LLVM: lib/Target/AMDGPU/SIWholeQuadMode.cpp Source File

//===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This pass adds instructions to enable whole quad mode (strict or non-strict)

/// for pixel shaders, and strict whole wavefront mode for all programs.

///

/// The "strict" prefix indicates that inactive lanes do not take part in

/// control flow, specifically an inactive lane enabled by a strict WQM/WWM will

/// always be enabled irrespective of control flow decisions. Conversely in

/// non-strict WQM inactive lanes may control flow decisions.

///

/// Whole quad mode is required for derivative computations, but it interferes

/// with shader side effects (stores and atomics). It ensures that WQM is

/// enabled when necessary, but disabled around stores and atomics.

///

/// When necessary, this pass creates a function prolog

///

///   S_MOV_B64 LiveMask, EXEC

///   S_WQM_B64 EXEC, EXEC

///

/// to enter WQM at the top of the function and surrounds blocks of Exact

/// instructions by

///

///   S_AND_SAVEEXEC_B64 Tmp, LiveMask

///   ...

///   S_MOV_B64 EXEC, Tmp

///

/// We also compute when a sequence of instructions requires strict whole

/// wavefront mode (StrictWWM) and insert instructions to save and restore it:

///

///   S_OR_SAVEEXEC_B64 Tmp, -1

///   ...

///   S_MOV_B64 EXEC, Tmp

///

/// When a sequence of instructions requires strict whole quad mode (StrictWQM)

/// we use a similar save and restore mechanism and force whole quad mode for

/// those instructions:

///

///  S_MOV_B64 Tmp, EXEC

///  S_WQM_B64 EXEC, EXEC

///  ...

///  S_MOV_B64 EXEC, Tmp

///

/// In order to avoid excessive switching during sequences of Exact

/// instructions, the pass first analyzes which instructions must be run in WQM

/// (aka which instructions produce values that lead to derivative

/// computations).

///

/// Basic blocks are always exited in WQM as long as some successor needs WQM.

///

/// There is room for improvement given better control flow analysis:

///

///  (1) at the top level (outside of control flow statements, and as long as

///      kill hasn't been used), one SGPR can be saved by recovering WQM from

///      the LiveMask (this is implemented for the entry block).

///

///  (2) when entire regions (e.g. if-else blocks or entire loops) only

///      consist of exact and don't-care instructions, the switch only has to

///      be done at the entry and exit points rather than potentially in each

///      block of the region.

///

//===----------------------------------------------------------------------===//


#include "SIWholeQuadMode.h"

#include "AMDGPU.h"

#include "AMDGPULaneMaskUtils.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/CodeGen/LiveIntervals.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachinePostDominators.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/InitializePasses.h"

#include "llvm/Support/raw_ostream.h"


using namespace llvm;


#define DEBUG_TYPE "si-wqm"


namespace {


enum {

  StateWQM = 0x1,

  StateStrictWWM = 0x2,

  StateStrictWQM = 0x4,

  StateExact = 0x8,

  StateStrict = StateStrictWWM | StateStrictWQM,

};


struct PrintState {

public:

  int State;


  explicit PrintState(int State) : State(State) {}

};


#ifndef NDEBUG

static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {


  static const std::pair<char, const char *> Mapping[] = {

      std::pair(StateWQM, "WQM"), std::pair(StateStrictWWM, "StrictWWM"),

      std::pair(StateStrictWQM, "StrictWQM"), std::pair(StateExact, "Exact")};

  char State = PS.State;

  for (auto M : Mapping) {

    if (State & M.first) {

      OS << M.second;

      State &= ~M.first;


      if (State)

        OS << '|';

    }

  }

  assert(State == 0);

  return OS;

}

#endif


struct InstrInfo {

  char Needs = 0;

  char Disabled = 0;

  char OutNeeds = 0;

  char MarkedStates = 0;

};


struct BlockInfo {

  char Needs = 0;

  char InNeeds = 0;

  char OutNeeds = 0;

  char InitialState = 0;

  bool NeedsLowering = false;

};


struct WorkItem {

  MachineBasicBlock *MBB = nullptr;

  MachineInstr *MI = nullptr;


  WorkItem() = default;

  WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}

  WorkItem(MachineInstr *MI) : MI(MI) {}

};


class SIWholeQuadMode {

public:

  SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,

                  MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)

      : ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),

        TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),

        PDT(PDT), LMC(AMDGPU::LaneMaskConstants::get(*ST)) {}

  bool run(MachineFunction &MF);


private:

  const GCNSubtarget *ST;

  const SIInstrInfo *TII;

  const SIRegisterInfo *TRI;

  MachineRegisterInfo *MRI;

  LiveIntervals *LIS;

  MachineDominatorTree *MDT;

  MachinePostDominatorTree *PDT;

  const AMDGPU::LaneMaskConstants &LMC;


  Register LiveMaskReg;


  DenseMap<const MachineInstr *, InstrInfo> Instructions;

  MapVector<MachineBasicBlock *, BlockInfo> Blocks;


  // Tracks state (WQM/StrictWWM/StrictWQM/Exact) after a given instruction

  DenseMap<const MachineInstr *, char> StateTransition;


  SmallVector<MachineInstr *, 2> LiveMaskQueries;

  SmallVector<MachineInstr *, 4> LowerToMovInstrs;

  SmallSetVector<MachineInstr *, 4> LowerToCopyInstrs;

  SmallVector<MachineInstr *, 4> KillInstrs;

  SmallVector<MachineInstr *, 4> InitExecInstrs;

  SmallVector<MachineInstr *, 4> SetInactiveInstrs;


  void printInfo();


  void markInstruction(MachineInstr &MI, char Flag,

                       std::vector<WorkItem> &Worklist);

  void markDefs(const MachineInstr &UseMI, LiveRange &LR, Register Reg,

                unsigned SubReg, char Flag, std::vector<WorkItem> &Worklist);

  void markOperand(const MachineInstr &MI, const MachineOperand &Op, char Flag,

                   std::vector<WorkItem> &Worklist);

  void markInstructionUses(const MachineInstr &MI, char Flag,

                           std::vector<WorkItem> &Worklist);

  char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);

  void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);

  void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);

  char analyzeFunction(MachineFunction &MF);


  MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,

                                      MachineBasicBlock::iterator Before);

  MachineBasicBlock::iterator

  prepareInsertion(MachineBasicBlock &MBB, MachineBasicBlock::iterator First,

                   MachineBasicBlock::iterator Last, bool PreferLast,

                   bool SaveSCC);

  void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,

               Register SaveWQM);

  void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,

             Register SavedWQM);

  void toStrictMode(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,

                    Register SaveOrig, char StrictStateNeeded);

  void fromStrictMode(MachineBasicBlock &MBB,

                      MachineBasicBlock::iterator Before, Register SavedOrig,

                      char NonStrictState, char CurrentStrictState);


  void splitBlock(MachineInstr *TermMI);

  MachineInstr *lowerKillI1(MachineInstr &MI, bool IsWQM);

  MachineInstr *lowerKillF32(MachineInstr &MI);


  void lowerBlock(MachineBasicBlock &MBB, BlockInfo &BI);

  void processBlock(MachineBasicBlock &MBB, BlockInfo &BI, bool IsEntry);


  bool lowerLiveMaskQueries();

  bool lowerCopyInstrs();

  bool lowerKillInstrs(bool IsWQM);

  void lowerInitExec(MachineInstr &MI);

  MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,

                                                  bool &Changed);

};


class SIWholeQuadModeLegacy : public MachineFunctionPass {

public:

  static char ID;


  SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override;


  StringRef getPassName() const override { return "SI Whole Quad Mode"; }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<LiveIntervalsWrapperPass>();

    AU.addPreserved<SlotIndexesWrapperPass>();

    AU.addPreserved<LiveIntervalsWrapperPass>();

    AU.addPreserved<MachineDominatorTreeWrapperPass>();

    AU.addPreserved<MachinePostDominatorTreeWrapperPass>();

    MachineFunctionPass::getAnalysisUsage(AU);

  }


  MachineFunctionProperties getClearedProperties() const override {

    return MachineFunctionProperties().setIsSSA();

  }

};

} // end anonymous namespace


char SIWholeQuadModeLegacy::ID = 0;


INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",

                      false, false)

INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)

INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",

                    false, false)


char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;


FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {

  return new SIWholeQuadModeLegacy;

}


#ifndef NDEBUG

LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() {

  for (const auto &BII : Blocks) {

    dbgs() << "\n"

           << printMBBReference(*BII.first) << ":\n"

           << "  InNeeds = " << PrintState(BII.second.InNeeds)

           << ", Needs = " << PrintState(BII.second.Needs)

           << ", OutNeeds = " << PrintState(BII.second.OutNeeds) << "\n\n";


    for (const MachineInstr &MI : *BII.first) {

      auto III = Instructions.find(&MI);

      if (III != Instructions.end()) {

        dbgs() << "  " << MI << "    Needs = " << PrintState(III->second.Needs)

               << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n';

      }

    }

  }

}

#endif


void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,

                                      std::vector<WorkItem> &Worklist) {

  InstrInfo &II = Instructions[&MI];


  assert(!(Flag & StateExact) && Flag != 0);


  // Capture all states requested in marking including disabled ones.

  II.MarkedStates |= Flag;


  // Remove any disabled states from the flag. The user that required it gets

  // an undefined value in the helper lanes. For example, this can happen if

  // the result of an atomic is used by instruction that requires WQM, where

  // ignoring the request for WQM is correct as per the relevant specs.

  Flag &= ~II.Disabled;


  // Ignore if the flag is already encompassed by the existing needs, or we

  // just disabled everything.

  if ((II.Needs & Flag) == Flag)

    return;


  LLVM_DEBUG(dbgs() << "markInstruction " << PrintState(Flag) << ": " << MI);

  II.Needs |= Flag;

  Worklist.emplace_back(&MI);

}


/// Mark all relevant definitions of register \p Reg in usage \p UseMI.

void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,

                               Register Reg, unsigned SubReg, char Flag,

                               std::vector<WorkItem> &Worklist) {

  LLVM_DEBUG(dbgs() << "markDefs " << PrintState(Flag) << ": " << UseMI);


  LiveQueryResult UseLRQ = LR.Query(LIS->getInstructionIndex(UseMI));

  const VNInfo *Value = UseLRQ.valueIn();

  if (!Value)

    return;


  // Note: this code assumes that lane masks on AMDGPU completely

  // cover registers.

  const LaneBitmask UseLanes =

      SubReg ? TRI->getSubRegIndexLaneMask(SubReg)

             : (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg)

                                : LaneBitmask::getNone());


  // Perform a depth-first iteration of the LiveRange graph marking defs.

  // Stop processing of a given branch when all use lanes have been defined.

  // The first definition stops processing for a physical register.

  struct PhiEntry {

    const VNInfo *Phi;

    unsigned PredIdx;

    LaneBitmask DefinedLanes;


    PhiEntry(const VNInfo *Phi, unsigned PredIdx, LaneBitmask DefinedLanes)

        : Phi(Phi), PredIdx(PredIdx), DefinedLanes(DefinedLanes) {}

  };

  using VisitKey = std::pair<const VNInfo *, LaneBitmask>;

  SmallVector<PhiEntry, 2> PhiStack;

  SmallSet<VisitKey, 4> Visited;

  LaneBitmask DefinedLanes;

  unsigned NextPredIdx = 0; // Only used for processing phi nodes

  do {

    const VNInfo *NextValue = nullptr;

    const VisitKey Key(Value, DefinedLanes);


    if (Visited.insert(Key).second) {

      // On first visit to a phi then start processing first predecessor

      NextPredIdx = 0;

    }


    if (Value->isPHIDef()) {

      // Each predecessor node in the phi must be processed as a subgraph

      const MachineBasicBlock *MBB = LIS->getMBBFromIndex(Value->def);

      assert(MBB && "Phi-def has no defining MBB");


      // Find next predecessor to process

      unsigned Idx = NextPredIdx;

      const auto *PI = MBB->pred_begin() + Idx;

      const auto *PE = MBB->pred_end();

      for (; PI != PE && !NextValue; ++PI, ++Idx) {

        if (const VNInfo *VN = LR.getVNInfoBefore(LIS->getMBBEndIdx(*PI))) {

          if (!Visited.count(VisitKey(VN, DefinedLanes)))

            NextValue = VN;

        }

      }


      // If there are more predecessors to process; add phi to stack

      if (PI != PE)

        PhiStack.emplace_back(Value, Idx, DefinedLanes);

    } else {

      MachineInstr *MI = LIS->getInstructionFromIndex(Value->def);

      assert(MI && "Def has no defining instruction");


      if (Reg.isVirtual()) {

        // Iterate over all operands to find relevant definitions

        bool HasDef = false;

        for (const MachineOperand &Op : MI->all_defs()) {

          if (Op.getReg() != Reg)

            continue;


          // Compute lanes defined and overlap with use

          LaneBitmask OpLanes =

              Op.isUndef() ? LaneBitmask::getAll()

                           : TRI->getSubRegIndexLaneMask(Op.getSubReg());

          LaneBitmask Overlap = (UseLanes & OpLanes);


          // Record if this instruction defined any of use

          HasDef |= Overlap.any();


          // Mark any lanes defined

          DefinedLanes |= OpLanes;

        }


        // Check if all lanes of use have been defined

        if ((DefinedLanes & UseLanes) != UseLanes) {

          // Definition not complete; need to process input value

          LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));

          if (const VNInfo *VN = LRQ.valueIn()) {

            if (!Visited.count(VisitKey(VN, DefinedLanes)))

              NextValue = VN;

          }

        }


        // Only mark the instruction if it defines some part of the use

        if (HasDef)

          markInstruction(*MI, Flag, Worklist);

      } else {

        // For physical registers simply mark the defining instruction

        markInstruction(*MI, Flag, Worklist);

      }

    }


    if (!NextValue && !PhiStack.empty()) {

      // Reach end of chain; revert to processing last phi

      PhiEntry &Entry = PhiStack.back();

      NextValue = Entry.Phi;

      NextPredIdx = Entry.PredIdx;

      DefinedLanes = Entry.DefinedLanes;

      PhiStack.pop_back();

    }


    Value = NextValue;

  } while (Value);

}


void SIWholeQuadMode::markOperand(const MachineInstr &MI,

                                  const MachineOperand &Op, char Flag,

                                  std::vector<WorkItem> &Worklist) {

  assert(Op.isReg());

  Register Reg = Op.getReg();


  // Ignore some hardware registers

  switch (Reg) {

  case AMDGPU::EXEC:

  case AMDGPU::EXEC_LO:

    return;

  default:

    break;

  }


  LLVM_DEBUG(dbgs() << "markOperand " << PrintState(Flag) << ": " << Op

                    << " for " << MI);

  if (Reg.isVirtual()) {

    LiveRange &LR = LIS->getInterval(Reg);

    markDefs(MI, LR, Reg, Op.getSubReg(), Flag, Worklist);

  } else {

    // Handle physical registers that we need to track; this is mostly relevant

    // for VCC, which can appear as the (implicit) input of a uniform branch,

    // e.g. when a loop counter is stored in a VGPR.

    for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {

      LiveRange &LR = LIS->getRegUnit(Unit);

      const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();

      if (Value)

        markDefs(MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist);

    }

  }

}


/// Mark all instructions defining the uses in \p MI with \p Flag.

void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,

                                          std::vector<WorkItem> &Worklist) {

  LLVM_DEBUG(dbgs() << "markInstructionUses " << PrintState(Flag) << ": "

                    << MI);


  for (const MachineOperand &Use : MI.all_uses())

    markOperand(MI, Use, Flag, Worklist);

}


// Scan instructions to determine which ones require an Exact execmask and

// which ones seed WQM requirements.

char SIWholeQuadMode::scanInstructions(MachineFunction &MF,

                                       std::vector<WorkItem> &Worklist) {

  char GlobalFlags = 0;

  bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs");

  SmallVector<MachineInstr *, 4> SoftWQMInstrs;

  bool HasImplicitDerivatives =

      MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS;


  // We need to visit the basic blocks in reverse post-order so that we visit

  // defs before uses, in particular so that we don't accidentally mark an

  // instruction as needing e.g. WQM before visiting it and realizing it needs

  // WQM disabled.

  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);

  for (MachineBasicBlock *MBB : RPOT) {

    BlockInfo &BBI = Blocks[MBB];


    for (MachineInstr &MI : *MBB) {

      InstrInfo &III = Instructions[&MI];

      unsigned Opcode = MI.getOpcode();

      char Flags = 0;


      if (TII->isWQM(Opcode)) {

        // If LOD is not supported WQM is not needed.

        // Only generate implicit WQM if implicit derivatives are required.

        // This avoids inserting unintended WQM if a shader type without

        // implicit derivatives uses an image sampling instruction.

        if (ST->hasExtendedImageInsts() && HasImplicitDerivatives) {

          // Sampling instructions don't need to produce results for all pixels

          // in a quad, they just require all inputs of a quad to have been

          // computed for derivatives.

          markInstructionUses(MI, StateWQM, Worklist);

          GlobalFlags |= StateWQM;

        }

      } else if (Opcode == AMDGPU::WQM) {

        // The WQM intrinsic requires its output to have all the helper lanes

        // correct, so we need it to be in WQM.

        Flags = StateWQM;

        LowerToCopyInstrs.insert(&MI);

      } else if (Opcode == AMDGPU::SOFT_WQM) {

        LowerToCopyInstrs.insert(&MI);

        SoftWQMInstrs.push_back(&MI);

      } else if (Opcode == AMDGPU::STRICT_WWM) {

        // The STRICT_WWM intrinsic doesn't make the same guarantee, and plus

        // it needs to be executed in WQM or Exact so that its copy doesn't

        // clobber inactive lanes.

        markInstructionUses(MI, StateStrictWWM, Worklist);

        GlobalFlags |= StateStrictWWM;

        LowerToMovInstrs.push_back(&MI);

      } else if (Opcode == AMDGPU::STRICT_WQM ||

                 TII->isDualSourceBlendEXP(MI)) {

        // STRICT_WQM is similar to STRICTWWM, but instead of enabling all

        // threads of the wave like STRICTWWM, STRICT_WQM enables all threads in

        // quads that have at least one active thread.

        markInstructionUses(MI, StateStrictWQM, Worklist);

        GlobalFlags |= StateStrictWQM;


        if (Opcode == AMDGPU::STRICT_WQM) {

          LowerToMovInstrs.push_back(&MI);

        } else {

          // Dual source blend export acts as implicit strict-wqm, its sources

          // need to be shuffled in strict wqm, but the export itself needs to

          // run in exact mode.

          BBI.Needs |= StateExact;

          if (!(BBI.InNeeds & StateExact)) {

            BBI.InNeeds |= StateExact;

            Worklist.emplace_back(MBB);

          }

          GlobalFlags |= StateExact;

          III.Disabled = StateWQM | StateStrict;

        }

      } else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||

                 Opcode == AMDGPU::DS_PARAM_LOAD ||

                 Opcode == AMDGPU::LDS_DIRECT_LOAD ||

                 Opcode == AMDGPU::DS_DIRECT_LOAD) {

        // Mark these STRICTWQM, but only for the instruction, not its operands.

        // This avoid unnecessarily marking M0 as requiring WQM.

        III.Needs |= StateStrictWQM;

        GlobalFlags |= StateStrictWQM;

      } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32) {

        // Disable strict states; StrictWQM will be added as required later.

        III.Disabled = StateStrict;

        MachineOperand &Inactive = MI.getOperand(4);

        if (Inactive.isReg()) {

          if (Inactive.isUndef() && MI.getOperand(3).getImm() == 0)

            LowerToCopyInstrs.insert(&MI);

          else

            markOperand(MI, Inactive, StateStrictWWM, Worklist);

        }

        SetInactiveInstrs.push_back(&MI);

        BBI.NeedsLowering = true;

      } else if (TII->isDisableWQM(MI)) {

        BBI.Needs |= StateExact;

        if (!(BBI.InNeeds & StateExact)) {

          BBI.InNeeds |= StateExact;

          Worklist.emplace_back(MBB);

        }

        GlobalFlags |= StateExact;

        III.Disabled = StateWQM | StateStrict;

      } else if (Opcode == AMDGPU::SI_PS_LIVE ||

                 Opcode == AMDGPU::SI_LIVE_MASK) {

        LiveMaskQueries.push_back(&MI);

      } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR ||

                 Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR ||

                 Opcode == AMDGPU::SI_DEMOTE_I1) {

        KillInstrs.push_back(&MI);

        BBI.NeedsLowering = true;

      } else if (Opcode == AMDGPU::SI_INIT_EXEC ||

                 Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT ||

                 Opcode == AMDGPU::SI_INIT_WHOLE_WAVE) {

        InitExecInstrs.push_back(&MI);

      } else if (WQMOutputs) {

        // The function is in machine SSA form, which means that physical

        // VGPRs correspond to shader inputs and outputs. Inputs are

        // only used, outputs are only defined.

        // FIXME: is this still valid?

        for (const MachineOperand &MO : MI.defs()) {

          Register Reg = MO.getReg();

          if (Reg.isPhysical() &&

              TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) {

            Flags = StateWQM;

            break;

          }

        }

      }


      if (Flags) {

        markInstruction(MI, Flags, Worklist);

        GlobalFlags |= Flags;

      }

    }

  }


  // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is

  // ever used anywhere in the function. This implements the corresponding

  // semantics of @llvm.amdgcn.set.inactive.

  // Similarly for SOFT_WQM instructions, implementing @llvm.amdgcn.softwqm.

  if (GlobalFlags & StateWQM) {

    for (MachineInstr *MI : SetInactiveInstrs)

      markInstruction(*MI, StateWQM, Worklist);

    for (MachineInstr *MI : SoftWQMInstrs)

      markInstruction(*MI, StateWQM, Worklist);

  }


  return GlobalFlags;

}


void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,

                                           std::vector<WorkItem>& Worklist) {

  MachineBasicBlock *MBB = MI.getParent();

  InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references

  BlockInfo &BI = Blocks[MBB];


  // Control flow-type instructions and stores to temporary memory that are

  // followed by WQM computations must themselves be in WQM.

  if ((II.OutNeeds & StateWQM) && !(II.Disabled & StateWQM) &&

      (MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) {

    Instructions[&MI].Needs = StateWQM;

    II.Needs = StateWQM;

  }


  // Propagate to block level

  if (II.Needs & StateWQM) {

    BI.Needs |= StateWQM;

    if (!(BI.InNeeds & StateWQM)) {

      BI.InNeeds |= StateWQM;

      Worklist.emplace_back(MBB);

    }

  }


  // Propagate backwards within block

  if (MachineInstr *PrevMI = MI.getPrevNode()) {

    char InNeeds = (II.Needs & ~StateStrict) | II.OutNeeds;

    if (!PrevMI->isPHI()) {

      InstrInfo &PrevII = Instructions[PrevMI];

      if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {

        PrevII.OutNeeds |= InNeeds;

        Worklist.emplace_back(PrevMI);

      }

    }

  }


  // Propagate WQM flag to instruction inputs

  assert(!(II.Needs & StateExact));


  if (II.Needs != 0)

    markInstructionUses(MI, II.Needs, Worklist);


  // Ensure we process a block containing StrictWWM/StrictWQM, even if it does

  // not require any WQM transitions.

  if (II.Needs & StateStrictWWM)

    BI.Needs |= StateStrictWWM;

  if (II.Needs & StateStrictWQM)

    BI.Needs |= StateStrictWQM;

}


void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,

                                     std::vector<WorkItem>& Worklist) {

  BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.


  // Propagate through instructions

  if (!MBB.empty()) {

    MachineInstr *LastMI = &*MBB.rbegin();

    InstrInfo &LastII = Instructions[LastMI];

    if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {

      LastII.OutNeeds |= BI.OutNeeds;

      Worklist.emplace_back(LastMI);

    }

  }


  // Predecessor blocks must provide for our WQM/Exact needs.

  for (MachineBasicBlock *Pred : MBB.predecessors()) {

    BlockInfo &PredBI = Blocks[Pred];

    if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)

      continue;


    PredBI.OutNeeds |= BI.InNeeds;

    PredBI.InNeeds |= BI.InNeeds;

    Worklist.emplace_back(Pred);

  }


  // All successors must be prepared to accept the same set of WQM/Exact data.

  for (MachineBasicBlock *Succ : MBB.successors()) {

    BlockInfo &SuccBI = Blocks[Succ];

    if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)

      continue;


    SuccBI.InNeeds |= BI.OutNeeds;

    Worklist.emplace_back(Succ);

  }

}


char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {

  std::vector<WorkItem> Worklist;

  char GlobalFlags = scanInstructions(MF, Worklist);


  while (!Worklist.empty()) {

    WorkItem WI = Worklist.back();

    Worklist.pop_back();


    if (WI.MI)

      propagateInstruction(*WI.MI, Worklist);

    else

      propagateBlock(*WI.MBB, Worklist);

  }


  return GlobalFlags;

}


MachineBasicBlock::iterator

SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,

                         MachineBasicBlock::iterator Before) {

  Register SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);


  MachineInstr *Save =

      BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)

          .addReg(AMDGPU::SCC);

  MachineInstr *Restore =

      BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::SCC)

          .addReg(SaveReg);


  LIS->InsertMachineInstrInMaps(*Save);

  LIS->InsertMachineInstrInMaps(*Restore);

  LIS->createAndComputeVirtRegInterval(SaveReg);


  return Restore;

}


void SIWholeQuadMode::splitBlock(MachineInstr *TermMI) {

  MachineBasicBlock *BB = TermMI->getParent();

  LLVM_DEBUG(dbgs() << "Split block " << printMBBReference(*BB) << " @ "

                    << *TermMI << "\n");


  MachineBasicBlock *SplitBB =

      BB->splitAt(*TermMI, /*UpdateLiveIns*/ true, LIS);


  // Convert last instruction in block to a terminator.

  // Note: this only covers the expected patterns

  unsigned NewOpcode = 0;

  switch (TermMI->getOpcode()) {

  case AMDGPU::S_AND_B32:

    NewOpcode = AMDGPU::S_AND_B32_term;

    break;

  case AMDGPU::S_AND_B64:

    NewOpcode = AMDGPU::S_AND_B64_term;

    break;

  case AMDGPU::S_MOV_B32:

    NewOpcode = AMDGPU::S_MOV_B32_term;

    break;

  case AMDGPU::S_MOV_B64:

    NewOpcode = AMDGPU::S_MOV_B64_term;

    break;

  case AMDGPU::S_ANDN2_B32:

    NewOpcode = AMDGPU::S_ANDN2_B32_term;

    break;

  case AMDGPU::S_ANDN2_B64:

    NewOpcode = AMDGPU::S_ANDN2_B64_term;

    break;

  default:

    llvm_unreachable("Unexpected instruction");

  }


  // These terminators fallthrough to the next block, no need to add an

  // unconditional branch to the next block (SplitBB).

  TermMI->setDesc(TII->get(NewOpcode));


  if (SplitBB != BB) {

    // Update dominator trees

    using DomTreeT = DomTreeBase<MachineBasicBlock>;

    SmallVector<DomTreeT::UpdateType, 16> DTUpdates;

    for (MachineBasicBlock *Succ : SplitBB->successors()) {

      DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});

      DTUpdates.push_back({DomTreeT::Delete, BB, Succ});

    }

    DTUpdates.push_back({DomTreeT::Insert, BB, SplitBB});

    if (MDT)

      MDT->applyUpdates(DTUpdates);

    if (PDT)

      PDT->applyUpdates(DTUpdates);

  }

}


MachineInstr *SIWholeQuadMode::lowerKillF32(MachineInstr &MI) {

  assert(LiveMaskReg.isVirtual());


  const DebugLoc &DL = MI.getDebugLoc();

  unsigned Opcode = 0;


  assert(MI.getOperand(0).isReg());


  // Comparison is for live lanes; however here we compute the inverse

  // (killed lanes).  This is because VCMP will always generate 0 bits

  // for inactive lanes so a mask of live lanes would not be correct

  // inside control flow.

  // Invert the comparison by swapping the operands and adjusting

  // the comparison codes.


  switch (MI.getOperand(2).getImm()) {

  case ISD::SETUEQ:

    Opcode = AMDGPU::V_CMP_LG_F32_e64;

    break;

  case ISD::SETUGT:

    Opcode = AMDGPU::V_CMP_GE_F32_e64;

    break;

  case ISD::SETUGE:

    Opcode = AMDGPU::V_CMP_GT_F32_e64;

    break;

  case ISD::SETULT:

    Opcode = AMDGPU::V_CMP_LE_F32_e64;

    break;

  case ISD::SETULE:

    Opcode = AMDGPU::V_CMP_LT_F32_e64;

    break;

  case ISD::SETUNE:

    Opcode = AMDGPU::V_CMP_EQ_F32_e64;

    break;

  case ISD::SETO:

    Opcode = AMDGPU::V_CMP_O_F32_e64;

    break;

  case ISD::SETUO:

    Opcode = AMDGPU::V_CMP_U_F32_e64;

    break;

  case ISD::SETOEQ:

  case ISD::SETEQ:

    Opcode = AMDGPU::V_CMP_NEQ_F32_e64;

    break;

  case ISD::SETOGT:

  case ISD::SETGT:

    Opcode = AMDGPU::V_CMP_NLT_F32_e64;

    break;

  case ISD::SETOGE:

  case ISD::SETGE:

    Opcode = AMDGPU::V_CMP_NLE_F32_e64;

    break;

  case ISD::SETOLT:

  case ISD::SETLT:

    Opcode = AMDGPU::V_CMP_NGT_F32_e64;

    break;

  case ISD::SETOLE:

  case ISD::SETLE:

    Opcode = AMDGPU::V_CMP_NGE_F32_e64;

    break;

  case ISD::SETONE:

  case ISD::SETNE:

    Opcode = AMDGPU::V_CMP_NLG_F32_e64;

    break;

  default:

    llvm_unreachable("invalid ISD:SET cond code");

  }


  MachineBasicBlock &MBB = *MI.getParent();


  // Pick opcode based on comparison type.

  MachineInstr *VcmpMI;

  const MachineOperand &Op0 = MI.getOperand(0);

  const MachineOperand &Op1 = MI.getOperand(1);


  // VCC represents lanes killed.

  if (TRI->isVGPR(*MRI, Op0.getReg())) {

    Opcode = AMDGPU::getVOPe32(Opcode);

    VcmpMI = BuildMI(MBB, &MI, DL, TII->get(Opcode)).add(Op1).add(Op0);

  } else {

    VcmpMI = BuildMI(MBB, &MI, DL, TII->get(Opcode))

                 .addReg(LMC.VccReg, RegState::Define)

                 .addImm(0) // src0 modifiers

                 .add(Op1)

                 .addImm(0) // src1 modifiers

                 .add(Op0)

                 .addImm(0); // omod

  }


  MachineInstr *MaskUpdateMI =

      BuildMI(MBB, MI, DL, TII->get(LMC.AndN2Opc), LiveMaskReg)

          .addReg(LiveMaskReg)

          .addReg(LMC.VccReg);


  // State of SCC represents whether any lanes are live in mask,

  // if SCC is 0 then no lanes will be alive anymore.

  MachineInstr *EarlyTermMI =

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_EARLY_TERMINATE_SCC0));


  MachineInstr *ExecMaskMI =

      BuildMI(MBB, MI, DL, TII->get(LMC.AndN2Opc), LMC.ExecReg)

          .addReg(LMC.ExecReg)

          .addReg(LMC.VccReg);


  assert(MBB.succ_size() == 1);


  // Update live intervals

  LIS->ReplaceMachineInstrInMaps(MI, *VcmpMI);

  MBB.remove(&MI);


  LIS->InsertMachineInstrInMaps(*MaskUpdateMI);

  LIS->InsertMachineInstrInMaps(*EarlyTermMI);

  LIS->InsertMachineInstrInMaps(*ExecMaskMI);


  return ExecMaskMI;

}


MachineInstr *SIWholeQuadMode::lowerKillI1(MachineInstr &MI, bool IsWQM) {

  assert(LiveMaskReg.isVirtual());


  MachineBasicBlock &MBB = *MI.getParent();


  const DebugLoc &DL = MI.getDebugLoc();

  MachineInstr *MaskUpdateMI = nullptr;


  const bool IsDemote = IsWQM && (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1);

  const MachineOperand &Op = MI.getOperand(0);

  int64_t KillVal = MI.getOperand(1).getImm();

  MachineInstr *ComputeKilledMaskMI = nullptr;

  Register CndReg = !Op.isImm() ? Op.getReg() : Register();

  Register TmpReg;


  // Is this a static or dynamic kill?

  if (Op.isImm()) {

    if (Op.getImm() == KillVal) {

      // Static: all active lanes are killed

      MaskUpdateMI = BuildMI(MBB, MI, DL, TII->get(LMC.AndN2Opc), LiveMaskReg)

                         .addReg(LiveMaskReg)

                         .addReg(LMC.ExecReg);

    } else {

      // Static: kill does nothing

      bool IsLastTerminator = std::next(MI.getIterator()) == MBB.end();

      if (!IsLastTerminator) {

        LIS->RemoveMachineInstrFromMaps(MI);

      } else {

        assert(MBB.succ_size() == 1 && MI.getOpcode() != AMDGPU::SI_DEMOTE_I1);

        MachineInstr *NewTerm = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_BRANCH))

                                    .addMBB(*MBB.succ_begin());

        LIS->ReplaceMachineInstrInMaps(MI, *NewTerm);

      }

      MBB.remove(&MI);

      return nullptr;

    }

  } else {

    if (!KillVal) {

      // Op represents live lanes after kill,

      // so exec mask needs to be factored in.

      TmpReg = MRI->createVirtualRegister(TRI->getBoolRC());

      ComputeKilledMaskMI = BuildMI(MBB, MI, DL, TII->get(LMC.AndN2Opc), TmpReg)

                                .addReg(LMC.ExecReg)

                                .add(Op);

      MaskUpdateMI = BuildMI(MBB, MI, DL, TII->get(LMC.AndN2Opc), LiveMaskReg)

                         .addReg(LiveMaskReg)

                         .addReg(TmpReg);

    } else {

      // Op represents lanes to kill

      MaskUpdateMI = BuildMI(MBB, MI, DL, TII->get(LMC.AndN2Opc), LiveMaskReg)

                         .addReg(LiveMaskReg)

                         .add(Op);

    }

  }


  // State of SCC represents whether any lanes are live in mask,

  // if SCC is 0 then no lanes will be alive anymore.

  MachineInstr *EarlyTermMI =

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_EARLY_TERMINATE_SCC0));


  // In the case we got this far some lanes are still live,

  // update EXEC to deactivate lanes as appropriate.

  MachineInstr *NewTerm;

  MachineInstr *WQMMaskMI = nullptr;

  Register LiveMaskWQM;

  if (IsDemote) {

    // Demote - deactivate quads with only helper lanes

    LiveMaskWQM = MRI->createVirtualRegister(TRI->getBoolRC());

    WQMMaskMI = BuildMI(MBB, MI, DL, TII->get(LMC.WQMOpc), LiveMaskWQM)

                    .addReg(LiveMaskReg);

    NewTerm = BuildMI(MBB, MI, DL, TII->get(LMC.AndOpc), LMC.ExecReg)

                  .addReg(LMC.ExecReg)

                  .addReg(LiveMaskWQM);

  } else {

    // Kill - deactivate lanes no longer in live mask

    if (Op.isImm()) {

      NewTerm =

          BuildMI(MBB, &MI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(0);

    } else if (!IsWQM) {

      NewTerm = BuildMI(MBB, &MI, DL, TII->get(LMC.AndOpc), LMC.ExecReg)

                    .addReg(LMC.ExecReg)

                    .addReg(LiveMaskReg);

    } else {

      unsigned Opcode = KillVal ? LMC.AndN2Opc : LMC.AndOpc;

      NewTerm = BuildMI(MBB, &MI, DL, TII->get(Opcode), LMC.ExecReg)

                    .addReg(LMC.ExecReg)

                    .add(Op);

    }

  }


  // Update live intervals

  LIS->RemoveMachineInstrFromMaps(MI);

  MBB.remove(&MI);

  assert(EarlyTermMI);

  assert(MaskUpdateMI);

  assert(NewTerm);

  if (ComputeKilledMaskMI)

    LIS->InsertMachineInstrInMaps(*ComputeKilledMaskMI);

  LIS->InsertMachineInstrInMaps(*MaskUpdateMI);

  LIS->InsertMachineInstrInMaps(*EarlyTermMI);

  if (WQMMaskMI)

    LIS->InsertMachineInstrInMaps(*WQMMaskMI);

  LIS->InsertMachineInstrInMaps(*NewTerm);


  if (CndReg) {

    LIS->removeInterval(CndReg);

    LIS->createAndComputeVirtRegInterval(CndReg);

  }

  if (TmpReg)

    LIS->createAndComputeVirtRegInterval(TmpReg);

  if (LiveMaskWQM)

    LIS->createAndComputeVirtRegInterval(LiveMaskWQM);


  return NewTerm;

}


// Replace (or supplement) instructions accessing live mask.

// This can only happen once all the live mask registers have been created

// and the execute state (WQM/StrictWWM/Exact) of instructions is known.

void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB, BlockInfo &BI) {

  if (!BI.NeedsLowering)

    return;


  LLVM_DEBUG(dbgs() << "\nLowering block " << printMBBReference(MBB) << ":\n");


  SmallVector<MachineInstr *, 4> SplitPoints;

  Register ActiveLanesReg = 0;

  char State = BI.InitialState;


  for (MachineInstr &MI : llvm::make_early_inc_range(

           llvm::make_range(MBB.getFirstNonPHI(), MBB.end()))) {

    auto MIState = StateTransition.find(&MI);

    if (MIState != StateTransition.end())

      State = MIState->second;


    MachineInstr *SplitPoint = nullptr;

    switch (MI.getOpcode()) {

    case AMDGPU::SI_DEMOTE_I1:

    case AMDGPU::SI_KILL_I1_TERMINATOR:

      SplitPoint = lowerKillI1(MI, State == StateWQM);

      break;

    case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:

      SplitPoint = lowerKillF32(MI);

      break;

    case AMDGPU::ENTER_STRICT_WWM:

      ActiveLanesReg = MI.getOperand(0).getReg();

      break;

    case AMDGPU::EXIT_STRICT_WWM:

      ActiveLanesReg = 0;

      break;

    case AMDGPU::V_SET_INACTIVE_B32:

      if (ActiveLanesReg) {

        LiveInterval &LI = LIS->getInterval(MI.getOperand(5).getReg());

        MRI->constrainRegClass(ActiveLanesReg, TRI->getWaveMaskRegClass());

        MI.getOperand(5).setReg(ActiveLanesReg);

        LIS->shrinkToUses(&LI);

      } else {

        assert(State == StateExact || State == StateWQM);

      }

      break;

    default:

      break;

    }

    if (SplitPoint)

      SplitPoints.push_back(SplitPoint);

  }


  // Perform splitting after instruction scan to simplify iteration.

  for (MachineInstr *MI : SplitPoints)

    splitBlock(MI);

}


// Return an iterator in the (inclusive) range [First, Last] at which

// instructions can be safely inserted, keeping in mind that some of the

// instructions we want to add necessarily clobber SCC.

MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator First,

    MachineBasicBlock::iterator Last, bool PreferLast, bool SaveSCC) {

  if (!SaveSCC)

    return PreferLast ? Last : First;


  LiveRange &LR =

      LIS->getRegUnit(*TRI->regunits(MCRegister::from(AMDGPU::SCC)).begin());

  auto MBBE = MBB.end();

  SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)

                                     : LIS->getMBBEndIdx(&MBB);

  SlotIndex LastIdx =

      Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);

  SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;

  const LiveRange::Segment *S;


  for (;;) {

    S = LR.getSegmentContaining(Idx);

    if (!S)

      break;


    if (PreferLast) {

      SlotIndex Next = S->start.getBaseIndex();

      if (Next < FirstIdx)

        break;

      Idx = Next;

    } else {

      MachineInstr *EndMI = LIS->getInstructionFromIndex(S->end.getBaseIndex());

      assert(EndMI && "Segment does not end on valid instruction");

      auto NextI = std::next(EndMI->getIterator());

      if (NextI == MBB.end())

        break;

      SlotIndex Next = LIS->getInstructionIndex(*NextI);

      if (Next > LastIdx)

        break;

      Idx = Next;

    }

  }


  MachineBasicBlock::iterator MBBI;


  if (MachineInstr *MI = LIS->getInstructionFromIndex(Idx))

    MBBI = MI;

  else {

    assert(Idx == LIS->getMBBEndIdx(&MBB));

    MBBI = MBB.end();

  }


  // Move insertion point past any operations modifying EXEC.

  // This assumes that the value of SCC defined by any of these operations

  // does not need to be preserved.

  while (MBBI != Last) {

    bool IsExecDef = false;

    for (const MachineOperand &MO : MBBI->all_defs()) {

      IsExecDef |=

          MO.getReg() == AMDGPU::EXEC_LO || MO.getReg() == AMDGPU::EXEC;

    }

    if (!IsExecDef)

      break;

    MBBI++;

    S = nullptr;

  }


  if (S)

    MBBI = saveSCC(MBB, MBBI);


  return MBBI;

}


void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,

                              MachineBasicBlock::iterator Before,

                              Register SaveWQM) {

  assert(LiveMaskReg.isVirtual());


  bool IsTerminator = Before == MBB.end();

  if (!IsTerminator) {

    auto FirstTerm = MBB.getFirstTerminator();

    if (FirstTerm != MBB.end()) {

      SlotIndex FirstTermIdx = LIS->getInstructionIndex(*FirstTerm);

      SlotIndex BeforeIdx = LIS->getInstructionIndex(*Before);

      IsTerminator = BeforeIdx > FirstTermIdx;

    }

  }


  MachineInstr *MI;


  if (SaveWQM) {

    unsigned Opcode =

        IsTerminator ? LMC.AndSaveExecTermOpc : LMC.AndSaveExecOpc;

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(Opcode), SaveWQM)

             .addReg(LiveMaskReg);

  } else {

    unsigned Opcode = IsTerminator ? LMC.AndTermOpc : LMC.AndOpc;

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(Opcode), LMC.ExecReg)

             .addReg(LMC.ExecReg)

             .addReg(LiveMaskReg);

  }


  LIS->InsertMachineInstrInMaps(*MI);

  StateTransition[MI] = StateExact;

}


void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,

                            MachineBasicBlock::iterator Before,

                            Register SavedWQM) {

  MachineInstr *MI;


  if (SavedWQM) {

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), LMC.ExecReg)

             .addReg(SavedWQM);

  } else {

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(LMC.WQMOpc), LMC.ExecReg)

             .addReg(LMC.ExecReg);

  }


  LIS->InsertMachineInstrInMaps(*MI);

  StateTransition[MI] = StateWQM;

}


void SIWholeQuadMode::toStrictMode(MachineBasicBlock &MBB,

                                   MachineBasicBlock::iterator Before,

                                   Register SaveOrig, char StrictStateNeeded) {

  MachineInstr *MI;

  assert(SaveOrig);

  assert(StrictStateNeeded == StateStrictWWM ||

         StrictStateNeeded == StateStrictWQM);


  if (StrictStateNeeded == StateStrictWWM) {

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::ENTER_STRICT_WWM),

                 SaveOrig)

             .addImm(-1);

  } else {

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::ENTER_STRICT_WQM),

                 SaveOrig)

             .addImm(-1);

  }

  LIS->InsertMachineInstrInMaps(*MI);

  StateTransition[MI] = StrictStateNeeded;

}


void SIWholeQuadMode::fromStrictMode(MachineBasicBlock &MBB,

                                     MachineBasicBlock::iterator Before,

                                     Register SavedOrig, char NonStrictState,

                                     char CurrentStrictState) {

  MachineInstr *MI;


  assert(SavedOrig);

  assert(CurrentStrictState == StateStrictWWM ||

         CurrentStrictState == StateStrictWQM);


  if (CurrentStrictState == StateStrictWWM) {

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_STRICT_WWM),

                 LMC.ExecReg)

             .addReg(SavedOrig);

  } else {

    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_STRICT_WQM),

                 LMC.ExecReg)

             .addReg(SavedOrig);

  }

  LIS->InsertMachineInstrInMaps(*MI);

  StateTransition[MI] = NonStrictState;

}


void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, BlockInfo &BI,

                                   bool IsEntry) {

  // This is a non-entry block that is WQM throughout, so no need to do

  // anything.

  if (!IsEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact) {

    BI.InitialState = StateWQM;

    return;

  }


  LLVM_DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB)

                    << ":\n");


  Register SavedWQMReg;

  Register SavedNonStrictReg;

  bool WQMFromExec = IsEntry;

  char State = (IsEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;

  char NonStrictState = 0;

  const TargetRegisterClass *BoolRC = TRI->getBoolRC();


  auto II = MBB.getFirstNonPHI(), IE = MBB.end();

  if (IsEntry) {

    // Skip the instruction that saves LiveMask

    if (II != IE && II->getOpcode() == AMDGPU::COPY &&

        II->getOperand(1).getReg() == LMC.ExecReg)

      ++II;

  }


  // This stores the first instruction where it's safe to switch from WQM to

  // Exact or vice versa.

  MachineBasicBlock::iterator FirstWQM = IE;


  // This stores the first instruction where it's safe to switch from Strict

  // mode to Exact/WQM or to switch to Strict mode. It must always be the same

  // as, or after, FirstWQM since if it's safe to switch to/from Strict, it must

  // be safe to switch to/from WQM as well.

  MachineBasicBlock::iterator FirstStrict = IE;


  // Record initial state is block information.

  BI.InitialState = State;


  for (unsigned Idx = 0;; ++Idx) {

    MachineBasicBlock::iterator Next = II;

    char Needs = StateExact | StateWQM; // Strict mode is disabled by default.

    char OutNeeds = 0;


    if (FirstWQM == IE)

      FirstWQM = II;


    if (FirstStrict == IE)

      FirstStrict = II;


    // Adjust needs if this is first instruction of WQM requiring shader.

    if (IsEntry && Idx == 0 && (BI.InNeeds & StateWQM))

      Needs = StateWQM;


    // First, figure out the allowed states (Needs) based on the propagated

    // flags.

    if (II != IE) {

      MachineInstr &MI = *II;


      if (MI.isTerminator() || TII->mayReadEXEC(*MRI, MI)) {

        auto III = Instructions.find(&MI);

        if (III != Instructions.end()) {

          if (III->second.Needs & StateStrictWWM)

            Needs = StateStrictWWM;

          else if (III->second.Needs & StateStrictWQM)

            Needs = StateStrictWQM;

          else if (III->second.Needs & StateWQM)

            Needs = StateWQM;

          else

            Needs &= ~III->second.Disabled;

          OutNeeds = III->second.OutNeeds;

        }

      } else {

        // If the instruction doesn't actually need a correct EXEC, then we can

        // safely leave Strict mode enabled.

        Needs = StateExact | StateWQM | StateStrict;

      }


      // Exact mode exit can occur in terminators, but must be before branches.

      if (MI.isBranch() && OutNeeds == StateExact)

        Needs = StateExact;


      ++Next;

    } else {

      // End of basic block

      if (BI.OutNeeds & StateWQM)

        Needs = StateWQM;

      else if (BI.OutNeeds == StateExact)

        Needs = StateExact;

      else

        Needs = StateWQM | StateExact;

    }


    // Now, transition if necessary.

    if (!(Needs & State)) {

      MachineBasicBlock::iterator First;

      if (State == StateStrictWWM || Needs == StateStrictWWM ||

          State == StateStrictWQM || Needs == StateStrictWQM) {

        // We must switch to or from Strict mode.

        First = FirstStrict;

      } else {

        // We only need to switch to/from WQM, so we can use FirstWQM.

        First = FirstWQM;

      }


      // Whether we need to save SCC depends on start and end states.

      bool SaveSCC = false;

      switch (State) {

      case StateExact:

      case StateStrictWWM:

      case StateStrictWQM:

        // Exact/Strict -> Strict: save SCC

        // Exact/Strict -> WQM: save SCC if WQM mask is generated from exec

        // Exact/Strict -> Exact: no save

        SaveSCC = (Needs & StateStrict) || ((Needs & StateWQM) && WQMFromExec);

        break;

      case StateWQM:

        // WQM -> Exact/Strict: save SCC

        SaveSCC = !(Needs & StateWQM);

        break;

      default:

        llvm_unreachable("Unknown state");

        break;

      }

      char StartState = State & StateStrict ? NonStrictState : State;

      bool WQMToExact =

          StartState == StateWQM && (Needs & StateExact) && !(Needs & StateWQM);

      bool ExactToWQM = StartState == StateExact && (Needs & StateWQM) &&

                        !(Needs & StateExact);

      bool PreferLast = Needs == StateWQM;

      // Exact regions in divergent control flow may run at EXEC=0, so try to

      // exclude instructions with unexpected effects from them.

      // FIXME: ideally we would branch over these when EXEC=0,

      // but this requires updating implicit values, live intervals and CFG.

      if ((WQMToExact && (OutNeeds & StateWQM)) || ExactToWQM) {

        for (MachineBasicBlock::iterator I = First; I != II; ++I) {

          if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) {

            PreferLast = WQMToExact;

            break;

          }

        }

      }

      MachineBasicBlock::iterator Before =

          prepareInsertion(MBB, First, II, PreferLast, SaveSCC);


      if (State & StateStrict) {

        assert(State == StateStrictWWM || State == StateStrictWQM);

        assert(SavedNonStrictReg);

        fromStrictMode(MBB, Before, SavedNonStrictReg, NonStrictState, State);


        LIS->createAndComputeVirtRegInterval(SavedNonStrictReg);

        SavedNonStrictReg = 0;

        State = NonStrictState;

      }


      if (Needs & StateStrict) {

        NonStrictState = State;

        assert(Needs == StateStrictWWM || Needs == StateStrictWQM);

        assert(!SavedNonStrictReg);

        SavedNonStrictReg = MRI->createVirtualRegister(BoolRC);


        toStrictMode(MBB, Before, SavedNonStrictReg, Needs);

        State = Needs;

      } else {

        if (WQMToExact) {

          if (!WQMFromExec && (OutNeeds & StateWQM)) {

            assert(!SavedWQMReg);

            SavedWQMReg = MRI->createVirtualRegister(BoolRC);

          }


          toExact(MBB, Before, SavedWQMReg);

          State = StateExact;

        } else if (ExactToWQM) {

          assert(WQMFromExec == (SavedWQMReg == 0));


          toWQM(MBB, Before, SavedWQMReg);


          if (SavedWQMReg) {

            LIS->createAndComputeVirtRegInterval(SavedWQMReg);

            SavedWQMReg = 0;

          }

          State = StateWQM;

        } else {

          // We can get here if we transitioned from StrictWWM to a

          // non-StrictWWM state that already matches our needs, but we

          // shouldn't need to do anything.

          assert(Needs & State);

        }

      }

    }


    if (Needs != (StateExact | StateWQM | StateStrict)) {

      if (Needs != (StateExact | StateWQM))

        FirstWQM = IE;

      FirstStrict = IE;

    }


    if (II == IE)

      break;


    II = Next;

  }

  assert(!SavedWQMReg);

  assert(!SavedNonStrictReg);

}


bool SIWholeQuadMode::lowerLiveMaskQueries() {

  for (MachineInstr *MI : LiveMaskQueries) {

    const DebugLoc &DL = MI->getDebugLoc();

    Register Dest = MI->getOperand(0).getReg();


    MachineInstr *Copy =

        BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)

            .addReg(LiveMaskReg);


    LIS->ReplaceMachineInstrInMaps(*MI, *Copy);

    MI->eraseFromParent();

  }

  return !LiveMaskQueries.empty();

}


bool SIWholeQuadMode::lowerCopyInstrs() {

  for (MachineInstr *MI : LowerToMovInstrs) {

    assert(MI->getNumExplicitOperands() == 2);


    const Register Reg = MI->getOperand(0).getReg();


    const TargetRegisterClass *regClass =

        TRI->getRegClassForOperandReg(*MRI, MI->getOperand(0));

    if (TRI->isVGPRClass(regClass)) {

      const unsigned MovOp = TII->getMovOpcode(regClass);

      MI->setDesc(TII->get(MovOp));


      // Check that it already implicitly depends on exec (like all VALU movs

      // should do).

      assert(any_of(MI->implicit_operands(), [](const MachineOperand &MO) {

        return MO.isUse() && MO.getReg() == AMDGPU::EXEC;

      }));

    } else {

      // Remove early-clobber and exec dependency from simple SGPR copies.

      // This allows some to be eliminated during/post RA.

      LLVM_DEBUG(dbgs() << "simplify SGPR copy: " << *MI);

      if (MI->getOperand(0).isEarlyClobber()) {

        LIS->removeInterval(Reg);

        MI->getOperand(0).setIsEarlyClobber(false);

        LIS->createAndComputeVirtRegInterval(Reg);

      }

      int Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC, /*TRI=*/nullptr);

      while (Index >= 0) {

        MI->removeOperand(Index);

        Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC, /*TRI=*/nullptr);

      }

      MI->setDesc(TII->get(AMDGPU::COPY));

      LLVM_DEBUG(dbgs() << "  -> " << *MI);

    }

  }

  for (MachineInstr *MI : LowerToCopyInstrs) {

    LLVM_DEBUG(dbgs() << "simplify: " << *MI);


    if (MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B32) {

      assert(MI->getNumExplicitOperands() == 6);


      LiveInterval *RecomputeLI = nullptr;

      if (MI->getOperand(4).isReg())

        RecomputeLI = &LIS->getInterval(MI->getOperand(4).getReg());


      MI->removeOperand(5);

      MI->removeOperand(4);

      MI->removeOperand(3);

      MI->removeOperand(1);


      if (RecomputeLI)

        LIS->shrinkToUses(RecomputeLI);

    } else {

      assert(MI->getNumExplicitOperands() == 2);

    }


    unsigned CopyOp = MI->getOperand(1).isReg()

                          ? (unsigned)AMDGPU::COPY

                          : TII->getMovOpcode(TRI->getRegClassForOperandReg(

                                *MRI, MI->getOperand(0)));

    MI->setDesc(TII->get(CopyOp));

    LLVM_DEBUG(dbgs() << " -> " << *MI);

  }

  return !LowerToCopyInstrs.empty() || !LowerToMovInstrs.empty();

}


bool SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {

  for (MachineInstr *MI : KillInstrs) {

    MachineInstr *SplitPoint = nullptr;

    switch (MI->getOpcode()) {

    case AMDGPU::SI_DEMOTE_I1:

    case AMDGPU::SI_KILL_I1_TERMINATOR:

      SplitPoint = lowerKillI1(*MI, IsWQM);

      break;

    case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:

      SplitPoint = lowerKillF32(*MI);

      break;

    }

    if (SplitPoint)

      splitBlock(SplitPoint);

  }

  return !KillInstrs.empty();

}


void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {

  MachineBasicBlock *MBB = MI.getParent();


  if (MI.getOpcode() == AMDGPU::SI_INIT_WHOLE_WAVE) {

    assert(MBB == &MBB->getParent()->front() &&

           "init whole wave not in entry block");

    Register EntryExec = MRI->createVirtualRegister(TRI->getBoolRC());

    MachineInstr *SaveExec = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),

                                     TII->get(LMC.OrSaveExecOpc), EntryExec)

                                 .addImm(-1);


    // Replace all uses of MI's destination reg with EntryExec.

    MRI->replaceRegWith(MI.getOperand(0).getReg(), EntryExec);


    if (LIS) {

      LIS->RemoveMachineInstrFromMaps(MI);

    }


    MI.eraseFromParent();


    if (LIS) {

      LIS->InsertMachineInstrInMaps(*SaveExec);

      LIS->createAndComputeVirtRegInterval(EntryExec);

    }

    return;

  }


  if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {

    // This should be before all vector instructions.

    MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),

                                   TII->get(LMC.MovOpc), LMC.ExecReg)

                               .addImm(MI.getOperand(0).getImm());

    if (LIS) {

      LIS->RemoveMachineInstrFromMaps(MI);

      LIS->InsertMachineInstrInMaps(*InitMI);

    }

    MI.eraseFromParent();

    return;

  }


  // Extract the thread count from an SGPR input and set EXEC accordingly.

  // Since BFM can't shift by 64, handle that case with CMP + CMOV.

  //

  // S_BFE_U32 count, input, {shift, 7}

  // S_BFM_B64 exec, count, 0

  // S_CMP_EQ_U32 count, 64

  // S_CMOV_B64 exec, -1

  Register InputReg = MI.getOperand(0).getReg();

  MachineInstr *FirstMI = &*MBB->begin();

  if (InputReg.isVirtual()) {

    MachineInstr *DefInstr = MRI->getVRegDef(InputReg);

    assert(DefInstr && DefInstr->isCopy());

    if (DefInstr->getParent() == MBB) {

      if (DefInstr != FirstMI) {

        // If the `InputReg` is defined in current block, we also need to

        // move that instruction to the beginning of the block.

        DefInstr->removeFromParent();

        MBB->insert(FirstMI, DefInstr);

        if (LIS)

          LIS->handleMove(*DefInstr);

      } else {

        // If first instruction is definition then move pointer after it.

        FirstMI = &*std::next(FirstMI->getIterator());

      }

    }

  }


  // Insert instruction sequence at block beginning (before vector operations).

  const DebugLoc DL = MI.getDebugLoc();

  const unsigned WavefrontSize = ST->getWavefrontSize();

  const unsigned Mask = (WavefrontSize << 1) - 1;

  Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);

  auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)

                   .addReg(InputReg)

                   .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);

  auto BfmMI = BuildMI(*MBB, FirstMI, DL, TII->get(LMC.BfmOpc), LMC.ExecReg)

                   .addReg(CountReg)

                   .addImm(0);

  auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))

                   .addReg(CountReg, RegState::Kill)

                   .addImm(WavefrontSize);

  auto CmovMI =

      BuildMI(*MBB, FirstMI, DL, TII->get(LMC.CMovOpc), LMC.ExecReg).addImm(-1);


  if (!LIS) {

    MI.eraseFromParent();

    return;

  }


  LIS->RemoveMachineInstrFromMaps(MI);

  MI.eraseFromParent();


  LIS->InsertMachineInstrInMaps(*BfeMI);

  LIS->InsertMachineInstrInMaps(*BfmMI);

  LIS->InsertMachineInstrInMaps(*CmpMI);

  LIS->InsertMachineInstrInMaps(*CmovMI);


  LIS->removeInterval(InputReg);

  LIS->createAndComputeVirtRegInterval(InputReg);

  LIS->createAndComputeVirtRegInterval(CountReg);

}


/// Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry

/// for instructions that depend on EXEC.

MachineBasicBlock::iterator

SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {

  MachineBasicBlock::iterator InsertPt = Entry.getFirstNonPHI();


  for (MachineInstr *MI : InitExecInstrs) {

    // Try to handle undefined cases gracefully:

    // - multiple INIT_EXEC instructions

    // - INIT_EXEC instructions not in the entry block

    if (MI->getParent() == &Entry)

      InsertPt = std::next(MI->getIterator());


    lowerInitExec(*MI);

    Changed = true;

  }


  return InsertPt;

}


bool SIWholeQuadMode::run(MachineFunction &MF) {

  LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()

                    << " ------------- \n");

  LLVM_DEBUG(MF.dump(););


  Instructions.clear();

  Blocks.clear();

  LiveMaskQueries.clear();

  LowerToCopyInstrs.clear();

  LowerToMovInstrs.clear();

  KillInstrs.clear();

  InitExecInstrs.clear();

  SetInactiveInstrs.clear();

  StateTransition.clear();


  const char GlobalFlags = analyzeFunction(MF);

  bool Changed = false;


  LiveMaskReg = LMC.ExecReg;


  MachineBasicBlock &Entry = MF.front();

  MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry, Changed);


  // Store a copy of the original live mask when required

  const bool HasLiveMaskQueries = !LiveMaskQueries.empty();

  const bool HasWaveModes = GlobalFlags & ~StateExact;

  const bool HasKills = !KillInstrs.empty();

  const bool UsesWQM = GlobalFlags & StateWQM;

  if (HasKills || UsesWQM || (HasWaveModes && HasLiveMaskQueries)) {

    LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());

    MachineInstr *MI =

        BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)

            .addReg(LMC.ExecReg);

    LIS->InsertMachineInstrInMaps(*MI);

    Changed = true;

  }


  // Check if V_SET_INACTIVE was touched by a strict state mode.

  // If so, promote to WWM; otherwise lower to COPY.

  for (MachineInstr *MI : SetInactiveInstrs) {

    if (LowerToCopyInstrs.contains(MI))

      continue;

    auto &Info = Instructions[MI];

    if (Info.MarkedStates & StateStrict) {

      Info.Needs |= StateStrictWWM;

      Info.Disabled &= ~StateStrictWWM;

      Blocks[MI->getParent()].Needs |= StateStrictWWM;

    } else {

      LLVM_DEBUG(dbgs() << "Has no WWM marking: " << *MI);

      LowerToCopyInstrs.insert(MI);

    }

  }


  LLVM_DEBUG(printInfo());


  Changed |= lowerLiveMaskQueries();

  Changed |= lowerCopyInstrs();


  if (!HasWaveModes) {

    // No wave mode execution

    Changed |= lowerKillInstrs(false);

  } else if (GlobalFlags == StateWQM) {

    // Shader only needs WQM

    auto MI =

        BuildMI(Entry, EntryMI, DebugLoc(), TII->get(LMC.WQMOpc), LMC.ExecReg)

            .addReg(LMC.ExecReg);

    LIS->InsertMachineInstrInMaps(*MI);

    lowerKillInstrs(true);

    Changed = true;

  } else {

    // Mark entry for WQM if required.

    if (GlobalFlags & StateWQM)

      Blocks[&Entry].InNeeds |= StateWQM;

    // Wave mode switching requires full lowering pass.

    for (auto &BII : Blocks)

      processBlock(*BII.first, BII.second, BII.first == &Entry);

    // Lowering blocks causes block splitting so perform as a second pass.

    for (auto &BII : Blocks)

      lowerBlock(*BII.first, BII.second);

    Changed = true;

  }


  // Compute live range for live mask

  if (LiveMaskReg != LMC.ExecReg)

    LIS->createAndComputeVirtRegInterval(LiveMaskReg);


  // Physical registers like SCC aren't tracked by default anyway, so just

  // removing the ranges we computed is the simplest option for maintaining

  // the analysis results.

  LIS->removeAllRegUnitsForPhysReg(AMDGPU::SCC);


  // If we performed any kills then recompute EXEC

  if (!KillInstrs.empty() || !InitExecInstrs.empty())

    LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);


  return Changed;

}


bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {

  LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();

  auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();

  MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;

  auto *PDTWrapper =

      getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();

  MachinePostDominatorTree *PDT =

      PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;

  SIWholeQuadMode Impl(MF, LIS, MDT, PDT);

  return Impl.run(MF);

}


PreservedAnalyses


SIWholeQuadModePass::run(MachineFunction &MF,

                         MachineFunctionAnalysisManager &MFAM) {

  MFPropsModifier _(*this, MF);


  LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);

  MachineDominatorTree *MDT =

      MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);

  MachinePostDominatorTree *PDT =

      MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);

  SIWholeQuadMode Impl(MF, LIS, MDT, PDT);

  bool Changed = Impl.run(MF);

  if (!Changed)

    return PreservedAnalyses::all();


  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();

  PA.preserve<SlotIndexesAnalysis>();

  PA.preserve<LiveIntervalsAnalysis>();

  PA.preserve<MachineDominatorTreeAnalysis>();

  PA.preserve<MachinePostDominatorTreeAnalysis>();

  return PA;

}


SubReg
unsigned SubReg
Definition AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

UseMI
MachineInstrBuilder & UseMI
Definition AArch64ExpandPseudoInsts.cpp:111

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPULaneMaskUtils.h

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

analyzeFunction
static void analyzeFunction(Function &Fn, const DataLayout &Layout, FunctionVarLocsBuilder *FnVarLocs)
Definition AssignmentTrackingAnalysis.cpp:2736

Info
Analysis containing CSE Info
Definition CSEInfo.cpp:27

CallingConv.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

_
#define _
Definition HexagonMCCodeEmitter.cpp:47

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InitializePasses.h

LiveIntervals.h

I
#define I(x, y, z)
Definition MD5.cpp:58

MachineBasicBlock.h

MachineDominators.h

MachineFunctionPass.h

MachineInstr.h

MachinePostDominators.h

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

MapVector.h
This file implements a map that provides insertion order iteration.

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

PostOrderIterator.h
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

splitBlock
static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, MachineDominatorTree *MDT)
Definition SILateBranchLowering.cpp:119

LiveRange
SI Optimize VGPR LiveRange
Definition SIOptimizeVGPRLiveRange.cpp:631

SIWholeQuadMode.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition AMDGPUSubtarget.h:310

llvm::AMDGPU::LaneMaskConstants
Definition AMDGPULaneMaskUtils.h:21

llvm::AMDGPU::LaneMaskConstants::CMovOpc
const unsigned CMovOpc
Definition AMDGPULaneMaskUtils.h:33

llvm::AMDGPU::LaneMaskConstants::AndSaveExecTermOpc
const unsigned AndSaveExecTermOpc
Definition AMDGPULaneMaskUtils.h:31

llvm::AMDGPU::LaneMaskConstants::AndTermOpc
const unsigned AndTermOpc
Definition AMDGPULaneMaskUtils.h:26

llvm::AMDGPU::LaneMaskConstants::WQMOpc
const unsigned WQMOpc
Definition AMDGPULaneMaskUtils.h:42

llvm::AMDGPU::LaneMaskConstants::AndN2Opc
const unsigned AndN2Opc
Definition AMDGPULaneMaskUtils.h:27

llvm::AMDGPU::LaneMaskConstants::VccReg
const Register VccReg
Definition AMDGPULaneMaskUtils.h:24

llvm::AMDGPU::LaneMaskConstants::AndOpc
const unsigned AndOpc
Definition AMDGPULaneMaskUtils.h:25

llvm::AMDGPU::LaneMaskConstants::MovOpc
const unsigned MovOpc
Definition AMDGPULaneMaskUtils.h:35

llvm::AMDGPU::LaneMaskConstants::get
static const LaneMaskConstants & get(const GCNSubtarget &ST)
Definition AMDGPULaneMaskUtils.h:79

llvm::AMDGPU::LaneMaskConstants::ExecReg
const Register ExecReg
Definition AMDGPULaneMaskUtils.h:23

llvm::AMDGPU::LaneMaskConstants::BfmOpc
const unsigned BfmOpc
Definition AMDGPULaneMaskUtils.h:32

llvm::AMDGPU::LaneMaskConstants::OrSaveExecOpc
const unsigned OrSaveExecOpc
Definition AMDGPULaneMaskUtils.h:39

llvm::AMDGPU::LaneMaskConstants::AndSaveExecOpc
const unsigned AndSaveExecOpc
Definition AMDGPULaneMaskUtils.h:30

llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition PassManager.h:431

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:412

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::DenseMap
Definition DenseMap.h:700

llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition GenericDomTree.h:612

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition GCNSubtarget.h:1143

llvm::LiveIntervalsAnalysis
Definition LiveIntervals.h:511

llvm::LiveIntervalsWrapperPass
Definition LiveIntervals.h:532

llvm::LiveIntervals
Definition LiveIntervals.h:55

llvm::LiveIntervals::removeAllRegUnitsForPhysReg
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
Definition LiveIntervals.h:445

llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition LiveIntervals.h:252

llvm::LiveIntervals::InsertMachineInstrInMaps
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
Definition LiveIntervals.h:285

llvm::LiveIntervals::handleMove
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition LiveIntervals.cpp:1559

llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition LiveIntervals.h:247

llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition LiveIntervals.h:295

llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition LiveIntervals.h:262

llvm::LiveIntervals::getRegUnit
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Definition LiveIntervals.h:415

llvm::LiveIntervals::getInterval
LiveInterval & getInterval(Register Reg)
Definition LiveIntervals.h:133

llvm::LiveIntervals::removeInterval
void removeInterval(Register Reg)
Interval removal.
Definition LiveIntervals.h:171

llvm::LiveIntervals::shrinkToUses
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
Definition LiveIntervals.cpp:485

llvm::LiveIntervals::getMBBFromIndex
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
Definition LiveIntervals.h:274

llvm::LiveIntervals::createAndComputeVirtRegInterval
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
Definition LiveIntervals.h:157

llvm::LiveIntervals::ReplaceMachineInstrInMaps
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
Definition LiveIntervals.h:299

llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition LiveInterval.h:106

llvm::LiveRange
This class represents the liveness of a register, stack slot, etc.
Definition LiveInterval.h:158

llvm::LiveRange::getSegmentContaining
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition LiveInterval.h:410

llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition LiveInterval.h:545

llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
Definition LiveInterval.h:431

llvm::MCRegister::from
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition MCRegister.h:69

llvm::MFPropsModifier
An RAII based helper class to modify MachineFunctionProperties when running pass.
Definition MachinePassManager.h:40

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition MachineBasicBlock.h:429

llvm::MachineBasicBlock::empty
bool empty() const
Definition MachineBasicBlock.h:349

llvm::MachineBasicBlock::insert
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition MachineBasicBlock.cpp:1463

llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition MachineBasicBlock.h:443

llvm::MachineBasicBlock::remove
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
Definition MachineBasicBlock.h:1129

llvm::MachineBasicBlock::getFirstTerminator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition MachineBasicBlock.cpp:242

llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition MachineBasicBlock.h:455

llvm::MachineBasicBlock::getFirstNonPHI
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition MachineBasicBlock.cpp:200

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:377

llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition MachineBasicBlock.h:427

llvm::MachineBasicBlock::splitAt
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Definition MachineBasicBlock.cpp:1023

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:379

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:323

llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition MachineBasicBlock.h:466

llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition MachineBasicBlock.h:381

llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition MachineBasicBlock.h:460

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineDominatorTreeAnalysis
Analysis pass which computes a MachineDominatorTree.
Definition MachineDominators.h:103

llvm::MachineDominatorTreeWrapperPass
Analysis pass which computes a MachineDominatorTree.
Definition MachineDominators.h:127

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition MachineDominators.h:71

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:184

llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition MachineFunction.h:137

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition MachineFunction.cpp:645

llvm::MachineFunction::dump
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
Definition MachineFunction.cpp:640

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition MachineFunction.h:996

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:253

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:175

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:587

llvm::MachineInstr::isCopy
bool isCopy() const
Definition MachineInstr.h:1431

llvm::MachineInstr::removeFromParent
LLVM_ABI MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
Definition MachineInstr.cpp:760

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:359

llvm::MachineInstr::setDesc
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition MachineInstr.cpp:145

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:48

llvm::MachineOperand::isUndef
bool isUndef() const
Definition MachineOperand.h:403

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:328

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MachinePostDominatorTreeAnalysis
Definition MachinePostDominators.h:71

llvm::MachinePostDominatorTreeWrapperPass
Definition MachinePostDominators.h:95

llvm::MachinePostDominatorTree
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
Definition MachinePostDominators.h:49

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::Register::asMCReg
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SIWholeQuadModePass::run
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition SIWholeQuadMode.cpp:1797

llvm::SlotIndex::getBaseIndex
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition SlotIndexes.h:225

llvm::SlotIndexesAnalysis
Definition SlotIndexes.h:645

llvm::SlotIndexesWrapperPass
Definition SlotIndexes.h:664

llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356

llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:938

llvm::SmallVectorImpl::clear
void clear()
Definition SmallVector.h:611

llvm::SmallVectorTemplateBase::pop_back
void pop_back()
Definition SmallVector.h:426

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:414

llvm::SmallVectorTemplateCommon::back
reference back()
Definition SmallVector.h:309

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1197

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:130

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

Changed
Changed
Definition ObjCARCOpts.cpp:2370

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:239

llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::WavefrontSize
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
Definition AMDGPUMetadata.h:258

llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)

llvm::ARM_PROC::IE
@ IE
Definition ARMBaseInfo.h:27

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::COFF::Entry
@ Entry
Definition COFF.h:862

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::ISD::SETOEQ
@ SETOEQ
Definition ISDOpcodes.h:1694

llvm::ISD::SETUNE
@ SETUNE
Definition ISDOpcodes.h:1707

llvm::ISD::SETUEQ
@ SETUEQ
Definition ISDOpcodes.h:1702

llvm::ISD::SETOLE
@ SETOLE
Definition ISDOpcodes.h:1698

llvm::ISD::SETOLT
@ SETOLT
Definition ISDOpcodes.h:1697

llvm::ISD::SETNE
@ SETNE
Definition ISDOpcodes.h:1716

llvm::ISD::SETUGT
@ SETUGT
Definition ISDOpcodes.h:1703

llvm::ISD::SETOGT
@ SETOGT
Definition ISDOpcodes.h:1695

llvm::ISD::SETULT
@ SETULT
Definition ISDOpcodes.h:1705

llvm::ISD::SETUO
@ SETUO
Definition ISDOpcodes.h:1701

llvm::ISD::SETONE
@ SETONE
Definition ISDOpcodes.h:1699

llvm::ISD::SETGT
@ SETGT
Definition ISDOpcodes.h:1712

llvm::ISD::SETLT
@ SETLT
Definition ISDOpcodes.h:1714

llvm::ISD::SETO
@ SETO
Definition ISDOpcodes.h:1700

llvm::ISD::SETGE
@ SETGE
Definition ISDOpcodes.h:1713

llvm::ISD::SETUGE
@ SETUGE
Definition ISDOpcodes.h:1704

llvm::ISD::SETLE
@ SETLE
Definition ISDOpcodes.h:1715

llvm::ISD::SETULE
@ SETULE
Definition ISDOpcodes.h:1706

llvm::ISD::SETOGE
@ SETOGE
Definition ISDOpcodes.h:1696

llvm::ISD::SETEQ
@ SETEQ
Definition ISDOpcodes.h:1711

llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition MCInstrDesc.h:149

llvm::RegState::Define
@ Define
Register definition.
Definition MachineInstrBuilder.h:47

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:51

llvm::codeview::FrameCookieKind::Copy
@ Copy
Definition CodeView.h:495

llvm::dwarf_linker::DebugSectionKind::DebugLoc
@ DebugLoc
Definition DWARFLinkerBase.h:34

llvm::dwarf::Index
Index
Definition Dwarf.h:896

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390

llvm::sframe::Flags
Flags
Definition SFrame.h:39

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:369

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition MachinePassManager.cpp:162

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712

llvm::DomTreeBase
DominatorTreeBase< T, false > DomTreeBase
Definition GenericDomTree.h:1020

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::MCRegUnit
unsigned MCRegUnit
Register units are used to compute register aliasing.
Definition MCRegister.h:30

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1123

llvm::Key
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Definition PassManager.h:668

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71

llvm::createSIWholeQuadModeLegacyPass
FunctionPass * createSIWholeQuadModeLegacyPass()
Definition SIWholeQuadMode.cpp:270

llvm::Next
FunctionAddr VTableAddr Next
Definition InstrProf.h:141

llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition SIWholeQuadMode.cpp:268

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition APFixedPoint.h:312

llvm::PseudoProbeReservedId::Last
@ Last
Definition PseudoProbe.h:28

llvm::WinX64EHUnwindV2Mode::Disabled
@ Disabled
Definition CodeGen.h:163

llvm::printMBBReference
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition MachineBasicBlock.cpp:120

raw_ostream.h

WorkItem
Definition WinEHPrepare.cpp:235

WorkItem::WorkItem
WorkItem(const BasicBlock *BB, int St)
Definition WinEHPrepare.cpp:238

llvm::LaneBitmask::getAll
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82

llvm::LaneBitmask::any
constexpr bool any() const
Definition LaneBitmask.h:53

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81

llvm::LiveRange::Segment::start
SlotIndex start
Definition LiveInterval.h:164

llvm::LiveRange::Segment::end
SlotIndex end
Definition LiveInterval.h:165