26#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
42 return "AMDGPU GlobalISel divergence lowering";
66 void markAsLaneMask(
Register DstReg)
const override;
67 void getCandidatesForLowering(
69 void collectIncomingValuesFromPhi(
78 void constrainAsLaneMask(
Incoming &In)
override;
80 bool lowerTemporalDivergence();
81 bool lowerTemporalDivergenceI1();
84DivergenceLoweringHelper::DivergenceLoweringHelper(
90void DivergenceLoweringHelper::markAsLaneMask(
Register DstReg)
const {
93 if (
MRI->getRegClassOrNull(DstReg)) {
94 if (
MRI->constrainRegClass(DstReg, ST->getBoolRC()))
99 MRI->setRegClass(DstReg, ST->getBoolRC());
102void DivergenceLoweringHelper::getCandidatesForLowering(
116void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
118 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
124void DivergenceLoweringHelper::replaceDstReg(
Register NewReg,
Register OldReg,
136 B.setInsertPt(*
MBB,
MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
137 B.buildCopy(LaneMask,
Reg);
164void DivergenceLoweringHelper::buildMergeLaneMasks(
170 Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
171 Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
175 B.setInsertPt(
MBB,
I);
176 B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
177 B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
178 B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
184void DivergenceLoweringHelper::constrainAsLaneMask(
Incoming &In) {
185 B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
188 MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
189 In.Reg = Copy.getReg(0);
195 if (
Op.isReg() &&
Op.getReg() ==
Reg)
200bool DivergenceLoweringHelper::lowerTemporalDivergence() {
211 replaceUsesOfRegInInstWith(
Reg, UseInst, CachedTDCopy);
220 B.buildInstr(AMDGPU::COPY, {VgprReg}, {
Reg})
223 replaceUsesOfRegInInstWith(
Reg, UseInst, VgprReg);
224 TDCache[
Reg] = VgprReg;
229bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
231 initializeLaneMaskRegisterAttributes(BoolS1);
242 auto &CycleMergedMask = LRCCacheIter->getSecond();
244 if (RegNotCached || LRC->contains(CachedLRC)) {
249 for (
auto &LRCCacheEntry : LRCCache) {
251 auto &CycleMergedMask = LRCCacheEntry.getSecond();
254 Register MergedMask =
MRI->createVirtualRegister(BoolS1);
260 for (
auto Entry :
Cycle->getEntries()) {
262 if (!
Cycle->contains(Pred)) {
263 B.setInsertPt(*Pred, Pred->getFirstTerminator());
264 auto ImplDef =
B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
270 buildMergeLaneMasks(*
MBB,
MBB->getFirstTerminator(), {}, MergedMask,
273 CycleMergedMask.second = MergedMask;
280 replaceUsesOfRegInInstWith(
Reg, UseInst, LRCCache.
lookup(
Reg).second);
289 "AMDGPU GlobalISel divergence lowering",
false,
false)
296char AMDGPUGlobalISelDivergenceLowering::
ID = 0;
299 AMDGPUGlobalISelDivergenceLowering::
ID;
302 return new AMDGPUGlobalISelDivergenceLowering();
305bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
308 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
310 getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
312 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
314 DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
324 Changed |= Helper.lowerTemporalDivergence();
327 Changed |= Helper.lowerTemporalDivergenceI1();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
This file declares the MachineIRBuilder class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
bool isS32S64LaneMask(Register Reg) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
Wrapper class representing virtual and physical registers.
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & AMDGPUGlobalISelDivergenceLoweringID
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
DWARFExpression::Operation Op
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
MachineCycleInfo::CycleT MachineCycle
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
All attributes(register class or bank and low-level type) a virtual register can have.