29#define DEBUG_TYPE "si-i1-copies"
48 void markAsLaneMask(
Register DstReg)
const override;
49 void getCandidatesForLowering(
51 void collectIncomingValuesFromPhi(
60 void constrainAsLaneMask(
Incoming &In)
override;
62 bool lowerCopiesFromI1();
63 bool lowerCopiesToI1();
64 bool cleanConstrainRegs(
bool Changed);
66 return Reg.isVirtual() &&
MRI->getRegClass(
Reg) == &AMDGPU::VReg_1RegClass;
75bool Vreg1LoweringHelper::cleanConstrainRegs(
bool Changed) {
78 MRI->constrainRegClass(
Reg, &AMDGPU::SReg_1_XEXECRegClass);
79 ConstrainRegs.clear();
106class PhiIncomingAnalysis {
107 MachinePostDominatorTree &PDT;
108 const SIInstrInfo *
TII;
112 MapVector<MachineBasicBlock *, bool> ReachableMap;
113 SmallVector<MachineBasicBlock *, 4> Stack;
114 SmallVector<MachineBasicBlock *, 4> Predecessors;
117 PhiIncomingAnalysis(MachinePostDominatorTree &PDT,
const SIInstrInfo *
TII)
122 bool isSource(MachineBasicBlock &
MBB)
const {
123 return ReachableMap.
find(&
MBB)->second;
130 ReachableMap.
clear();
131 Predecessors.
clear();
137 for (
auto Incoming : Incomings) {
138 MachineBasicBlock *
MBB = Incoming.Block;
139 if (
MBB == &DefBlock) {
140 ReachableMap[&DefBlock] =
true;
152 while (!
Stack.empty()) {
153 MachineBasicBlock *
MBB =
Stack.pop_back_val();
158 for (
auto &[
MBB, Reachable] : ReachableMap) {
159 bool HaveReachablePred =
false;
161 if (ReachableMap.count(Pred)) {
162 HaveReachablePred =
true;
164 Stack.push_back(Pred);
167 if (!HaveReachablePred)
169 if (HaveReachablePred) {
170 for (MachineBasicBlock *UnreachablePred : Stack) {
213 MachineDominatorTree &DT;
214 MachinePostDominatorTree &PDT;
219 DenseMap<MachineBasicBlock *, unsigned> Visited;
223 SmallVector<MachineBasicBlock *, 4> CommonDominators;
226 MachineBasicBlock *VisitedPostDom =
nullptr;
231 unsigned FoundLoopLevel = ~0
u;
233 MachineBasicBlock *DefBlock =
nullptr;
234 SmallVector<MachineBasicBlock *, 4>
Stack;
235 SmallVector<MachineBasicBlock *, 4> NextLevel;
238 LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT)
239 : DT(DT), PDT(PDT) {}
243 CommonDominators.
clear();
246 VisitedPostDom =
nullptr;
247 FoundLoopLevel = ~0
u;
256 unsigned findLoop(MachineBasicBlock *PostDom) {
263 while (PDNode->
getBlock() != PostDom) {
264 if (PDNode->
getBlock() == VisitedPostDom)
268 if (FoundLoopLevel == Level)
278 void addLoopEntries(
unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
279 MachineRegisterInfo &
MRI,
280 MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs,
284 MachineBasicBlock *Dom = CommonDominators[LoopLevel];
285 for (
auto &Incoming : Incomings)
288 if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
295 if (!inLoopLevel(*Pred, LoopLevel, Incomings))
303 bool inLoopLevel(MachineBasicBlock &
MBB,
unsigned LoopLevel,
305 auto DomIt = Visited.
find(&
MBB);
306 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
309 for (
auto &Incoming : Incomings)
310 if (Incoming.Block == &
MBB)
316 void advanceLevel() {
317 MachineBasicBlock *VisitedDom;
319 if (!VisitedPostDom) {
320 VisitedPostDom = DefBlock;
321 VisitedDom = DefBlock;
322 Stack.push_back(DefBlock);
324 VisitedPostDom = PDT.
getNode(VisitedPostDom)->getIDom()->getBlock();
325 VisitedDom = CommonDominators.
back();
327 for (
unsigned i = 0; i < NextLevel.
size();) {
328 if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
329 Stack.push_back(NextLevel[i]);
331 NextLevel[i] = NextLevel.
back();
339 unsigned Level = CommonDominators.
size();
340 while (!
Stack.empty()) {
341 MachineBasicBlock *
MBB =
Stack.pop_back_val();
349 if (Succ == DefBlock) {
350 if (
MBB == VisitedPostDom)
351 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
353 FoundLoopLevel = std::min(FoundLoopLevel, Level);
358 if (
MBB == VisitedPostDom)
361 Stack.push_back(Succ);
375 return MRI->createVirtualRegister(LaneMaskRegAttrs);
385 BuildMI(*
MBB,
MBB->getFirstTerminator(), {},
TII->get(AMDGPU::IMPLICIT_DEF),
399bool Vreg1LoweringHelper::lowerCopiesFromI1() {
401 SmallVector<MachineInstr *, 4> DeadCopies;
403 for (MachineBasicBlock &
MBB : *MF) {
404 for (MachineInstr &
MI :
MBB) {
405 if (
MI.getOpcode() != AMDGPU::COPY)
410 if (!isVreg1(SrcReg))
413 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
423 assert(!
MI.getOperand(0).getSubReg());
425 ConstrainRegs.insert(SrcReg);
435 for (MachineInstr *
MI : DeadCopies)
436 MI->eraseFromParent();
446 MRI = &
MF->getRegInfo();
449 TII =
ST->getInstrInfo();
454 MovOp = AMDGPU::S_MOV_B32;
455 AndOp = AMDGPU::S_AND_B32;
456 OrOp = AMDGPU::S_OR_B32;
457 XorOp = AMDGPU::S_XOR_B32;
459 OrN2Op = AMDGPU::S_ORN2_B32;
462 MovOp = AMDGPU::S_MOV_B64;
463 AndOp = AMDGPU::S_AND_B64;
464 OrOp = AMDGPU::S_OR_B64;
465 XorOp = AMDGPU::S_XOR_B64;
467 OrN2Op = AMDGPU::S_ORN2_B64;
473 LoopFinder LF(*
DT, *
PDT);
474 PhiIncomingAnalysis PIA(*
PDT,
TII);
479 if (Vreg1Phis.
empty())
482 DT->updateDFSNumbers();
486 if (&
MBB != PrevMBB) {
504 return DT->getNode(LHS.Block)->getDFSNumIn() <
505 DT->getNode(RHS.Block)->getDFSNumIn();
514 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
516 DomBlocks.push_back(
Use.getParent());
519 PDT->findNearestCommonDominator(DomBlocks);
525 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
529 if (FoundLoopLevel) {
547 PIA.analyze(
MBB, Incomings);
555 if (PIA.isSource(IMBB)) {
576 if (NewReg != DstReg) {
578 MI->eraseFromParent();
586bool Vreg1LoweringHelper::lowerCopiesToI1() {
589 LoopFinder LF(*DT, *PDT);
596 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
597 MI.getOpcode() != AMDGPU::COPY)
601 if (!isVreg1(DstReg))
606 if (
MRI->use_empty(DstReg)) {
613 markAsLaneMask(DstReg);
614 initializeLaneMaskRegisterAttributes(DstReg);
616 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
621 assert(!
MI.getOperand(1).getSubReg());
623 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
624 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
629 MI.getOperand(1).setReg(TmpReg);
633 MI.getOperand(1).setIsKill(
false);
638 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
640 DomBlocks.push_back(
Use.getParent());
644 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
645 if (FoundLoopLevel) {
648 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater, *
MRI, LaneMaskRegAttrs);
650 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
657 MI->eraseFromParent();
666 MI =
MRI->getUniqueVRegDef(Reg);
667 if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
670 if (
MI->getOpcode() != AMDGPU::COPY)
673 Reg =
MI->getOperand(1).getReg();
674 if (!Reg.isVirtual())
683 if (!
MI->getOperand(1).isImm())
686 int64_t Imm =
MI->getOperand(1).getImm();
704 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
717 auto InsertionPt =
MBB.getFirstTerminator();
718 bool TerminatorsUseSCC =
false;
719 for (
auto I = InsertionPt, E =
MBB.end();
I != E; ++
I) {
722 if (TerminatorsUseSCC || DefsSCC)
726 if (!TerminatorsUseSCC)
729 while (InsertionPt !=
MBB.begin()) {
743void Vreg1LoweringHelper::markAsLaneMask(
Register DstReg)
const {
744 MRI->setRegClass(DstReg, ST->getBoolRC());
747void Vreg1LoweringHelper::getCandidatesForLowering(
751 if (isVreg1(
MI.getOperand(0).getReg()))
757void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
759 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
760 assert(i + 1 <
MI->getNumOperands());
761 Register IncomingReg =
MI->getOperand(i).getReg();
765 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
767 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
769 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
772 assert(IncomingDef->
isPHI() || PhiRegisters.count(IncomingReg));
781 MRI->replaceRegWith(NewReg, OldReg);
789 bool PrevVal =
false;
790 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
792 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
794 if (PrevConstant && CurConstant) {
795 if (PrevVal == CurVal) {
810 if (CurConstant && CurVal) {
811 PrevMaskedReg = PrevReg;
821 if (PrevConstant && PrevVal) {
822 CurMaskedReg = CurReg;
831 if (PrevConstant && !PrevVal) {
834 }
else if (CurConstant && !CurVal) {
837 }
else if (PrevConstant && PrevVal) {
844 .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
848void Vreg1LoweringHelper::constrainAsLaneMask(
Incoming &In) {}
865 Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT);
867 Changed |= Helper.lowerCopiesFromI1();
869 Changed |= Helper.lowerCopiesToI1();
870 return Helper.cleanConstrainRegs(
Changed);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, MachinePostDominatorTree &MPDT)
Lower all instructions that def or use vreg_1 registers.
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Implements a dense probed hash-table based set.
DomTreeNodeBase * getIDom() const
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass(char &ID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
LLVM_ABI MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
iterator find(const KeyT &Key)
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)
bool isLaneMaskReg(Register Reg) const
MachineRegisterInfo * MRI
MachineDominatorTree * DT
DenseSet< Register > PhiRegisters
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
virtual void markAsLaneMask(Register DstReg) const =0
MachinePostDominatorTree * PDT
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs
MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const
Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.
void initializeLaneMaskRegisterAttributes(Register LaneMask)
bool isConstantLaneMask(Register Reg, bool &Val) const
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
void initializeSILowerI1CopiesLegacyPass(PassRegistry &)
ArrayRef(const T &OneElt) -> ArrayRef< T >
FunctionPass * createSILowerI1CopiesLegacyPass()
char & SILowerI1CopiesLegacyID
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
MachineBasicBlock * Block
All attributes(register class or bank and low-level type) a virtual register can have.