33#define DEBUG_TYPE "amdgpu-regbanklegalize"
50 return "AMDGPU Register Bank Legalize";
70 "AMDGPU Register Bank Legalize",
false,
false)
77char AMDGPURegBankLegalize::
ID = 0;
82 return new AMDGPURegBankLegalize();
87 static std::mutex GlobalMutex;
90 std::lock_guard<std::mutex> Lock(GlobalMutex);
91 auto [It, Inserted] = CacheForRuleSet.
try_emplace(ST.getGeneration());
93 It->second = std::make_unique<RegBankLegalizeRules>(ST,
MRI);
95 It->second->refreshRefs(ST,
MRI);
115 : B(B), MRI(*B.getMRI()), TRI(TRI),
116 SgprRB(&RBI.getRegBank(
AMDGPU::SGPRRegBankID)),
117 VgprRB(&RBI.getRegBank(
AMDGPU::VGPRRegBankID)),
118 VccRB(&RBI.getRegBank(
AMDGPU::VCCRegBankID)) {};
121 std::pair<MachineInstr *, Register>
tryMatch(
Register Src,
unsigned Opcode);
133 if (RB && RB->
getID() == AMDGPU::VCCRegBankID)
137 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) ==
LLT::scalar(1);
140std::pair<MachineInstr *, Register>
148std::pair<GUnmerge *, int>
151 if (ReadAnyLane->
getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
152 return {
nullptr, -1};
156 return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc,
nullptr)};
158 return {
nullptr, -1};
163 auto [RAL, RALSrc] =
tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
173 unsigned NumElts =
Merge->getNumSources();
175 if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
179 for (
unsigned i = 1; i < NumElts; ++i) {
181 if (UnmergeI != Unmerge || (
unsigned)IdxI != i)
184 return Unmerge->getSourceReg();
194 int Idx = UnMerge->findRegisterDefOperandIdx(Src,
nullptr);
196 if (!
Merge || UnMerge->getNumDefs() !=
Merge->getNumSources())
200 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
203 auto [RALEl, RALElSrc] =
tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
213 MRI.replaceRegWith(Dst, Src);
215 B.buildCopy(Dst, Src);
220 Register Dst = Copy.getOperand(0).getReg();
221 Register Src = Copy.getOperand(1).getReg();
224 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
225 : !TRI.isVGPR(MRI, Dst))
229 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
234 if (SrcMI.
getOpcode() == AMDGPU::G_BITCAST)
242 if (SrcMI.
getOpcode() != AMDGPU::G_BITCAST) {
255 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
270 if (!Dst.isVirtual() || !Src.isVirtual())
280 if (
isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
281 auto [Trunc, TruncS32Src] =
tryMatch(Src, AMDGPU::G_TRUNC);
282 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
283 "sgpr S1 must be result of G_TRUNC of sgpr S32");
287 auto One = B.buildConstant({SgprRB, S32}, 1);
288 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
289 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
301 if (MRI.getType(Src) != S1)
304 auto [Trunc, TruncSrc] =
tryMatch(Src, AMDGPU::G_TRUNC);
308 LLT DstTy = MRI.getType(Dst);
309 LLT TruncSrcTy = MRI.getType(TruncSrc);
311 if (DstTy == TruncSrcTy) {
312 MRI.replaceRegWith(Dst, TruncSrc);
319 if (DstTy == S32 && TruncSrcTy == S64) {
320 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
321 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
326 if (DstTy == S64 && TruncSrcTy == S32) {
327 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
328 {TruncSrc, B.buildUndef({SgprRB, S32})});
333 if (DstTy ==
S32 && TruncSrcTy ==
S16) {
334 B.buildAnyExt(Dst, TruncSrc);
339 if (DstTy ==
S16 && TruncSrcTy ==
S32) {
340 B.buildTrunc(Dst, TruncSrc);
351 for (
unsigned i = 0; i <
MRI.getNumVirtRegs(); ++i) {
357 if (RB && RB->
getID() == AMDGPU::SGPRRegBankID) {
372 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
373 GISelCSEAnalysisWrapper &
Wrapper =
374 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
376 GISelObserverWrapper Observer;
380 B.setCSEInfo(&CSEInfo);
381 B.setChangeObserver(Observer);
383 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
384 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
388 const RegisterBankInfo &RBI = *
ST.getRegBankInfo();
390 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
393 const RegBankLegalizeRules &RBLRules =
getRules(ST,
MRI);
396 RegBankLegalizeHelper RBLHelper(
B, MUI, RBI, RBLRules);
400 for (MachineBasicBlock &
MBB : MF) {
401 for (MachineInstr &
MI :
MBB) {
406 for (MachineInstr *
MI : AllInst) {
407 if (!
MI->isPreISelOpcode())
410 unsigned Opc =
MI->getOpcode();
412 if (
Opc == AMDGPU::G_PHI) {
413 RBLHelper.applyMappingPHI(*
MI);
419 if (
Opc == AMDGPU::G_BUILD_VECTOR ||
Opc == AMDGPU::G_UNMERGE_VALUES ||
420 Opc == AMDGPU::G_MERGE_VALUES ||
Opc == AMDGPU::G_BITCAST) {
421 RBLHelper.applyMappingTrivial(*
MI);
426 if (
Opc == G_FREEZE &&
428 RBLHelper.applyMappingTrivial(*
MI);
432 if ((
Opc == AMDGPU::G_CONSTANT ||
Opc == AMDGPU::G_FCONSTANT ||
433 Opc == AMDGPU::G_IMPLICIT_DEF)) {
437 assert(
MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
444 RBLHelper.findRuleAndApplyMapping(*
MI);
470 AMDGPURegBankLegalizeCombiner Combiner(
B, *
ST.getRegisterInfo(), RBI);
472 for (MachineBasicBlock &
MBB : MF) {
474 if (
MI.getOpcode() == AMDGPU::COPY) {
475 Combiner.tryCombineCopy(
MI);
478 if (
MI.getOpcode() == AMDGPU::G_ANYEXT) {
479 Combiner.tryCombineS1AnyExt(
MI);
486 "Registers with sgpr reg bank and S1 LLT are not legal after "
487 "AMDGPURegBankLegalize. Should lower to sgpr S32");
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Target-Independent Code Generator Pass Configuration Options pass.
std::pair< GUnmerge *, int > tryMatchRALFromUnmerge(Register Src)
void replaceRegWithOrBuildCopy(Register Dst, Register Src)
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
bool isLaneMask(Register Reg)
void tryCombineS1AnyExt(MachineInstr &MI)
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
Register getReadAnyLaneSrc(Register Src)
void tryCombineCopy(MachineInstr &MI)
bool tryEliminateReadAnyLane(MachineInstr &Copy)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
void addObserver(GISelChangeObserver *O)
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createAMDGPURegBankLegalizePass()
LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)
char & AMDGPURegBankLegalizeID