21#include "llvm/IR/IntrinsicsAMDGPU.h"
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
27using namespace AMDGPU;
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
80 return MRI.getType(Reg).getSizeInBits() == 32;
82 return MRI.getType(Reg).getSizeInBits() == 64;
84 return MRI.getType(Reg).getSizeInBits() == 96;
86 return MRI.getType(Reg).getSizeInBits() == 128;
88 return MRI.getType(Reg).getSizeInBits() == 256;
90 return MRI.getType(Reg).getSizeInBits() == 512;
120 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.
isUniform(Reg);
122 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.
isUniform(Reg);
124 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.
isUniform(Reg);
126 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.
isUniform(Reg);
128 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.
isUniform(Reg);
130 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.
isUniform(Reg);
160 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.
isDivergent(Reg);
162 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.
isDivergent(Reg);
164 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.
isDivergent(Reg);
166 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.
isDivergent(Reg);
168 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.
isDivergent(Reg);
170 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.
isDivergent(Reg);
184 if (
MI.getOperand(i).isReg())
190 if (!
MI.getOperand(i).isReg())
208 : FastTypes(FastTypes) {}
254 Slot = getFastPredicateSlot(
LLTToBId(
MRI.getType(Reg)));
256 Slot = getFastPredicateSlot(
LLTToId(
MRI.getType(Reg)));
259 return MUI.
isUniform(Reg) ? Uni[Slot] : Div[Slot];
264 if (Rule.Predicate.match(
MI, MUI,
MRI))
265 return Rule.OperandMapping;
273 Rules.push_back(Rule);
278 int Slot = getFastPredicateSlot(Ty);
279 assert(Slot != -1 &&
"Ty unsupported in this FastRulesTypes");
280 Div[Slot] = RuleApplyIDs;
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 &&
"Ty unsupported in this FastRulesTypes");
287 Uni[Slot] = RuleApplyIDs;
290int SetOfRulesForOpcode::getFastPredicateSlot(
340RegBankLegalizeRules::RuleSetInitializer
341RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
343 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
346RegBankLegalizeRules::RuleSetInitializer
347RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
349 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
354 unsigned Opc =
MI.getOpcode();
355 if (
Opc == AMDGPU::G_INTRINSIC ||
Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
356 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
357 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
358 unsigned IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
359 auto IRAIt = IRulesAlias.
find(IntrID);
360 if (IRAIt == IRulesAlias.
end()) {
364 return IRules.at(IRAIt->second);
367 auto GRAIt = GRulesAlias.
find(
Opc);
368 if (GRAIt == GRulesAlias.
end()) {
372 return GRules.at(GRAIt->second);
392 unsigned TJumpOffset;
393 unsigned FJumpOffset;
417 }
while ((
Idx != ResultIdx));
425 NegExpression.
push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
426 ExprElt.TJumpOffset});
428 return Predicate(std::move(NegExpression));
434 unsigned RHSSize =
RHS.Expression.size();
436 for (
unsigned i = 0; i < ResultIdx; ++i) {
438 if (i + AndExpression[i].FJumpOffset == ResultIdx)
439 AndExpression[i].FJumpOffset += RHSSize;
444 return Predicate(std::move(AndExpression));
450 unsigned RHSSize =
RHS.Expression.size();
452 for (
unsigned i = 0; i < ResultIdx; ++i) {
454 if (i + OrExpression[i].TJumpOffset == ResultIdx)
455 OrExpression[i].TJumpOffset += RHSSize;
460 return Predicate(std::move(OrExpression));
467 : ST(&_ST),
MRI(&_MRI) {
469 addRulesForGOpcs({G_ADD, G_SUB},
Standard)
475 addRulesForGOpcs({G_XOR, G_OR, G_AND},
StandardB)
495 addRulesForGOpcs({G_LSHR},
Standard)
505 addRulesForGOpcs({G_ASHR},
Standard)
517 addRulesForGOpcs({G_UBFX, G_SBFX},
Standard)
526 addRulesForGOpcs({G_CONSTANT})
530 addRulesForGOpcs({G_ICMP})
535 addRulesForGOpcs({G_FCMP})
539 addRulesForGOpcs({G_BRCOND})
543 addRulesForGOpcs({G_BR}).
Any({{
_}, {{}, {
None}}});
553 addRulesForGOpcs({G_ANYEXT})
567 addRulesForGOpcs({G_TRUNC})
582 addRulesForGOpcs({G_ZEXT})
597 addRulesForGOpcs({G_SEXT})
612 addRulesForGOpcs({G_SEXT_INREG})
622 return (*
MI.memoperands_begin())->getAlign() >=
Align(16);
626 return (*
MI.memoperands_begin())->getAlign() >=
Align(4);
630 return (*
MI.memoperands_begin())->isAtomic();
646 return (*
MI.memoperands_begin())->isVolatile();
650 return (*
MI.memoperands_begin())->isInvariant();
657 Predicate isNaturalAlignedSmall([](
const MachineInstr &
MI) ->
bool {
659 const unsigned MemSize = 8 * MMO->
getSize().getValue();
664 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
665 (isConst || isInvMMO || isNoClobberMMO);
668 addRulesForGOpcs({G_LOAD})
702 addRulesForGOpcs({G_ZEXTLOAD})
707 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD},
StandardB)
717 addRulesForGOpcs({G_STORE})
723 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
726 addRulesForGOpcs({G_PTR_ADD})
732 addRulesForGOpcs({G_INTTOPTR})
740 addRulesForGOpcs({G_PTRTOINT})
752 addRulesForGOpcs({G_FADD},
Standard)
757 addRulesForGOpcs({G_FPTOUI})
761 addRulesForGOpcs({G_UITOFP})
766 using namespace Intrinsic;
773 addRulesForIOpcs({amdgcn_if_break},
Standard)
776 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi},
Standard)
779 addRulesForIOpcs({amdgcn_readfirstlane})
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
void addRule(RegBankLegalizeRule Rule)
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
iterator find(const_arg_type_t< KeyT > Val)
Class representing an expression and its matching format.
bool hasScalarSubwordLoads() const
Generation getGeneration() const
bool hasSALUFloatInsts() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
UniformityLLTOpPredicateID
bool isUniformMMO(const MachineMemOperand *MMO)
This is an optimization pass for GlobalISel generic memory operations.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
This struct is a compact representation of a valid (non-zero power of two) alignment.