31#define DEBUG_TYPE "amdgpu-resource-usage"
41 "amdgpu-assume-external-call-stack-size",
46 "amdgpu-assume-dynamic-stack-object-size",
47 cl::desc(
"Assumed extra stack use if there are any "
48 "variable sized objects (in bytes)"),
52 "Function register usage analysis",
true,
true)
59 return cast<Function>(
Op.getGlobal()->stripPointerCastsAndAliases());
65 if (!UseOp.isImplicit() || !
TII.isFLAT(*UseOp.getParent()))
74 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
83 uint32_t AssumedStackSizeForDynamicSizeObjects =
90 AssumedStackSizeForDynamicSizeObjects = 0;
92 AssumedStackSizeForExternalCall = 0;
96 MF, AssumedStackSizeForDynamicSizeObjects,
97 AssumedStackSizeForExternalCall);
110 uint32_t AssumedStackSizeForDynamicSizeObjects =
117 AssumedStackSizeForDynamicSizeObjects = 0;
119 AssumedStackSizeForExternalCall = 0;
123 MF, AssumedStackSizeForDynamicSizeObjects,
124 AssumedStackSizeForExternalCall);
130 uint32_t AssumedStackSizeForExternalCall)
const {
140 Info.UsesFlatScratch =
MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
141 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
157 Info.UsesFlatScratch =
false;
160 Info.PrivateSegmentSize = FrameInfo.getStackSize();
163 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
164 if (
Info.HasDynamicallySizedStack)
165 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
168 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
171 MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
MRI.isPhysRegUsed(AMDGPU::VCC_HI);
172 Info.NumExplicitSGPR =
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::SGPR_32RegClass,
174 if (ST.hasMAIInsts())
175 Info.NumAGPR =
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::AGPR_32RegClass,
181 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
182 Info.NumVGPR =
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::VGPR_32RegClass,
187 int32_t MaxVGPR = -1;
188 Info.CalleeSegmentSize = 0;
192 for (
unsigned I = 0;
I <
MI.getNumOperands(); ++
I) {
200 case AMDGPU::NoRegister:
202 "Instruction uses invalid noreg register");
205 case AMDGPU::XNACK_MASK:
206 case AMDGPU::XNACK_MASK_LO:
207 case AMDGPU::XNACK_MASK_HI:
210 case AMDGPU::LDS_DIRECT:
221 case AMDGPU::SRC_VCCZ:
224 case AMDGPU::SRC_EXECZ:
227 case AMDGPU::SRC_SCC:
235 assert((!RC ||
TRI.isVGPRClass(RC) ||
TRI.isSGPRClass(RC) ||
236 TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
237 AMDGPU::TTMP_64RegClass.contains(Reg) ||
238 AMDGPU::TTMP_128RegClass.contains(Reg) ||
239 AMDGPU::TTMP_256RegClass.contains(Reg) ||
240 AMDGPU::TTMP_512RegClass.contains(Reg)) &&
241 "Unknown register class");
243 if (!RC || !
TRI.isVGPRClass(RC))
246 if (
MI.isCall() ||
MI.isMetaInstruction())
250 unsigned HWReg =
TRI.getHWRegIndex(Reg);
251 int MaxUsed = HWReg + Width - 1;
252 MaxVGPR = std::max(MaxUsed, MaxVGPR);
260 TII->getNamedOperand(
MI, AMDGPU::OpName::callee);
262 const Function *Callee = getCalleeFunction(*CalleeOp);
275 if (Callee && !isSameFunction(MF, Callee))
276 Info.Callees.push_back(Callee);
278 bool IsIndirect = !Callee || Callee->isDeclaration();
281 if (!Callee || !Callee->doesNotRecurse()) {
282 Info.HasRecursion =
true;
286 if (!
MI.isReturn()) {
293 Info.CalleeSegmentSize = std::max(
294 Info.CalleeSegmentSize,
295 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
300 Info.CalleeSegmentSize =
301 std::max(
Info.CalleeSegmentSize,
302 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
306 Info.UsesFlatScratch = ST.hasFlatAddressSpace();
307 Info.HasDynamicallySizedStack =
true;
308 Info.HasIndirectCall =
true;
314 Info.NumVGPR = MaxVGPR + 1;
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
Analyzes how many registers and other resources are used by functions.
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
uint32_t getNumNamedBarriers() const
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A container for analyses that lazily runs them and caches their results.
This class represents an Operation in the Expression.
bool hasFlatScratchInit() const
Module * getParent()
Get the module that this global value is contained inside of...
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
bool isStackRealigned() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Primary interface to the complete machine description for the target machine.
const MCSubtargetInfo * getMCSubtargetInfo() const
OSType getOS() const
Get the parsed operating system type of this triple.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUResourceUsageAnalysisID
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects, uint32_t AssumedStackSizeForExternalCall) const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
FunctionResourceInfo ResourceInfo
A special type used by analysis passes to provide an address that identifies that particular analysis...