LLVM 22.0.0git
SIProgramInfo.cpp
Go to the documentation of this file.
1//===-- SIProgramInfo.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10///
11/// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12/// entry functions.
13//
14//===----------------------------------------------------------------------===//
15//
16
17#include "SIProgramInfo.h"
18#include "GCNSubtarget.h"
19#include "SIDefines.h"
21#include "llvm/MC/MCExpr.h"
22
23using namespace llvm;
24
26 MCContext &Ctx = MF.getContext();
27
28 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29
30 CodeSizeInBytes.reset();
31
32 VGPRBlocks = ZeroExpr;
33 SGPRBlocks = ZeroExpr;
34 Priority = 0;
35 FloatMode = 0;
36 Priv = 0;
37 DX10Clamp = 0;
38 DebugMode = 0;
39 IEEEMode = 0;
40 WgpMode = 0;
41 MemOrdered = 0;
42 FwdProgress = 0;
43 RrWgMode = 0;
44 ScratchSize = ZeroExpr;
45
46 LDSBlocks = 0;
47 ScratchBlocks = ZeroExpr;
48
49 ScratchEnable = ZeroExpr;
50 UserSGPR = 0;
52 TGIdXEnable = 0;
53 TGIdYEnable = 0;
54 TGIdZEnable = 0;
55 TGSizeEnable = 0;
57 EXCPEnMSB = 0;
58 LdsSize = 0;
59 EXCPEnable = 0;
60
61 ComputePGMRSrc3 = ZeroExpr;
62
63 NumVGPR = ZeroExpr;
64 NumArchVGPR = ZeroExpr;
65 NumAccVGPR = ZeroExpr;
66 AccumOffset = ZeroExpr;
67 TgSplit = 0;
68 NumSGPR = ZeroExpr;
69 SGPRSpill = 0;
70 VGPRSpill = 0;
71 LDSSize = 0;
72 FlatUsed = ZeroExpr;
73
74 NumSGPRsForWavesPerEU = ZeroExpr;
75 NumVGPRsForWavesPerEU = ZeroExpr;
76 NamedBarCnt = ZeroExpr;
77 Occupancy = ZeroExpr;
78 DynamicCallStack = ZeroExpr;
79 VCCUsed = ZeroExpr;
80}
81
83 const GCNSubtarget &ST) {
84 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
86 S_00B848_PRIV(ProgInfo.Priv) |
88 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
91
92 if (ST.hasDX10ClampMode())
93 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
94
95 if (ST.hasIEEEMode())
96 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
97
98 if (ST.hasRrWGMode())
99 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
100
101 return Reg;
102}
103
104static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
105 CallingConv::ID CC, const GCNSubtarget &ST) {
106 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
108 S_00B848_PRIV(ProgInfo.Priv) |
110
111 if (ST.hasDX10ClampMode())
112 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
113
114 if (ST.hasIEEEMode())
115 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
116
117 if (ST.hasRrWGMode())
118 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
119
120 switch (CC) {
122 Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
123 break;
125 Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
126 break;
128 Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
130 break;
132 Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
134 break;
135 default:
136 break;
137 }
138 return Reg;
139}
140
142 uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
150 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
152
153 return Reg;
154}
155
156static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
157 MCContext &Ctx) {
158 if (Mask) {
159 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
160 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
161 }
162 if (Shift) {
163 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
164 Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
165 }
166 return Val;
167}
168
170 MCContext &Ctx) const {
171 uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
172 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
173 const MCExpr *Res = MCBinaryExpr::createOr(
174 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
175 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
176 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
177}
178
180 const GCNSubtarget &ST,
181 MCContext &Ctx) const {
182 if (AMDGPU::isCompute(CC)) {
183 return getComputePGMRSrc1(ST, Ctx);
184 }
185
186 uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
187 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
188 const MCExpr *Res = MCBinaryExpr::createOr(
189 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
190 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
191 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
192}
193
195 uint64_t Reg = getComputePGMRSrc2Reg(*this);
196 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
197 return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
198}
199
201 MCContext &Ctx) const {
202 if (AMDGPU::isCompute(CC))
203 return getComputePGMRSrc2(Ctx);
204
205 return MCConstantExpr::create(0, Ctx);
206}
207
209 bool IsLowerBound) {
210 if (!IsLowerBound && CodeSizeInBytes.has_value())
211 return *CodeSizeInBytes;
212
213 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214 const SIInstrInfo *TII = STM.getInstrInfo();
215
216 uint64_t CodeSize = 0;
217
218 for (const MachineBasicBlock &MBB : MF) {
219 // The amount of padding to align code can be both underestimated and
220 // overestimated. In case of inline asm used getInstSizeInBytes() will
221 // return a maximum size of a single instruction, where the real size may
222 // differ. At this point CodeSize may be already off.
223 if (!IsLowerBound)
225
226 for (const MachineInstr &MI : MBB) {
227 // TODO: CodeSize should account for multiple functions.
228
229 if (MI.isMetaInstruction())
230 continue;
231
232 // We cannot properly estimate inline asm size. It can be as small as zero
233 // if that is just a comment.
234 if (IsLowerBound && MI.isInlineAsm())
235 continue;
236
237 CodeSize += TII->getInstSizeInBytes(MI);
238 }
239 }
240
242 return CodeSize;
243}
MachineBasicBlock & MBB
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:1175
#define S_00B428_MEM_ORDERED(x)
Definition: SIDefines.h:1136
#define S_00B028_MEM_ORDERED(x)
Definition: SIDefines.h:1112
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:1158
#define S_00B228_WGP_MODE(x)
Definition: SIDefines.h:1124
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1213
#define S_00B228_MEM_ORDERED(x)
Definition: SIDefines.h:1127
#define S_00B848_RR_WG_MODE(x)
Definition: SIDefines.h:1201
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:1152
#define S_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:1204
#define S_00B428_WGP_MODE(x)
Definition: SIDefines.h:1133
#define S_00B848_PRIV(x)
Definition: SIDefines.h:1195
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:1161
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:1164
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:1172
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:1146
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:1149
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:1155
#define S_00B128_MEM_ORDERED(x)
Definition: SIDefines.h:1119
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1210
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:1168
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1198
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:1189
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1207
#define S_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1216
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:1192
static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo)
static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo, CallingConv::ID CC, const GCNSubtarget &ST)
static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, const GCNSubtarget &ST)
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Defines struct to track resource usage and hardware flags for kernels and entry functions.
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:308
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:348
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:408
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:413
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:212
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Align getAlignment() const
Return alignment of the basic block.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MCContext & getContext() const
Representation of each machine instruction.
Definition: MachineInstr.h:72
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:32
const MCExpr * NumSGPR
Definition: SIProgramInfo.h:74
const MCExpr * getPGMRSrc2(CallingConv::ID CC, MCContext &Ctx) const
const MCExpr * NumArchVGPR
Definition: SIProgramInfo.h:70
uint64_t getFunctionCodeSize(const MachineFunction &MF, bool IsLowerBound=false)
const MCExpr * getComputePGMRSrc2(MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
const MCExpr * VGPRBlocks
Definition: SIProgramInfo.h:36
const MCExpr * ScratchBlocks
Definition: SIProgramInfo.h:52
const MCExpr * ComputePGMRSrc3
Definition: SIProgramInfo.h:67
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
const MCExpr * VCCUsed
Definition: SIProgramInfo.h:97
const MCExpr * FlatUsed
Definition: SIProgramInfo.h:78
uint32_t TrapHandlerEnable
Definition: SIProgramInfo.h:57
const MCExpr * NamedBarCnt
Definition: SIProgramInfo.h:87
const MCExpr * ScratchEnable
Definition: SIProgramInfo.h:55
const MCExpr * AccumOffset
Definition: SIProgramInfo.h:72
const MCExpr * NumAccVGPR
Definition: SIProgramInfo.h:71
const MCExpr * DynamicCallStack
Definition: SIProgramInfo.h:94
const MCExpr * SGPRBlocks
Definition: SIProgramInfo.h:37
const MCExpr * NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:84
std::optional< uint64_t > CodeSizeInBytes
Definition: SIProgramInfo.h:33
const MCExpr * NumVGPR
Definition: SIProgramInfo.h:69
const MCExpr * getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST, MCContext &Ctx) const
const MCExpr * Occupancy
Definition: SIProgramInfo.h:90
const MCExpr * ScratchSize
Definition: SIProgramInfo.h:48
const MCExpr * NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:81
void reset(const MachineFunction &MF)