84 std::unique_ptr<MCStreamer> &&Streamer) {
97 std::unique_ptr<MCStreamer> Streamer)
103 return "AMDGPU Assembly Printer";
107 return TM.getMCSubtargetInfo();
120void AMDGPUAsmPrinter::initTargetStreamer(
Module &M) {
126 initializeTargetID(M);
147 initTargetStreamer(M);
155 HSAMetadataStream->end();
170 STM.getCPU() +
" is only available on code object version 6 or better");
176 initializeTargetID(*
F.getParent());
178 const auto &FunctionTargetID = STM.getTargetID();
181 if (FunctionTargetID.isXnackSupported() &&
183 FunctionTargetID.getXnackSetting() !=
getTargetStreamer()->getTargetID()->getXnackSetting()) {
185 "' function does not match module xnack setting");
190 if (FunctionTargetID.isSramEccSupported() &&
192 FunctionTargetID.getSramEccSetting() !=
getTargetStreamer()->getTargetID()->getSramEccSetting()) {
194 "' function does not match module sramecc setting");
201 if (STM.isMesaKernel(
F) &&
205 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
210 if (STM.isAmdHsaOS())
211 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
227 Streamer.pushSection();
228 Streamer.switchSection(&ReadOnlySection);
232 Streamer.emitValueToAlignment(
Align(64), 0, 1, 0);
240 STM, KernelName, getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo),
241 CurrentProgramInfo.NumVGPRsForWavesPerEU,
243 CurrentProgramInfo.NumSGPRsForWavesPerEU,
245 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
248 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
250 Streamer.popSection();
258 OS <<
"implicit-def: "
259 <<
printReg(RegNo,
MF->getSubtarget().getRegisterInfo());
262 OS <<
" : SGPR spill to VGPR lane";
282 if (DumpCodeInstEmitter) {
297 +
"_" +
Twine(
MBB.getNumber()) +
":").str());
309 ": unsupported initializer for address space");
323 "' is already defined");
332 TS->emitAMDGPULDS(GVSym,
Size, Alignment);
343 switch (CodeObjectVersion) {
345 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();
348 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();
351 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();
361void AMDGPUAsmPrinter::validateMCResourceInfo(
Function &
F) {
368 bool IsLocal =
F.hasLocalLinkage();
372 if (
Value->evaluateAsAbsolute(Val)) {
379 const uint64_t MaxScratchPerWorkitem =
381 MCSymbol *ScratchSizeSymbol = RI.getSymbol(
382 FnSym->getName(), RIK::RIK_PrivateSegSize,
OutContext, IsLocal);
383 uint64_t ScratchSize;
386 ScratchSize > MaxScratchPerWorkitem) {
387 DiagnosticInfoStackSize DiagStackSize(
F, ScratchSize, MaxScratchPerWorkitem,
389 F.getContext().diagnose(DiagStackSize);
395 RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR,
OutContext, IsLocal);
402 NumSgpr > MaxAddressableNumSGPRs) {
403 DiagnosticInfoResourceLimit Diag(
F,
"addressable scalar registers",
404 NumSgpr, MaxAddressableNumSGPRs,
406 F.getContext().diagnose(Diag);
412 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC,
OutContext, IsLocal);
413 MCSymbol *FlatUsedSymbol = RI.getSymbol(
414 FnSym->getName(), RIK::RIK_UsesFlatScratch,
OutContext, IsLocal);
415 uint64_t VCCUsed, FlatUsed, NumSgpr;
426 &STM, VCCUsed, FlatUsed,
431 if (NumSgpr > MaxAddressableNumSGPRs) {
432 DiagnosticInfoResourceLimit Diag(
F,
"scalar registers", NumSgpr,
435 F.getContext().diagnose(Diag);
441 RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR,
OutContext, IsLocal);
443 RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR,
OutContext, IsLocal);
444 uint64_t NumVgpr, NumAgpr;
446 MachineModuleInfo &
MMI =
448 MachineFunction *
MF =
MMI.getMachineFunction(
F);
452 const SIMachineFunctionInfo &MFI = *
MF->getInfo<SIMachineFunctionInfo>();
454 uint64_t TotalNumVgpr =
456 uint64_t NumVGPRsForWavesPerEU =
457 std::max({TotalNumVgpr, (uint64_t)1,
460 uint64_t NumSGPRsForWavesPerEU = std::max(
470 F,
"amdgpu-waves-per-eu", {0, 0},
true);
472 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
473 DiagnosticInfoOptimizationFailure Diag(
474 F,
F.getSubprogram(),
475 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
477 F.getName() +
"': desired occupancy was " + Twine(MinWEU) +
478 ", final occupancy is " + Twine(Occupancy));
479 F.getContext().diagnose(Diag);
517 validateMCResourceInfo(
F);
535void AMDGPUAsmPrinter::emitCommonFunctionComments(
540 OutStreamer->emitRawComment(
" TotalNumSgprs: " + getMCExprStr(NumSGPR),
542 OutStreamer->emitRawComment(
" NumVgprs: " + getMCExprStr(NumVGPR),
false);
543 if (NumAGPR && TotalNumVGPR) {
544 OutStreamer->emitRawComment(
" NumAgprs: " + getMCExprStr(NumAGPR),
false);
545 OutStreamer->emitRawComment(
" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),
548 OutStreamer->emitRawComment(
" ScratchSize: " + getMCExprStr(ScratchSize),
554const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
556 const SIMachineFunctionInfo &MFI = *
MF.getInfo<SIMachineFunctionInfo>();
557 MCContext &Ctx =
MF.getContext();
558 uint16_t KernelCodeProperties = 0;
562 KernelCodeProperties |=
563 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
566 KernelCodeProperties |=
567 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
570 KernelCodeProperties |=
571 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
574 KernelCodeProperties |=
575 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
578 KernelCodeProperties |=
579 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
582 KernelCodeProperties |=
583 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
586 KernelCodeProperties |=
587 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
589 if (
MF.getSubtarget<GCNSubtarget>().isWave32()) {
590 KernelCodeProperties |=
591 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
598 const MCExpr *KernelCodePropExpr =
601 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
606 return KernelCodePropExpr;
612 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
614 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
615 MCContext &Ctx =
MF.getContext();
617 MCKernelDescriptor KernelDescriptor;
623 Align MaxKernArgAlign;
631 int64_t PGRM_Rsrc3 = 1;
632 bool EvaluatableRsrc3 =
633 CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGRM_Rsrc3);
635 (void)EvaluatableRsrc3;
638 static_cast<uint64_t
>(PGRM_Rsrc3) == 0);
645 return KernelDescriptor;
652 initTargetStreamer(*
MF.getFunction().getParent());
656 CurrentProgramInfo.reset(
MF);
669 bool IsLocal =
MF.getFunction().hasLocalLinkage();
680 getSIProgramInfo(CurrentProgramInfo,
MF);
685 EmitPALMetadata(
MF, CurrentProgramInfo);
687 emitPALFunctionMetadata(
MF);
689 EmitProgramInfoSI(
MF, CurrentProgramInfo);
692 DumpCodeInstEmitter =
nullptr;
719 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_NumNamedBarrier,
721 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
725 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
727 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
731 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
745 OutStreamer->emitRawComment(
" Function info:",
false);
747 emitCommonFunctionComments(
750 ->getVariableValue(),
752 ? RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_NumAGPR,
756 RI.createTotalNumVGPRs(
MF, Ctx),
757 RI.createTotalNumSGPRs(
761 RI.getSymbol(
CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
763 ->getVariableValue(),
764 CurrentProgramInfo.getFunctionCodeSize(
MF), MFI);
768 OutStreamer->emitRawComment(
" Kernel info:",
false);
769 emitCommonFunctionComments(
770 CurrentProgramInfo.NumArchVGPR,
771 STM.
hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR :
nullptr,
772 CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
773 CurrentProgramInfo.ScratchSize,
774 CurrentProgramInfo.getFunctionCodeSize(
MF), MFI);
777 " FloatMode: " +
Twine(CurrentProgramInfo.FloatMode),
false);
779 " IeeeMode: " +
Twine(CurrentProgramInfo.IEEEMode),
false);
781 " LDSByteSize: " +
Twine(CurrentProgramInfo.LDSSize) +
782 " bytes/workgroup (compile time only)",
false);
785 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks),
false);
788 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks),
false);
791 " NumSGPRsForWavesPerEU: " +
792 getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),
795 " NumVGPRsForWavesPerEU: " +
796 getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),
805 " AccumOffset: " + getMCExprStr(AdjustedAccum),
false);
810 " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
814 " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy),
false);
820 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
821 getMCExprStr(CurrentProgramInfo.ScratchEnable),
823 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
824 Twine(CurrentProgramInfo.UserSGPR),
826 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
827 Twine(CurrentProgramInfo.TrapHandlerEnable),
829 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
830 Twine(CurrentProgramInfo.TGIdXEnable),
832 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
833 Twine(CurrentProgramInfo.TGIdYEnable),
835 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
836 Twine(CurrentProgramInfo.TGIdZEnable),
838 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
839 Twine(CurrentProgramInfo.TIdIGCompCount),
842 [[maybe_unused]] int64_t PGMRSrc3;
845 (CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
846 static_cast<uint64_t>(PGMRSrc3) == 0));
849 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
851 CurrentProgramInfo.ComputePGMRSrc3,
852 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
853 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
856 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
858 CurrentProgramInfo.ComputePGMRSrc3,
859 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
860 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
865 if (DumpCodeInstEmitter) {
871 std::string Comment =
"\n";
874 Comment +=
" ; " +
HexLines[i] +
"\n";
900 NumVGPRs,
nullptr) ||
901 !NumVGPRs.isAbsolute()) {
911 "too many DVGPR blocks for _dvgpr$ symbol for '" +
915 unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
922 OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
929void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
943 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
944 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
947 const GCNSubtarget &STM =
TM.getSubtarget<GCNSubtarget>(
F);
948 const IsaInfo::AMDGPUTargetID &STMTargetID = STM.
getTargetID();
949 if (TSTargetID->isXnackSupported())
950 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
952 if (TSTargetID->isSramEccSupported())
953 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
954 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
965 const MCExpr *MaximumTaken =
976void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
978 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
979 bool IsLocal =
MF.getFunction().hasLocalLinkage();
980 MCContext &Ctx =
MF.getContext();
982 auto CreateExpr = [&Ctx](int64_t
Value) {
986 auto TryGetMCExprValue = [](
const MCExpr *
Value, uint64_t &Res) ->
bool {
988 if (
Value->evaluateAsAbsolute(Val)) {
1003 ProgInfo.
NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
1004 ProgInfo.
NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
1010 ProgInfo.
NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
1011 ProgInfo.
ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
1012 ProgInfo.
VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
1013 ProgInfo.
FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
1016 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
1020 GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
1023 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1037 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1038 NumSgpr > MaxAddressableNumSGPRs) {
1040 LLVMContext &Ctx =
MF.getFunction().getContext();
1041 DiagnosticInfoResourceLimit Diag(
1042 MF.getFunction(),
"addressable scalar registers", NumSgpr,
1045 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
1059 if (WaveDispatchNumSGPR) {
1067 if (WaveDispatchNumVGPR) {
1069 {ProgInfo.
NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
1092 if (TryGetMCExprValue(ProgInfo.
NumSGPR, NumSgpr) &&
1093 NumSgpr > MaxAddressableNumSGPRs) {
1096 LLVMContext &Ctx =
MF.getFunction().getContext();
1097 DiagnosticInfoResourceLimit Diag(
MF.getFunction(),
"scalar registers",
1098 NumSgpr, MaxAddressableNumSGPRs,
1101 ProgInfo.
NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
1114 LLVMContext &Ctx =
MF.getFunction().getContext();
1115 DiagnosticInfoResourceLimit Diag(
MF.getFunction(),
"user SGPRs",
1122 LLVMContext &Ctx =
MF.getFunction().getContext();
1123 DiagnosticInfoResourceLimit Diag(
1130 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](
const MCExpr *NumGPR,
1132 const MCExpr *OneConst = CreateExpr(1ul);
1133 const MCExpr *GranuleConst = CreateExpr(Granule);
1135 const MCExpr *AlignToGPR =
1137 const MCExpr *DivGPR =
1152 const SIModeRegisterDefaults
Mode = MFI->
getMode();
1163 unsigned LDSAlignShift;
1164 if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize327680)) {
1167 }
else if (STM.getFeatureBits().test(
1168 FeatureAddressableLocalMemorySize163840)) {
1171 }
else if (STM.getFeatureBits().test(
1172 FeatureAddressableLocalMemorySize65536)) {
1185 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1188 auto DivideCeil = [&Ctx](
const MCExpr *Numerator,
const MCExpr *Denominator) {
1189 const MCExpr *Ceil =
1195 unsigned ScratchAlignShift =
1203 CreateExpr(1ULL << ScratchAlignShift));
1215 unsigned TIDIGCompCnt = 0;
1246 auto SetBits = [&Ctx](
const MCExpr *Dst,
const MCExpr *
Value, uint32_t
Mask,
1259 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1260 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1263 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1264 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
1270 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
1271 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT);
1278 const auto [MinWEU, MaxWEU] =
1281 if (TryGetMCExprValue(ProgInfo.
Occupancy, Occupancy) && Occupancy < MinWEU) {
1282 DiagnosticInfoOptimizationFailure Diag(
1283 F,
F.getSubprogram(),
1284 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
1286 F.getName() +
"': desired occupancy was " + Twine(MinWEU) +
1287 ", final occupancy is " + Twine(Occupancy));
1288 F.getContext().diagnose(Diag);
1292 uint32_t CodeSizeInBytes = (uint32_t)std::min(
1294 (uint64_t)std::numeric_limits<uint32_t>::max());
1295 uint32_t CodeSizeInLines =
divideCeil(CodeSizeInBytes, 128);
1296 uint32_t
Field, Shift, Width;
1298 Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1299 Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1300 Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1302 Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1303 Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1304 Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1306 uint64_t InstPrefSize = std::min(CodeSizeInLines, (1u << Width) - 1);
1308 CreateExpr(InstPrefSize),
Field, Shift);
1314 default: [[fallthrough]];
1327 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1328 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1329 unsigned RsrcReg =
getRsrcReg(
MF.getFunction().getCallingConv());
1330 MCContext &Ctx =
MF.getContext();
1333 auto SetBits = [&Ctx](
const MCExpr *
Value, uint32_t
Mask, uint32_t Shift) {
1340 auto EmitResolvedOrExpr = [
this](
const MCExpr *
Value,
unsigned Size) {
1342 if (
Value->evaluateAsAbsolute(Val))
1351 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),
1355 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(Ctx), 4);
1362 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1366 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1370 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1381 SetBits(CurrentProgramInfo.VGPRBlocks, 0x3F, 0),
1382 SetBits(CurrentProgramInfo.SGPRBlocks, 0x0F, 6),
1384 EmitResolvedOrExpr(GPRBlocks, 4);
1390 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1394 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1398 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1407 ?
divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1408 : CurrentProgramInfo.LDSBlocks;
1426 unsigned DynamicVGPRBlockSize) {
1427 if (ST.hasIEEEMode())
1439 if (DynamicVGPRBlockSize != 0)
1456 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1457 auto CC =
MF.getFunction().getCallingConv();
1459 auto &Ctx =
MF.getContext();
1461 MD->setEntryPoint(CC,
MF.getFunction().getName());
1462 MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
1466 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1469 MD->setHwStage(CC,
".dynamic_vgpr_saved_count",
1474 MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1477 MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
1478 if (MD->getPALMajorVersion() < 3) {
1479 MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
1481 MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
1483 const MCExpr *HasScratchBlocks =
1487 MD->setRsrc2(CC,
maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1490 MD->setHwStage(CC,
".debug_mode", (
bool)CurrentProgramInfo.DebugMode);
1492 CurrentProgramInfo.ScratchEnable);
1506 ?
divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1507 : CurrentProgramInfo.LDSBlocks;
1508 if (MD->getPALMajorVersion() < 3) {
1517 const unsigned ExtraLdsDwGranularity =
1519 MD->setGraphicsRegisters(
1520 ".ps_extra_lds_size",
1521 (
unsigned)(ExtraLDSSize * ExtraLdsDwGranularity *
sizeof(uint32_t)));
1524 static StringLiteral
const PsInputFields[] = {
1525 ".persp_sample_ena",
".persp_center_ena",
1526 ".persp_centroid_ena",
".persp_pull_model_ena",
1527 ".linear_sample_ena",
".linear_center_ena",
1528 ".linear_centroid_ena",
".line_stipple_tex_ena",
1529 ".pos_x_float_ena",
".pos_y_float_ena",
1530 ".pos_z_float_ena",
".pos_w_float_ena",
1531 ".front_face_ena",
".ancillary_ena",
1532 ".sample_coverage_ena",
".pos_fixed_pt_ena"};
1536 MD->setGraphicsRegisters(
".spi_ps_input_ena",
Field,
1537 (
bool)((PSInputEna >> Idx) & 1));
1538 MD->setGraphicsRegisters(
".spi_ps_input_addr",
Field,
1539 (
bool)((PSInputAddr >> Idx) & 1));
1545 if (MD->getPALMajorVersion() < 3 && STM.
isWave32())
1546 MD->setWave32(
MF.getFunction().getCallingConv());
1549void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1551 const MachineFrameInfo &MFI =
MF.getFrameInfo();
1552 StringRef FnName =
MF.getFunction().getName();
1553 MD->setFunctionScratchSize(FnName, MFI.
getStackSize());
1554 const GCNSubtarget &
ST =
MF.getSubtarget<GCNSubtarget>();
1555 MCContext &Ctx =
MF.getContext();
1557 if (MD->getPALMajorVersion() < 3) {
1563 CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
1567 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());
1571 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
1572 MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1573 MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1590void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
1597 const SIMachineFunctionInfo *MFI =
MF.getInfo<SIMachineFunctionInfo>();
1598 const GCNSubtarget &STM =
MF.getSubtarget<GCNSubtarget>();
1599 MCContext &Ctx =
MF.getContext();
1604 CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
1606 CurrentProgramInfo.getComputePGMRSrc2(Ctx);
1637 if (STM.isXNACKEnabled())
1640 Align MaxKernArgAlign;
1659 if (ExtraCode && ExtraCode[0]) {
1660 if (ExtraCode[1] != 0)
1663 switch (ExtraCode[0]) {
1675 *
MF->getSubtarget().getRegisterInfo());
1679 int64_t Val = MO.
getImm();
1702void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1708 const char *Name =
"kernel-resource-usage";
1709 const char *Indent =
" ";
1713 if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
1720 auto EmitResourceUsageRemark = [&](
StringRef RemarkName,
1725 std::string LabelStr = RemarkLabel.str() +
": ";
1726 if (RemarkName !=
"FunctionName")
1727 LabelStr = Indent + LabelStr;
1742 EmitResourceUsageRemark(
"FunctionName",
"Function Name",
1743 MF.getFunction().getName());
1744 EmitResourceUsageRemark(
"NumSGPR",
"TotalSGPRs",
1745 getMCExprStr(CurrentProgramInfo.NumSGPR));
1746 EmitResourceUsageRemark(
"NumVGPR",
"VGPRs",
1747 getMCExprStr(CurrentProgramInfo.NumArchVGPR));
1749 EmitResourceUsageRemark(
"NumAGPR",
"AGPRs",
1750 getMCExprStr(CurrentProgramInfo.NumAccVGPR));
1752 EmitResourceUsageRemark(
"ScratchSize",
"ScratchSize [bytes/lane]",
1753 getMCExprStr(CurrentProgramInfo.ScratchSize));
1755 bool DynStackEvaluatable =
1756 CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);
1757 StringRef DynamicStackStr =
1758 DynStackEvaluatable && DynStack ?
"True" :
"False";
1759 EmitResourceUsageRemark(
"DynamicStack",
"Dynamic Stack", DynamicStackStr);
1760 EmitResourceUsageRemark(
"Occupancy",
"Occupancy [waves/SIMD]",
1761 getMCExprStr(CurrentProgramInfo.Occupancy));
1762 EmitResourceUsageRemark(
"SGPRSpill",
"SGPRs Spill",
1763 CurrentProgramInfo.SGPRSpill);
1764 EmitResourceUsageRemark(
"VGPRSpill",
"VGPRs Spill",
1765 CurrentProgramInfo.VGPRSpill);
1766 if (isModuleEntryFunction)
1767 EmitResourceUsageRemark(
"BytesLDS",
"LDS Size [bytes/block]",
1768 CurrentProgramInfo.LDSSize);
1774 "AMDGPU Assembly Printer",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize)
static unsigned getRsrcReg(CallingConv::ID CallConv)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
MC infrastructure to propagate the function level resource usage info.
Analyzes how many registers and other resources are used by functions.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
AMD GCN specific subclass of TargetSubtarget.
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
R600 Assembly printer class.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_0286E8_SPI_TMPRING_SIZE
#define FP_ROUND_MODE_DP(x)
#define C_00B84C_SCRATCH_EN
#define FP_ROUND_ROUND_TO_NEAREST
#define R_0286D0_SPI_PS_INPUT_ADDR
#define R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_0286CC_SPI_PS_INPUT_ENA
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define FP_DENORM_MODE_DP(x)
#define R_00B848_COMPUTE_PGM_RSRC1
#define FP_ROUND_MODE_SP(x)
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define S_00B02C_EXTRA_LDS_SIZE(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
static const int BlockSize
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
bool IsTargetStreamerInitialized
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
uint32_t getLDSSize() const
bool isMemoryBound() const
bool needsWaveLimiter() const
bool isEntryFunction() const
bool isModuleEntryFunction() const
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitISAVersion()
void initializeTargetID(const MCSubtargetInfo &STI)
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR)
virtual void EmitDirectiveAMDGCNTarget()
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
bool isXnackOnOrAny() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
This class is intended to be used as a driving class for all asm writers.
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
const MCAsmInfo * MAI
Target Asm Printer information.
MachineFunction * MF
The current machine function.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
MCSymbol * CurrentFnSym
The symbol for the current function.
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
bool isVerbose() const
Return true if assembly output should contain comments.
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasGFX90AInsts() const
bool hasSGPRInitBug() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool isTgSplitEnabled() const
bool isCuModeEnabled() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool isTrapHandlerEnabled() const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
VisibilityTypes getVisibility() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
unsigned getAddressSpace() const
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MCCodeEmitter * getEmitterPtr() const
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCObjectFileInfo * getObjectFileInfo() const
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
LLVM_ABI bool evaluateAsRelocatable(MCValue &Res, const MCAssembler *Asm) const
Try to evaluate the expression to a relocatable value, i.e.
MCSection * getReadOnlySection() const
MCSection * getTextSection() const
MCContext & getContext() const
This represents a section on linux, lots of unix variants and some bare metal systems.
Instances of this class represent a uniqued identifier for a section in the current translation unit.
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
bool hasInstructions() const
MCContext & getContext() const
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
StringRef getName() const
getName - Get the symbol name.
bool isVariable() const
isVariable - Check if this is a variable symbol.
void redefineIfPossible()
Prepare this symbol to be redefined.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCStreamer & getStreamer()
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getNumWaveDispatchVGPRs() const
unsigned getNumSpilledVGPRs() const
unsigned getNumWaveDispatchSGPRs() const
unsigned getNumSpilledSGPRs() const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
unsigned getDynamicVGPRBlockSize() const
unsigned getMaxWavesPerEU() const
bool hasWorkGroupIDZ() const
bool hasWorkGroupIDY() const
SIModeRegisterDefaults getMode() const
bool hasWorkGroupInfo() const
bool hasWorkItemIDY() const
bool hasWorkGroupIDX() const
unsigned getNumUserSGPRs() const
unsigned getScratchReservedForDynamicVGPRs() const
bool isDynamicVGPREnabled() const
unsigned getPSInputAddr() const
bool hasWorkItemIDZ() const
unsigned getPSInputEnable() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
MCSymbol * getSymbol(const GlobalValue *GV) const
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
bool isGFX10Plus(const MCSubtargetInfo &STI)
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Target & getTheR600Target()
The target for R600 GPUs.
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Target & getTheGCNTarget()
The target for GCN GPUs.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Implement std::hash so that hash_code can be used in STL containers.
uint64_t kernarg_segment_byte_size
const MCExpr * workitem_private_segment_byte_size
const MCExpr * compute_pgm_resource2_registers
uint8_t kernarg_segment_alignment
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * wavefront_sgpr_count
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
const MCExpr * workitem_vgpr_count
const MCExpr * is_dynamic_callstack
uint32_t workgroup_group_segment_byte_size
const MCExpr * compute_pgm_resource1_registers
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
Track resource usage for kernels / entry functions.
const MCExpr * NumArchVGPR
uint64_t getFunctionCodeSize(const MachineFunction &MF, bool IsLowerBound=false)
const MCExpr * getComputePGMRSrc2(MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * ComputePGMRSrc3
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
uint32_t TrapHandlerEnable
const MCExpr * NamedBarCnt
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.