24#include "llvm/IR/IntrinsicsAMDGPU.h"
26#define DEBUG_TYPE "amdgpu-call-lowering"
47 : OutgoingValueHandler(
B,
MRI), MIB(MIB) {}
65 Register ExtReg = extendRegisterMin32(*
this, ValVReg, VA);
72 if (
TRI->isSGPRReg(
MRI, PhysReg)) {
73 LLT Ty =
MRI.getType(ExtReg);
80 ExtReg = MIRBuilder.buildPtrToInt(
S32, ExtReg).getReg(0);
82 ExtReg = MIRBuilder.buildBitcast(
S32, ExtReg).getReg(0);
85 auto ToSGPR = MIRBuilder
86 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
87 {
MRI.getType(ExtReg)})
89 ExtReg = ToSGPR.getReg(0);
92 MIRBuilder.buildCopy(PhysReg, ExtReg);
101 : IncomingValueHandler(
B,
MRI) {}
106 auto &MFI = MIRBuilder.getMF().getFrameInfo();
110 const bool IsImmutable = !Flags.isByVal();
111 int FI = MFI.CreateFixedObject(
Size,
Offset, IsImmutable);
113 auto AddrReg = MIRBuilder.buildFrameIndex(
115 StackUsed = std::max(StackUsed,
Size +
Offset);
116 return AddrReg.getReg(0);
121 markPhysRegUsed(PhysReg);
126 auto Copy = MIRBuilder.buildCopy(
LLT::scalar(32), PhysReg);
132 MIRBuilder.buildTrunc(ValVReg, Extended);
147 MIRBuilder.buildLoad(ValVReg,
Addr, *MMO);
153 virtual void markPhysRegUsed(
unsigned PhysReg) = 0;
158 : AMDGPUIncomingArgHandler(
B,
MRI) {}
160 void markPhysRegUsed(
unsigned PhysReg)
override {
161 MIRBuilder.getMBB().addLiveIn(PhysReg);
165struct CallReturnHandler :
public AMDGPUIncomingArgHandler {
168 : AMDGPUIncomingArgHandler(MIRBuilder,
MRI), MIB(MIB) {}
170 void markPhysRegUsed(
unsigned PhysReg)
override {
177struct AMDGPUOutgoingArgHandler :
public AMDGPUOutgoingValueHandler {
189 bool IsTailCall =
false,
int FPDiff = 0)
190 : AMDGPUOutgoingValueHandler(MIRBuilder,
MRI, MIB), FPDiff(FPDiff),
191 IsTailCall(IsTailCall) {}
205 return FIReg.getReg(0);
212 if (ST.enableFlatScratch()) {
220 SPReg = MIRBuilder.
buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
227 auto AddrReg = MIRBuilder.
buildPtrAdd(PtrTy, SPReg, OffsetReg);
229 return AddrReg.getReg(0);
250 ? extendRegister(Arg.
Regs[ValRegIndex], VA)
251 : Arg.
Regs[ValRegIndex];
252 assignValueToAddress(ValVReg,
Addr, MemTy, MPO, VA);
264 case TargetOpcode::G_SEXT:
266 case TargetOpcode::G_ZEXT:
268 case TargetOpcode::G_ANYEXT:
278 bool IsVarArg)
const {
285 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
299 auto &MF =
B.getMF();
311 "For each split Type there should be exactly one VReg.");
315 for (
unsigned i = 0; i < SplitEVTs.
size(); ++i) {
316 EVT VT = SplitEVTs[i];
322 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
323 if (RetInfo.Flags[0].isSExt()) {
324 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
325 ExtendOp = TargetOpcode::G_SEXT;
326 }
else if (RetInfo.Flags[0].isZExt()) {
327 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
328 ExtendOp = TargetOpcode::G_ZEXT;
340 if (Reg != RetInfo.Regs[0]) {
341 RetInfo.Regs[0] =
Reg;
351 OutgoingValueAssigner Assigner(AssignFn);
352 AMDGPUOutgoingValueHandler RetHandler(
B, *
MRI, Ret);
365 assert(!Val == VRegs.
empty() &&
"Return value without a vreg");
369 const bool IsWaveEnd =
372 B.buildInstr(AMDGPU::S_ENDPGM)
378 unsigned ReturnOpc = IsWholeWave ? AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN
379 : IsShader ? AMDGPU::SI_RETURN_TO_EPILOG
381 auto Ret =
B.buildInstrNoInsert(ReturnOpc);
385 else if (!lowerReturnVal(
B, Val, VRegs, Ret))
389 addOriginalExecToReturn(
B.getMF(), Ret);
404 Register KernArgSegmentVReg =
MRI.getLiveInVirtReg(KernArgSegmentPtr);
408 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
413 Align Alignment)
const {
426 for (
ArgInfo &SplitArg : SplitArgs) {
427 Register PtrReg =
B.getMRI()->createGenericVirtualRegister(PtrTy);
428 lowerParameterPtr(PtrReg,
B,
Offset + FieldOffsets[
Idx]);
431 if (SplitArg.Flags[0].isPointer()) {
445 assert(SplitArg.Regs.size() == 1);
447 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
462 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
468 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
474 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
482 Register VReg =
MRI.createGenericVirtualRegister(P4);
483 MRI.addLiveIn(InputPtrReg, VReg);
484 B.getMBB().addLiveIn(InputPtrReg);
485 B.buildCopy(VReg, InputPtrReg);
491 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
497 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
503 MF.
addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
523 CCState CCInfo(
F.getCallingConv(),
F.isVarArg(), MF, ArgLocs,
F.getContext());
528 const Align KernArgBaseAlign(16);
533 for (
auto &Arg :
F.args()) {
535 if (Arg.hasAttribute(
"amdgpu-hidden-argument")) {
536 LLVM_DEBUG(
dbgs() <<
"Preloading hidden arguments is not supported\n");
540 const bool IsByRef = Arg.hasByRefAttr();
541 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
542 unsigned AllocSize =
DL.getTypeAllocSize(ArgTy);
546 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
547 Align ABIAlign =
DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
549 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
550 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
552 if (Arg.use_empty()) {
560 unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
563 "expected only one register for byval pointers");
565 lowerParameterPtr(VRegs[i][0],
B, ArgOffset);
568 Register PtrReg =
MRI.createGenericVirtualRegister(ConstPtrTy);
569 lowerParameterPtr(PtrReg,
B, ArgOffset);
571 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
574 ArgInfo OrigArg(VRegs[i], Arg, i);
577 lowerParameter(
B, OrigArg, ArgOffset, Alignment);
583 if (
Info->getNumKernargPreloadedSGPRs())
584 Info->setNumWaveDispatchSGPRs(
Info->getNumUserSGPRs());
614 CCState CCInfo(CC,
F.isVarArg(), MF, ArgLocs,
F.getContext());
619 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
626 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
632 unsigned PSInputNum = 0;
639 for (
auto &Arg :
F.args()) {
640 if (
DL.getTypeStoreSize(Arg.getType()) == 0)
643 if (
Info->isWholeWaveFunction() &&
Idx == 0) {
644 assert(VRegs[
Idx].
size() == 1 &&
"Expected only one register");
647 B.buildInstr(AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
648 .addDef(VRegs[
Idx][0]);
654 const bool InReg = Arg.hasAttribute(Attribute::InReg);
656 if (Arg.hasAttribute(Attribute::SwiftSelf) ||
657 Arg.hasAttribute(Attribute::SwiftError) ||
658 Arg.hasAttribute(Attribute::Nest))
662 const bool ArgUsed = !Arg.use_empty();
663 bool SkipArg = !ArgUsed && !
Info->isPSInputAllocated(PSInputNum);
666 Info->markPSInputAllocated(PSInputNum);
668 Info->markPSInputEnabled(PSInputNum);
704 if ((
Info->getPSInputAddr() & 0x7F) == 0 ||
705 ((
Info->getPSInputAddr() & 0xF) == 0 &&
706 Info->isPSInputAllocated(11))) {
709 Info->markPSInputAllocated(0);
710 Info->markPSInputEnabled(0);
713 if (Subtarget.isAmdPalOS()) {
722 unsigned PsInputBits =
Info->getPSInputAddr() &
Info->getPSInputEnable();
723 if ((PsInputBits & 0x7F) == 0 ||
724 ((PsInputBits & 0xF) == 0 &&
725 (PsInputBits >> 11 & 1)))
736 if (!IsEntryFunc && !IsGraphics) {
740 if (!Subtarget.enableFlatScratch())
752 Info->setNumWaveDispatchSGPRs(
754 Info->setNumWaveDispatchVGPRs(
772 Info->setBytesInStackArgArea(StackSize);
813 "amdgpu-no-dispatch-ptr",
814 "amdgpu-no-queue-ptr",
815 "amdgpu-no-implicitarg-ptr",
816 "amdgpu-no-dispatch-id",
817 "amdgpu-no-workgroup-id-x",
818 "amdgpu-no-workgroup-id-y",
819 "amdgpu-no-workgroup-id-z",
820 "amdgpu-no-lds-kernel-id",
830 for (
auto InputID : InputRegs) {
836 if (
Info.CB->hasFnAttr(ImplicitAttrNames[
I++]))
839 std::tie(OutgoingArg, ArgRC, ArgTy) =
846 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
847 CallerArgInfo.getPreloadedValue(InputID);
848 assert(IncomingArgRC == ArgRC);
850 Register InputReg =
MRI.createGenericVirtualRegister(ArgTy);
853 LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
855 LI->getImplicitArgPtr(InputReg,
MRI, MIRBuilder);
857 std::optional<uint32_t> Id =
871 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
875 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
886 std::tie(OutgoingArg, ArgRC, ArgTy) =
889 std::tie(OutgoingArg, ArgRC, ArgTy) =
892 std::tie(OutgoingArg, ArgRC, ArgTy) =
904 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
905 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
906 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
909 const bool NeedWorkItemIDX = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-x");
910 const bool NeedWorkItemIDY = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-y");
911 const bool NeedWorkItemIDZ = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-z");
918 if (ST.getMaxWorkitemID(MF.
getFunction(), 0) != 0) {
919 InputReg =
MRI.createGenericVirtualRegister(
S32);
920 LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArgX,
921 std::get<1>(WorkitemIDX),
922 std::get<2>(WorkitemIDX));
929 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.
getFunction(), 1) != 0) {
931 LI->buildLoadInputValue(
Y, MIRBuilder, IncomingArgY,
932 std::get<1>(WorkitemIDY), std::get<2>(WorkitemIDY));
939 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.
getFunction(), 2) != 0) {
941 LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ,
942 std::get<1>(WorkitemIDZ), std::get<2>(WorkitemIDZ));
949 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
950 InputReg =
MRI.createGenericVirtualRegister(
S32);
951 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
961 IncomingArgX ? *IncomingArgX :
962 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
963 LI->buildLoadInputValue(InputReg, MIRBuilder, &IncomingArg,
964 &AMDGPU::VGPR_32RegClass,
S32);
970 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
975 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
984static std::pair<CCAssignFn *, CCAssignFn *>
990 bool IsTailCall,
bool IsWave32,
992 bool IsDynamicVGPRChainCall =
false) {
995 "Indirect calls can't be tail calls, "
996 "because the address can be divergent");
998 return AMDGPU::G_SI_CALL;
1001 if (IsDynamicVGPRChainCall)
1002 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32_DVGPR
1003 : AMDGPU::SI_CS_CHAIN_TC_W64_DVGPR;
1004 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
1008 AMDGPU::SI_TCRETURN;
1015 bool IsDynamicVGPRChainCall =
false) {
1016 if (
Info.Callee.isReg()) {
1019 }
else if (
Info.Callee.isGlobal() &&
Info.Callee.getOffset() == 0) {
1027 if (IsDynamicVGPRChainCall) {
1046 if (CalleeCC == CallerCC)
1052 const auto *
TRI = ST.getRegisterInfo();
1054 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
1055 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
1056 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1063 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1068 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1074 CalleeAssignFnVarArg);
1076 CallerAssignFnVarArg);
1084 if (OutArgs.
empty())
1109 LLVM_DEBUG(
dbgs() <<
"... Cannot fit call operands on caller's stack.\n");
1116 const uint32_t *CallerPreservedMask =
TRI->getCallPreservedMask(MF, CallerCC);
1125 if (!
Info.IsTailCall)
1130 if (
Info.Callee.isReg())
1139 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
1142 if (!CallerPreserved)
1146 LLVM_DEBUG(
dbgs() <<
"... Calling convention cannot be tail called.\n");
1151 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1153 LLVM_DEBUG(
dbgs() <<
"... Cannot tail call from callers with byval "
1154 "or swifterror arguments\n");
1169 <<
"... Caller and callee have incompatible calling conventions.\n");
1179 LLVM_DEBUG(
dbgs() <<
"... Call is eligible for tail call optimization.\n");
1190 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs)
const {
1191 if (!ST.enableFlatScratch()) {
1198 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
1199 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
1201 MIRBuilder.
buildCopy(CalleeRSrcReg, ScratchRSrcReg);
1205 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1214enum ChainCallArgIdx {
1246 CallSeqStart = MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP);
1249 bool IsDynamicVGPRChainCall =
false;
1252 ArgInfo FlagsArg =
Info.OrigArgs[ChainCallArgIdx::Flags];
1253 const APInt &FlagsValue = cast<ConstantInt>(FlagsArg.
OrigValue)->getValue();
1254 if (FlagsValue.
isZero()) {
1255 if (
Info.OrigArgs.size() != 5) {
1256 LLVM_DEBUG(
dbgs() <<
"No additional args allowed if flags == 0\n");
1260 IsDynamicVGPRChainCall =
true;
1262 if (
Info.OrigArgs.size() != 8) {
1268 if (!ST.isWave32()) {
1270 F,
"dynamic VGPR mode is only supported for wave32"));
1274 ArgInfo FallbackExecArg =
Info.OrigArgs[ChainCallArgIdx::FallbackExec];
1276 "Expected single register for fallback EXEC");
1277 if (!FallbackExecArg.
Ty->
isIntegerTy(ST.getWavefrontSize())) {
1285 ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall);
1297 auto AddRegOrImm = [&](
const ArgInfo &Arg) {
1298 if (
auto CI = dyn_cast<ConstantInt>(Arg.OrigValue)) {
1299 MIB.
addImm(CI->getSExtValue());
1302 unsigned Idx = MIB->getNumOperands() - 1;
1304 MF, *
TRI,
MRI, *
TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),
1305 MIB->getOperand(
Idx),
Idx));
1309 ArgInfo ExecArg =
Info.OrigArgs[ChainCallArgIdx::Exec];
1310 assert(ExecArg.
Regs.size() == 1 &&
"Too many regs for EXEC");
1317 AddRegOrImm(ExecArg);
1318 if (IsDynamicVGPRChainCall)
1319 std::for_each(
Info.OrigArgs.begin() + ChainCallArgIdx::NumVGPRs,
1320 Info.OrigArgs.end(), AddRegOrImm);
1324 const uint32_t *Mask =
TRI->getCallPreservedMask(MF, CalleeCC);
1325 MIB.addRegMask(Mask);
1337 unsigned NumBytes = 0;
1342 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1344 CCState OutInfo(CalleeCC,
false, MF, OutLocs,
F.getContext());
1358 FPDiff = NumReusableBytes - NumBytes;
1366 "unaligned stack on tail call");
1391 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
true, FPDiff);
1395 if (
Info.ConvergenceCtrlToken) {
1404 MIB->getOperand(1).setImm(FPDiff);
1422 if (MIB->getOperand(0).isReg()) {
1425 *MIB, MIB->getDesc(), MIB->getOperand(0), 0));
1429 Info.LoweredTailCall =
true;
1446 const Value *CalleeV = Callee.OrigValue->stripPointerCasts();
1447 if (
const Function *
F = dyn_cast<Function>(CalleeV)) {
1449 Info.CallConv =
F->getCallingConv();
1451 assert(Callee.Regs.size() == 1 &&
"Too many regs for the callee");
1458 Info.IsVarArg =
false;
1462 "SGPR arguments should be marked inreg");
1465 "VGPR arguments should not be marked inreg");
1471 Info.IsMustTailCall =
true;
1478 if (
F->isIntrinsic()) {
1479 switch (
F->getIntrinsicID()) {
1480 case Intrinsic::amdgcn_cs_chain:
1482 case Intrinsic::amdgcn_call_whole_wave:
1488 cast<GlobalValue>(
Info.CB->getOperand(0)), 0);
1489 Info.OrigArgs.erase(
Info.OrigArgs.begin());
1490 Info.IsVarArg =
false;
1497 if (
Info.IsVarArg) {
1512 for (
auto &OrigArg :
Info.OrigArgs)
1516 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy())
1520 bool CanTailCallOpt =
1524 if (
Info.IsMustTailCall && !CanTailCallOpt) {
1525 LLVM_DEBUG(
dbgs() <<
"Failed to lower musttail call as tail call\n");
1529 Info.IsTailCall = CanTailCallOpt;
1536 std::tie(AssignFnFixed, AssignFnVarArg) =
1539 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP)
1549 MIB.
addDef(
TRI->getReturnAddressReg(MF));
1551 if (!
Info.IsConvergent)
1559 MIB.addRegMask(Mask);
1581 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
false);
1587 if (
Info.ConvergenceCtrlToken) {
1602 if (MIB->getOperand(1).isReg()) {
1604 MF, *
TRI,
MRI, *ST.getInstrInfo(),
1605 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1615 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy()) {
1619 CallReturnHandler Handler(MIRBuilder,
MRI, MIB);
1625 uint64_t CalleePopBytes = NumBytes;
1627 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1631 if (!
Info.CanLowerReturn) {
1633 Info.DemoteRegister,
Info.DemoteStackIndex);
1639void AMDGPUCallLowering::addOriginalExecToReturn(
1644 Ret.addReg(Setup->getOperand(0).getReg());
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info, bool IsDynamicVGPRChainCall=false)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const
Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
Class for arbitrary precision integers.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool isOneBitSet(unsigned BitNo) const
Determine if this APInt Value only has the specified bit set.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const
Use Handler to insert code to handle the argument/return values represented by Args.
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
Diagnostic information for unsupported feature in backend.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const SIRegisterInfo * getRegisterInfo() const override
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
unsigned getAddressSpace() const
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
unsigned getBytesInStackArgArea() const
void setIfReturnsVoid(bool Value)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
LLVM_READNONE constexpr bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool canGuaranteeTCO(CallingConv::ID CC)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ SIGN_EXTEND
Conversion operators.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
This struct is a compact representation of a valid (non-zero power of two) alignment.
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
const Value * OrigValue
Optionally track the original IR value for the argument.
SmallVector< Register, 4 > Regs
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override
Provides a default implementation for argument handling.
Register buildExtensionHint(const CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
uint64_t StackSize
The size of the currently allocated portion of the stack.
MachineIRBuilder & MIRBuilder
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA)=0
The specified value has been assigned to a stack location.
Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
virtual void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.