40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
48#define DEBUG_TYPE "amdgpu-promote-alloca"
55 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
56 cl::desc(
"Disable promote alloca to vector"),
60 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
61 cl::desc(
"Disable promote alloca to LDS"),
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
70 "amdgpu-promote-alloca-to-vector-max-regs",
72 "Maximum vector size (in 32b registers) to use when promoting alloca"),
78 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
79 cl::desc(
"Ratio of VGPRs to budget for promoting alloca to vectors"),
83 LoopUserWeight(
"promote-alloca-vector-loop-user-weight",
84 cl::desc(
"The bonus weight of users of allocas within loop "
85 "when sorting profitable allocas"),
89class AMDGPUPromoteAllocaImpl {
100 unsigned VGPRBudgetRatio;
101 unsigned MaxVectorRegs;
103 bool IsAMDGCN =
false;
104 bool IsAMDHSA =
false;
106 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
111 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
112 std::vector<Value *> &WorkList)
const;
118 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
123 bool hasSufficientLocalMem(
const Function &
F);
126 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
130 void setFunctionLimits(
const Function &
F);
135 const Triple &TT = TM.getTargetTriple();
136 IsAMDGCN = TT.isAMDGCN();
140 bool run(
Function &
F,
bool PromoteToLDS);
153 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
154 return AMDGPUPromoteAllocaImpl(
156 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
161 StringRef getPassName()
const override {
return "AMDGPU Promote Alloca"; }
170static unsigned getMaxVGPRs(
unsigned LDSBytes,
const TargetMachine &TM,
172 if (!TM.getTargetTriple().isAMDGCN())
180 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())
181 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();
183 unsigned MaxVGPRs = ST.getMaxNumVGPRs(
184 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(
F), LDSBytes,
F).first,
185 DynamicVGPRBlockSize);
190 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
192 MaxVGPRs = std::min(MaxVGPRs, 32u);
198char AMDGPUPromoteAlloca::ID = 0;
201 "AMDGPU promote alloca to vector or LDS",
false,
false)
214 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(
F,
true);
226 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(
F,
false);
236 return new AMDGPUPromoteAlloca();
242 while (!WorkList.empty()) {
243 auto *Cur = WorkList.pop_back_val();
244 for (
auto &U : Cur->uses()) {
253void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
257 for (
auto *Alloca : Allocas) {
259 unsigned &Score = Scores[Alloca];
263 for (
auto *U :
Uses) {
268 1 + (LoopUserWeight * LI.getLoopDepth(Inst->
getParent()));
269 LLVM_DEBUG(
dbgs() <<
" [+" << UserScore <<
"]:\t" << *Inst <<
"\n");
276 return Scores.
at(
A) > Scores.
at(
B);
281 dbgs() <<
"Sorted Worklist:\n";
282 for (
auto *
A: Allocas)
283 dbgs() <<
" " << *
A <<
"\n";
288void AMDGPUPromoteAllocaImpl::setFunctionLimits(
const Function &
F) {
292 const int R600MaxVectorRegs = 16;
293 MaxVectorRegs =
F.getFnAttributeAsParsedInteger(
294 "amdgpu-promote-alloca-to-vector-max-regs",
295 IsAMDGCN ? PromoteAllocaToVectorMaxRegs : R600MaxVectorRegs);
296 if (PromoteAllocaToVectorMaxRegs.getNumOccurrences())
297 MaxVectorRegs = PromoteAllocaToVectorMaxRegs;
298 VGPRBudgetRatio =
F.getFnAttributeAsParsedInteger(
299 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
300 PromoteAllocaToVectorVGPRRatio);
301 if (PromoteAllocaToVectorVGPRRatio.getNumOccurrences())
302 VGPRBudgetRatio = PromoteAllocaToVectorVGPRRatio;
305bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
307 DL = &
Mod->getDataLayout();
310 if (!
ST.isPromoteAllocaEnabled())
313 bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(
F);
314 MaxVGPRs = getMaxVGPRs(CurrentLocalMemUsage, TM,
F);
315 setFunctionLimits(
F);
317 unsigned VectorizationBudget =
318 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
327 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
333 sortAllocasToPromote(Allocas);
337 const unsigned AllocaCost =
DL->getTypeSizeInBits(AI->getAllocatedType());
339 if (AllocaCost <= VectorizationBudget) {
342 if (tryPromoteAllocaToVector(*AI)) {
344 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
346 VectorizationBudget -= AllocaCost;
348 << VectorizationBudget <<
"\n");
353 << AllocaCost <<
", budget:" << VectorizationBudget
354 <<
"): " << *AI <<
"\n");
357 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
386 return I->getOperand(0) == AI &&
391 Value *
Ptr,
const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
396 auto I = GEPIdx.find(
GEP);
397 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
399 Value *IndexValue =
I->second;
400 assert(IndexValue &&
"index value missing from GEP index map");
410 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
412 APInt ConstOffset(BW, 0);
433 if (!CurGEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
437 CurPtr = CurGEP->getPointerOperand();
440 assert(CurPtr == Alloca &&
"GEP not based on alloca");
442 int64_t VecElemSize =
DL.getTypeAllocSize(VecElemTy);
443 if (VarOffsets.
size() > 1)
451 if (VarOffsets.
size() == 0)
452 return ConstantInt::get(Ctx, IndexQuot);
456 const auto &VarOffset = VarOffsets.
front();
459 if (Rem != 0 || OffsetQuot.
isZero())
467 if (!OffsetQuot.
isOne()) {
469 ConstantInt::get(Ctx, OffsetQuot.
sext(OffsetType->getBitWidth()));
478 ConstantInt::get(Ctx, IndexQuot.
sext(OffsetType->getBitWidth()));
479 Value *IndexAdd = Builder.CreateAdd(
Offset, ConstIndex);
502 unsigned VecStoreSize,
unsigned ElementSize,
504 std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
Value *CurVal,
510 Builder.SetInsertPoint(Inst);
512 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
520 "promotealloca.dummyload");
525 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
527 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
528 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
529 if (!PtrTy->isVectorTy())
530 return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(
Size));
534 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
536 return Builder.CreateBitOrPointerCast(
543 case Instruction::Load: {
555 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
557 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
559 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
561 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
562 Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
571 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
573 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
576 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
578 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
579 SubVec = Builder.CreateInsertElement(
580 SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
584 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
585 else if (SubVecTy->isPtrOrPtrVectorTy())
586 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
588 SubVec = Builder.CreateBitOrPointerCast(SubVec, AccessTy);
594 Value *ExtractElement = Builder.CreateExtractElement(CurVal, Index);
595 if (AccessTy != VecEltTy)
596 ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
601 case Instruction::Store: {
608 Value *Val =
SI->getValueOperand();
612 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
614 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
616 Val = CreateTempPtrIntCast(Val, AccessTy);
618 Val = CreateTempPtrIntCast(Val, VectorTy);
619 return Builder.CreateBitOrPointerCast(Val, VectorTy);
626 const unsigned NumWrittenElts =
627 AccessSize /
DL.getTypeStoreSize(VecEltTy);
630 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
632 if (SubVecTy->isPtrOrPtrVectorTy())
633 Val = CreateTempPtrIntCast(Val, SubVecTy);
635 Val = CreateTempPtrIntCast(Val, AccessTy);
637 Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
639 Value *CurVec = GetOrLoadCurrentVectorValue();
640 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
643 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
644 CurVec = Builder.CreateInsertElement(
645 CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
650 if (Val->
getType() != VecEltTy)
651 Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
652 return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
655 case Instruction::Call: {
659 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
666 if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
675 return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
681 Value *Elt = MSI->getOperand(1);
682 const unsigned BytesPerElt =
DL.getTypeStoreSize(VecEltTy);
683 if (BytesPerElt > 1) {
684 Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);
690 Elt = Builder.CreateBitCast(EltBytes, PtrInt);
691 Elt = Builder.CreateIntToPtr(Elt, VecEltTy);
693 Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
700 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
701 Intr->replaceAllUsesWith(
702 Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
703 DL.getTypeAllocSize(VectorTy)));
732 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
736 if (AccTS * 8 !=
DL.getTypeSizeInBits(AccessTy))
748template <
typename InstContainer>
760 auto &BlockUses = UsesByBlock[BB];
763 if (BlockUses.empty())
767 if (BlockUses.size() == 1) {
774 if (!BlockUses.contains(&Inst))
795bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
796 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
798 if (DisablePromoteAllocaToVector) {
806 uint64_t NumElems = 1;
809 NumElems *= ArrayTy->getNumElements();
810 ElemTy = ArrayTy->getElementType();
816 NumElems *= InnerVectorTy->getNumElements();
817 ElemTy = InnerVectorTy->getElementType();
821 unsigned ElementSize =
DL->getTypeSizeInBits(ElemTy) / 8;
822 if (ElementSize > 0) {
823 unsigned AllocaSize =
DL->getTypeStoreSize(AllocaTy);
828 if (NumElems * ElementSize != AllocaSize)
829 NumElems = AllocaSize / ElementSize;
830 if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
841 const unsigned MaxElements =
842 (MaxVectorRegs * 32) /
DL->getTypeSizeInBits(VectorTy->getElementType());
844 if (VectorTy->getNumElements() > MaxElements ||
845 VectorTy->getNumElements() < 2) {
847 <<
" has an unsupported number of elements\n");
851 std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
859 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
860 <<
" " << *Inst <<
"\n");
861 for (
auto *Inst :
reverse(NewGEPInsts))
869 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
871 Type *VecEltTy = VectorTy->getElementType();
872 unsigned ElementSizeInBits =
DL->getTypeSizeInBits(VecEltTy);
873 if (ElementSizeInBits !=
DL->getTypeAllocSizeInBits(VecEltTy)) {
874 LLVM_DEBUG(
dbgs() <<
" Cannot convert to vector if the allocation size "
875 "does not match the type's size\n");
878 unsigned ElementSize = ElementSizeInBits / 8;
880 for (
auto *U :
Uses) {
887 return RejectUser(Inst,
"pointer is being stored");
891 return RejectUser(Inst,
"unsupported load/store as aggregate");
898 return RejectUser(Inst,
"not a simple load or store");
900 Ptr =
Ptr->stripPointerCasts();
904 DL->getTypeStoreSize(AccessTy)) {
910 return RejectUser(Inst,
"not a supported access type");
921 return RejectUser(Inst,
"cannot compute vector index for GEP");
935 if (TransferInst->isVolatile())
936 return RejectUser(Inst,
"mem transfer inst is volatile");
939 if (!Len || (
Len->getZExtValue() % ElementSize))
940 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
941 "not a multiple of the vector element size");
943 if (TransferInfo.
try_emplace(TransferInst).second) {
950 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
956 unsigned OpNum =
U->getOperandNo();
957 MemTransferInfo *TI = &TransferInfo[TransferInst];
959 Value *Dest = TransferInst->getDest();
962 return RejectUser(Inst,
"could not calculate constant dest index");
966 Value *Src = TransferInst->getSource();
969 return RejectUser(Inst,
"could not calculate constant src index");
976 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
985 return RejectUser(Inst,
"assume-like intrinsic cannot have any users");
991 return isAssumeLikeIntrinsic(cast<Instruction>(U));
997 return RejectUser(Inst,
"unhandled alloca user");
1000 while (!DeferredInsts.
empty()) {
1005 MemTransferInfo &
Info = TransferInfo[TransferInst];
1006 if (!
Info.SrcIndex || !
Info.DestIndex)
1008 Inst,
"mem transfer inst is missing constant src and/or dst index");
1011 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
1012 << *VectorTy <<
'\n');
1013 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
1018 Updater.
Initialize(VectorTy,
"promotealloca");
1024 Value *AllocaInitValue =
1026 AllocaInitValue->
takeName(&Alloca);
1037 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1050 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1054 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
1060 InstsToDelete.insert_range(DeferredLoads);
1063 I->eraseFromParent();
1068 I->dropDroppableUses();
1070 I->eraseFromParent();
1079std::pair<Value *, Value *>
1080AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
1090 ST.makeLIDRangeMetadata(LocalSizeY);
1091 ST.makeLIDRangeMetadata(LocalSizeZ);
1093 return std::pair(LocalSizeY, LocalSizeZ);
1134 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
1151 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
1152 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
1153 ST.makeLIDRangeMetadata(LoadZU);
1158 return std::pair(
Y, LoadZU);
1170 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1172 AttrName =
"amdgpu-no-workitem-id-x";
1175 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1177 AttrName =
"amdgpu-no-workitem-id-y";
1181 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1183 AttrName =
"amdgpu-no-workitem-id-z";
1191 ST.makeLIDRangeMetadata(CI);
1192 F->removeFnAttr(AttrName);
1202 switch (
II->getIntrinsicID()) {
1203 case Intrinsic::memcpy:
1204 case Intrinsic::memmove:
1205 case Intrinsic::memset:
1206 case Intrinsic::lifetime_start:
1207 case Intrinsic::lifetime_end:
1208 case Intrinsic::invariant_start:
1209 case Intrinsic::invariant_end:
1210 case Intrinsic::launder_invariant_group:
1211 case Intrinsic::strip_invariant_group:
1212 case Intrinsic::objectsize:
1219bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1241 if (OtherObj != BaseAlloca) {
1243 dbgs() <<
"Found a binary instruction with another alloca object\n");
1250bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1251 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1261 WorkList.push_back(
User);
1266 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1270 if (LI->isVolatile())
1276 if (
SI->isVolatile())
1280 if (
SI->getPointerOperand() != Val)
1286 if (RMW->isVolatile())
1292 if (CAS->isVolatile())
1300 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1304 WorkList.push_back(ICmp);
1311 if (!
GEP->isInBounds())
1316 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val,
SI, 1, 2))
1323 switch (
Phi->getNumIncomingValues()) {
1327 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1344 WorkList.push_back(
User);
1345 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1352bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1360 for (
Type *ParamTy : FTy->params()) {
1364 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1365 "local memory disabled.\n");
1370 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1371 if (LocalMemLimit == 0)
1381 if (
Use->getParent()->getParent() == &
F)
1385 if (VisitedConstants.
insert(
C).second)
1397 if (visitUsers(&GV, &GV)) {
1405 while (!
Stack.empty()) {
1407 if (visitUsers(&GV,
C)) {
1428 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1429 "local memory. Promoting to local memory "
1444 CurrentLocalMemUsage = 0;
1450 for (
auto Alloc : AllocatedSizes) {
1451 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1452 CurrentLocalMemUsage +=
Alloc.first;
1455 unsigned MaxOccupancy =
1456 ST.getWavesPerEU(
ST.getFlatWorkGroupSizes(
F), CurrentLocalMemUsage,
F)
1460 unsigned MaxSizeWithWaveCount =
1461 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1464 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1467 LocalMemLimit = MaxSizeWithWaveCount;
1470 <<
" bytes of LDS\n"
1471 <<
" Rounding size to " << MaxSizeWithWaveCount
1472 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1473 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1474 <<
" available for promotion\n");
1480bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1481 bool SufficientLDS) {
1484 if (DisablePromoteAllocaToLDS) {
1505 <<
" promote alloca to LDS not supported with calling convention.\n");
1514 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1517 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1525 uint32_t NewSize =
alignTo(CurrentLocalMemUsage, Alignment);
1526 uint32_t AllocSize =
1527 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1528 NewSize += AllocSize;
1530 if (NewSize > LocalMemLimit) {
1532 <<
" bytes of local memory not available to promote\n");
1536 CurrentLocalMemUsage = NewSize;
1538 std::vector<Value *> WorkList;
1540 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1552 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1557 Value *TCntY, *TCntZ;
1559 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1560 Value *TIdX = getWorkitemID(Builder, 0);
1561 Value *TIdY = getWorkitemID(Builder, 1);
1562 Value *TIdZ = getWorkitemID(Builder, 2);
1576 I.eraseFromParent();
1582 for (
Value *V : WorkList) {
1604 assert(
V->getType()->isPtrOrPtrVectorTy());
1606 Type *NewTy =
V->getType()->getWithNewType(NewPtrTy);
1607 V->mutateType(NewTy);
1617 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
1619 Phi->getIncomingValue(
I)))
1630 case Intrinsic::lifetime_start:
1631 case Intrinsic::lifetime_end:
1635 case Intrinsic::memcpy:
1636 case Intrinsic::memmove:
1642 case Intrinsic::memset: {
1650 case Intrinsic::invariant_start:
1651 case Intrinsic::invariant_end:
1652 case Intrinsic::launder_invariant_group:
1653 case Intrinsic::strip_invariant_group: {
1671 case Intrinsic::objectsize: {
1675 Intrinsic::objectsize,
1691 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1695 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1696 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1698 for (
unsigned I = 0;
I != 2; ++
I) {
1700 B->addDereferenceableParamAttr(
I, Bytes);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isOne() const
Determine if this is a value of 1.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Class to represent function types.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
MaybeAlign getAlign() const
Returns the alignment of the given variable.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memset to the specified pointer and the specified value.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
char & AMDGPUPromoteAllocaID
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
This struct is a compact representation of a valid (non-zero power of two) alignment.
A MapVector that performs no allocations if smaller than a certain size.
Function object to check whether the second component of a container supported by std::get (like std:...