241#include "llvm/IR/IntrinsicsAMDGPU.h"
259#define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
282 void clear() { Map.clear(); }
288class BufferFatPtrToIntTypeMap :
public BufferFatPtrTypeLoweringBase {
289 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
299class BufferFatPtrToStructTypeMap :
public BufferFatPtrTypeLoweringBase {
300 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
309Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(
Type *Ty) {
313 if (
auto *PT = dyn_cast<PointerType>(Ty)) {
315 return *
Entry = remapScalar(PT);
318 if (
auto *VT = dyn_cast<VectorType>(Ty)) {
319 auto *PT = dyn_cast<PointerType>(VT->getElementType());
321 return *
Entry = remapVector(VT);
328 StructType *TyAsStruct = dyn_cast<StructType>(Ty);
329 bool IsUniqued = !TyAsStruct || TyAsStruct->
isLiteral();
334 bool Changed =
false;
338 Type *NewElem = remapTypeImpl(OldElem);
339 ElementTypes[
I] = NewElem;
340 Changed |= (OldElem != NewElem);
347 if (
auto *ArrTy = dyn_cast<ArrayType>(Ty))
348 return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());
349 if (
auto *FnTy = dyn_cast<FunctionType>(Ty))
350 return *Entry = FunctionType::get(ElementTypes[0],
353 if (
auto *STy = dyn_cast<StructType>(Ty)) {
368Type *BufferFatPtrTypeLoweringBase::remapType(
Type *SrcTy) {
369 return remapTypeImpl(SrcTy);
396 auto *ST = dyn_cast<StructType>(Ty);
399 if (!ST->isLiteral() || ST->getNumElements() != 2)
402 dyn_cast<PointerType>(ST->getElementType(0)->getScalarType());
404 dyn_cast<IntegerType>(ST->getElementType(1)->getScalarType());
405 return MaybeRsrc && MaybeOff &&
414 return isBufferFatPtrOrVector(U.get()->getType());
427class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
428 :
public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor, bool> {
429 BufferFatPtrToIntTypeMap *TypeMap;
446 StoreFatPtrsAsIntsAndExpandMemcpyVisitor(BufferFatPtrToIntTypeMap *TypeMap,
466Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
471 if (
Find != ConvertedForStore.end())
474 Value *Cast = IRB.CreatePtrToInt(V, To,
Name +
".int");
475 ConvertedForStore[
V] = Cast;
478 if (
From->getNumContainedTypes() == 0)
482 if (
auto *AT = dyn_cast<ArrayType>(
From)) {
484 Type *ToPart = cast<ArrayType>(To)->getElementType();
485 for (
uint64_t I = 0, E = AT->getArrayNumElements();
I < E; ++
I) {
489 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
492 for (
auto [
Idx, FromPart, ToPart] :
497 Ret = IRB.CreateInsertValue(Ret, NewField,
Idx);
500 ConvertedForStore[
V] =
Ret;
504Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
509 Value *Cast = IRB.CreateIntToPtr(V, To,
Name +
".ptr");
512 if (
From->getNumContainedTypes() == 0)
516 if (
auto *AT = dyn_cast<ArrayType>(
From)) {
518 Type *ToPart = cast<ArrayType>(To)->getElementType();
519 for (
uint64_t I = 0, E = AT->getArrayNumElements();
I < E; ++
I) {
523 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
526 for (
auto [
Idx, FromPart, ToPart] :
531 Ret = IRB.CreateInsertValue(Ret, NewField,
Idx);
537bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::processFunction(
Function &
F) {
538 bool Changed =
false;
543 if (isa<MemTransferInst, MemSetInst, MemSetPatternInst>(
I))
549 Changed |=
visit(cast<Instruction>(VH));
551 ConvertedForStore.clear();
555bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(
AllocaInst &
I) {
556 Type *Ty =
I.getAllocatedType();
557 Type *NewTy = TypeMap->remapType(Ty);
560 I.setAllocatedType(NewTy);
564bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
566 Type *Ty =
I.getSourceElementType();
567 Type *NewTy = TypeMap->remapType(Ty);
572 I.setSourceElementType(NewTy);
573 I.setResultElementType(TypeMap->remapType(
I.getResultElementType()));
577bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(
LoadInst &LI) {
579 Type *IntTy = TypeMap->remapType(Ty);
583 IRB.SetInsertPoint(&LI);
584 auto *NLI = cast<LoadInst>(LI.
clone());
585 NLI->mutateType(IntTy);
586 NLI = IRB.Insert(NLI);
589 Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());
595bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(
StoreInst &SI) {
597 Type *Ty =
V->getType();
598 Type *IntTy = TypeMap->remapType(Ty);
602 IRB.SetInsertPoint(&SI);
603 Value *IntV = fatPtrsToInts(V, Ty, IntTy,
V->getName());
607 SI.setOperand(0, IntV);
611bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst(
624bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst(
630 "memmove() on buffer descriptors is not implemented because pointer "
631 "comparison on buffer descriptors isn't implemented\n");
634bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst(
643bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst(
668class LegalizeBufferContentTypesVisitor
669 :
public InstVisitor<LegalizeBufferContentTypesVisitor, bool> {
678 Type *scalarArrayTypeAsVector(
Type *MaybeArrayType);
715 Type *intrinsicTypeFor(
Type *LegalType);
721 std::pair<bool, bool> visitStoreImpl(
StoreInst &OrigSI,
Type *PartType,
737Type *LegalizeBufferContentTypesVisitor::scalarArrayTypeAsVector(
Type *
T) {
741 Type *ET = AT->getElementType();
744 "should have recursed");
745 if (!
DL.typeSizeEqualsStoreSize(AT))
747 "loading padded arrays from buffer fat pinters should have recursed");
751Value *LegalizeBufferContentTypesVisitor::arrayToVector(
Value *V,
755 auto *VT = cast<FixedVectorType>(TargetType);
756 unsigned EC = VT->getNumElements();
759 VectorRes = IRB.CreateInsertElement(VectorRes, Elem,
I,
765Value *LegalizeBufferContentTypesVisitor::vectorToArray(
Value *V,
769 ArrayType *AT = cast<ArrayType>(OrigType);
770 unsigned EC = AT->getNumElements();
773 ArrayRes = IRB.CreateInsertValue(ArrayRes, Elem,
I,
779Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(
Type *
T) {
782 if (!
DL.typeSizeEqualsStoreSize(
T))
783 T = IRB.getIntNTy(
Size.getFixedValue());
784 Type *ElemTy =
T->getScalarType();
785 if (isa<PointerType, ScalableVectorType>(ElemTy)) {
790 unsigned ElemSize =
DL.getTypeSizeInBits(ElemTy).getFixedValue();
791 if (
isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) {
796 Type *BestVectorElemType =
nullptr;
797 if (
Size.isKnownMultipleOf(32))
799 else if (
Size.isKnownMultipleOf(16))
803 unsigned NumCastElems =
805 if (NumCastElems == 1)
806 return BestVectorElemType;
810Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
812 Type *SourceType =
V->getType();
813 TypeSize SourceSize =
DL.getTypeSizeInBits(SourceType);
814 TypeSize TargetSize =
DL.getTypeSizeInBits(TargetType);
815 if (SourceSize != TargetSize) {
818 Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy,
Name +
".as.scalar");
819 Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy,
Name +
".zext");
821 SourceType = ByteScalarTy;
823 return IRB.CreateBitCast(V, TargetType,
Name +
".legal");
826Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate(
828 Type *LegalType =
V->getType();
829 TypeSize LegalSize =
DL.getTypeSizeInBits(LegalType);
830 TypeSize OrigSize =
DL.getTypeSizeInBits(OrigType);
831 if (LegalSize != OrigSize) {
834 Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy,
Name +
".bytes.cast");
835 Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy,
Name +
".trunc");
836 return IRB.CreateBitCast(Trunc, OrigType,
Name +
".orig");
838 return IRB.CreateBitCast(V, OrigType,
Name +
".real.ty");
841Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(
Type *LegalType) {
842 auto *VT = dyn_cast<FixedVectorType>(LegalType);
845 Type *ET = VT->getElementType();
848 if (VT->getNumElements() == 1)
850 if (
DL.getTypeSizeInBits(LegalType) == 96 &&
DL.getTypeSizeInBits(ET) < 32)
853 switch (VT->getNumElements()) {
871void LegalizeBufferContentTypesVisitor::getVecSlices(
874 auto *VT = dyn_cast<FixedVectorType>(
T);
879 DL.getTypeSizeInBits(VT->getElementType()).getFixedValue();
881 uint64_t ElemsPer4Words = 128 / ElemBitWidth;
882 uint64_t ElemsPer2Words = ElemsPer4Words / 2;
883 uint64_t ElemsPerWord = ElemsPer2Words / 2;
884 uint64_t ElemsPerShort = ElemsPerWord / 2;
885 uint64_t ElemsPerByte = ElemsPerShort / 2;
889 uint64_t ElemsPer3Words = ElemsPerWord * 3;
891 uint64_t TotalElems = VT->getNumElements();
893 auto TrySlice = [&](
unsigned MaybeLen) {
894 if (MaybeLen > 0 && Index + MaybeLen <= TotalElems) {
895 VecSlice Slice{
Index, MaybeLen};
902 while (Index < TotalElems) {
903 TrySlice(ElemsPer4Words) || TrySlice(ElemsPer3Words) ||
904 TrySlice(ElemsPer2Words) || TrySlice(ElemsPerWord) ||
905 TrySlice(ElemsPerShort) || TrySlice(ElemsPerByte);
909Value *LegalizeBufferContentTypesVisitor::extractSlice(
Value *Vec, VecSlice S,
911 auto *VecVT = dyn_cast<FixedVectorType>(Vec->
getType());
914 if (S.Length == VecVT->getNumElements() && S.Index == 0)
917 return IRB.CreateExtractElement(Vec, S.Index,
921 return IRB.CreateShuffleVector(Vec, Mask,
Name +
".slice." +
Twine(S.Index));
924Value *LegalizeBufferContentTypesVisitor::insertSlice(
Value *Whole,
Value *Part,
927 auto *WholeVT = dyn_cast<FixedVectorType>(Whole->
getType());
930 if (S.Length == WholeVT->getNumElements() && S.Index == 0)
933 return IRB.CreateInsertElement(Whole, Part, S.Index,
936 int NumElems = cast<FixedVectorType>(Whole->
getType())->getNumElements();
944 Value *ExtPart = IRB.CreateShuffleVector(Part, ExtPartMask,
952 return IRB.CreateShuffleVector(Whole, ExtPart, Mask,
956bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
959 if (
auto *ST = dyn_cast<StructType>(PartType)) {
961 bool Changed =
false;
962 for (
auto [
I, ElemTy,
Offset] :
965 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
966 AggByteOff +
Offset.getFixedValue(), Result,
972 if (
auto *AT = dyn_cast<ArrayType>(PartType)) {
973 Type *ElemTy = AT->getElementType();
976 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
977 bool Changed =
false;
981 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
992 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
993 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
996 getVecSlices(LegalType, Slices);
997 bool HasSlices = Slices.
size() > 1;
998 bool IsAggPart = !AggIdxs.
empty();
1000 if (!HasSlices && !IsAggPart) {
1001 Type *LoadableType = intrinsicTypeFor(LegalType);
1002 if (LoadableType == PartType)
1005 IRB.SetInsertPoint(&OrigLI);
1006 auto *NLI = cast<LoadInst>(OrigLI.
clone());
1007 NLI->mutateType(LoadableType);
1008 NLI = IRB.Insert(NLI);
1009 NLI->setName(
Name +
".loadable");
1011 LoadsRes = IRB.CreateBitCast(NLI, LegalType,
Name +
".from.loadable");
1013 IRB.SetInsertPoint(&OrigLI);
1021 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1023 if (IsAggPart && Slices.
empty())
1025 for (VecSlice S : Slices) {
1028 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1030 Value *NewPtr = IRB.CreateGEP(
1034 Type *LoadableType = intrinsicTypeFor(SliceType);
1035 LoadInst *NewLI = IRB.CreateAlignedLoad(
1043 Value *Loaded = IRB.CreateBitCast(NewLI, SliceType,
1044 NewLI->
getName() +
".from.loadable");
1045 LoadsRes = insertSlice(LoadsRes, Loaded, S,
Name);
1048 if (LegalType != ArrayAsVecType)
1049 LoadsRes = makeIllegalNonAggregate(LoadsRes, ArrayAsVecType,
Name);
1050 if (ArrayAsVecType != PartType)
1051 LoadsRes = vectorToArray(LoadsRes, PartType,
Name);
1054 Result = IRB.CreateInsertValue(Result, LoadsRes, AggIdxs,
Name);
1060bool LegalizeBufferContentTypesVisitor::visitLoadInst(
LoadInst &LI) {
1067 bool Changed = visitLoadImpl(LI, OrigType, AggIdxs, 0, Result, LI.
getName());
1076std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
1079 if (
auto *ST = dyn_cast<StructType>(PartType)) {
1081 bool Changed =
false;
1082 for (
auto [
I, ElemTy,
Offset] :
1085 Changed |= std::get<0>(visitStoreImpl(OrigSI, ElemTy, AggIdxs,
1086 AggByteOff +
Offset.getFixedValue(),
1090 return std::make_pair(Changed,
false);
1092 if (
auto *AT = dyn_cast<ArrayType>(PartType)) {
1093 Type *ElemTy = AT->getElementType();
1096 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
1097 bool Changed =
false;
1101 Changed |= std::get<0>(visitStoreImpl(
1102 OrigSI, ElemTy, AggIdxs,
1106 return std::make_pair(Changed,
false);
1111 Value *NewData = OrigData;
1113 bool IsAggPart = !AggIdxs.
empty();
1115 NewData = IRB.CreateExtractValue(NewData, AggIdxs,
Name);
1117 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
1118 if (ArrayAsVecType != PartType) {
1119 NewData = arrayToVector(NewData, ArrayAsVecType,
Name);
1122 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
1123 if (LegalType != ArrayAsVecType) {
1124 NewData = makeLegalNonAggregate(NewData, LegalType,
Name);
1128 getVecSlices(LegalType, Slices);
1129 bool NeedToSplit = Slices.
size() > 1 || IsAggPart;
1131 Type *StorableType = intrinsicTypeFor(LegalType);
1132 if (StorableType == PartType)
1133 return std::make_pair(
false,
false);
1134 NewData = IRB.CreateBitCast(NewData, StorableType,
Name +
".storable");
1136 return std::make_pair(
true,
true);
1141 if (IsAggPart && Slices.
empty())
1143 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1145 for (VecSlice S : Slices) {
1148 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1150 IRB.CreateGEP(IRB.getInt8Ty(), OrigPtr, IRB.getInt32(ByteOffset),
1153 Value *DataSlice = extractSlice(NewData, S,
Name);
1154 Type *StorableType = intrinsicTypeFor(SliceType);
1155 DataSlice = IRB.CreateBitCast(DataSlice, StorableType,
1156 DataSlice->
getName() +
".storable");
1157 auto *NewSI = cast<StoreInst>(OrigSI.
clone());
1160 NewSI->setOperand(0, DataSlice);
1161 NewSI->setOperand(1, NewPtr);
1164 return std::make_pair(
true,
false);
1167bool LegalizeBufferContentTypesVisitor::visitStoreInst(
StoreInst &SI) {
1170 IRB.SetInsertPoint(&SI);
1172 Value *OrigData =
SI.getValueOperand();
1173 auto [Changed, ModifiedInPlace] =
1174 visitStoreImpl(SI, OrigData->
getType(), AggIdxs, 0, OrigData->
getName());
1175 if (Changed && !ModifiedInPlace)
1176 SI.eraseFromParent();
1180bool LegalizeBufferContentTypesVisitor::processFunction(
Function &
F) {
1181 bool Changed =
false;
1191static std::pair<Constant *, Constant *>
1194 return std::make_pair(
C->getAggregateElement(0u),
C->getAggregateElement(1u));
1200 BufferFatPtrToStructTypeMap *TypeMap;
1212 FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,
1215 InternalMapper(UnderlyingMap,
RF_None, TypeMap, this) {}
1216 ~FatPtrConstMaterializer() =
default;
1223 Type *SrcTy =
C->getType();
1224 auto *NewTy = dyn_cast<StructType>(TypeMap->remapType(SrcTy));
1225 if (
C->isNullValue())
1226 return ConstantAggregateZero::getNullValue(NewTy);
1227 if (isa<PoisonValue>(
C)) {
1232 if (isa<UndefValue>(
C)) {
1238 if (
auto *VC = dyn_cast<ConstantVector>(
C)) {
1240 Constant *NewS = InternalMapper.mapConstant(*S);
1244 auto EC =
VC->getType()->getElementCount();
1250 for (
Value *
Op :
VC->operand_values()) {
1251 auto *NewOp = dyn_cast_or_null<Constant>(InternalMapper.mapValue(*
Op));
1263 if (isa<GlobalValue>(
C))
1265 "fat pointer) values are not supported");
1267 if (isa<ConstantExpr>(
C))
1269 "constant exprs containing ptr addrspace(7) (buffer "
1270 "fat pointer) values should have been expanded earlier");
1275Value *FatPtrConstMaterializer::materialize(
Value *V) {
1283 return materializeBufferFatPtrConst(
C);
1291class SplitPtrStructs :
public InstVisitor<SplitPtrStructs, PtrParts> {
1334 void processConditionals();
1384void SplitPtrStructs::copyMetadata(
Value *Dest,
Value *Src) {
1385 auto *DestI = dyn_cast<Instruction>(Dest);
1386 auto *SrcI = dyn_cast<Instruction>(Src);
1388 if (!DestI || !SrcI)
1391 DestI->copyMetadata(*SrcI);
1396 "of something that wasn't rewritten");
1397 auto *RsrcEntry = &RsrcParts[
V];
1398 auto *OffEntry = &OffParts[
V];
1399 if (*RsrcEntry && *OffEntry)
1400 return {*RsrcEntry, *OffEntry};
1402 if (
auto *
C = dyn_cast<Constant>(V)) {
1404 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1408 if (
auto *
I = dyn_cast<Instruction>(V)) {
1412 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1415 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1416 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1417 }
else if (
auto *
A = dyn_cast<Argument>(V)) {
1418 IRB.SetInsertPointPastAllocas(
A->getParent());
1419 IRB.SetCurrentDebugLocation(
DebugLoc());
1421 Value *Rsrc = IRB.CreateExtractValue(V, 0,
V->getName() +
".rsrc");
1422 Value *
Off = IRB.CreateExtractValue(V, 1,
V->getName() +
".off");
1423 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1435 while (
auto *
GEP = dyn_cast<GEPOperator>(V))
1436 V =
GEP->getPointerOperand();
1437 while (
auto *ASC = dyn_cast<AddrSpaceCastOperator>(V))
1438 V = ASC->getPointerOperand();
1442void SplitPtrStructs::getPossibleRsrcRoots(
Instruction *
I,
1445 if (
auto *
PHI = dyn_cast<PHINode>(
I)) {
1448 for (
Value *In :
PHI->incoming_values()) {
1451 if (isa<PHINode, SelectInst>(In))
1452 getPossibleRsrcRoots(cast<Instruction>(In), Roots, Seen);
1454 }
else if (
auto *SI = dyn_cast<SelectInst>(
I)) {
1455 if (!Seen.
insert(SI).second)
1461 if (isa<PHINode, SelectInst>(TrueVal))
1462 getPossibleRsrcRoots(cast<Instruction>(TrueVal), Roots, Seen);
1463 if (isa<PHINode, SelectInst>(FalseVal))
1464 getPossibleRsrcRoots(cast<Instruction>(FalseVal), Roots, Seen);
1470void SplitPtrStructs::processConditionals() {
1476 Value *Rsrc = RsrcParts[
I];
1478 assert(Rsrc && Off &&
"must have visited conditionals by now");
1480 std::optional<Value *> MaybeRsrc;
1481 auto MaybeFoundRsrc = FoundRsrcs.
find(
I);
1482 if (MaybeFoundRsrc != FoundRsrcs.
end()) {
1483 MaybeRsrc = MaybeFoundRsrc->second;
1488 getPossibleRsrcRoots(
I, Roots, Seen);
1491 for (
Value *V : Roots)
1493 for (
Value *V : Seen)
1505 if (Diff.size() == 1) {
1506 Value *RootVal = *Diff.begin();
1510 MaybeRsrc = std::get<0>(getPtrParts(RootVal));
1512 MaybeRsrc = RootVal;
1517 if (
auto *
PHI = dyn_cast<PHINode>(
I)) {
1520 IRB.SetInsertPoint(*
PHI->getInsertionPointAfterDef());
1521 IRB.SetCurrentDebugLocation(
PHI->getDebugLoc());
1523 NewRsrc = *MaybeRsrc;
1526 auto *RsrcPHI = IRB.CreatePHI(RsrcTy,
PHI->getNumIncomingValues());
1527 RsrcPHI->takeName(Rsrc);
1528 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1529 Value *VRsrc = std::get<0>(getPtrParts(V));
1530 RsrcPHI->addIncoming(VRsrc, BB);
1532 copyMetadata(RsrcPHI,
PHI);
1537 auto *NewOff = IRB.CreatePHI(OffTy,
PHI->getNumIncomingValues());
1538 NewOff->takeName(Off);
1539 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1540 assert(OffParts.count(V) &&
"An offset part had to be created by now");
1541 Value *VOff = std::get<1>(getPtrParts(V));
1542 NewOff->addIncoming(VOff, BB);
1544 copyMetadata(NewOff,
PHI);
1552 if (
auto *RsrcInst = dyn_cast<Instruction>(Rsrc)) {
1553 ConditionalTemps.push_back(RsrcInst);
1554 RsrcInst->replaceAllUsesWith(NewRsrc);
1556 if (
auto *OffInst = dyn_cast<Instruction>(Off)) {
1557 ConditionalTemps.push_back(OffInst);
1558 OffInst->replaceAllUsesWith(NewOff);
1563 for (
Value *V : Seen)
1564 FoundRsrcs[
V] = NewRsrc;
1565 }
else if (isa<SelectInst>(
I)) {
1567 if (
auto *RsrcInst = dyn_cast<Instruction>(Rsrc)) {
1568 ConditionalTemps.push_back(RsrcInst);
1571 for (
Value *V : Seen)
1572 FoundRsrcs[
V] = *MaybeRsrc;
1580void SplitPtrStructs::killAndReplaceSplitInstructions(
1583 I->eraseFromParent();
1586 if (!SplitUsers.contains(
I))
1592 auto &
DL =
I->getDataLayout();
1594 "We should've RAUW'd away loads, stores, etc. at this point");
1596 auto [Rsrc,
Off] = getPtrParts(
I);
1598 int64_t RsrcSz =
DL.getTypeSizeInBits(Rsrc->
getType());
1599 int64_t OffSz =
DL.getTypeSizeInBits(
Off->getType());
1601 std::optional<DIExpression *> RsrcExpr =
1604 std::optional<DIExpression *> OffExpr =
1615 Dbg->setExpression(*RsrcExpr);
1616 Dbg->replaceVariableLocationOp(
I, Rsrc);
1623 I->replaceUsesWithIf(
Poison, [&](
const Use &U) ->
bool {
1624 if (
const auto *UI = dyn_cast<Instruction>(
U.getUser()))
1625 return SplitUsers.contains(UI);
1629 if (
I->use_empty()) {
1630 I->eraseFromParent();
1633 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1634 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1635 auto [Rsrc,
Off] = getPtrParts(
I);
1641 I->replaceAllUsesWith(
Struct);
1642 I->eraseFromParent();
1654 case AtomicOrdering::Release:
1655 case AtomicOrdering::AcquireRelease:
1656 case AtomicOrdering::SequentiallyConsistent:
1657 IRB.CreateFence(AtomicOrdering::Release, SSID);
1667 case AtomicOrdering::Acquire:
1668 case AtomicOrdering::AcquireRelease:
1669 case AtomicOrdering::SequentiallyConsistent:
1670 IRB.CreateFence(AtomicOrdering::Acquire, SSID);
1681 IRB.SetInsertPoint(
I);
1683 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1686 Args.push_back(Arg);
1687 Args.push_back(Rsrc);
1688 Args.push_back(Off);
1689 insertPreMemOpFence(Order, SSID);
1693 Args.push_back(IRB.getInt32(0));
1698 Args.push_back(IRB.getInt32(Aux));
1701 if (isa<LoadInst>(
I))
1702 IID = Order == AtomicOrdering::NotAtomic
1703 ? Intrinsic::amdgcn_raw_ptr_buffer_load
1704 : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load;
1705 else if (isa<StoreInst>(
I))
1706 IID = Intrinsic::amdgcn_raw_ptr_buffer_store;
1707 else if (
auto *RMW = dyn_cast<AtomicRMWInst>(
I)) {
1708 switch (RMW->getOperation()) {
1710 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;
1713 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1716 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;
1719 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;
1722 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;
1725 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;
1728 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;
1731 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;
1734 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;
1737 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;
1740 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1743 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1746 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;
1750 "atomic floating point subtraction not supported for "
1751 "buffer resources and should've been expanded away");
1756 "atomic floating point fmaximum not supported for "
1757 "buffer resources and should've been expanded away");
1762 "atomic floating point fminimum not supported for "
1763 "buffer resources and should've been expanded away");
1768 "atomic nand not supported for buffer resources and "
1769 "should've been expanded away");
1774 "buffer resources and should've ben expanded away");
1784 auto *
Call = IRB.CreateIntrinsic(IID, Ty, Args);
1785 copyMetadata(Call,
I);
1786 setAlign(Call, Alignment, Arg ? 1 : 0);
1789 insertPostMemOpFence(Order, SSID);
1792 SplitUsers.insert(
I);
1793 I->replaceAllUsesWith(Call);
1798 return {
nullptr,
nullptr};
1803 return {
nullptr,
nullptr};
1807 return {
nullptr,
nullptr};
1812 return {
nullptr,
nullptr};
1813 Value *Arg =
SI.getValueOperand();
1814 handleMemoryInst(&SI, Arg,
SI.getPointerOperand(), Arg->
getType(),
1815 SI.getAlign(),
SI.getOrdering(),
SI.isVolatile(),
1816 SI.getSyncScopeID());
1817 return {
nullptr,
nullptr};
1822 return {
nullptr,
nullptr};
1827 return {
nullptr,
nullptr};
1835 return {
nullptr,
nullptr};
1836 IRB.SetInsertPoint(&AI);
1841 bool IsNonTemporal = AI.
getMetadata(LLVMContext::MD_nontemporal);
1843 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1844 insertPreMemOpFence(Order, SSID);
1852 IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
1854 Off, IRB.getInt32(0), IRB.getInt32(Aux)});
1855 copyMetadata(Call, &AI);
1857 Call->takeName(&AI);
1858 insertPostMemOpFence(Order, SSID);
1861 Res = IRB.CreateInsertValue(Res, Call, 0);
1864 Res = IRB.CreateInsertValue(Res, Succeeded, 1);
1866 SplitUsers.insert(&AI);
1868 return {
nullptr,
nullptr};
1875 return {
nullptr,
nullptr};
1876 IRB.SetInsertPoint(&
GEP);
1878 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1880 bool IsNUW =
GEP.hasNoUnsignedWrap();
1881 bool IsNUSW =
GEP.hasNoUnsignedSignedWrap();
1885 VectorType *ResRsrcVecTy = dyn_cast<VectorType>(ResRsrcTy);
1886 bool BroadcastsPtr = ResRsrcVecTy && !isa<VectorType>(
Off->getType());
1892 GEP.mutateType(FatPtrTy);
1894 GEP.mutateType(ResTy);
1896 if (BroadcastsPtr) {
1897 Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,
1899 Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,
1903 SplitUsers.insert(&
GEP);
1907 bool HasNonNegativeOff =
false;
1908 if (
auto *CI = dyn_cast<ConstantInt>(OffAccum)) {
1909 HasNonNegativeOff = !CI->isNegative();
1915 NewOff = IRB.CreateAdd(Off, OffAccum,
"",
1916 IsNUW || (IsNUSW && HasNonNegativeOff),
1919 copyMetadata(NewOff, &
GEP);
1921 SplitUsers.insert(&
GEP);
1922 return {Rsrc, NewOff};
1928 return {
nullptr,
nullptr};
1929 IRB.SetInsertPoint(&PI);
1934 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1940 Res = IRB.CreateIntCast(Off, ResTy,
false,
1943 Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.
getName() +
".rsrc");
1944 Value *Shl = IRB.CreateShl(
1947 "", Width >= FatPtrWidth, Width > FatPtrWidth);
1948 Value *OffCast = IRB.CreateIntCast(Off, ResTy,
false,
1950 Res = IRB.CreateOr(Shl, OffCast);
1953 copyMetadata(Res, &PI);
1955 SplitUsers.insert(&PI);
1957 return {
nullptr,
nullptr};
1963 return {
nullptr,
nullptr};
1964 IRB.SetInsertPoint(&PA);
1966 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1967 Value *Res = IRB.CreateIntCast(Off, PA.
getType(),
false);
1968 copyMetadata(Res, &PA);
1970 SplitUsers.insert(&PA);
1972 return {
nullptr,
nullptr};
1977 return {
nullptr,
nullptr};
1978 IRB.SetInsertPoint(&IP);
1987 Type *RsrcTy =
RetTy->getElementType(0);
1989 Value *RsrcPart = IRB.CreateLShr(
1992 Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy,
false);
1993 Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.
getName() +
".rsrc");
1995 IRB.CreateIntCast(
Int, OffTy,
false, IP.
getName() +
".off");
1997 copyMetadata(Rsrc, &IP);
1998 SplitUsers.insert(&IP);
2006 return {
nullptr,
nullptr};
2007 IRB.SetInsertPoint(&
I);
2010 if (
In->getType() ==
I.getType()) {
2011 auto [Rsrc,
Off] = getPtrParts(In);
2012 SplitUsers.insert(&
I);
2016 auto *ResTy = cast<StructType>(
I.getType());
2017 Type *RsrcTy = ResTy->getElementType(0);
2018 Type *OffTy = ResTy->getElementType(1);
2023 auto *InConst = dyn_cast<Constant>(In);
2024 if (InConst && InConst->isNullValue()) {
2026 SplitUsers.insert(&
I);
2027 return {NullRsrc, ZeroOff};
2029 if (isa<PoisonValue>(In)) {
2032 SplitUsers.insert(&
I);
2033 return {PoisonRsrc, PoisonOff};
2035 if (isa<UndefValue>(In)) {
2038 SplitUsers.insert(&
I);
2039 return {UndefRsrc, UndefOff};
2044 "only buffer resources (addrspace 8) and null/poison pointers can be "
2045 "cast to buffer fat pointers (addrspace 7)");
2046 SplitUsers.insert(&
I);
2047 return {
In, ZeroOff};
2053 return {
nullptr,
nullptr};
2055 IRB.SetInsertPoint(&Cmp);
2058 assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2059 "Pointer comparison is only equal or unequal");
2060 auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
2061 auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
2063 IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc,
Cmp.getName() +
".rsrc");
2064 copyMetadata(RsrcCmp, &Cmp);
2065 Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff,
Cmp.getName() +
".off");
2066 copyMetadata(OffCmp, &Cmp);
2068 Value *Res =
nullptr;
2069 if (Pred == ICmpInst::ICMP_EQ)
2070 Res = IRB.CreateAnd(RsrcCmp, OffCmp);
2071 else if (Pred == ICmpInst::ICMP_NE)
2072 Res = IRB.CreateOr(RsrcCmp, OffCmp);
2073 copyMetadata(Res, &Cmp);
2075 SplitUsers.insert(&Cmp);
2076 Cmp.replaceAllUsesWith(Res);
2077 return {
nullptr,
nullptr};
2082 return {
nullptr,
nullptr};
2083 IRB.SetInsertPoint(&
I);
2084 auto [Rsrc,
Off] = getPtrParts(
I.getOperand(0));
2086 Value *RsrcRes = IRB.CreateFreeze(Rsrc,
I.getName() +
".rsrc");
2087 copyMetadata(RsrcRes, &
I);
2088 Value *OffRes = IRB.CreateFreeze(Off,
I.getName() +
".off");
2089 copyMetadata(OffRes, &
I);
2090 SplitUsers.insert(&
I);
2091 return {RsrcRes, OffRes};
2096 return {
nullptr,
nullptr};
2097 IRB.SetInsertPoint(&
I);
2098 Value *Vec =
I.getVectorOperand();
2100 auto [Rsrc,
Off] = getPtrParts(Vec);
2102 Value *RsrcRes = IRB.CreateExtractElement(Rsrc,
Idx,
I.getName() +
".rsrc");
2103 copyMetadata(RsrcRes, &
I);
2104 Value *OffRes = IRB.CreateExtractElement(Off,
Idx,
I.getName() +
".off");
2105 copyMetadata(OffRes, &
I);
2106 SplitUsers.insert(&
I);
2107 return {RsrcRes, OffRes};
2114 return {
nullptr,
nullptr};
2115 IRB.SetInsertPoint(&
I);
2116 Value *Vec =
I.getOperand(0);
2117 Value *Elem =
I.getOperand(1);
2119 auto [VecRsrc, VecOff] = getPtrParts(Vec);
2120 auto [ElemRsrc, ElemOff] = getPtrParts(Elem);
2123 IRB.CreateInsertElement(VecRsrc, ElemRsrc,
Idx,
I.getName() +
".rsrc");
2124 copyMetadata(RsrcRes, &
I);
2126 IRB.CreateInsertElement(VecOff, ElemOff,
Idx,
I.getName() +
".off");
2127 copyMetadata(OffRes, &
I);
2128 SplitUsers.insert(&
I);
2129 return {RsrcRes, OffRes};
2135 return {
nullptr,
nullptr};
2136 IRB.SetInsertPoint(&
I);
2138 Value *V1 =
I.getOperand(0);
2139 Value *V2 =
I.getOperand(1);
2141 auto [V1Rsrc, V1Off] = getPtrParts(V1);
2142 auto [V2Rsrc, V2Off] = getPtrParts(V2);
2145 IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask,
I.getName() +
".rsrc");
2146 copyMetadata(RsrcRes, &
I);
2148 IRB.CreateShuffleVector(V1Off, V2Off, Mask,
I.getName() +
".off");
2149 copyMetadata(OffRes, &
I);
2150 SplitUsers.insert(&
I);
2151 return {RsrcRes, OffRes};
2156 return {
nullptr,
nullptr};
2157 IRB.SetInsertPoint(*
PHI.getInsertionPointAfterDef());
2163 Value *TmpRsrc = IRB.CreateExtractValue(&
PHI, 0,
PHI.getName() +
".rsrc");
2164 Value *TmpOff = IRB.CreateExtractValue(&
PHI, 1,
PHI.getName() +
".off");
2165 Conditionals.push_back(&
PHI);
2166 SplitUsers.insert(&
PHI);
2167 return {TmpRsrc, TmpOff};
2172 return {
nullptr,
nullptr};
2173 IRB.SetInsertPoint(&SI);
2176 Value *True =
SI.getTrueValue();
2177 Value *False =
SI.getFalseValue();
2178 auto [TrueRsrc, TrueOff] = getPtrParts(True);
2179 auto [FalseRsrc, FalseOff] = getPtrParts(False);
2182 IRB.CreateSelect(
Cond, TrueRsrc, FalseRsrc,
SI.getName() +
".rsrc", &SI);
2183 copyMetadata(RsrcRes, &SI);
2184 Conditionals.push_back(&SI);
2186 IRB.CreateSelect(
Cond, TrueOff, FalseOff,
SI.getName() +
".off", &SI);
2187 copyMetadata(OffRes, &SI);
2188 SplitUsers.insert(&SI);
2189 return {RsrcRes, OffRes};
2200 case Intrinsic::amdgcn_make_buffer_rsrc:
2201 case Intrinsic::ptrmask:
2202 case Intrinsic::invariant_start:
2203 case Intrinsic::invariant_end:
2204 case Intrinsic::launder_invariant_group:
2205 case Intrinsic::strip_invariant_group:
2206 case Intrinsic::memcpy:
2207 case Intrinsic::memcpy_inline:
2208 case Intrinsic::memmove:
2209 case Intrinsic::memset:
2210 case Intrinsic::memset_inline:
2211 case Intrinsic::experimental_memset_pattern:
2212 case Intrinsic::amdgcn_load_to_lds:
2222 case Intrinsic::amdgcn_make_buffer_rsrc: {
2224 return {
nullptr,
nullptr};
2226 Value *Stride =
I.getArgOperand(1);
2227 Value *NumRecords =
I.getArgOperand(2);
2229 auto *SplitType = cast<StructType>(
I.getType());
2230 Type *RsrcType = SplitType->getElementType(0);
2231 Type *OffType = SplitType->getElementType(1);
2232 IRB.SetInsertPoint(&
I);
2233 Value *Rsrc = IRB.CreateIntrinsic(IID, {RsrcType,
Base->getType()},
2235 copyMetadata(Rsrc, &
I);
2238 SplitUsers.insert(&
I);
2239 return {Rsrc,
Zero};
2241 case Intrinsic::ptrmask: {
2244 return {
nullptr,
nullptr};
2246 IRB.SetInsertPoint(&
I);
2247 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2248 if (
Mask->getType() !=
Off->getType())
2250 "pointer (data layout not set up correctly?)");
2251 Value *OffRes = IRB.CreateAnd(Off, Mask,
I.getName() +
".off");
2252 copyMetadata(OffRes, &
I);
2253 SplitUsers.insert(&
I);
2254 return {Rsrc, OffRes};
2258 case Intrinsic::invariant_start: {
2261 return {
nullptr,
nullptr};
2262 IRB.SetInsertPoint(&
I);
2263 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2265 auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {
I.getOperand(0), Rsrc});
2266 copyMetadata(NewRsrc, &
I);
2268 SplitUsers.insert(&
I);
2269 I.replaceAllUsesWith(NewRsrc);
2270 return {
nullptr,
nullptr};
2272 case Intrinsic::invariant_end: {
2273 Value *RealPtr =
I.getArgOperand(2);
2275 return {
nullptr,
nullptr};
2276 IRB.SetInsertPoint(&
I);
2277 Value *RealRsrc = getPtrParts(RealPtr).first;
2278 Value *InvPtr =
I.getArgOperand(0);
2280 Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->
getType()},
2281 {InvPtr,
Size, RealRsrc});
2282 copyMetadata(NewRsrc, &
I);
2284 SplitUsers.insert(&
I);
2285 I.replaceAllUsesWith(NewRsrc);
2286 return {
nullptr,
nullptr};
2288 case Intrinsic::launder_invariant_group:
2289 case Intrinsic::strip_invariant_group: {
2292 return {
nullptr,
nullptr};
2293 IRB.SetInsertPoint(&
I);
2294 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2295 Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->
getType()}, {Rsrc});
2296 copyMetadata(NewRsrc, &
I);
2298 SplitUsers.insert(&
I);
2299 return {NewRsrc,
Off};
2301 case Intrinsic::amdgcn_load_to_lds: {
2304 return {
nullptr,
nullptr};
2305 IRB.SetInsertPoint(&
I);
2306 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2307 Value *LDSPtr =
I.getArgOperand(1);
2308 Value *LoadSize =
I.getArgOperand(2);
2309 Value *ImmOff =
I.getArgOperand(3);
2310 Value *Aux =
I.getArgOperand(4);
2311 Value *SOffset = IRB.getInt32(0);
2313 Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {},
2314 {Rsrc, LDSPtr, LoadSize,
Off, SOffset, ImmOff, Aux});
2315 copyMetadata(NewLoad, &
I);
2316 SplitUsers.insert(&
I);
2317 I.replaceAllUsesWith(NewLoad);
2318 return {
nullptr,
nullptr};
2321 return {
nullptr,
nullptr};
2324void SplitPtrStructs::processFunction(
Function &
F) {
2328 LLVM_DEBUG(
dbgs() <<
"Splitting pointer structs in function: " <<
F.getName()
2332 assert(((Rsrc && Off) || (!Rsrc && !Off)) &&
2333 "Can't have a resource but no offset");
2335 RsrcParts[
I] = Rsrc;
2339 processConditionals();
2340 killAndReplaceSplitInstructions(Originals);
2346 Conditionals.clear();
2347 ConditionalTemps.clear();
2351class AMDGPULowerBufferFatPointers :
public ModulePass {
2368 BufferFatPtrToStructTypeMap *TypeMap) {
2369 bool HasFatPointers =
false;
2372 HasFatPointers |= (
I.getType() != TypeMap->remapType(
I.getType()));
2374 for (
const Value *V :
I.operand_values())
2375 HasFatPointers |= (V->getType() != TypeMap->remapType(V->getType()));
2377 return HasFatPointers;
2381 BufferFatPtrToStructTypeMap *TypeMap) {
2382 Type *Ty =
F.getFunctionType();
2383 return Ty != TypeMap->remapType(Ty);
2399 while (!OldF->
empty()) {
2413 CloneMap[&NewArg] = &OldArg;
2414 NewArg.takeName(&OldArg);
2415 Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();
2417 NewArg.mutateType(OldArgTy);
2418 OldArg.replaceAllUsesWith(&NewArg);
2419 NewArg.mutateType(NewArgTy);
2423 if (OldArgTy != NewArgTy && !IsIntrinsic)
2443 CloneMap[&BB] = &BB;
2450 bool Changed =
false;
2459 BufferFatPtrToStructTypeMap StructTM(
DL);
2460 BufferFatPtrToIntTypeMap IntTM(
DL);
2464 Ctx.
emitError(
"global variables with a buffer fat pointer address "
2465 "space (7) are not supported");
2469 Type *VT = GV.getValueType();
2470 if (VT != StructTM.remapType(VT)) {
2472 Ctx.
emitError(
"global variables that contain buffer fat pointers "
2473 "(address space 7 pointers) are unsupported. Use "
2474 "buffer resource pointers (address space 8) instead");
2485 if (isa<ConstantExpr, ConstantAggregate>(
Op))
2491 while (!Worklist.
empty()) {
2493 if (!Visited.
insert(
C).second)
2498 if (isa<ConstantExpr, ConstantAggregate>(
Op))
2509 StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM,
DL,
2510 M.getContext(), &TM);
2511 LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(
DL,
2516 Changed |= MemOpsRewrite.processFunction(
F);
2517 if (InterfaceChange || BodyChanges) {
2518 NeedsRemap.
push_back(std::make_pair(&
F, InterfaceChange));
2519 Changed |= BufferContentsTypeRewrite.processFunction(
F);
2522 if (NeedsRemap.
empty())
2529 FatPtrConstMaterializer Materializer(&StructTM, CloneMap);
2532 for (
auto [
F, InterfaceChange] : NeedsRemap) {
2534 if (InterfaceChange)
2536 F, cast<FunctionType>(StructTM.remapType(
F->getFunctionType())),
2540 LowerInFuncs.remapFunction(*NewF);
2545 if (InterfaceChange) {
2546 F->replaceAllUsesWith(NewF);
2547 F->eraseFromParent();
2555 SplitPtrStructs Splitter(
DL,
M.getContext(), &TM);
2557 Splitter.processFunction(*
F);
2560 F->eraseFromParent();
2564 F->replaceAllUsesWith(*NewF);
2570bool AMDGPULowerBufferFatPointers::runOnModule(
Module &M) {
2576char AMDGPULowerBufferFatPointers::ID = 0;
2580void AMDGPULowerBufferFatPointers::getAnalysisUsage(
AnalysisUsage &AU)
const {
2584#define PASS_DESC "Lower buffer fat pointer operations to buffer resources"
2593 return new AMDGPULowerBufferFatPointers();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
static Function * moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, ValueToValueMapTy &CloneMap)
Move the body of OldF into a new function, returning it.
static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap)
static bool isBufferFatPtrOrVector(Type *Ty)
static bool isSplitFatPtr(Type *Ty)
std::pair< Value *, Value * > PtrParts
static bool hasFatPointerInterface(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
static bool isRemovablePointerIntrinsic(Intrinsic::ID IID)
Returns true if this intrinsic needs to be removed when it is applied to ptr addrspace(7) values.
static bool containsBufferFatPointers(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
Returns true if there are values that have a buffer fat pointer in them, which means we'll need to pe...
static Value * rsrcPartRoot(Value *V)
Returns the instruction that defines the resource part of the value V.
static constexpr unsigned BufferOffsetWidth
static bool isBufferFatPtrConst(Constant *C)
static std::pair< Constant *, Constant * > splitLoweredFatBufferConst(Constant *C)
Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered buffer fat pointer const...
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
Atomic ordering constants.
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM_ABI AttributeSet getFnAttrs() const
The function attributes are returned.
static LLVM_ABI AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
LLVM_ABI AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
LLVM_ABI AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
LLVM_ABI AttributeSet removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const
Remove the specified attributes from this set.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI void removeFromParent()
Unlink 'this' from the containing function, but do not delete it.
LLVM_ABI void insertInto(Function *Parent, BasicBlock *InsertBefore=nullptr)
Insert unlinked basic block into a function.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void insertBefore(DbgRecord *InsertBefore)
LLVM_ABI void eraseFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
void setExpression(DIExpression *NewExpr)
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & front() const
iterator_range< arg_iterator > args()
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void updateAfterNameChange()
Update internal caches that depend on the function name (such as the intrinsic ID and libcall cache).
Type * getReturnType() const
Returns the type of the ret val.
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
static GEPNoWrapFlags noUnsignedWrap()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
LinkageTypes getLinkage() const
void setDLLStorageClass(DLLStorageClassTypes C)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
DLLStorageClassTypes getDLLStorageClass() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
Base class for instruction visitors.
RetTy visitFreezeInst(FreezeInst &I)
RetTy visitPtrToIntInst(PtrToIntInst &I)
RetTy visitExtractElementInst(ExtractElementInst &I)
RetTy visitMemCpyInst(MemCpyInst &I)
RetTy visitIntrinsicInst(IntrinsicInst &I)
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I)
RetTy visitIntToPtrInst(IntToPtrInst &I)
RetTy visitPHINode(PHINode &I)
RetTy visitStoreInst(StoreInst &I)
RetTy visitInsertElementInst(InsertElementInst &I)
RetTy visitMemMoveInst(MemMoveInst &I)
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
RetTy visitAddrSpaceCastInst(AddrSpaceCastInst &I)
RetTy visitAllocaInst(AllocaInst &I)
RetTy visitICmpInst(ICmpInst &I)
RetTy visitPtrToAddrInst(PtrToAddrInst &I)
RetTy visitMemSetPatternInst(MemSetPatternInst &I)
RetTy visitMemSetInst(MemSetInst &I)
RetTy visitSelectInst(SelectInst &I)
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class represents a cast from an integer to a pointer.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
This class wraps the llvm.memcpy intrinsic.
unsigned getDestAddressSpace() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
unsigned getSourceAddressSpace() const
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
ArrayRef< value_type > getArrayRef() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
MutableArrayRef< TypeSize > getMemberOffsets()
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
bool isLiteral() const
Return true if this type is uniqued by structural equivalence, false if it is a struct definition.
Type * getElementType(unsigned N) const
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Type * getArrayElementType() const
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
ArrayRef< Type * > subtypes() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
This is a class that can be implemented by clients to remap types when cloning constants and instruct...
virtual Type * remapType(Type *SrcTy)=0
The client should implement this method if they want to remap types while mapping values.
Context for (re-)mapping values (and metadata).
This is a class that can be implemented by clients to materialize Values on demand.
virtual Value * materialize(Value *V)=0
This method can be implemented to generate a mapped Value on demand.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
self_iterator getIterator()
iterator insertAfter(iterator where, pointer New)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_ABI AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
ModulePass * createAMDGPULowerBufferFatPointersPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)
Expand MemSetPattern as a loop. MemSet is not deleted.
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
char & AMDGPULowerBufferFatPointersID
AtomicOrdering
Atomic ordering for LLVM's memory model.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.