30#define DEBUG_TYPE "systemztti"
39 bool UsedAsMemCpySource =
false;
40 for (
const User *U : V->users())
42 if (isa<BitCastInst>(
User) || isa<GetElementPtrInst>(
User)) {
47 if (Memcpy->getOperand(1) == V && !Memcpy->isVolatile()) {
48 UsedAsMemCpySource =
true;
54 return UsedAsMemCpySource;
59 if (!isa<PointerType>(
Ptr->getType()))
61 for (
const User *U :
Ptr->users())
63 if (
User->getParent()->getParent() ==
F) {
64 if (
const auto *SI = dyn_cast<StoreInst>(
User)) {
65 if (SI->getPointerOperand() ==
Ptr && !SI->isVolatile())
67 }
else if (
const auto *LI = dyn_cast<LoadInst>(
User)) {
68 if (LI->getPointerOperand() ==
Ptr && !LI->isVolatile())
70 }
else if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(
User)) {
71 if (
GEP->getPointerOperand() ==
Ptr)
87 for (
const Argument &Arg : Callee->args()) {
88 bool OtherUse =
false;
104 if (
const auto *SI = dyn_cast<StoreInst>(&
I)) {
105 if (!SI->isVolatile())
106 if (
auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
108 }
else if (
const auto *LI = dyn_cast<LoadInst>(&
I)) {
109 if (!LI->isVolatile())
110 if (
auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
112 }
else if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(&
I)) {
113 if (
auto *GV = dyn_cast<GlobalVariable>(
GEP->getPointerOperand())) {
114 unsigned NumStores = 0, NumLoads = 0;
116 Ptr2NumUses[GV] += NumLoads + NumStores;
121 for (
auto [
Ptr, NumCalleeUses] : Ptr2NumUses)
122 if (NumCalleeUses > 10) {
123 unsigned CallerStores = 0, CallerLoads = 0;
125 if (CallerStores + CallerLoads > 10) {
132 unsigned NumStores = 0;
133 unsigned NumLoads = 0;
137 if (isa<AllocaInst>(CallerArg))
141 Bonus += NumLoads * 50;
143 Bonus += NumStores * 50;
144 Bonus = std::min(Bonus,
unsigned(1000));
147 dbgs() <<
"++ SZTTI Adding inlining bonus: " << Bonus <<
"\n";);
162 if ((!ST->hasVector() && BitSize > 64) || BitSize > 128)
168 if (Imm.getBitWidth() <= 64) {
170 if (isInt<32>(Imm.getSExtValue()))
173 if (isUInt<32>(Imm.getZExtValue()))
176 if ((Imm.getZExtValue() & 0xffffffff) == 0)
204 case Instruction::GetElementPtr:
211 case Instruction::Store:
212 if (
Idx == 0 && Imm.getBitWidth() <= 64) {
217 if (isInt<16>(Imm.getSExtValue()))
221 case Instruction::ICmp:
222 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
224 if (isInt<32>(Imm.getSExtValue()))
227 if (isUInt<32>(Imm.getZExtValue()))
231 case Instruction::Add:
232 case Instruction::Sub:
233 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
235 if (isUInt<32>(Imm.getZExtValue()))
238 if (isUInt<32>(-Imm.getSExtValue()))
242 case Instruction::Mul:
243 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
245 if (isInt<32>(Imm.getSExtValue()))
249 case Instruction::Or:
250 case Instruction::Xor:
251 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
253 if (isUInt<32>(Imm.getZExtValue()))
256 if ((Imm.getZExtValue() & 0xffffffff) == 0)
260 case Instruction::And:
261 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
266 if (isUInt<32>(~Imm.getZExtValue()))
269 if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
274 if (
TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start,
End))
278 case Instruction::Shl:
279 case Instruction::LShr:
280 case Instruction::AShr:
285 case Instruction::UDiv:
286 case Instruction::SDiv:
287 case Instruction::URem:
288 case Instruction::SRem:
289 case Instruction::Trunc:
290 case Instruction::ZExt:
291 case Instruction::SExt:
292 case Instruction::IntToPtr:
293 case Instruction::PtrToInt:
294 case Instruction::BitCast:
295 case Instruction::PHI:
296 case Instruction::Call:
297 case Instruction::Select:
298 case Instruction::Ret:
299 case Instruction::Load:
324 case Intrinsic::sadd_with_overflow:
325 case Intrinsic::uadd_with_overflow:
326 case Intrinsic::ssub_with_overflow:
327 case Intrinsic::usub_with_overflow:
329 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
330 if (isUInt<32>(Imm.getZExtValue()))
332 if (isUInt<32>(-Imm.getSExtValue()))
336 case Intrinsic::smul_with_overflow:
337 case Intrinsic::umul_with_overflow:
339 if (
Idx == 1 && Imm.getBitWidth() <= 64) {
340 if (isInt<32>(Imm.getSExtValue()))
344 case Intrinsic::experimental_stackmap:
345 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
348 case Intrinsic::experimental_patchpoint_void:
349 case Intrinsic::experimental_patchpoint:
350 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
360 if (ST->hasPopulationCount() && TyWidth <= 64)
370 bool HasCall =
false;
372 for (
auto &BB : L->blocks())
373 for (
auto &
I : *BB) {
374 if (isa<CallInst>(&
I) || isa<InvokeInst>(&
I)) {
378 if (
F->getIntrinsicID() == Intrinsic::memcpy ||
379 F->getIntrinsicID() == Intrinsic::memset)
385 if (isa<StoreInst>(&
I)) {
386 Type *MemAccessTy =
I.getOperand(0)->getType();
395 unsigned const NumStoresVal = NumStores.
getValue();
396 unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
440 bool Vector = (ClassID == 1);
465 unsigned NumStridedMemAccesses,
466 unsigned NumPrefetches,
467 bool HasCall)
const {
469 if (NumPrefetches > 16)
474 if (NumStridedMemAccesses > 32 && !HasCall &&
475 (NumMemAccesses - NumStridedMemAccesses) * 32 <= NumStridedMemAccesses)
478 return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
487 if (isa<LoadInst>(
Op) &&
Op->hasOneUse()) {
488 const Instruction *UserI = cast<Instruction>(*
Op->user_begin());
489 return !isa<StoreInst>(UserI);
498 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
505 "Type does not match the number of values.");
507 for (
unsigned Idx = 0;
Idx < NumElts; ++
Idx) {
528 assert(
Size > 0 &&
"Element must have non-zero size.");
536 auto *VTy = cast<FixedVectorType>(Ty);
538 assert(WideBits > 0 &&
"Could not compute size of vector");
539 return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
550 Op2Info, Args, CxtI);
564 const unsigned DivInstrCost = 20;
565 const unsigned DivMulSeqCost = 10;
566 const unsigned SDivPow2Cost = 4;
569 Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
570 bool UnsignedDivRem =
571 Opcode == Instruction::UDiv || Opcode == Instruction::URem;
574 bool DivRemConst =
false;
575 bool DivRemConstPow2 =
false;
576 if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
577 if (
const Constant *
C = dyn_cast<Constant>(Args[1])) {
579 (
C->getType()->isVectorTy()
580 ? dyn_cast_or_null<const ConstantInt>(
C->getSplatValue())
581 : dyn_cast<const ConstantInt>(
C));
584 DivRemConstPow2 =
true;
594 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
595 Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
599 if (Opcode == Instruction::FRem)
603 if (Args.size() == 2) {
604 if (Opcode == Instruction::Xor) {
605 for (
const Value *
A : Args) {
607 if (
I->hasOneUse() &&
608 (
I->getOpcode() == Instruction::Or ||
609 I->getOpcode() == Instruction::And ||
610 I->getOpcode() == Instruction::Xor))
611 if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
613 (
I->getOpcode() == Instruction::Or || ST->hasVectorEnhancements1())))
617 else if (Opcode == Instruction::And || Opcode == Instruction::Or) {
618 for (
const Value *
A : Args) {
620 if ((
I->hasOneUse() &&
I->getOpcode() == Instruction::Xor) &&
621 ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
623 (Opcode == Instruction::And || ST->hasVectorEnhancements1()))))
630 if (Opcode == Instruction::Or)
633 if (Opcode == Instruction::Xor && ScalarBits == 1) {
634 if (ST->hasLoadStoreOnCond2())
640 return (SignedDivRem ? SDivPow2Cost : 1);
642 return DivMulSeqCost;
643 if (SignedDivRem || UnsignedDivRem)
646 else if (ST->hasVector()) {
647 auto *VTy = cast<FixedVectorType>(Ty);
648 unsigned VF = VTy->getNumElements();
653 if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
654 Opcode == Instruction::AShr) {
659 return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
662 return VF * DivMulSeqCost +
665 if (SignedDivRem || UnsignedDivRem) {
666 if (ST->hasVectorEnhancements3() && ScalarBits >= 32)
667 return NumVectors * DivInstrCost;
680 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
681 Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
682 switch (ScalarBits) {
685 if (ST->hasVectorEnhancements1())
710 if (Opcode == Instruction::FRem) {
713 (VF * LIBCALL_COST) +
716 if (VF == 2 && ScalarBits == 32)
734 if (ST->hasVector()) {
750 return (Index == 0 ? 0 : NumVectors);
757 return NumVectors - 1;
786 "Packing must reduce size of vector type.");
789 "Packing should not change number of elements.");
804 unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements();
805 for (
unsigned P = 0;
P < Log2Diff; ++
P) {
826 "Should only be called with vector types.");
828 unsigned PackCost = 0;
832 if (SrcScalarBits > DstScalarBits)
835 else if (SrcScalarBits < DstScalarBits) {
838 PackCost = Log2Diff * DstNumParts;
840 PackCost += DstNumParts - 1;
849 Type *OpTy =
nullptr;
850 if (
CmpInst *CI = dyn_cast<CmpInst>(
I->getOperand(0)))
851 OpTy = CI->getOperand(0)->getType();
852 else if (
Instruction *LogicI = dyn_cast<Instruction>(
I->getOperand(0)))
853 if (LogicI->getNumOperands() == 2)
854 if (
CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
855 if (isa<CmpInst>(LogicI->getOperand(1)))
856 OpTy = CI0->getOperand(0)->getType();
858 if (OpTy !=
nullptr) {
877 auto *DstVTy = cast<FixedVectorType>(Dst);
878 unsigned VF = DstVTy->getNumElements();
883 if (CmpOpTy !=
nullptr)
885 if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
899 return BaseCost == 0 ? BaseCost : 1;
902 unsigned DstScalarBits = Dst->getScalarSizeInBits();
903 unsigned SrcScalarBits = Src->getScalarSizeInBits();
905 if (!Src->isVectorTy()) {
906 if (Dst->isVectorTy())
909 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
910 if (Src->isIntegerTy(128))
912 if (SrcScalarBits >= 32 ||
913 (
I !=
nullptr && isa<LoadInst>(
I->getOperand(0))))
915 return SrcScalarBits > 1 ? 2 : 5 ;
918 if ((Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) &&
919 Dst->isIntegerTy(128))
922 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) {
923 if (Src->isIntegerTy(1)) {
924 if (DstScalarBits == 128) {
925 if (Opcode == Instruction::SExt && ST->hasVectorEnhancements3())
930 if (ST->hasLoadStoreOnCond2())
936 if (Opcode == Instruction::SExt)
937 Cost = (DstScalarBits < 64 ? 3 : 4);
938 if (Opcode == Instruction::ZExt)
946 else if (isInt128InVR(Dst)) {
949 if (Opcode == Instruction::ZExt &&
I !=
nullptr)
950 if (
LoadInst *Ld = dyn_cast<LoadInst>(
I->getOperand(0)))
957 if (Opcode == Instruction::Trunc && isInt128InVR(Src) &&
I !=
nullptr) {
958 if (
LoadInst *Ld = dyn_cast<LoadInst>(
I->getOperand(0)))
961 bool OnlyTruncatingStores =
true;
962 for (
const User *U :
I->users())
963 if (!isa<StoreInst>(U)) {
964 OnlyTruncatingStores =
false;
967 if (OnlyTruncatingStores)
972 else if (ST->hasVector()) {
974 auto *SrcVecTy = cast<FixedVectorType>(Src);
975 auto *DstVecTy = dyn_cast<FixedVectorType>(Dst);
980 unsigned VF = SrcVecTy->getNumElements();
984 if (Opcode == Instruction::Trunc) {
985 if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
990 if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
991 if (SrcScalarBits >= 8) {
993 if (Opcode == Instruction::ZExt)
994 return NumDstVectors;
1001 unsigned NumSrcVectorOps =
1002 (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
1003 : (NumDstVectors / 2));
1005 return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
1007 else if (SrcScalarBits == 1)
1011 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
1012 Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
1017 if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
1018 if (SrcScalarBits == DstScalarBits)
1019 return NumDstVectors;
1021 if (SrcScalarBits == 1)
1029 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH,
CostKind);
1031 bool NeedsInserts =
true, NeedsExtracts =
true;
1033 if (DstScalarBits == 128 &&
1034 (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
1035 NeedsInserts =
false;
1036 if (SrcScalarBits == 128 &&
1037 (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
1038 NeedsExtracts =
false;
1046 if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
1052 if (Opcode == Instruction::FPTrunc) {
1053 if (SrcScalarBits == 128)
1058 return VF / 2 + std::max(1U, VF / 4 );
1061 if (Opcode == Instruction::FPExt) {
1062 if (SrcScalarBits == 32 && DstScalarBits == 64) {
1080 unsigned ExtCost = 0;
1083 if (!isa<LoadInst>(
Op) && !isa<ConstantInt>(
Op))
1099 case Instruction::ICmp: {
1103 if (
I !=
nullptr && (ScalarBits == 32 || ScalarBits == 64))
1104 if (
LoadInst *Ld = dyn_cast<LoadInst>(
I->getOperand(0)))
1105 if (
const ConstantInt *
C = dyn_cast<ConstantInt>(
I->getOperand(1)))
1106 if (!Ld->hasOneUse() && Ld->getParent() ==
I->getParent() &&
1115 case Instruction::Select:
1122 if (
ICmpInst *CI = dyn_cast<ICmpInst>(
I->getOperand(0)))
1123 if (CI->getOperand(0)->getType()->isIntegerTy(128))
1124 return ST->hasVectorEnhancements3() ? 1 : 4;
1127 return !isInt128InVR(ValTy) ? 1 : 4;
1130 else if (ST->hasVector()) {
1131 unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements();
1134 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
1135 unsigned PredicateExtraCost = 0;
1138 switch (cast<CmpInst>(
I)->getPredicate()) {
1144 PredicateExtraCost = 1;
1150 PredicateExtraCost = 2;
1162 unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
1166 assert (Opcode == Instruction::Select);
1170 unsigned PackCost = 0;
1172 if (CmpOpTy !=
nullptr)
1188 const Value *Op1)
const {
1189 if (Opcode == Instruction::InsertElement) {
1198 return ((Index % 2 == 0) ? 1 : 0);
1201 if (Opcode == Instruction::ExtractElement) {
1222 unsigned TruncBits = 0;
1223 unsigned SExtBits = 0;
1224 unsigned ZExtBits = 0;
1227 if (isa<TruncInst>(UserI))
1228 TruncBits = UserBits;
1229 else if (isa<SExtInst>(UserI))
1230 SExtBits = UserBits;
1231 else if (isa<ZExtInst>(UserI))
1232 ZExtBits = UserBits;
1234 if (TruncBits || SExtBits || ZExtBits) {
1235 FoldedValue = UserI;
1236 UserI = cast<Instruction>(*UserI->
user_begin());
1239 if ((UserI->
getOpcode() == Instruction::Sub ||
1240 UserI->
getOpcode() == Instruction::SDiv ||
1241 UserI->
getOpcode() == Instruction::UDiv) &&
1246 unsigned LoadOrTruncBits =
1247 ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
1249 case Instruction::Add:
1250 case Instruction::Sub:
1251 case Instruction::ICmp:
1252 if (LoadedBits == 32 && ZExtBits == 64)
1255 case Instruction::Mul:
1256 if (UserI->
getOpcode() != Instruction::ICmp) {
1257 if (LoadedBits == 16 &&
1259 (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
1261 if (LoadOrTruncBits == 16)
1265 case Instruction::SDiv:
1266 if (LoadedBits == 32 && SExtBits == 64)
1269 case Instruction::UDiv:
1270 case Instruction::And:
1271 case Instruction::Or:
1272 case Instruction::Xor:
1284 if (UserI->
getOpcode() == Instruction::ICmp)
1286 if (CI->getValue().isIntN(16))
1288 return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
1296 if (
auto *CI = dyn_cast<CallInst>(
I))
1297 if (
auto *
F = CI->getCalledFunction())
1298 if (
F->getIntrinsicID() == Intrinsic::bswap)
1309 assert(!Src->isVoidTy() &&
"Invalid type");
1315 if (!Src->isVectorTy() && Opcode == Instruction::Load &&
I !=
nullptr) {
1324 for (
unsigned i = 0; i < 2; ++i) {
1329 LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
1331 (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
1332 isa<ZExtInst>(OtherOp)))
1333 OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
1349 if (Src->isFP128Ty() && !ST->hasVectorEnhancements1())
1356 if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
1358 if (Opcode == Instruction::Load &&
I->hasOneUse()) {
1359 const Instruction *LdUser = cast<Instruction>(*
I->user_begin());
1365 else if (
const StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1366 const Value *StoredVal = SI->getValueOperand();
1383 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1384 if (UseMaskForCond || UseMaskForGaps)
1387 UseMaskForCond, UseMaskForGaps);
1388 assert(isa<VectorType>(VecTy) &&
1389 "Expect a vector type for interleaved memory op");
1391 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1392 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1393 unsigned VF = NumElts / Factor;
1396 unsigned NumPermutes = 0;
1398 if (Opcode == Instruction::Load) {
1402 BitVector UsedInsts(NumVectorMemOps,
false);
1403 std::vector<BitVector> ValueVecs(Factor,
BitVector(NumVectorMemOps,
false));
1404 for (
unsigned Index : Indices)
1405 for (
unsigned Elt = 0; Elt < VF; ++Elt) {
1406 unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg;
1408 ValueVecs[Index].
set(Vec);
1410 NumVectorMemOps = UsedInsts.
count();
1412 for (
unsigned Index : Indices) {
1416 unsigned NumSrcVecs = ValueVecs[Index].count();
1418 assert (NumSrcVecs >= NumDstVecs &&
"Expected at least as many sources");
1419 NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);
1425 unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
1426 unsigned NumDstVecs = NumVectorMemOps;
1427 NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
1431 return NumVectorMemOps + NumPermutes;
1439 Cost += (ScalarBits < 32) ? 3 : 2;
1444 unsigned ScalarBits) {
1456 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
1457 Opcode == Instruction::Add || Opcode == Instruction::Mul;
1462 std::optional<FastMathFlags> FMF,
1473 if (Opcode == Instruction::Add)
1479 if ((Opcode == Instruction::FAdd) || (Opcode == Instruction::FMul))
1492 if (ST->hasVectorEnhancements1()) {
1498 Cost += NumVectors - 1;
1511 if (
RetTy->isVectorTy() &&
ID == Intrinsic::bswap)
1529 if (!ST->hasVector())
1533 switch (
II->getIntrinsicID()) {
1537 case Intrinsic::vector_reduce_add:
1538 auto *VType = cast<FixedVectorType>(
II->getOperand(0)->getType());
1542 return VType->getScalarSizeInBits() >= 64 ||
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
unsigned getNumberOfParts(Type *Tp) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
size_type count() const
count - Returns the number of bits which are set.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This instruction compares its operands according to the predicate given to the constructor.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class wraps the llvm.memcpy intrinsic.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
const SystemZInstrInfo * getInstrInfo() const override
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) const
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool hasDivRemOp(Type *DataType, bool IsSigned) const override
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
unsigned adjustInliningThreshold(const CallBase *CB) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isFP128Ty() const
Return true if this is 'fp128'.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
const unsigned VectorBits
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.