28#include "llvm/IR/IntrinsicsAMDGPU.h"
37#define DEBUG_TYPE "amdgpu-codegenprepare"
45 "amdgpu-codegenprepare-widen-constant-loads",
46 cl::desc(
"Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
51 BreakLargePHIs(
"amdgpu-codegenprepare-break-large-phis",
52 cl::desc(
"Break large PHI nodes for DAGISel"),
56 ForceBreakLargePHIs(
"amdgpu-codegenprepare-force-break-large-phis",
57 cl::desc(
"For testing purposes, always break large "
58 "PHIs even if it isn't profitable."),
62 "amdgpu-codegenprepare-break-large-phis-threshold",
63 cl::desc(
"Minimum type size in bits for breaking large PHI nodes"),
67 "amdgpu-codegenprepare-mul24",
68 cl::desc(
"Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
74 "amdgpu-codegenprepare-expand-div64",
75 cl::desc(
"Expand 64-bit division in AMDGPUCodeGenPrepare"),
82 "amdgpu-codegenprepare-disable-idiv-expansion",
83 cl::desc(
"Prevent expanding integer division in AMDGPUCodeGenPrepare"),
89 "amdgpu-codegenprepare-disable-fdiv-expansion",
90 cl::desc(
"Prevent expanding floating point division in AMDGPUCodeGenPrepare"),
94class AMDGPUCodeGenPrepareImpl
95 :
public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
105 const bool HasFP32DenormalFlush;
106 bool FlowChanged =
false;
107 mutable Function *SqrtF32 =
nullptr;
108 mutable Function *LdexpF32 =
nullptr;
115 :
F(
F), ST(TM.getSubtarget<
GCNSubtarget>(
F)), TM(TM), TLI(TLI), AC(AC),
116 DT(DT), UA(UA),
DL(
F.getDataLayout()),
126 F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
136 F.getParent(), Intrinsic::ldexp,
137 {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
141 bool canBreakPHINode(
const PHINode &
I);
152 bool isLegalFloatingTy(
const Type *
T)
const;
160 bool canIgnoreDenormalInput(
const Value *V,
const Instruction *CtxI)
const {
161 return HasFP32DenormalFlush ||
168 unsigned numBitsUnsigned(
Value *
Op)
const;
173 unsigned numBitsSigned(
Value *
Op)
const;
186 unsigned MaxDivBits,
bool Signed)
const;
191 bool IsDiv,
bool IsSigned)
const;
195 bool IsDiv,
bool IsSigned)
const;
213 bool canWidenScalarExtLoad(
LoadInst &
I)
const;
228 float ReqdAccuracy)
const;
233 float ReqdAccuracy)
const;
235 std::pair<Value *, Value *> getFrexpResults(
IRBuilder<> &Builder,
239 bool IsNegative)
const;
271 if (!ExpandDiv64InIR)
280bool AMDGPUCodeGenPrepareImpl::run() {
281 BreakPhiNodesCache.clear();
282 bool MadeChange =
false;
287 NextBB = std::next(FI);
298 if (NextInstBB != BB) {
309bool AMDGPUCodeGenPrepareImpl::isSigned(
const BinaryOperator &
I)
const {
310 return I.getOpcode() == Instruction::AShr ||
311 I.getOpcode() == Instruction::SDiv ||
I.getOpcode() == Instruction::SRem;
314bool AMDGPUCodeGenPrepareImpl::isSigned(
const SelectInst &
I)
const {
315 return isa<ICmpInst>(
I.getOperand(0)) &&
316 cast<ICmpInst>(
I.getOperand(0))->isSigned();
319bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(
const Type *Ty)
const {
324bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(
LoadInst &
I)
const {
325 Type *Ty =
I.getType();
326 int TySize =
DL.getTypeSizeInBits(Ty);
327 Align Alignment =
DL.getValueOrABITypeAlignment(
I.getAlign(), Ty);
329 return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&
I);
332unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(
Value *
Op)
const {
336unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(
Value *
Op)
const {
342 auto *VT = dyn_cast<FixedVectorType>(V->getType());
348 for (
int I = 0, E = VT->getNumElements();
I != E; ++
I)
361 for (
int I = 0, E = Values.
size();
I != E; ++
I)
367bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(
BinaryOperator &
I)
const {
368 if (
I.getOpcode() != Instruction::Mul)
371 Type *Ty =
I.getType();
373 if (
Size <= 16 &&
ST.has16BitInsts())
377 if (UA.isUniform(&
I))
383 Builder.SetCurrentDebugLocation(
I.getDebugLoc());
385 unsigned LHSBits = 0, RHSBits = 0;
386 bool IsSigned =
false;
388 if (
ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
389 (RHSBits = numBitsUnsigned(RHS)) <= 24) {
392 }
else if (
ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
393 (RHSBits = numBitsSigned(RHS)) <= 24) {
407 Type *DstTy = LHSVals[0]->getType();
409 for (
int I = 0, E = LHSVals.
size();
I != E; ++
I) {
410 Value *
LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[
I], I32Ty)
411 : Builder.CreateZExtOrTrunc(LHSVals[
I], I32Ty);
412 Value *
RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[
I], I32Ty)
413 : Builder.CreateZExtOrTrunc(RHSVals[
I], I32Ty);
415 IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
417 Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
418 : Builder.CreateZExtOrTrunc(Result, DstTy);
424 I.replaceAllUsesWith(NewVal);
434 if (
SelectInst *Sel = dyn_cast<SelectInst>(V))
437 if ((Cast = dyn_cast<CastInst>(V))) {
445bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(
BinaryOperator &BO)
const {
466 if (!CBO || !CT || !CF)
481 if (!FoldedT || isa<ConstantExpr>(FoldedT))
487 if (!FoldedF || isa<ConstantExpr>(FoldedF))
492 if (
const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&BO))
493 Builder.setFastMathFlags(FPOp->getFastMathFlags());
506std::pair<Value *, Value *>
507AMDGPUCodeGenPrepareImpl::getFrexpResults(
IRBuilder<> &Builder,
509 Type *Ty = Src->getType();
523 return {FrexpMant, FrexpExp};
529 bool IsNegative)
const {
544 auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
547 return Builder.
CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
557 if (HasFP32DenormalFlush &&
ST.hasFractBug() && !
ST.hasFastFMAF32() &&
563 auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder, RHS);
568 auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder, LHS);
581 Type *Ty = Src->getType();
585 Builder.
CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
588 Value *InputScaleFactor =
595 Value *OutputScaleFactor =
597 return Builder.
CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
608 Type *Ty = Src->getType();
612 Builder.
CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
613 Constant *One = ConstantFP::get(Ty, 1.0);
614 Constant *InputScale = ConstantFP::get(Ty, 0x1.0p+24);
616 ConstantFP::get(Ty, IsNegative ? -0x1.0p+12 : 0x1.0p+12);
623 NeedScale, OutputScale, IsNegative ? ConstantFP::get(Ty, -1.0) : One);
625 return Builder.
CreateFMul(Rsq, OutputScaleFactor);
628bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(
const FPMathOperator *SqrtOp,
639Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
648 const ConstantFP *CLHS = dyn_cast<ConstantFP>(Num);
654 bool IsNegative =
false;
663 canIgnoreDenormalInput(Den, CtxI)) {
690 if (
const ConstantFP *CLHS = dyn_cast<ConstantFP>(Num)) {
691 bool IsNegative =
false;
696 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
717 return emitRcpIEEE1ULP(Builder, Src, IsNegative);
726 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
731 Value *Recip = emitRcpIEEE1ULP(Builder, Den,
false);
745Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
748 if (ReqdAccuracy < 2.5f)
754 bool NumIsOne =
false;
755 if (
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num)) {
756 if (CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0))
764 if (!HasFP32DenormalFlush && !NumIsOne)
767 return Builder.
CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {Num, Den});
770Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
773 float ReqdDivAccuracy)
const {
776 optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
781 Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
789 Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
793 return emitFrexpDiv(Builder, Num, Den, DivFMF);
812 if (DisableFDivExpand)
831 Value *RsqOp =
nullptr;
832 auto *DenII = dyn_cast<IntrinsicInst>(Den);
833 if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
834 DenII->hasOneUse()) {
835 const auto *SqrtOp = cast<FPMathOperator>(DenII);
837 if (canOptimizeWithRsq(SqrtOp, DivFMF, SqrtFMF))
850 const bool AllowInaccurateRcp = DivFMF.
approxFunc();
851 if (!RsqOp && AllowInaccurateRcp)
855 if (ReqdAccuracy < 1.0f)
872 for (
int I = 0, E = NumVals.
size();
I != E; ++
I) {
873 Value *NumElt = NumVals[
I];
874 Value *DenElt = DenVals[
I];
875 Value *RsqDenElt = RsqOp ? RsqDenVals[
I] :
nullptr;
878 visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
879 cast<Instruction>(FPOp), ReqdAccuracy);
886 if (
auto *NewEltInst = dyn_cast<Instruction>(NewElt))
887 NewEltInst->copyMetadata(FDiv);
890 ResultVals[
I] = NewElt;
915 return std::pair(
Lo,
Hi);
929 bool IsSigned)
const {
936 unsigned DivBits = SSBits - RHSSignBits + 1;
937 if (DivBits > MaxDivBits)
942 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
943 DivBits = SSBits - SignBits + 1;
953 unsigned DivBits = SSBits - RHSSignBits;
954 if (DivBits > MaxDivBits)
962 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
963 DivBits = SSBits - SignBits;
971 Value *Den,
bool IsDiv,
972 bool IsSigned)
const {
973 unsigned DivBits = getDivNumBits(
I, Num, Den, 24, IsSigned);
976 return expandDivRem24Impl(Builder,
I, Num, Den, DivBits, IsDiv, IsSigned);
979Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
981 unsigned DivBits,
bool IsDiv,
bool IsSigned)
const {
1027 auto FMAD = !
ST.hasMadMacF32Insts()
1031 {FQNeg->
getType()}, {FQNeg, FB, FA}, FQ);
1059 if (DivBits != 0 && DivBits < 32) {
1062 int InRegBits = 32 - DivBits;
1064 Res = Builder.
CreateShl(Res, InRegBits);
1068 = Builder.
getInt32((UINT64_C(1) << DivBits) - 1);
1069 Res = Builder.
CreateAnd(Res, TruncMask);
1080bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(
BinaryOperator &
I,
1083 if (
Constant *
C = dyn_cast<Constant>(Den)) {
1086 if (
C->getType()->getScalarSizeInBits() <= 32)
1102 if (BinOpDen->getOpcode() == Instruction::Shl &&
1103 isa<Constant>(BinOpDen->getOperand(0)) &&
1126 assert(
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1127 Opc == Instruction::SRem ||
Opc == Instruction::SDiv);
1133 if (divHasSpecialOptimization(
I,
X,
Y))
1136 bool IsDiv =
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv;
1137 bool IsSigned =
Opc == Instruction::SRem ||
Opc == Instruction::SDiv;
1139 Type *Ty =
X->getType();
1153 if (
Value *Res = expandDivRem24(Builder,
I,
X,
Y, IsDiv, IsSigned)) {
1161 Value *Sign =
nullptr;
1166 Sign = IsDiv ? Builder.
CreateXor(SignX, SignY) : SignX;
1209 Constant *Scale = ConstantFP::get(F32Ty, llvm::bit_cast<float>(0x4F7FFFFE));
1249 if (!ExpandDiv64InIR && divHasSpecialOptimization(
I, Num, Den))
1254 bool IsDiv =
Opc == Instruction::SDiv ||
Opc == Instruction::UDiv;
1255 bool IsSigned =
Opc == Instruction::SDiv ||
Opc == Instruction::SRem;
1257 unsigned NumDivBits = getDivNumBits(
I, Num, Den, 32, IsSigned);
1258 if (NumDivBits > 32)
1261 Value *Narrowed =
nullptr;
1262 if (NumDivBits <= 24) {
1263 Narrowed = expandDivRem24Impl(Builder,
I, Num, Den, NumDivBits,
1265 }
else if (NumDivBits <= 32) {
1266 Narrowed = expandDivRem32(Builder,
I, Num, Den);
1277void AMDGPUCodeGenPrepareImpl::expandDivRem64(
BinaryOperator &
I)
const {
1280 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv) {
1285 if (
Opc == Instruction::URem ||
Opc == Instruction::SRem) {
1309 unsigned Opc =
I->getOpcode();
1310 Type *OldType =
I->getType();
1312 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1317 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1319 "Instruction::Mul.");
1323 MaxBitsNeeded = std::max<unsigned>(
bit_ceil(MaxBitsNeeded), 8);
1324 Type *NewType =
DL.getSmallestLegalIntType(
I->getContext(), MaxBitsNeeded);
1328 if (NewBit >= OrigBit)
1330 NewType =
I->getType()->getWithNewBitWidth(NewBit);
1339 int NumOfNonConstOps = 2;
1340 if (isa<Constant>(
I->getOperand(0)) || isa<Constant>(
I->getOperand(1))) {
1342 NumOfNonConstOps = 1;
1352 if (NewCost >= OldCost)
1362 I->replaceAllUsesWith(Zext);
1363 I->eraseFromParent();
1367bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(
BinaryOperator &
I) {
1368 if (foldBinOpIntoSelect(
I))
1371 if (UseMul24Intrin && replaceMulWithMul24(
I))
1374 TM.getTargetTransformInfo(
F),
DL))
1377 bool Changed =
false;
1379 Type *Ty =
I.getType();
1380 Value *NewDiv =
nullptr;
1385 if ((
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1386 Opc == Instruction::SRem ||
Opc == Instruction::SDiv) &&
1388 !DisableIDivExpand) {
1389 Value *Num =
I.getOperand(0);
1390 Value *Den =
I.getOperand(1);
1394 if (
auto *VT = dyn_cast<FixedVectorType>(Ty)) {
1397 for (
unsigned N = 0, E = VT->getNumElements();
N != E; ++
N) {
1402 if (ScalarSize <= 32) {
1403 NewElt = expandDivRem32(Builder,
I, NumEltN, DenEltN);
1409 NewElt = shrinkDivRem64(Builder,
I, NumEltN, DenEltN);
1417 if (
auto *NewEltBO = dyn_cast<BinaryOperator>(NewElt))
1422 if (
auto *NewEltI = dyn_cast<Instruction>(NewElt))
1423 NewEltI->copyIRFlags(&
I);
1428 if (ScalarSize <= 32)
1429 NewDiv = expandDivRem32(Builder,
I, Num, Den);
1431 NewDiv = shrinkDivRem64(Builder,
I, Num, Den);
1438 I.replaceAllUsesWith(NewDiv);
1439 I.eraseFromParent();
1444 if (ExpandDiv64InIR) {
1447 expandDivRem64(*Div);
1456bool AMDGPUCodeGenPrepareImpl::visitLoadInst(
LoadInst &
I) {
1462 canWidenScalarExtLoad(
I)) {
1472 if (
auto *
Range =
WidenLoad->getMetadata(LLVMContext::MD_range)) {
1474 mdconst::extract<ConstantInt>(
Range->getOperand(0));
1476 if (
Lower->isNullValue()) {
1477 WidenLoad->setMetadata(LLVMContext::MD_range,
nullptr);
1485 WidenLoad->setMetadata(LLVMContext::MD_range,
1490 int TySize =
DL.getTypeSizeInBits(
I.getType());
1494 I.replaceAllUsesWith(ValOrig);
1495 I.eraseFromParent();
1502bool AMDGPUCodeGenPrepareImpl::visitSelectInst(
SelectInst &
I) {
1520 auto *IITrue = dyn_cast<IntrinsicInst>(TrueVal);
1521 auto *IIFalse = dyn_cast<IntrinsicInst>(FalseVal);
1523 Value *Fract =
nullptr;
1524 if (Pred == FCmpInst::FCMP_UNO && TrueVal == CmpVal && IIFalse &&
1525 CmpVal == matchFractPat(*IIFalse)) {
1527 Fract = applyFractPat(Builder, CmpVal);
1528 }
else if (Pred == FCmpInst::FCMP_ORD && FalseVal == CmpVal && IITrue &&
1529 CmpVal == matchFractPat(*IITrue)) {
1531 Fract = applyFractPat(Builder, CmpVal);
1536 I.replaceAllUsesWith(Fract);
1542 const auto *IA = dyn_cast<Instruction>(
A);
1543 const auto *IB = dyn_cast<Instruction>(
B);
1544 return IA && IB && IA->getParent() == IB->getParent();
1550 const auto *FVT = dyn_cast<FixedVectorType>(V->getType());
1554 const Value *CurVal = V;
1557 BitVector EltsCovered(FVT->getNumElements());
1558 while (
const auto *IE = dyn_cast<InsertElementInst>(CurVal)) {
1559 const auto *
Idx = dyn_cast<ConstantInt>(IE->getOperand(2));
1564 if (!
Idx ||
Idx->getZExtValue() >= FVT->getNumElements())
1567 const auto *VecSrc = IE->getOperand(0);
1572 if (isa<Instruction>(VecSrc) && !
areInSameBB(VecSrc, IE))
1576 EltsCovered.
set(
Idx->getZExtValue());
1579 if (EltsCovered.
all())
1588 if (isa<Constant>(CurVal))
1595 if (
const auto *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
1596 return isa<Constant>(SV->getOperand(1)) ||
1606 const auto [It, Inserted] = SeenPHIs.
insert(&
I);
1610 for (
const Value *Inc :
I.incoming_values()) {
1611 if (
const auto *PhiInc = dyn_cast<PHINode>(Inc))
1615 for (
const User *U :
I.users()) {
1616 if (
const auto *PhiU = dyn_cast<PHINode>(U))
1621bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(
const PHINode &
I) {
1623 if (
const auto It = BreakPhiNodesCache.find(&
I);
1624 It != BreakPhiNodesCache.end())
1639 for (
const PHINode *WLP : WorkList) {
1640 assert(BreakPhiNodesCache.count(WLP) == 0);
1655 const auto Threshold = (
alignTo(WorkList.size() * 2, 3) / 3);
1656 unsigned NumBreakablePHIs = 0;
1657 bool CanBreak =
false;
1658 for (
const PHINode *Cur : WorkList) {
1666 if (++NumBreakablePHIs >= Threshold) {
1673 for (
const PHINode *Cur : WorkList)
1674 BreakPhiNodesCache[Cur] = CanBreak;
1723 Value *&Res = SlicedVals[{BB, Inc}];
1728 if (
Instruction *IncInst = dyn_cast<Instruction>(Inc))
1729 B.SetCurrentDebugLocation(IncInst->getDebugLoc());
1735 Res =
B.CreateShuffleVector(Inc, Mask, NewValName);
1737 Res =
B.CreateExtractElement(Inc,
Idx, NewValName);
1746bool AMDGPUCodeGenPrepareImpl::visitPHINode(
PHINode &
I) {
1762 DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
1765 if (!ForceBreakLargePHIs && !canBreakPHINode(
I))
1768 std::vector<VectorSlice> Slices;
1775 const unsigned EltSize =
DL.getTypeSizeInBits(EltTy);
1777 if (EltSize == 8 || EltSize == 16) {
1778 const unsigned SubVecSize = (32 / EltSize);
1782 Slices.emplace_back(SubVecTy,
Idx, SubVecSize);
1786 for (;
Idx < NumElts; ++
Idx)
1787 Slices.emplace_back(EltTy,
Idx, 1);
1790 assert(Slices.size() > 1);
1796 B.SetCurrentDebugLocation(
I.getDebugLoc());
1798 unsigned IncNameSuffix = 0;
1802 B.SetInsertPoint(
I.getParent()->getFirstNonPHIIt());
1803 S.NewPHI =
B.CreatePHI(S.Ty,
I.getNumIncomingValues());
1806 S.NewPHI->addIncoming(S.getSlicedVal(BB,
I.getIncomingValue(
Idx),
1807 "largephi.extractslice" +
1808 std::to_string(IncNameSuffix++)),
1815 unsigned NameSuffix = 0;
1817 const auto ValName =
"largephi.insertslice" + std::to_string(NameSuffix++);
1819 Vec =
B.CreateInsertVector(FVT, Vec, S.NewPHI, S.Idx, ValName);
1821 Vec =
B.CreateInsertElement(Vec, S.NewPHI, S.Idx, ValName);
1824 I.replaceAllUsesWith(Vec);
1825 I.eraseFromParent();
1839 if (isa<BlockAddress, GlobalValue, AllocaInst>(V))
1843 if (
const auto *Arg = dyn_cast<Argument>(V); Arg && Arg->hasNonNullAttr())
1847 if (
const auto *Load = dyn_cast<LoadInst>(V);
1848 Load && Load->hasMetadata(LLVMContext::MD_nonnull))
1853 if (AS != cast<PointerType>(V->getType())->getAddressSpace())
1865 const auto NullVal = TM.getNullPointerValue(AS);
1867 assert(SrcPtrKB.getBitWidth() ==
DL.getPointerSizeInBits(AS));
1868 assert((NullVal == 0 || NullVal == -1) &&
1869 "don't know how to check for this null value!");
1870 return NullVal ? !SrcPtrKB.getMaxValue().isAllOnes() : SrcPtrKB.isNonZero();
1877 if (
I.getType()->isVectorTy())
1882 const unsigned SrcAS =
I.getSrcAddressSpace();
1883 const unsigned DstAS =
I.getDestAddressSpace();
1885 bool CanLower =
false;
1903 auto *Intrin =
B.CreateIntrinsic(
1904 I.getType(), Intrinsic::amdgcn_addrspacecast_nonnull, {I.getOperand(0)});
1905 I.replaceAllUsesWith(Intrin);
1906 I.eraseFromParent();
1910bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(
IntrinsicInst &
I) {
1911 switch (
I.getIntrinsicID()) {
1912 case Intrinsic::minnum:
1913 case Intrinsic::minimumnum:
1914 case Intrinsic::minimum:
1915 return visitFMinLike(
I);
1916 case Intrinsic::sqrt:
1917 return visitSqrt(
I);
1931 if (
ST.hasFractBug())
1938 if (IID != Intrinsic::minnum && IID != Intrinsic::minimum &&
1939 IID != Intrinsic::minimumnum)
1942 Type *Ty =
I.getType();
1946 Value *Arg0 =
I.getArgOperand(0);
1947 Value *Arg1 =
I.getArgOperand(1);
1955 One.convert(
C->getSemantics(), APFloat::rmNearestTiesToEven, &LosesInfo);
1964 m_Intrinsic<Intrinsic::floor>(
m_Deferred(FloorSrc)))))
1977 for (
unsigned I = 0, E = FractVals.
size();
I != E; ++
I) {
1986 Value *FractArg = matchFractPat(
I);
2000 Value *Fract = applyFractPat(Builder, FractArg);
2002 I.replaceAllUsesWith(Fract);
2014bool AMDGPUCodeGenPrepareImpl::visitSqrt(
IntrinsicInst &Sqrt) {
2030 if (ReqdAccuracy < 1.0f)
2038 if (FDiv && FDiv->
getOpcode() == Instruction::FDiv &&
2039 FDiv->getFPAccuracy() >= 1.0f &&
2046 bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt);
2050 if (!CanTreatAsDAZ && ReqdAccuracy < 2.0f)
2058 for (
int I = 0, E = SrcVals.
size();
I != E; ++
I) {
2060 ResultVals[
I] = Builder.
CreateCall(getSqrtF32(), SrcVals[
I]);
2062 ResultVals[
I] = emitSqrtIEEE2ULP(Builder, SrcVals[
I], SqrtFMF);
2072bool AMDGPUCodeGenPrepare::runOnFunction(
Function &
F) {
2073 if (skipFunction(
F))
2076 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
2082 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2084 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2085 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
2086 const DominatorTree *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
2088 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
2089 return AMDGPUCodeGenPrepareImpl(
F, TM, TLI, AC, DT, UA).run();
2099 AMDGPUCodeGenPrepareImpl Impl(
F, ATM, TLI, AC, DT, UA);
2103 if (!Impl.FlowChanged)
2109 "AMDGPU IR optimizations",
false,
false)
2116char AMDGPUCodeGenPrepare::
ID = 0;
2119 return new AMDGPUCodeGenPrepare();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * insertValues(IRBuilder<> &Builder, Type *Ty, SmallVectorImpl< Value * > &Values)
static bool isOneOrNegOne(const Value *Val)
static void extractValues(IRBuilder<> &Builder, SmallVectorImpl< Value * > &Values, Value *V)
static Value * getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static bool isInterestingPHIIncomingValue(const Value *V)
static bool tryNarrowMathIfNoOverflow(Instruction *I, const SITargetLowering *TLI, const TargetTransformInfo &TTI, const DataLayout &DL)
static SelectInst * findSelectThroughCast(Value *V, CastInst *&Cast)
static std::pair< Value *, Value * > getMul64(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static Value * emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src, bool IsNegative)
Emit an expansion of 1.0 / sqrt(Src) good for 1ulp that supports denormals.
static Value * getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL)
static void collectPHINodes(const PHINode &I, SmallPtrSet< const PHINode *, 8 > &SeenPHIs)
static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL, const AMDGPUTargetMachine &TM, unsigned AS)
static bool areInSameBB(const Value *A, const Value *B)
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Legalize the Machine IR a function s Machine IR
Generic memory optimizations
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static cl::opt< cl::boolOrDefault > EnableGlobalISelOption("global-isel", cl::Hidden, cl::desc("Enable the \"global\" instruction selector"))
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Helper class for "break large PHIs" (visitPHINode).
VectorSlice(Type *Ty, unsigned Idx, unsigned NumElts)
Value * getSlicedVal(BasicBlock *BB, Value *Inc, StringRef NewValName)
Slice Inc according to the information contained within this slice.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
This class represents a conversion between pointers from one address space to another.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
bool all() const
all - Returns true if all bits are set.
Represents analyses that only rely on functions' control flow.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setFast(bool B=true)
bool allowReciprocal() const
void setNoNaNs(bool B=true)
bool allowContract() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
BasicBlockListType::iterator iterator
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
FastMathFlags getFastMathFlags() const
Get the flags to be applied to created floating point ops.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
RetTy visitIntrinsicInst(IntrinsicInst &I)
RetTy visitPHINode(PHINode &I)
RetTy visitAddrSpaceCastInst(AddrSpaceCastInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
LLVM_ABI const fltSemantics & getFltSemantics() const
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Type * getElementType() const
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
cstfp_pred_ty< is_nonnan > m_NonNaN()
Match a non-NaN FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI bool expandRemainderUpTo64Bits(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool expandDivisionUpTo64Bits(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
FunctionPass * createAMDGPUCodeGenPreparePass()
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr DenormalMode getPreserveSign()
bool isNonNegative() const
Returns true if this value is known to be non-negative.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
bool isNegative() const
Returns true if this value is known to be negative.
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.