22#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "AMDGPUtti"
33struct AMDGPUImageDMaskIntrinsic {
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "InstCombineTables.inc"
51 assert(Cmp0 != APFloat::cmpUnordered &&
"nans handled separately");
52 if (Cmp0 == APFloat::cmpEqual)
56 assert(Cmp1 != APFloat::cmpUnordered &&
"nans handled separately");
57 if (Cmp1 == APFloat::cmpEqual)
68 Type *VTy = V.getType();
74 if (
ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo =
true;
79 FloatValue.
convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
84 if (
ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
87 APInt IntValue(ConstInt->getValue());
106 Type *VTy = V.getType();
107 if (isa<FPExtInst, SExtInst, ZExtInst>(&V))
108 return cast<Instruction>(&V)->getOperand(0);
137 if (isa<FPMathOperator>(NewCall))
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
153static std::optional<Instruction *>
158 if (
const auto *LZMappingInfo =
160 if (
auto *ConstantLod =
161 dyn_cast<ConstantFP>(
II.getOperand(ImageDimIntr->
LodIndex))) {
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
167 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
175 if (
const auto *MIPMappingInfo =
177 if (
auto *ConstantMip =
178 dyn_cast<ConstantInt>(
II.getOperand(ImageDimIntr->
MipIndex))) {
179 if (ConstantMip->isZero()) {
184 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
192 if (
const auto *BiasMappingInfo =
194 if (
auto *ConstantBias =
195 dyn_cast<ConstantFP>(
II.getOperand(ImageDimIntr->
BiasIndex))) {
196 if (ConstantBias->isZero()) {
201 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
210 if (
const auto *OffsetMappingInfo =
212 if (
auto *ConstantOffset =
213 dyn_cast<ConstantInt>(
II.getOperand(ImageDimIntr->
OffsetIndex))) {
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
219 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
227 if (ST->hasD16Images()) {
237 if (
II.hasOneUse()) {
240 if (
User->getOpcode() == Instruction::FPTrunc &&
244 [&](
auto &Args,
auto &ArgTys) {
247 ArgTys[0] = User->getType();
256 bool AllHalfExtracts =
true;
258 for (
User *U :
II.users()) {
259 auto *Ext = dyn_cast<ExtractElementInst>(U);
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts =
false;
265 auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin());
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts =
false;
274 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
275 auto *VecTy = cast<VectorType>(
II.getType());
283 SigTys[0] = HalfVecTy;
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
293 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *
Idx = Ext->getIndexOperand();
301 Tr->replaceAllUsesWith(HalfExtract);
304 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
315 if (!ST->hasA16() && !ST->hasG16())
322 bool FloatCoord =
false;
324 bool OnlyDerivatives =
false;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord =
II.getOperand(OperandIndex);
331 if (OperandIndex < ImageDimIntr->CoordStart ||
336 OnlyDerivatives =
true;
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives =
true;
349 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives =
true;
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
365 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
378 OperandIndex < EndIndex; OperandIndex++) {
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
415 Value *Src =
nullptr;
418 if (Src->getType()->isHalfTy())
434 auto *VTy = cast<FixedVectorType>(UseV->
getType());
435 unsigned VWidth = VTy->getNumElements();
438 for (
int i = VWidth - 1; i > 0; --i) {
443 if (
auto *ConstElt = dyn_cast<Constant>(Elt)) {
444 if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
459 auto *VTy = cast<FixedVectorType>(V->getType());
460 unsigned VWidth = VTy->getNumElements();
465 if (
auto *SVI = dyn_cast<ShuffleVectorInst>(V))
466 SVI->getShuffleMask(ShuffleMask);
468 for (
int I = VWidth - 1;
I > 0; --
I) {
469 if (ShuffleMask.
empty()) {
471 if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt)))
501 if (isa<Constant>(V))
503 if (
const auto *
A = dyn_cast<Argument>(V))
505 if (
const auto *
II = dyn_cast<IntrinsicInst>(V)) {
510 return II->getParent() == cast<Instruction>(U.getUser())->getParent();
520 unsigned LaneArgIdx)
const {
522 APInt DemandedMask(32, maskTrailingOnes<unsigned>(MaskBits));
535 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
551 CallInst *NewCall =
B.CreateCall(&NewCallee, Ops, OpBundles);
559 const auto IID =
II.getIntrinsicID();
560 assert(IID == Intrinsic::amdgcn_readlane ||
561 IID == Intrinsic::amdgcn_readfirstlane ||
562 IID == Intrinsic::amdgcn_permlane64);
564 Instruction *OpInst = dyn_cast<Instruction>(
II.getOperand(0));
572 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
576 Value *LaneID =
nullptr;
578 LaneID =
II.getOperand(1);
583 if (
auto *LaneIDInst = dyn_cast<Instruction>(LaneID)) {
592 const auto DoIt = [&](
unsigned OpIdx,
596 Ops.push_back(LaneID);
608 if (IID == Intrinsic::amdgcn_permlane64 && !isa<BitCastInst>(OpInst))
611 if (isa<UnaryOperator>(OpInst))
612 return DoIt(0,
II.getCalledFunction());
614 if (isa<CastInst>(OpInst)) {
616 Type *SrcTy = Src->getType();
622 return DoIt(0, Remangled);
626 if (isa<BinaryOperator>(OpInst)) {
630 return DoIt(1,
II.getCalledFunction());
632 return DoIt(0,
II.getCalledFunction());
638std::optional<Instruction *>
642 case Intrinsic::amdgcn_rcp: {
643 Value *Src =
II.getArgOperand(0);
644 if (isa<PoisonValue>(Src))
648 if (isa<UndefValue>(Src)) {
657 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
658 const APFloat &ArgVal =
C->getValueAPF();
672 auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
676 auto IID = SrcCI->getIntrinsicID();
681 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
691 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
694 II.setFastMathFlags(InnerFMF);
696 II.setCalledFunction(NewDecl);
702 case Intrinsic::amdgcn_sqrt:
703 case Intrinsic::amdgcn_rsq:
704 case Intrinsic::amdgcn_tanh: {
705 Value *Src =
II.getArgOperand(0);
706 if (isa<PoisonValue>(Src))
710 if (isa<UndefValue>(Src)) {
717 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
719 II.getModule(), Intrinsic::sqrt, {II.getType()});
720 II.setCalledFunction(NewDecl);
726 case Intrinsic::amdgcn_log:
727 case Intrinsic::amdgcn_exp2: {
728 const bool IsLog = IID == Intrinsic::amdgcn_log;
729 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
730 Value *Src =
II.getArgOperand(0);
733 if (isa<PoisonValue>(Src))
740 if (
C->isInfinity()) {
743 if (!
C->isNegative())
747 if (IsExp &&
C->isNegative())
755 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
760 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->
isFloatTy())) {
762 : ConstantFP::get(Ty, 1.0);
766 if (IsLog &&
C->isNegative())
774 case Intrinsic::amdgcn_frexp_mant:
775 case Intrinsic::amdgcn_frexp_exp: {
776 Value *Src =
II.getArgOperand(0);
777 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
782 if (IID == Intrinsic::amdgcn_frexp_mant) {
784 II, ConstantFP::get(
II.getContext(), Significand));
794 if (isa<PoisonValue>(Src))
797 if (isa<UndefValue>(Src)) {
803 case Intrinsic::amdgcn_class: {
804 Value *Src0 =
II.getArgOperand(0);
805 Value *Src1 =
II.getArgOperand(1);
806 const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
809 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
812 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
818 if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
833 case Intrinsic::amdgcn_cvt_pkrtz: {
834 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
837 if (isa<PoisonValue>(Arg))
839 if (isa<UndefValue>(Arg))
847 return ConstantFP::get(HalfTy, Val);
850 Value *Src =
nullptr;
852 if (Src->getType()->isHalfTy())
859 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
860 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
870 case Intrinsic::amdgcn_cvt_pknorm_i16:
871 case Intrinsic::amdgcn_cvt_pknorm_u16:
872 case Intrinsic::amdgcn_cvt_pk_i16:
873 case Intrinsic::amdgcn_cvt_pk_u16: {
874 Value *Src0 =
II.getArgOperand(0);
875 Value *Src1 =
II.getArgOperand(1);
878 if (isa<PoisonValue>(Src0) && isa<PoisonValue>(Src1))
881 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
887 case Intrinsic::amdgcn_cvt_off_f32_i4: {
888 Value* Arg =
II.getArgOperand(0);
891 if (isa<PoisonValue>(Arg))
897 ConstantInt *CArg = dyn_cast<ConstantInt>(
II.getArgOperand(0));
902 constexpr size_t ResValsSize = 16;
903 static constexpr float ResVals[ResValsSize] = {
904 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
905 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
907 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
910 case Intrinsic::amdgcn_ubfe:
911 case Intrinsic::amdgcn_sbfe: {
913 Value *Src =
II.getArgOperand(0);
914 if (isa<UndefValue>(Src)) {
922 ConstantInt *CWidth = dyn_cast<ConstantInt>(
II.getArgOperand(2));
925 if ((Width & (IntSize - 1)) == 0) {
930 if (Width >= IntSize) {
932 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
937 ConstantInt *COffset = dyn_cast<ConstantInt>(
II.getArgOperand(1));
943 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
947 bool Signed = IID == Intrinsic::amdgcn_sbfe;
949 if (!CWidth || !COffset)
959 if (
Offset + Width < IntSize) {
963 RightShift->takeName(&
II);
970 RightShift->takeName(&
II);
973 case Intrinsic::amdgcn_exp:
974 case Intrinsic::amdgcn_exp_row:
975 case Intrinsic::amdgcn_exp_compr: {
981 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
982 bool Changed =
false;
983 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
984 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
985 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
986 Value *Src =
II.getArgOperand(
I + 2);
987 if (!isa<PoisonValue>(Src)) {
1000 case Intrinsic::amdgcn_fmed3: {
1001 Value *Src0 =
II.getArgOperand(0);
1002 Value *Src1 =
II.getArgOperand(1);
1003 Value *Src2 =
II.getArgOperand(2);
1005 for (
Value *Src : {Src0, Src1, Src2}) {
1006 if (isa<PoisonValue>(Src))
1010 if (
II.isStrictFP())
1047 const APFloat *ConstSrc0 =
nullptr;
1048 const APFloat *ConstSrc1 =
nullptr;
1049 const APFloat *ConstSrc2 =
nullptr;
1053 isa<UndefValue>(Src0)) {
1054 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1073 isa<UndefValue>(Src1)) {
1074 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1093 isa<UndefValue>(Src2)) {
1097 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1116 if (
auto *CI = dyn_cast<CallInst>(V)) {
1117 CI->copyFastMathFlags(&
II);
1127 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
1132 if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
1137 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
1143 II.setArgOperand(0, Src0);
1144 II.setArgOperand(1, Src1);
1145 II.setArgOperand(2, Src2);
1149 if (
const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
1150 if (
const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
1151 if (
const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
1155 ConstantFP::get(
II.getType(), Result));
1169 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1177 case Intrinsic::amdgcn_icmp:
1178 case Intrinsic::amdgcn_fcmp: {
1179 const ConstantInt *CC = cast<ConstantInt>(
II.getArgOperand(2));
1182 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1189 Value *Src0 =
II.getArgOperand(0);
1190 Value *Src1 =
II.getArgOperand(1);
1192 if (
auto *CSrc0 = dyn_cast<Constant>(Src0)) {
1193 if (
auto *CSrc1 = dyn_cast<Constant>(Src1)) {
1210 II.getType(), Args);
1211 NewCall->
addFnAttr(Attribute::Convergent);
1219 II.setArgOperand(0, Src1);
1220 II.setArgOperand(1, Src0);
1222 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1269 ? Intrinsic::amdgcn_fcmp
1270 : Intrinsic::amdgcn_icmp;
1273 if (
auto *CmpType = dyn_cast<IntegerType>(Ty)) {
1275 unsigned Width = CmpType->getBitWidth();
1276 unsigned NewWidth = Width;
1284 else if (Width <= 32)
1286 else if (Width <= 64)
1291 if (Width != NewWidth) {
1304 Value *Args[] = {SrcLHS, SrcRHS,
1305 ConstantInt::get(CC->
getType(), SrcPred)};
1307 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1314 case Intrinsic::amdgcn_mbcnt_hi: {
1320 case Intrinsic::amdgcn_ballot: {
1321 Value *Arg =
II.getArgOperand(0);
1322 if (isa<PoisonValue>(Arg))
1325 if (
auto *Src = dyn_cast<ConstantInt>(Arg)) {
1326 if (Src->isZero()) {
1331 if (ST->
isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1338 {IC.Builder.getInt32Ty()},
1339 {II.getArgOperand(0)}),
1341 Call->takeName(&
II);
1346 case Intrinsic::amdgcn_wavefrontsize: {
1352 case Intrinsic::amdgcn_wqm_vote: {
1354 if (!isa<Constant>(
II.getArgOperand(0)))
1359 case Intrinsic::amdgcn_kill: {
1360 const ConstantInt *
C = dyn_cast<ConstantInt>(
II.getArgOperand(0));
1361 if (!
C || !
C->getZExtValue())
1367 case Intrinsic::amdgcn_update_dpp: {
1368 Value *Old =
II.getArgOperand(0);
1370 auto *BC = cast<ConstantInt>(
II.getArgOperand(5));
1371 auto *RM = cast<ConstantInt>(
II.getArgOperand(3));
1372 auto *BM = cast<ConstantInt>(
II.getArgOperand(4));
1373 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1374 BM->getZExtValue() != 0xF || isa<PoisonValue>(Old))
1380 case Intrinsic::amdgcn_permlane16:
1381 case Intrinsic::amdgcn_permlane16_var:
1382 case Intrinsic::amdgcn_permlanex16:
1383 case Intrinsic::amdgcn_permlanex16_var: {
1385 Value *VDstIn =
II.getArgOperand(0);
1386 if (isa<PoisonValue>(VDstIn))
1390 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1391 IID == Intrinsic::amdgcn_permlanex16)
1398 unsigned int BcIdx = FiIdx + 1;
1400 ConstantInt *FetchInvalid = cast<ConstantInt>(
II.getArgOperand(FiIdx));
1401 ConstantInt *BoundCtrl = cast<ConstantInt>(
II.getArgOperand(BcIdx));
1407 case Intrinsic::amdgcn_permlane64:
1408 case Intrinsic::amdgcn_readfirstlane:
1409 case Intrinsic::amdgcn_readlane:
1410 case Intrinsic::amdgcn_ds_bpermute: {
1412 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1413 const Use &Src =
II.getArgOperandUse(SrcIdx);
1417 if (IID == Intrinsic::amdgcn_readlane &&
1424 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1425 const Use &Lane =
II.getArgOperandUse(0);
1429 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1430 II.setCalledFunction(NewDecl);
1431 II.setOperand(0, Src);
1432 II.setOperand(1, NewLane);
1437 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1442 return std::nullopt;
1444 case Intrinsic::amdgcn_writelane: {
1448 return std::nullopt;
1450 case Intrinsic::amdgcn_trig_preop: {
1453 if (!
II.getType()->isDoubleTy())
1456 Value *Src =
II.getArgOperand(0);
1457 Value *Segment =
II.getArgOperand(1);
1458 if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment))
1461 if (isa<UndefValue>(Src)) {
1462 auto *QNaN = ConstantFP::get(
1467 const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
1471 if (
II.isStrictFP())
1476 auto *Quieted = ConstantFP::get(
II.getType(), Fsrc.
makeQuiet());
1480 const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
1486 unsigned Shift = SegmentVal * 53;
1491 static const uint32_t TwoByPi[] = {
1492 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1493 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1494 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1495 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1496 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1497 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1501 unsigned Idx = Shift >> 5;
1502 if (
Idx + 2 >= std::size(TwoByPi)) {
1507 unsigned BShift = Shift & 0x1f;
1511 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1515 int Scale = -53 - Shift;
1522 case Intrinsic::amdgcn_fmul_legacy: {
1523 Value *Op0 =
II.getArgOperand(0);
1524 Value *Op1 =
II.getArgOperand(1);
1526 for (
Value *Src : {Op0, Op1}) {
1527 if (isa<PoisonValue>(Src))
1547 case Intrinsic::amdgcn_fma_legacy: {
1548 Value *Op0 =
II.getArgOperand(0);
1549 Value *Op1 =
II.getArgOperand(1);
1550 Value *Op2 =
II.getArgOperand(2);
1552 for (
Value *Src : {Op0, Op1, Op2}) {
1553 if (isa<PoisonValue>(Src))
1574 II.getModule(), Intrinsic::fma,
II.getType()));
1579 case Intrinsic::amdgcn_is_shared:
1580 case Intrinsic::amdgcn_is_private: {
1581 Value *Src =
II.getArgOperand(0);
1582 if (isa<PoisonValue>(Src))
1584 if (isa<UndefValue>(Src))
1587 if (isa<ConstantPointerNull>(
II.getArgOperand(0)))
1591 case Intrinsic::amdgcn_make_buffer_rsrc: {
1592 Value *Src =
II.getArgOperand(0);
1593 if (isa<PoisonValue>(Src))
1595 return std::nullopt;
1597 case Intrinsic::amdgcn_raw_buffer_store_format:
1598 case Intrinsic::amdgcn_struct_buffer_store_format:
1599 case Intrinsic::amdgcn_raw_tbuffer_store:
1600 case Intrinsic::amdgcn_struct_tbuffer_store:
1601 case Intrinsic::amdgcn_image_store_1d:
1602 case Intrinsic::amdgcn_image_store_1darray:
1603 case Intrinsic::amdgcn_image_store_2d:
1604 case Intrinsic::amdgcn_image_store_2darray:
1605 case Intrinsic::amdgcn_image_store_2darraymsaa:
1606 case Intrinsic::amdgcn_image_store_2dmsaa:
1607 case Intrinsic::amdgcn_image_store_3d:
1608 case Intrinsic::amdgcn_image_store_cube:
1609 case Intrinsic::amdgcn_image_store_mip_1d:
1610 case Intrinsic::amdgcn_image_store_mip_1darray:
1611 case Intrinsic::amdgcn_image_store_mip_2d:
1612 case Intrinsic::amdgcn_image_store_mip_2darray:
1613 case Intrinsic::amdgcn_image_store_mip_3d:
1614 case Intrinsic::amdgcn_image_store_mip_cube: {
1615 if (!isa<FixedVectorType>(
II.getArgOperand(0)->getType()))
1626 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1634 case Intrinsic::amdgcn_prng_b32: {
1635 auto *Src =
II.getArgOperand(0);
1636 if (isa<UndefValue>(Src)) {
1639 return std::nullopt;
1641 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1642 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1643 Value *Src0 =
II.getArgOperand(0);
1644 Value *Src1 =
II.getArgOperand(1);
1645 uint64_t CBSZ = cast<ConstantInt>(
II.getArgOperand(3))->getZExtValue();
1646 uint64_t BLGP = cast<ConstantInt>(
II.getArgOperand(4))->getZExtValue();
1647 auto *Src0Ty = cast<FixedVectorType>(Src0->
getType());
1648 auto *Src1Ty = cast<FixedVectorType>(Src1->
getType());
1650 auto getFormatNumRegs = [](
unsigned FormatVal) {
1651 switch (FormatVal) {
1665 bool MadeChange =
false;
1666 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1667 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1671 if (Src0Ty->getNumElements() > Src0NumElts) {
1678 if (Src1Ty->getNumElements() > Src1NumElts) {
1686 return std::nullopt;
1697 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1698 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1699 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1700 Value *Src0 =
II.getArgOperand(1);
1701 Value *Src1 =
II.getArgOperand(3);
1702 unsigned FmtA = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1703 uint64_t FmtB = cast<ConstantInt>(
II.getArgOperand(2))->getZExtValue();
1704 auto *Src0Ty = cast<FixedVectorType>(Src0->
getType());
1705 auto *Src1Ty = cast<FixedVectorType>(Src1->
getType());
1707 bool MadeChange =
false;
1713 if (Src0Ty->getNumElements() > Src0NumElts) {
1720 if (Src1Ty->getNumElements() > Src1NumElts) {
1728 return std::nullopt;
1745 return std::nullopt;
1758 int DMaskIdx,
bool IsLoad) {
1760 auto *IIVTy = cast<FixedVectorType>(IsLoad ?
II.getType()
1761 :
II.getOperand(0)->getType());
1762 unsigned VWidth = IIVTy->getNumElements();
1765 Type *EltTy = IIVTy->getElementType();
1777 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1782 DemandedElts = (1 << ActiveBits) - 1;
1784 if (UnusedComponentsAtFront > 0) {
1785 static const unsigned InvalidOffsetIdx = 0xf;
1788 switch (
II.getIntrinsicID()) {
1789 case Intrinsic::amdgcn_raw_buffer_load:
1790 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1793 case Intrinsic::amdgcn_s_buffer_load:
1797 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1798 OffsetIdx = InvalidOffsetIdx;
1802 case Intrinsic::amdgcn_struct_buffer_load:
1803 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1808 OffsetIdx = InvalidOffsetIdx;
1812 if (OffsetIdx != InvalidOffsetIdx) {
1814 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1815 auto *
Offset = Args[OffsetIdx];
1816 unsigned SingleComponentSizeInBits =
1818 unsigned OffsetAdd =
1819 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1820 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
1827 ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
1837 unsigned NewDMaskVal = 0;
1838 unsigned OrigLdStIdx = 0;
1839 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1840 const unsigned Bit = 1 << SrcIdx;
1841 if (!!(DMaskVal & Bit)) {
1842 if (!!DemandedElts[OrigLdStIdx])
1848 if (DMaskVal != NewDMaskVal)
1849 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
1852 unsigned NewNumElts = DemandedElts.
popcount();
1856 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
1858 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1870 OverloadTys[0] = NewTy;
1874 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1875 if (DemandedElts[OrigStoreIdx])
1878 if (NewNumElts == 1)
1890 if (NewNumElts == 1) {
1896 unsigned NewLoadIdx = 0;
1897 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1898 if (!!DemandedElts[OrigLoadIdx])
1914 APInt &UndefElts)
const {
1915 auto *VT = dyn_cast<FixedVectorType>(
II.getType());
1919 const unsigned FirstElt = DemandedElts.
countr_zero();
1921 const unsigned MaskLen = LastElt - FirstElt + 1;
1923 unsigned OldNumElts = VT->getNumElements();
1924 if (MaskLen == OldNumElts && MaskLen != 1)
1927 Type *EltTy = VT->getElementType();
1935 Value *Src =
II.getArgOperand(0);
1940 II.getOperandBundlesAsDefs(OpBundles);
1957 for (
unsigned I = 0;
I != MaskLen; ++
I) {
1958 if (DemandedElts[FirstElt +
I])
1959 ExtractMask[
I] = FirstElt +
I;
1968 for (
unsigned I = 0;
I != MaskLen; ++
I) {
1969 if (DemandedElts[FirstElt +
I])
1970 InsertMask[FirstElt +
I] =
I;
1982 SimplifyAndSetOp)
const {
1983 switch (
II.getIntrinsicID()) {
1984 case Intrinsic::amdgcn_readfirstlane:
1985 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
1987 case Intrinsic::amdgcn_raw_buffer_load:
1988 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1989 case Intrinsic::amdgcn_raw_buffer_load_format:
1990 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1991 case Intrinsic::amdgcn_raw_tbuffer_load:
1992 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1993 case Intrinsic::amdgcn_s_buffer_load:
1994 case Intrinsic::amdgcn_struct_buffer_load:
1995 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1996 case Intrinsic::amdgcn_struct_buffer_load_format:
1997 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1998 case Intrinsic::amdgcn_struct_tbuffer_load:
1999 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2002 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2008 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static SymbolRef::Type getType(const Symbol *Sym)
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool hasDefaultComponentZero() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
bool hasDefaultComponentBroadcast() const
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateFPCast(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVM_ABI const fltSemantics & getFltSemantics() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.