43#define DEBUG_TYPE "legalizer"
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
81 return std::make_pair(NumParts, NumLeftover);
108 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
115 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
125 if (isa<GIntrinsic>(
MI))
128 switch (Step.Action) {
143 return bitcast(
MI, Step.TypeIdx, Step.NewType);
146 return lower(
MI, Step.TypeIdx, Step.NewType);
163void LegalizerHelper::insertParts(
Register DstReg,
185 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
187 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
193 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
209void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
212 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
219 appendVectorElts(AllElts, Leftover);
227 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
229 const int StartIdx = Regs.
size();
230 const int NumResults =
MI.getNumOperands() - 1;
232 for (
int I = 0;
I != NumResults; ++
I)
233 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
239 if (SrcTy == GCDTy) {
254 extractGCDType(Parts, GCDTy, SrcReg);
258LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
260 unsigned PadStrategy) {
265 int NumOrigSrc = VRegs.
size();
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 assert(PadStrategy == TargetOpcode::G_SEXT);
298 for (
int I = 0;
I != NumParts; ++
I) {
299 bool AllMergePartsArePadding =
true;
302 for (
int J = 0; J != NumSubParts; ++J) {
303 int Idx =
I * NumSubParts + J;
304 if (
Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
309 SubMerge[J] = VRegs[
Idx];
312 AllMergePartsArePadding =
false;
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
331 Remerge[
I] = AllPadReg;
335 if (NumSubParts == 1)
336 Remerge[
I] = SubMerge[0];
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[
I];
345 VRegs = std::move(Remerge);
349void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
356 if (DstTy == LCMTy) {
370 UnmergeDefs[0] = DstReg;
371 for (
unsigned I = 1;
I != NumDefs; ++
I)
383#define RTLIBCASE_INT(LibcallPrefix) \
387 return RTLIB::LibcallPrefix##32; \
389 return RTLIB::LibcallPrefix##64; \
391 return RTLIB::LibcallPrefix##128; \
393 llvm_unreachable("unexpected size"); \
397#define RTLIBCASE(LibcallPrefix) \
401 return RTLIB::LibcallPrefix##32; \
403 return RTLIB::LibcallPrefix##64; \
405 return RTLIB::LibcallPrefix##80; \
407 return RTLIB::LibcallPrefix##128; \
409 llvm_unreachable("unexpected size"); \
414 case TargetOpcode::G_LROUND:
416 case TargetOpcode::G_LLROUND:
418 case TargetOpcode::G_MUL:
420 case TargetOpcode::G_SDIV:
422 case TargetOpcode::G_UDIV:
424 case TargetOpcode::G_SREM:
426 case TargetOpcode::G_UREM:
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
430 case TargetOpcode::G_FADD:
432 case TargetOpcode::G_FSUB:
434 case TargetOpcode::G_FMUL:
436 case TargetOpcode::G_FDIV:
438 case TargetOpcode::G_FEXP:
440 case TargetOpcode::G_FEXP2:
442 case TargetOpcode::G_FEXP10:
444 case TargetOpcode::G_FREM:
446 case TargetOpcode::G_FPOW:
448 case TargetOpcode::G_FPOWI:
450 case TargetOpcode::G_FMA:
452 case TargetOpcode::G_FSIN:
454 case TargetOpcode::G_FCOS:
456 case TargetOpcode::G_FTAN:
458 case TargetOpcode::G_FASIN:
460 case TargetOpcode::G_FACOS:
462 case TargetOpcode::G_FATAN:
464 case TargetOpcode::G_FATAN2:
466 case TargetOpcode::G_FSINH:
468 case TargetOpcode::G_FCOSH:
470 case TargetOpcode::G_FTANH:
472 case TargetOpcode::G_FLOG10:
474 case TargetOpcode::G_FLOG:
476 case TargetOpcode::G_FLOG2:
478 case TargetOpcode::G_FLDEXP:
480 case TargetOpcode::G_FCEIL:
482 case TargetOpcode::G_FFLOOR:
484 case TargetOpcode::G_FMINNUM:
486 case TargetOpcode::G_FMAXNUM:
488 case TargetOpcode::G_FSQRT:
490 case TargetOpcode::G_FRINT:
492 case TargetOpcode::G_FNEARBYINT:
494 case TargetOpcode::G_INTRINSIC_TRUNC:
496 case TargetOpcode::G_INTRINSIC_ROUND:
498 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
500 case TargetOpcode::G_INTRINSIC_LRINT:
502 case TargetOpcode::G_INTRINSIC_LLRINT:
530 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
542 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
549 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
552 Register PReg = Next->getOperand(0).getReg();
560 if (Ret->getNumImplicitOperands() != 1)
563 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
587 Info.OrigRet = Result;
590 (Result.Ty->isVoidTy() ||
595 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
596 if (!CLI.lowerCall(MIRBuilder,
Info))
599 if (
MI &&
Info.LoweredTailCall) {
600 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
611 "Expected instr following MI to be return or debug inst?");
615 }
while (
MI->getNextNode());
645 Args.push_back({MO.getReg(), OpType, 0});
647 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
658 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
662 LLT OpLLT =
MRI.getType(Reg);
663 Type *OpTy =
nullptr;
668 Args.push_back({Reg, OpTy, 0});
674 unsigned Opc =
MI.getOpcode();
676 case TargetOpcode::G_BZERO:
677 RTLibcall = RTLIB::BZERO;
679 case TargetOpcode::G_MEMCPY:
680 RTLibcall = RTLIB::MEMCPY;
681 Args[0].Flags[0].setReturned();
683 case TargetOpcode::G_MEMMOVE:
684 RTLibcall = RTLIB::MEMMOVE;
685 Args[0].Flags[0].setReturned();
687 case TargetOpcode::G_MEMSET:
688 RTLibcall = RTLIB::MEMSET;
689 Args[0].Flags[0].setReturned();
694 const char *
Name = TLI.getLibcallName(RTLibcall);
704 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
708 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
711 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
712 if (!CLI.lowerCall(MIRBuilder,
Info))
715 if (
Info.LoweredTailCall) {
716 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
727 "Expected instr following MI to be return or debug inst?");
731 }
while (
MI.getNextNode());
741 unsigned Opc =
MI.getOpcode();
742 auto &AtomicMI = cast<GMemOperation>(
MI);
743 auto &MMO = AtomicMI.getMMO();
744 auto Ordering = MMO.getMergedOrdering();
745 LLT MemType = MMO.getMemoryType();
748 return RTLIB::UNKNOWN_LIBCALL;
750#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
752 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
754 case TargetOpcode::G_ATOMIC_CMPXCHG:
755 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
757 return getOutlineAtomicHelper(LC, Ordering, MemSize);
759 case TargetOpcode::G_ATOMICRMW_XCHG: {
761 return getOutlineAtomicHelper(LC, Ordering, MemSize);
763 case TargetOpcode::G_ATOMICRMW_ADD:
764 case TargetOpcode::G_ATOMICRMW_SUB: {
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
768 case TargetOpcode::G_ATOMICRMW_AND: {
770 return getOutlineAtomicHelper(LC, Ordering, MemSize);
772 case TargetOpcode::G_ATOMICRMW_OR: {
774 return getOutlineAtomicHelper(LC, Ordering, MemSize);
776 case TargetOpcode::G_ATOMICRMW_XOR: {
778 return getOutlineAtomicHelper(LC, Ordering, MemSize);
781 return RTLIB::UNKNOWN_LIBCALL;
794 unsigned Opc =
MI.getOpcode();
796 case TargetOpcode::G_ATOMIC_CMPXCHG:
797 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
800 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
801 MI.getFirst4RegLLTs();
804 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
805 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
806 NewLLT) =
MI.getFirst5RegLLTs();
816 case TargetOpcode::G_ATOMICRMW_XCHG:
817 case TargetOpcode::G_ATOMICRMW_ADD:
818 case TargetOpcode::G_ATOMICRMW_SUB:
819 case TargetOpcode::G_ATOMICRMW_AND:
820 case TargetOpcode::G_ATOMICRMW_OR:
821 case TargetOpcode::G_ATOMICRMW_XOR: {
822 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
825 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
829 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
844 const char *
Name = TLI.getLibcallName(RTLibcall);
854 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
858 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
859 if (!CLI.lowerCall(MIRBuilder,
Info))
871 case TargetOpcode::G_FPEXT:
873 case TargetOpcode::G_FPTRUNC:
875 case TargetOpcode::G_FPTOSI:
877 case TargetOpcode::G_FPTOUI:
879 case TargetOpcode::G_SITOFP:
881 case TargetOpcode::G_UITOFP:
892 if (FromType->isIntegerTy()) {
894 Arg.
Flags[0].setSExt();
896 Arg.
Flags[0].setZExt();
901 {
MI.getOperand(0).
getReg(), ToType, 0}, Arg, LocObserver,
908 switch (
MI.getOpcode()) {
909 case TargetOpcode::G_GET_FPENV:
910 RTLibcall = RTLIB::FEGETENV;
912 case TargetOpcode::G_SET_FPENV:
913 case TargetOpcode::G_RESET_FPENV:
914 RTLibcall = RTLIB::FESETENV;
916 case TargetOpcode::G_GET_FPMODE:
917 RTLibcall = RTLIB::FEGETMODE;
919 case TargetOpcode::G_SET_FPMODE:
920 case TargetOpcode::G_RESET_FPMODE:
921 RTLibcall = RTLIB::FESETMODE;
950 auto &Ctx = MF.getFunction().getContext();
961 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
968 LocObserver,
nullptr);
990 auto &Ctx = MF.getFunction().getContext();
1006 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1012 LocObserver,
nullptr);
1018static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1020#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1024 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1026 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1028 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1030 llvm_unreachable("unexpected size"); \
1073 const auto Cond =
Cmp->getCond();
1087 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1100 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1102 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1115 const auto [OeqLibcall, OeqPred] =
1117 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1119 const auto [UnoLibcall, UnoPred] =
1121 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1136 const auto [OeqLibcall, OeqPred] =
1141 const auto [UnoLibcall, UnoPred] =
1146 if (NotOeq && NotUno)
1165 const auto [InversedLibcall, InversedPred] =
1167 if (!BuildLibcall(InversedLibcall,
1193 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1195 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1212 switch (
MI.getOpcode()) {
1215 case TargetOpcode::G_MUL:
1216 case TargetOpcode::G_SDIV:
1217 case TargetOpcode::G_UDIV:
1218 case TargetOpcode::G_SREM:
1219 case TargetOpcode::G_UREM:
1220 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1229 case TargetOpcode::G_FADD:
1230 case TargetOpcode::G_FSUB:
1231 case TargetOpcode::G_FMUL:
1232 case TargetOpcode::G_FDIV:
1233 case TargetOpcode::G_FMA:
1234 case TargetOpcode::G_FPOW:
1235 case TargetOpcode::G_FREM:
1236 case TargetOpcode::G_FCOS:
1237 case TargetOpcode::G_FSIN:
1238 case TargetOpcode::G_FTAN:
1239 case TargetOpcode::G_FACOS:
1240 case TargetOpcode::G_FASIN:
1241 case TargetOpcode::G_FATAN:
1242 case TargetOpcode::G_FATAN2:
1243 case TargetOpcode::G_FCOSH:
1244 case TargetOpcode::G_FSINH:
1245 case TargetOpcode::G_FTANH:
1246 case TargetOpcode::G_FLOG10:
1247 case TargetOpcode::G_FLOG:
1248 case TargetOpcode::G_FLOG2:
1249 case TargetOpcode::G_FEXP:
1250 case TargetOpcode::G_FEXP2:
1251 case TargetOpcode::G_FEXP10:
1252 case TargetOpcode::G_FCEIL:
1253 case TargetOpcode::G_FFLOOR:
1254 case TargetOpcode::G_FMINNUM:
1255 case TargetOpcode::G_FMAXNUM:
1256 case TargetOpcode::G_FSQRT:
1257 case TargetOpcode::G_FRINT:
1258 case TargetOpcode::G_FNEARBYINT:
1259 case TargetOpcode::G_INTRINSIC_TRUNC:
1260 case TargetOpcode::G_INTRINSIC_ROUND:
1261 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1266 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1274 case TargetOpcode::G_LROUND:
1275 case TargetOpcode::G_LLROUND:
1276 case TargetOpcode::G_INTRINSIC_LRINT:
1277 case TargetOpcode::G_INTRINSIC_LLRINT: {
1284 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1290 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1293 MI.eraseFromParent();
1296 case TargetOpcode::G_FPOWI:
1297 case TargetOpcode::G_FLDEXP: {
1304 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1309 {
MI.getOperand(1).getReg(), HLTy, 0},
1310 {
MI.getOperand(2).getReg(), ITy, 1}};
1311 Args[1].Flags[0].setSExt();
1314 Args, LocObserver, &
MI);
1319 case TargetOpcode::G_FPEXT:
1320 case TargetOpcode::G_FPTRUNC: {
1323 if (!FromTy || !ToTy)
1331 case TargetOpcode::G_FCMP: {
1335 MI.eraseFromParent();
1338 case TargetOpcode::G_FPTOSI:
1339 case TargetOpcode::G_FPTOUI: {
1344 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1352 case TargetOpcode::G_SITOFP:
1353 case TargetOpcode::G_UITOFP: {
1357 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1359 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1362 LocObserver, TLI, IsSigned);
1367 case TargetOpcode::G_ATOMICRMW_XCHG:
1368 case TargetOpcode::G_ATOMICRMW_ADD:
1369 case TargetOpcode::G_ATOMICRMW_SUB:
1370 case TargetOpcode::G_ATOMICRMW_AND:
1371 case TargetOpcode::G_ATOMICRMW_OR:
1372 case TargetOpcode::G_ATOMICRMW_XOR:
1373 case TargetOpcode::G_ATOMIC_CMPXCHG:
1374 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1380 case TargetOpcode::G_BZERO:
1381 case TargetOpcode::G_MEMCPY:
1382 case TargetOpcode::G_MEMMOVE:
1383 case TargetOpcode::G_MEMSET: {
1388 MI.eraseFromParent();
1391 case TargetOpcode::G_GET_FPENV:
1392 case TargetOpcode::G_GET_FPMODE: {
1398 case TargetOpcode::G_SET_FPENV:
1399 case TargetOpcode::G_SET_FPMODE: {
1405 case TargetOpcode::G_RESET_FPENV:
1406 case TargetOpcode::G_RESET_FPMODE: {
1415 MI.eraseFromParent();
1425 switch (
MI.getOpcode()) {
1428 case TargetOpcode::G_IMPLICIT_DEF: {
1438 if (SizeOp0 % NarrowSize != 0) {
1439 LLT ImplicitTy = NarrowTy;
1446 MI.eraseFromParent();
1450 int NumParts = SizeOp0 / NarrowSize;
1453 for (
int i = 0; i < NumParts; ++i)
1460 MI.eraseFromParent();
1463 case TargetOpcode::G_CONSTANT: {
1465 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1468 int NumParts = TotalSize / NarrowSize;
1471 for (
int I = 0;
I != NumParts; ++
I) {
1472 unsigned Offset =
I * NarrowSize;
1479 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1481 if (LeftoverBits != 0) {
1485 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1489 insertParts(
MI.getOperand(0).getReg(),
1490 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1492 MI.eraseFromParent();
1495 case TargetOpcode::G_SEXT:
1496 case TargetOpcode::G_ZEXT:
1497 case TargetOpcode::G_ANYEXT:
1499 case TargetOpcode::G_TRUNC: {
1505 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1511 MI.eraseFromParent();
1514 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1515 case TargetOpcode::G_FREEZE: {
1526 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1533 MI.eraseFromParent();
1536 case TargetOpcode::G_ADD:
1537 case TargetOpcode::G_SUB:
1538 case TargetOpcode::G_SADDO:
1539 case TargetOpcode::G_SSUBO:
1540 case TargetOpcode::G_SADDE:
1541 case TargetOpcode::G_SSUBE:
1542 case TargetOpcode::G_UADDO:
1543 case TargetOpcode::G_USUBO:
1544 case TargetOpcode::G_UADDE:
1545 case TargetOpcode::G_USUBE:
1547 case TargetOpcode::G_MUL:
1548 case TargetOpcode::G_UMULH:
1550 case TargetOpcode::G_EXTRACT:
1552 case TargetOpcode::G_INSERT:
1554 case TargetOpcode::G_LOAD: {
1555 auto &LoadMI = cast<GLoad>(
MI);
1556 Register DstReg = LoadMI.getDstReg();
1561 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1565 LoadMI.eraseFromParent();
1571 case TargetOpcode::G_ZEXTLOAD:
1572 case TargetOpcode::G_SEXTLOAD: {
1573 auto &LoadMI = cast<GExtLoad>(
MI);
1574 Register DstReg = LoadMI.getDstReg();
1575 Register PtrReg = LoadMI.getPointerReg();
1578 auto &MMO = LoadMI.getMMO();
1581 if (MemSize == NarrowSize) {
1583 }
else if (MemSize < NarrowSize) {
1585 }
else if (MemSize > NarrowSize) {
1590 if (isa<GZExtLoad>(LoadMI))
1595 LoadMI.eraseFromParent();
1598 case TargetOpcode::G_STORE: {
1599 auto &StoreMI = cast<GStore>(
MI);
1601 Register SrcReg = StoreMI.getValueReg();
1606 int NumParts = SizeOp0 / NarrowSize;
1608 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1609 if (SrcTy.
isVector() && LeftoverBits != 0)
1612 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1616 StoreMI.eraseFromParent();
1622 case TargetOpcode::G_SELECT:
1624 case TargetOpcode::G_AND:
1625 case TargetOpcode::G_OR:
1626 case TargetOpcode::G_XOR: {
1638 case TargetOpcode::G_SHL:
1639 case TargetOpcode::G_LSHR:
1640 case TargetOpcode::G_ASHR:
1642 case TargetOpcode::G_CTLZ:
1643 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1644 case TargetOpcode::G_CTTZ:
1645 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1646 case TargetOpcode::G_CTPOP:
1648 switch (
MI.getOpcode()) {
1649 case TargetOpcode::G_CTLZ:
1650 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1652 case TargetOpcode::G_CTTZ:
1653 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1655 case TargetOpcode::G_CTPOP:
1665 case TargetOpcode::G_INTTOPTR:
1673 case TargetOpcode::G_PTRTOINT:
1681 case TargetOpcode::G_PHI: {
1684 if (SizeOp0 % NarrowSize != 0)
1687 unsigned NumParts = SizeOp0 / NarrowSize;
1691 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1699 for (
unsigned i = 0; i < NumParts; ++i) {
1703 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1704 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1709 MI.eraseFromParent();
1712 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1713 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1717 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1723 case TargetOpcode::G_ICMP: {
1737 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1738 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1751 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1752 auto LHS = std::get<0>(LHSAndRHS);
1753 auto RHS = std::get<1>(LHSAndRHS);
1761 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1762 auto LHS = std::get<0>(LHSAndRHS);
1763 auto RHS = std::get<1>(LHSAndRHS);
1765 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1766 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1767 TargetOpcode::G_ZEXT);
1774 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1776 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1781 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1785 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1800 LHSPartRegs[
I], RHSPartRegs[
I]);
1807 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1811 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1821 RHSLeftoverRegs[
I]);
1824 RHSLeftoverRegs[
I]);
1827 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1834 MI.eraseFromParent();
1837 case TargetOpcode::G_FCMP:
1846 case TargetOpcode::G_SEXT_INREG: {
1850 int64_t SizeInBits =
MI.getOperand(2).getImm();
1860 MO1.
setReg(TruncMIB.getReg(0));
1875 if (SizeOp0 % NarrowSize != 0)
1877 int NumParts = SizeOp0 / NarrowSize;
1885 for (
int i = 0; i < NumParts; ++i) {
1901 for (
int i = 0; i < NumParts; ++i) {
1904 PartialExtensionReg = DstRegs.
back();
1906 assert(PartialExtensionReg &&
1907 "Expected to visit partial extension before full");
1908 if (FullExtensionReg) {
1915 FullExtensionReg = DstRegs.
back();
1920 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1923 PartialExtensionReg = DstRegs.
back();
1930 MI.eraseFromParent();
1933 case TargetOpcode::G_BSWAP:
1934 case TargetOpcode::G_BITREVERSE: {
1935 if (SizeOp0 % NarrowSize != 0)
1940 unsigned NumParts = SizeOp0 / NarrowSize;
1941 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1944 for (
unsigned i = 0; i < NumParts; ++i) {
1946 {SrcRegs[NumParts - 1 - i]});
1953 MI.eraseFromParent();
1956 case TargetOpcode::G_PTR_ADD:
1957 case TargetOpcode::G_PTRMASK: {
1965 case TargetOpcode::G_FPTOUI:
1966 case TargetOpcode::G_FPTOSI:
1967 case TargetOpcode::G_FPTOUI_SAT:
1968 case TargetOpcode::G_FPTOSI_SAT:
1970 case TargetOpcode::G_FPEXT:
1977 case TargetOpcode::G_FLDEXP:
1978 case TargetOpcode::G_STRICT_FLDEXP:
1980 case TargetOpcode::G_VSCALE: {
1991 MI.eraseFromParent();
2019 unsigned OpIdx,
unsigned ExtOpcode) {
2022 MO.
setReg(ExtB.getReg(0));
2029 MO.
setReg(ExtB.getReg(0));
2033 unsigned OpIdx,
unsigned TruncOpcode) {
2042 unsigned OpIdx,
unsigned ExtOpcode) {
2081LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2086 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2087 if (DstTy.isVector())
2094 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2096 unsigned NumOps =
MI.getNumOperands();
2097 unsigned NumSrc =
MI.getNumOperands() - 1;
2098 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2100 if (WideSize >= DstSize) {
2104 for (
unsigned I = 2;
I != NumOps; ++
I) {
2105 const unsigned Offset = (
I - 1) * PartSize;
2112 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
2118 ResultReg = NextResult;
2121 if (WideSize > DstSize)
2123 else if (DstTy.isPointer())
2126 MI.eraseFromParent();
2151 const int GCD = std::gcd(SrcSize, WideSize);
2162 if (GCD == SrcSize) {
2166 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2172 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2174 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2178 const int PartsPerGCD = WideSize / GCD;
2182 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2197 MI.eraseFromParent();
2202LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2207 int NumDst =
MI.getNumOperands() - 1;
2208 Register SrcReg =
MI.getOperand(NumDst).getReg();
2213 Register Dst0Reg =
MI.getOperand(0).getReg();
2223 dbgs() <<
"Not casting non-integral address space integer\n");
2244 for (
int I = 1;
I != NumDst; ++
I) {
2250 MI.eraseFromParent();
2261 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2286 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2291 if (PartsPerRemerge == 1) {
2294 for (
int I = 0;
I != NumUnmerge; ++
I) {
2297 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2298 int Idx =
I * PartsPerUnmerge + J;
2300 MIB.addDef(
MI.getOperand(
Idx).getReg());
2307 MIB.addUse(Unmerge.getReg(
I));
2311 for (
int J = 0; J != NumUnmerge; ++J)
2312 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2315 for (
int I = 0;
I != NumDst; ++
I) {
2316 for (
int J = 0; J < PartsPerRemerge; ++J) {
2317 const int Idx =
I * PartsPerRemerge + J;
2322 RemergeParts.
clear();
2326 MI.eraseFromParent();
2331LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2333 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2334 unsigned Offset =
MI.getOperand(2).getImm();
2337 if (SrcTy.
isVector() || DstTy.isVector())
2353 if (DstTy.isPointer())
2360 MI.eraseFromParent();
2365 LLT ShiftTy = SrcTy;
2374 MI.eraseFromParent();
2405LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2407 if (TypeIdx != 0 || WideTy.
isVector())
2417LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2421 std::optional<Register> CarryIn;
2422 switch (
MI.getOpcode()) {
2425 case TargetOpcode::G_SADDO:
2426 Opcode = TargetOpcode::G_ADD;
2427 ExtOpcode = TargetOpcode::G_SEXT;
2429 case TargetOpcode::G_SSUBO:
2430 Opcode = TargetOpcode::G_SUB;
2431 ExtOpcode = TargetOpcode::G_SEXT;
2433 case TargetOpcode::G_UADDO:
2434 Opcode = TargetOpcode::G_ADD;
2435 ExtOpcode = TargetOpcode::G_ZEXT;
2437 case TargetOpcode::G_USUBO:
2438 Opcode = TargetOpcode::G_SUB;
2439 ExtOpcode = TargetOpcode::G_ZEXT;
2441 case TargetOpcode::G_SADDE:
2442 Opcode = TargetOpcode::G_UADDE;
2443 ExtOpcode = TargetOpcode::G_SEXT;
2444 CarryIn =
MI.getOperand(4).getReg();
2446 case TargetOpcode::G_SSUBE:
2447 Opcode = TargetOpcode::G_USUBE;
2448 ExtOpcode = TargetOpcode::G_SEXT;
2449 CarryIn =
MI.getOperand(4).getReg();
2451 case TargetOpcode::G_UADDE:
2452 Opcode = TargetOpcode::G_UADDE;
2453 ExtOpcode = TargetOpcode::G_ZEXT;
2454 CarryIn =
MI.getOperand(4).getReg();
2456 case TargetOpcode::G_USUBE:
2457 Opcode = TargetOpcode::G_USUBE;
2458 ExtOpcode = TargetOpcode::G_ZEXT;
2459 CarryIn =
MI.getOperand(4).getReg();
2480 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2483 {LHSExt, RHSExt, *CarryIn})
2495 MI.eraseFromParent();
2500LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2502 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2503 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2504 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2505 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2506 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2531 {ShiftL, ShiftR},
MI.getFlags());
2539 MI.eraseFromParent();
2544LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2553 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2556 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2563 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2572 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2575 if (WideMulCanOverflow)
2577 {LeftOperand, RightOperand});
2598 if (WideMulCanOverflow) {
2606 MI.eraseFromParent();
2612 unsigned Opcode =
MI.getOpcode();
2616 case TargetOpcode::G_ATOMICRMW_XCHG:
2617 case TargetOpcode::G_ATOMICRMW_ADD:
2618 case TargetOpcode::G_ATOMICRMW_SUB:
2619 case TargetOpcode::G_ATOMICRMW_AND:
2620 case TargetOpcode::G_ATOMICRMW_OR:
2621 case TargetOpcode::G_ATOMICRMW_XOR:
2622 case TargetOpcode::G_ATOMICRMW_MIN:
2623 case TargetOpcode::G_ATOMICRMW_MAX:
2624 case TargetOpcode::G_ATOMICRMW_UMIN:
2625 case TargetOpcode::G_ATOMICRMW_UMAX:
2626 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2632 case TargetOpcode::G_ATOMIC_CMPXCHG:
2633 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2640 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2650 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2655 case TargetOpcode::G_EXTRACT:
2656 return widenScalarExtract(
MI, TypeIdx, WideTy);
2657 case TargetOpcode::G_INSERT:
2658 return widenScalarInsert(
MI, TypeIdx, WideTy);
2659 case TargetOpcode::G_MERGE_VALUES:
2660 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2661 case TargetOpcode::G_UNMERGE_VALUES:
2662 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2663 case TargetOpcode::G_SADDO:
2664 case TargetOpcode::G_SSUBO:
2665 case TargetOpcode::G_UADDO:
2666 case TargetOpcode::G_USUBO:
2667 case TargetOpcode::G_SADDE:
2668 case TargetOpcode::G_SSUBE:
2669 case TargetOpcode::G_UADDE:
2670 case TargetOpcode::G_USUBE:
2671 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2672 case TargetOpcode::G_UMULO:
2673 case TargetOpcode::G_SMULO:
2674 return widenScalarMulo(
MI, TypeIdx, WideTy);
2675 case TargetOpcode::G_SADDSAT:
2676 case TargetOpcode::G_SSUBSAT:
2677 case TargetOpcode::G_SSHLSAT:
2678 case TargetOpcode::G_UADDSAT:
2679 case TargetOpcode::G_USUBSAT:
2680 case TargetOpcode::G_USHLSAT:
2681 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2682 case TargetOpcode::G_CTTZ:
2683 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2684 case TargetOpcode::G_CTLZ:
2685 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2686 case TargetOpcode::G_CTPOP: {
2697 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2698 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2699 ? TargetOpcode::G_ANYEXT
2700 : TargetOpcode::G_ZEXT;
2703 unsigned NewOpc = Opcode;
2704 if (NewOpc == TargetOpcode::G_CTTZ) {
2713 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2718 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2730 if (Opcode == TargetOpcode::G_CTLZ) {
2737 MI.eraseFromParent();
2740 case TargetOpcode::G_BSWAP: {
2749 MI.getOperand(0).setReg(DstExt);
2762 case TargetOpcode::G_BITREVERSE: {
2771 MI.getOperand(0).setReg(DstExt);
2780 case TargetOpcode::G_FREEZE:
2781 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2788 case TargetOpcode::G_ABS:
2795 case TargetOpcode::G_ADD:
2796 case TargetOpcode::G_AND:
2797 case TargetOpcode::G_MUL:
2798 case TargetOpcode::G_OR:
2799 case TargetOpcode::G_XOR:
2800 case TargetOpcode::G_SUB:
2801 case TargetOpcode::G_SHUFFLE_VECTOR:
2812 case TargetOpcode::G_SBFX:
2813 case TargetOpcode::G_UBFX:
2827 case TargetOpcode::G_SHL:
2843 case TargetOpcode::G_ROTR:
2844 case TargetOpcode::G_ROTL:
2853 case TargetOpcode::G_SDIV:
2854 case TargetOpcode::G_SREM:
2855 case TargetOpcode::G_SMIN:
2856 case TargetOpcode::G_SMAX:
2864 case TargetOpcode::G_SDIVREM:
2873 case TargetOpcode::G_ASHR:
2874 case TargetOpcode::G_LSHR:
2878 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2879 : TargetOpcode::G_ZEXT;
2892 case TargetOpcode::G_UDIV:
2893 case TargetOpcode::G_UREM:
2900 case TargetOpcode::G_UDIVREM:
2908 case TargetOpcode::G_UMIN:
2909 case TargetOpcode::G_UMAX: {
2916 ? TargetOpcode::G_SEXT
2917 : TargetOpcode::G_ZEXT;
2927 case TargetOpcode::G_SELECT:
2944 case TargetOpcode::G_FPTOSI:
2945 case TargetOpcode::G_FPTOUI:
2946 case TargetOpcode::G_INTRINSIC_LRINT:
2947 case TargetOpcode::G_INTRINSIC_LLRINT:
2948 case TargetOpcode::G_IS_FPCLASS:
2958 case TargetOpcode::G_SITOFP:
2968 case TargetOpcode::G_UITOFP:
2978 case TargetOpcode::G_FPTOSI_SAT:
2979 case TargetOpcode::G_FPTOUI_SAT:
2987 MI.getOperand(0).setReg(ExtReg);
2991 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3019 case TargetOpcode::G_LOAD:
3020 case TargetOpcode::G_SEXTLOAD:
3021 case TargetOpcode::G_ZEXTLOAD:
3027 case TargetOpcode::G_STORE: {
3041 MI.setMemRefs(MF, {NewMMO});
3049 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3055 case TargetOpcode::G_CONSTANT: {
3059 MRI.
getType(
MI.getOperand(0).getReg()));
3060 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3061 ExtOpc == TargetOpcode::G_ANYEXT) &&
3064 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3068 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3074 case TargetOpcode::G_FCONSTANT: {
3082 MI.eraseFromParent();
3085 case TargetOpcode::G_IMPLICIT_DEF: {
3091 case TargetOpcode::G_BRCOND:
3097 case TargetOpcode::G_FCMP:
3108 case TargetOpcode::G_ICMP:
3118 unsigned ExtOpcode =
3122 ? TargetOpcode::G_SEXT
3123 : TargetOpcode::G_ZEXT;
3130 case TargetOpcode::G_PTR_ADD:
3131 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3137 case TargetOpcode::G_PHI: {
3138 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3141 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3153 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3161 TargetOpcode::G_ANYEXT);
3176 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3212 case TargetOpcode::G_FADD:
3213 case TargetOpcode::G_FMUL:
3214 case TargetOpcode::G_FSUB:
3215 case TargetOpcode::G_FMA:
3216 case TargetOpcode::G_FMAD:
3217 case TargetOpcode::G_FNEG:
3218 case TargetOpcode::G_FABS:
3219 case TargetOpcode::G_FCANONICALIZE:
3220 case TargetOpcode::G_FMINNUM:
3221 case TargetOpcode::G_FMAXNUM:
3222 case TargetOpcode::G_FMINNUM_IEEE:
3223 case TargetOpcode::G_FMAXNUM_IEEE:
3224 case TargetOpcode::G_FMINIMUM:
3225 case TargetOpcode::G_FMAXIMUM:
3226 case TargetOpcode::G_FDIV:
3227 case TargetOpcode::G_FREM:
3228 case TargetOpcode::G_FCEIL:
3229 case TargetOpcode::G_FFLOOR:
3230 case TargetOpcode::G_FCOS:
3231 case TargetOpcode::G_FSIN:
3232 case TargetOpcode::G_FTAN:
3233 case TargetOpcode::G_FACOS:
3234 case TargetOpcode::G_FASIN:
3235 case TargetOpcode::G_FATAN:
3236 case TargetOpcode::G_FATAN2:
3237 case TargetOpcode::G_FCOSH:
3238 case TargetOpcode::G_FSINH:
3239 case TargetOpcode::G_FTANH:
3240 case TargetOpcode::G_FLOG10:
3241 case TargetOpcode::G_FLOG:
3242 case TargetOpcode::G_FLOG2:
3243 case TargetOpcode::G_FRINT:
3244 case TargetOpcode::G_FNEARBYINT:
3245 case TargetOpcode::G_FSQRT:
3246 case TargetOpcode::G_FEXP:
3247 case TargetOpcode::G_FEXP2:
3248 case TargetOpcode::G_FEXP10:
3249 case TargetOpcode::G_FPOW:
3250 case TargetOpcode::G_INTRINSIC_TRUNC:
3251 case TargetOpcode::G_INTRINSIC_ROUND:
3252 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3256 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3262 case TargetOpcode::G_FPOWI:
3263 case TargetOpcode::G_FLDEXP:
3264 case TargetOpcode::G_STRICT_FLDEXP: {
3266 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3287 case TargetOpcode::G_FFREXP: {
3300 case TargetOpcode::G_INTTOPTR:
3308 case TargetOpcode::G_PTRTOINT:
3316 case TargetOpcode::G_BUILD_VECTOR: {
3320 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3334 case TargetOpcode::G_SEXT_INREG:
3343 case TargetOpcode::G_PTRMASK: {
3351 case TargetOpcode::G_VECREDUCE_ADD: {
3360 case TargetOpcode::G_VECREDUCE_FADD:
3361 case TargetOpcode::G_VECREDUCE_FMUL:
3362 case TargetOpcode::G_VECREDUCE_FMIN:
3363 case TargetOpcode::G_VECREDUCE_FMAX:
3364 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3365 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3379 case TargetOpcode::G_VSCALE: {
3386 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3391 case TargetOpcode::G_SPLAT_VECTOR: {
3400 case TargetOpcode::G_INSERT_SUBVECTOR: {
3423 MI.eraseFromParent();
3432 auto Unmerge =
B.buildUnmerge(Ty, Src);
3433 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3442 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3444 LLT DstLLT =
MRI.getType(DstReg);
3465 MI.eraseFromParent();
3476 MI.eraseFromParent();
3483 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3488 if (DstTy.isVector()) {
3489 int NumDstElt = DstTy.getNumElements();
3493 LLT DstCastTy = DstEltTy;
3494 LLT SrcPartTy = SrcEltTy;
3498 if (NumSrcElt < NumDstElt) {
3508 SrcPartTy = SrcEltTy;
3509 }
else if (NumSrcElt > NumDstElt) {
3520 DstCastTy = DstEltTy;
3530 MI.eraseFromParent();
3534 if (DstTy.isVector()) {
3538 MI.eraseFromParent();
3554 unsigned NewEltSize,
3555 unsigned OldEltSize) {
3556 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3557 LLT IdxTy =
B.getMRI()->getType(
Idx);
3560 auto OffsetMask =
B.buildConstant(
3562 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3563 return B.buildShl(IdxTy, OffsetIdx,
3564 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3579 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3583 unsigned OldNumElts = SrcVecTy.getNumElements();
3590 if (NewNumElts > OldNumElts) {
3601 if (NewNumElts % OldNumElts != 0)
3605 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3614 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3618 NewOps[
I] = Elt.getReg(0);
3623 MI.eraseFromParent();
3627 if (NewNumElts < OldNumElts) {
3628 if (NewEltSize % OldEltSize != 0)
3650 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3669 MI.eraseFromParent();
3683 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3684 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3685 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3686 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3689 auto EltMask =
B.buildConstant(
3693 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3694 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3697 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3701 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3715 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3716 MI.getFirst4RegLLTs();
3728 if (NewNumElts < OldNumElts) {
3729 if (NewEltSize % OldEltSize != 0)
3738 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3758 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3762 MI.eraseFromParent();
3786 auto ConcatMI = dyn_cast<GConcatVectors>(&
MI);
3792 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3796 if (!LI.
isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3797 return UnableToLegalize;
3802 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3804 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3813 MI.eraseFromParent();
3830 auto ShuffleMI = cast<GShuffleVector>(&
MI);
3831 LLT DstTy = MRI.
getType(ShuffleMI->getReg(0));
3832 LLT SrcTy = MRI.
getType(ShuffleMI->getReg(1));
3848 MI.eraseFromParent();
3864 auto ES = cast<GExtractSubvector>(&
MI);
3878 LLT DstTy =
MRI.getType(Dst);
3879 LLT SrcTy =
MRI.getType(Src);
3885 if (DstTy == CastTy)
3893 if (CastEltSize < DstEltSize)
3896 auto AdjustAmt = CastEltSize / DstEltSize;
3897 if (
Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3898 SrcTyMinElts % AdjustAmt != 0)
3927 auto ES = cast<GInsertSubvector>(&
MI);
3942 LLT DstTy =
MRI.getType(Dst);
3943 LLT BigVecTy =
MRI.getType(BigVec);
3944 LLT SubVecTy =
MRI.getType(SubVec);
3946 if (DstTy == CastTy)
3961 if (CastEltSize < DstEltSize)
3964 auto AdjustAmt = CastEltSize / DstEltSize;
3965 if (
Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3966 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3994 if (MemSizeInBits != MemStoreSizeInBits) {
4014 if (isa<GSExtLoad>(LoadMI)) {
4017 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4026 if (DstTy != LoadTy)
4052 uint64_t LargeSplitSize, SmallSplitSize;
4057 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4067 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4092 LargeSplitSize / 8);
4096 SmallPtr, *SmallMMO);
4101 if (AnyExtTy == DstTy)
4136 if (StoreWidth != StoreSizeInBits && !SrcTy.
isVector()) {
4167 uint64_t LargeSplitSize, SmallSplitSize;
4170 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
4177 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4260 switch (
MI.getOpcode()) {
4261 case TargetOpcode::G_LOAD: {
4279 case TargetOpcode::G_STORE: {
4295 case TargetOpcode::G_SELECT: {
4301 dbgs() <<
"bitcast action not implemented for vector select\n");
4312 case TargetOpcode::G_AND:
4313 case TargetOpcode::G_OR:
4314 case TargetOpcode::G_XOR: {
4322 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4324 case TargetOpcode::G_INSERT_VECTOR_ELT:
4326 case TargetOpcode::G_CONCAT_VECTORS:
4328 case TargetOpcode::G_SHUFFLE_VECTOR:
4330 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4332 case TargetOpcode::G_INSERT_SUBVECTOR:
4340void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4348 using namespace TargetOpcode;
4350 switch(
MI.getOpcode()) {
4353 case TargetOpcode::G_FCONSTANT:
4355 case TargetOpcode::G_BITCAST:
4357 case TargetOpcode::G_SREM:
4358 case TargetOpcode::G_UREM: {
4362 {MI.getOperand(1), MI.getOperand(2)});
4366 MI.eraseFromParent();
4369 case TargetOpcode::G_SADDO:
4370 case TargetOpcode::G_SSUBO:
4372 case TargetOpcode::G_UMULH:
4373 case TargetOpcode::G_SMULH:
4375 case TargetOpcode::G_SMULO:
4376 case TargetOpcode::G_UMULO: {
4379 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
4382 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4383 ? TargetOpcode::G_SMULH
4384 : TargetOpcode::G_UMULH;
4388 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4389 MI.removeOperand(1);
4400 if (Opcode == TargetOpcode::G_SMULH) {
4409 case TargetOpcode::G_FNEG: {
4410 auto [Res, SubByReg] =
MI.getFirst2Regs();
4416 MI.eraseFromParent();
4419 case TargetOpcode::G_FSUB:
4420 case TargetOpcode::G_STRICT_FSUB: {
4421 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
4427 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4432 MI.eraseFromParent();
4435 case TargetOpcode::G_FMAD:
4437 case TargetOpcode::G_FFLOOR:
4439 case TargetOpcode::G_LROUND:
4440 case TargetOpcode::G_LLROUND: {
4447 MI.eraseFromParent();
4450 case TargetOpcode::G_INTRINSIC_ROUND:
4452 case TargetOpcode::G_FRINT: {
4455 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4458 case TargetOpcode::G_INTRINSIC_LRINT:
4459 case TargetOpcode::G_INTRINSIC_LLRINT: {
4466 MI.eraseFromParent();
4469 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4470 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4473 **
MI.memoperands_begin());
4476 MI.eraseFromParent();
4479 case TargetOpcode::G_LOAD:
4480 case TargetOpcode::G_SEXTLOAD:
4481 case TargetOpcode::G_ZEXTLOAD:
4483 case TargetOpcode::G_STORE:
4485 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4486 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4487 case TargetOpcode::G_CTLZ:
4488 case TargetOpcode::G_CTTZ:
4489 case TargetOpcode::G_CTPOP:
4492 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
4501 MI.eraseFromParent();
4505 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
4531 MI.eraseFromParent();
4535 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
4540 MI.eraseFromParent();
4544 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
4566 MI.eraseFromParent();
4597 case G_MERGE_VALUES:
4599 case G_UNMERGE_VALUES:
4601 case TargetOpcode::G_SEXT_INREG: {
4602 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4603 int64_t SizeInBits =
MI.getOperand(2).getImm();
4605 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4612 MI.eraseFromParent();
4615 case G_EXTRACT_VECTOR_ELT:
4616 case G_INSERT_VECTOR_ELT:
4618 case G_SHUFFLE_VECTOR:
4620 case G_VECTOR_COMPRESS:
4622 case G_DYN_STACKALLOC:
4626 case G_STACKRESTORE:
4636 case G_READ_REGISTER:
4637 case G_WRITE_REGISTER:
4684 case G_MEMCPY_INLINE:
4685 return lowerMemcpyInline(
MI);
4716 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4726 Align StackTypeAlign =
4739 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4751 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4754 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4765 "Converting bits to bytes lost precision");
4772 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4774 if (IdxTy != MRI.
getType(Index))
4789 std::initializer_list<unsigned> NonVecOpIndices) {
4790 if (
MI.getNumMemOperands() != 0)
4793 LLT VecTy =
MRI.getType(
MI.getReg(0));
4798 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4831 int NumParts, NumLeftover;
4832 std::tie(NumParts, NumLeftover) =
4835 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4836 for (
int i = 0; i < NumParts; ++i) {
4841 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4850 for (
unsigned i = 0; i <
N; ++i) {
4853 else if (
Op.isImm())
4855 else if (
Op.isPredicate())
4877 std::initializer_list<unsigned> NonVecOpIndices) {
4879 "Non-compatible opcode or not specified non-vector operands");
4882 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4883 unsigned NumDefs =
MI.getNumDefs();
4891 for (
unsigned i = 0; i < NumDefs; ++i) {
4900 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4901 ++UseIdx, ++UseNo) {
4904 MI.getOperand(UseIdx));
4909 for (
auto Reg : SplitPieces)
4914 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4918 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4920 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4921 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4924 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4925 Uses.push_back(InputOpsPieces[InputNo][i]);
4928 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4929 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4934 for (
unsigned i = 0; i < NumDefs; ++i)
4935 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4937 for (
unsigned i = 0; i < NumDefs; ++i)
4941 MI.eraseFromParent();
4950 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4951 unsigned NumDefs =
MI.getNumDefs();
4960 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4961 UseIdx += 2, ++UseNo) {
4969 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4971 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4977 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4978 Phi.addUse(InputOpsPieces[j][i]);
4979 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4989 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4994 MI.eraseFromParent();
5002 const int NumDst =
MI.getNumOperands() - 1;
5003 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5007 if (TypeIdx != 1 || NarrowTy == DstTy)
5033 const int PartsPerUnmerge = NumDst / NumUnmerge;
5035 for (
int I = 0;
I != NumUnmerge; ++
I) {
5038 for (
int J = 0; J != PartsPerUnmerge; ++J)
5039 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5040 MIB.
addUse(Unmerge.getReg(
I));
5043 MI.eraseFromParent();
5050 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5054 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5056 if (NarrowTy == SrcTy)
5066 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5080 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5082 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5088 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5089 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5090 ++i,
Offset += NumNarrowTyElts) {
5097 MI.eraseFromParent();
5101 assert(TypeIdx == 0 &&
"Bad type index");
5117 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5120 for (
unsigned i = 0; i < NumParts; ++i) {
5122 for (
unsigned j = 0; j < NumElts; ++j)
5123 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5129 MI.eraseFromParent();
5137 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5139 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5141 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5143 InsertVal =
MI.getOperand(2).getReg();
5158 IdxVal = MaybeCst->Value.getSExtValue();
5162 MI.eraseFromParent();
5167 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5170 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5171 TargetOpcode::G_ANYEXT);
5176 int64_t PartIdx = IdxVal / NewNumElts;
5185 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5186 VecParts[PartIdx] = InsertPart.getReg(0);
5190 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5195 MI.eraseFromParent();
5219 bool IsLoad = isa<GLoad>(LdStMI);
5231 int NumLeftover = -1;
5237 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5239 NumParts = NarrowRegs.
size();
5240 NumLeftover = NarrowLeftoverRegs.
size();
5257 auto MMO = LdStMI.
getMMO();
5259 unsigned NumParts,
unsigned Offset) ->
unsigned {
5262 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
5264 unsigned ByteOffset =
Offset / 8;
5274 ValRegs.push_back(Dst);
5286 unsigned HandledOffset =
5287 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5291 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5294 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5295 LeftoverTy, NarrowLeftoverRegs);
5305 using namespace TargetOpcode;
5309 switch (
MI.getOpcode()) {
5310 case G_IMPLICIT_DEF:
5326 case G_FCANONICALIZE:
5343 case G_INTRINSIC_LRINT:
5344 case G_INTRINSIC_LLRINT:
5345 case G_INTRINSIC_ROUND:
5346 case G_INTRINSIC_ROUNDEVEN:
5349 case G_INTRINSIC_TRUNC:
5376 case G_FMINNUM_IEEE:
5377 case G_FMAXNUM_IEEE:
5397 case G_CTLZ_ZERO_UNDEF:
5399 case G_CTTZ_ZERO_UNDEF:
5415 case G_ADDRSPACE_CAST:
5428 case G_STRICT_FLDEXP:
5442 case G_UNMERGE_VALUES:
5444 case G_BUILD_VECTOR:
5445 assert(TypeIdx == 0 &&
"not a vector type index");
5447 case G_CONCAT_VECTORS:
5451 case G_EXTRACT_VECTOR_ELT:
5452 case G_INSERT_VECTOR_ELT:
5461 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5462 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5464 case G_SHUFFLE_VECTOR:
5470 case G_INTRINSIC_FPTRUNC_ROUND:
5480 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5481 "Not a bitcast operation");
5486 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5488 unsigned NewElemCount =
5494 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5499 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
5504 MI.eraseFromParent();
5510 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5514 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5515 MI.getFirst3RegLLTs();
5518 if (DstTy != Src1Ty)
5520 if (DstTy != Src2Ty)
5535 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5551 unsigned InputUsed[2] = {-1U, -1U};
5552 unsigned FirstMaskIdx =
High * NewElts;
5553 bool UseBuildVector =
false;
5554 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5556 int Idx = Mask[FirstMaskIdx + MaskOffset];
5561 if (Input >= std::size(Inputs)) {
5568 Idx -= Input * NewElts;
5572 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5573 if (InputUsed[OpNo] == Input) {
5576 }
else if (InputUsed[OpNo] == -1U) {
5578 InputUsed[OpNo] = Input;
5583 if (OpNo >= std::size(InputUsed)) {
5586 UseBuildVector =
true;
5594 if (UseBuildVector) {
5599 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5601 int Idx = Mask[FirstMaskIdx + MaskOffset];
5606 if (Input >= std::size(Inputs)) {
5613 Idx -= Input * NewElts;
5617 .buildExtractVectorElement(
5618 EltTy, Inputs[Input],
5625 }
else if (InputUsed[0] == -1U) {
5629 Register Op0 = Inputs[InputUsed[0]];
5633 : Inputs[InputUsed[1]];
5642 MI.eraseFromParent();
5648 auto &RdxMI = cast<GVecReduce>(
MI);
5655 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5661 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5664 const unsigned NumParts =
5670 if (DstTy != NarrowTy)
5676 unsigned NumPartsLeft = NumParts;
5677 while (NumPartsLeft > 1) {
5678 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
5681 .buildInstr(ScalarOpc, {NarrowTy},
5682 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
5685 SplitSrcs = PartialResults;
5686 PartialResults.
clear();
5687 NumPartsLeft = SplitSrcs.
size();
5691 MI.eraseFromParent();
5696 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
5700 MI.eraseFromParent();
5704 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5714 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5717 Register Acc = PartialReductions[0];
5718 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5719 if (Part == NumParts - 1) {
5721 {Acc, PartialReductions[Part]});
5724 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5728 MI.eraseFromParent();
5734 unsigned int TypeIdx,
5736 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5737 MI.getFirst3RegLLTs();
5738 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5742 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5743 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5744 "Unexpected vecreduce opcode");
5745 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5746 ? TargetOpcode::G_FADD
5747 : TargetOpcode::G_FMUL;
5753 for (
unsigned i = 0; i < NumParts; i++)
5758 MI.eraseFromParent();
5765 unsigned ScalarOpc) {
5773 while (SplitSrcs.
size() > 1) {
5775 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5783 SplitSrcs = std::move(PartialRdxs);
5787 MI.getOperand(1).setReg(SplitSrcs[0]);
5794 const LLT HalfTy,
const LLT AmtTy) {
5802 MI.eraseFromParent();
5808 unsigned VTBits = 2 * NVTBits;
5811 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5812 if (Amt.
ugt(VTBits)) {
5814 }
else if (Amt.
ugt(NVTBits)) {
5818 }
else if (Amt == NVTBits) {
5829 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5830 if (Amt.
ugt(VTBits)) {
5832 }
else if (Amt.
ugt(NVTBits)) {
5836 }
else if (Amt == NVTBits) {
5850 if (Amt.
ugt(VTBits)) {
5853 }
else if (Amt.
ugt(NVTBits)) {
5858 }
else if (Amt == NVTBits) {
5875 MI.eraseFromParent();
5899 if (DstEltSize % 2 != 0)
5905 const unsigned NewBitSize = DstEltSize / 2;
5931 switch (
MI.getOpcode()) {
5932 case TargetOpcode::G_SHL: {
5948 ResultRegs[0] =
Lo.getReg(0);
5949 ResultRegs[1] =
Hi.getReg(0);
5952 case TargetOpcode::G_LSHR:
5953 case TargetOpcode::G_ASHR: {
5963 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5977 ResultRegs[0] =
Lo.getReg(0);
5978 ResultRegs[1] =
Hi.getReg(0);
5986 MI.eraseFromParent();
5993 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5996 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6011 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
6016 "getNeutralElementForVecReduce called with invalid opcode!");
6017 case TargetOpcode::G_VECREDUCE_ADD:
6018 case TargetOpcode::G_VECREDUCE_OR:
6019 case TargetOpcode::G_VECREDUCE_XOR:
6020 case TargetOpcode::G_VECREDUCE_UMAX:
6022 case TargetOpcode::G_VECREDUCE_MUL:
6024 case TargetOpcode::G_VECREDUCE_AND:
6025 case TargetOpcode::G_VECREDUCE_UMIN:
6028 case TargetOpcode::G_VECREDUCE_SMAX:
6031 case TargetOpcode::G_VECREDUCE_SMIN:
6034 case TargetOpcode::G_VECREDUCE_FADD:
6036 case TargetOpcode::G_VECREDUCE_FMUL:
6038 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6039 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6040 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6041 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6049 unsigned Opc =
MI.getOpcode();
6051 case TargetOpcode::G_IMPLICIT_DEF:
6052 case TargetOpcode::G_LOAD: {
6060 case TargetOpcode::G_STORE:
6067 case TargetOpcode::G_AND:
6068 case TargetOpcode::G_OR:
6069 case TargetOpcode::G_XOR:
6070 case TargetOpcode::G_ADD:
6071 case TargetOpcode::G_SUB:
6072 case TargetOpcode::G_MUL:
6073 case TargetOpcode::G_FADD:
6074 case TargetOpcode::G_FSUB:
6075 case TargetOpcode::G_FMUL:
6076 case TargetOpcode::G_FDIV:
6077 case TargetOpcode::G_FCOPYSIGN:
6078 case TargetOpcode::G_UADDSAT:
6079 case TargetOpcode::G_USUBSAT:
6080 case TargetOpcode::G_SADDSAT:
6081 case TargetOpcode::G_SSUBSAT:
6082 case TargetOpcode::G_SMIN:
6083 case TargetOpcode::G_SMAX:
6084 case TargetOpcode::G_UMIN:
6085 case TargetOpcode::G_UMAX:
6086 case TargetOpcode::G_FMINNUM:
6087 case TargetOpcode::G_FMAXNUM:
6088 case TargetOpcode::G_FMINNUM_IEEE:
6089 case TargetOpcode::G_FMAXNUM_IEEE:
6090 case TargetOpcode::G_FMINIMUM:
6091 case TargetOpcode::G_FMAXIMUM:
6092 case TargetOpcode::G_STRICT_FADD:
6093 case TargetOpcode::G_STRICT_FSUB:
6094 case TargetOpcode::G_STRICT_FMUL:
6095 case TargetOpcode::G_SHL:
6096 case TargetOpcode::G_ASHR:
6097 case TargetOpcode::G_LSHR: {
6105 case TargetOpcode::G_FMA:
6106 case TargetOpcode::G_STRICT_FMA:
6107 case TargetOpcode::G_FSHR:
6108 case TargetOpcode::G_FSHL: {
6117 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6118 case TargetOpcode::G_EXTRACT:
6125 case TargetOpcode::G_INSERT:
6126 case TargetOpcode::G_INSERT_VECTOR_ELT:
6127 case TargetOpcode::G_FREEZE:
6128 case TargetOpcode::G_FNEG:
6129 case TargetOpcode::G_FABS:
6130 case TargetOpcode::G_FSQRT:
6131 case TargetOpcode::G_FCEIL:
6132 case TargetOpcode::G_FFLOOR:
6133 case TargetOpcode::G_FNEARBYINT:
6134 case TargetOpcode::G_FRINT:
6135 case TargetOpcode::G_INTRINSIC_ROUND:
6136 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6137 case TargetOpcode::G_INTRINSIC_TRUNC:
6138 case TargetOpcode::G_BSWAP:
6139 case TargetOpcode::G_FCANONICALIZE:
6140 case TargetOpcode::G_SEXT_INREG:
6141 case TargetOpcode::G_ABS:
6149 case TargetOpcode::G_SELECT: {
6150 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6152 if (!CondTy.isScalar() ||
6160 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6165 if (CondTy.isVector())
6175 case TargetOpcode::G_UNMERGE_VALUES:
6177 case TargetOpcode::G_PHI:
6179 case TargetOpcode::G_SHUFFLE_VECTOR:
6181 case TargetOpcode::G_BUILD_VECTOR: {
6183 for (
auto Op :
MI.uses()) {
6193 MI.eraseFromParent();
6196 case TargetOpcode::G_SEXT:
6197 case TargetOpcode::G_ZEXT:
6198 case TargetOpcode::G_ANYEXT:
6199 case TargetOpcode::G_TRUNC:
6200 case TargetOpcode::G_FPTRUNC:
6201 case TargetOpcode::G_FPEXT:
6202 case TargetOpcode::G_FPTOSI:
6203 case TargetOpcode::G_FPTOUI:
6204 case TargetOpcode::G_FPTOSI_SAT:
6205 case TargetOpcode::G_FPTOUI_SAT:
6206 case TargetOpcode::G_SITOFP:
6207 case TargetOpcode::G_UITOFP: {
6227 case TargetOpcode::G_ICMP:
6228 case TargetOpcode::G_FCMP: {
6242 case TargetOpcode::G_BITCAST: {
6263 case TargetOpcode::G_VECREDUCE_FADD:
6264 case TargetOpcode::G_VECREDUCE_FMUL:
6265 case TargetOpcode::G_VECREDUCE_ADD:
6266 case TargetOpcode::G_VECREDUCE_MUL:
6267 case TargetOpcode::G_VECREDUCE_AND:
6268 case TargetOpcode::G_VECREDUCE_OR:
6269 case TargetOpcode::G_VECREDUCE_XOR:
6270 case TargetOpcode::G_VECREDUCE_SMAX:
6271 case TargetOpcode::G_VECREDUCE_SMIN:
6272 case TargetOpcode::G_VECREDUCE_UMAX:
6273 case TargetOpcode::G_VECREDUCE_UMIN: {
6277 auto NeutralElement = getNeutralElementForVecReduce(
6285 NeutralElement,
Idx);
6289 MO.
setReg(NewVec.getReg(0));
6301 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6303 unsigned MaskNumElts = Mask.size();
6307 if (MaskNumElts == SrcNumElts)
6310 if (MaskNumElts < SrcNumElts) {
6319 MI.getOperand(1).getReg(),
6320 MI.getOperand(2).getReg(), NewMask);
6321 MI.eraseFromParent();
6326 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6327 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6335 MOps1[0] =
MI.getOperand(1).getReg();
6336 MOps2[0] =
MI.getOperand(2).getReg();
6343 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6345 if (
Idx >=
static_cast<int>(SrcNumElts))
6346 Idx += PaddedMaskNumElts - SrcNumElts;
6351 if (MaskNumElts != PaddedMaskNumElts) {
6356 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
6366 MI.eraseFromParent();
6372 unsigned int TypeIdx,
LLT MoreTy) {
6373 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
6375 unsigned NumElts = DstTy.getNumElements();
6378 if (DstTy.isVector() && Src1Ty.isVector() &&
6379 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6387 if (DstTy != Src1Ty || DstTy != Src2Ty)
6395 for (
unsigned I = 0;
I != NumElts; ++
I) {
6397 if (
Idx <
static_cast<int>(NumElts))
6400 NewMask[
I] =
Idx - NumElts + WidenNumElts;
6405 MI.getOperand(1).getReg(),
6406 MI.getOperand(2).getReg(), NewMask);
6407 MI.eraseFromParent();
6416 unsigned SrcParts = Src1Regs.
size();
6417 unsigned DstParts = DstRegs.
size();
6419 unsigned DstIdx = 0;
6421 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6422 DstRegs[DstIdx] = FactorSum;
6424 unsigned CarrySumPrevDstIdx;
6427 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6429 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6430 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6432 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6436 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6437 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6439 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6449 if (DstIdx != DstParts - 1) {
6451 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
6452 FactorSum = Uaddo.
getReg(0);
6453 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
6454 for (
unsigned i = 2; i < Factors.
size(); ++i) {
6456 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
6457 FactorSum = Uaddo.
getReg(0);
6459 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6463 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6464 for (
unsigned i = 2; i < Factors.
size(); ++i)
6465 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6468 CarrySumPrevDstIdx = CarrySum;
6469 DstRegs[DstIdx] = FactorSum;
6486 unsigned Opcode =
MI.getOpcode();
6487 unsigned OpO, OpE, OpF;
6489 case TargetOpcode::G_SADDO:
6490 case TargetOpcode::G_SADDE:
6491 case TargetOpcode::G_UADDO:
6492 case TargetOpcode::G_UADDE:
6493 case TargetOpcode::G_ADD:
6494 OpO = TargetOpcode::G_UADDO;
6495 OpE = TargetOpcode::G_UADDE;
6496 OpF = TargetOpcode::G_UADDE;
6497 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6498 OpF = TargetOpcode::G_SADDE;
6500 case TargetOpcode::G_SSUBO:
6501 case TargetOpcode::G_SSUBE:
6502 case TargetOpcode::G_USUBO:
6503 case TargetOpcode::G_USUBE:
6504 case TargetOpcode::G_SUB:
6505 OpO = TargetOpcode::G_USUBO;
6506 OpE = TargetOpcode::G_USUBE;
6507 OpF = TargetOpcode::G_USUBE;
6508 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6509 OpF = TargetOpcode::G_SSUBE;
6516 unsigned NumDefs =
MI.getNumExplicitDefs();
6517 Register Src1 =
MI.getOperand(NumDefs).getReg();
6518 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
6521 CarryDst =
MI.getOperand(1).getReg();
6522 if (
MI.getNumOperands() == NumDefs + 3)
6523 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
6526 LLT LeftoverTy, DummyTy;
6528 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6533 int NarrowParts = Src1Regs.
size();
6534 Src1Regs.
append(Src1Left);
6535 Src2Regs.
append(Src2Left);
6538 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
6543 if (i == e - 1 && CarryDst)
6544 CarryOut = CarryDst;
6550 {Src1Regs[i], Src2Regs[i]});
6551 }
else if (i == e - 1) {
6553 {Src1Regs[i], Src2Regs[i], CarryIn});
6556 {Src1Regs[i], Src2Regs[i], CarryIn});
6562 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
6563 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6564 ArrayRef(DstRegs).drop_front(NarrowParts));
6566 MI.eraseFromParent();
6572 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
6580 if (
Size % NarrowSize != 0)
6583 unsigned NumParts =
Size / NarrowSize;
6584 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
6585 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6591 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6596 MI.eraseFromParent();
6606 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
6620 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6636 if (SizeOp1 % NarrowSize != 0)
6638 int NumParts = SizeOp1 / NarrowSize;
6642 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6646 uint64_t OpStart =
MI.getOperand(2).getImm();
6648 for (
int i = 0; i < NumParts; ++i) {
6649 unsigned SrcStart = i * NarrowSize;
6651 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6654 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6662 int64_t ExtractOffset;
6664 if (OpStart < SrcStart) {
6666 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6668 ExtractOffset = OpStart - SrcStart;
6669 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6673 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6685 else if (DstRegs.
size() > 1)
6689 MI.eraseFromParent();
6704 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6707 SrcRegs.
append(LeftoverRegs);
6711 uint64_t OpStart =
MI.getOperand(3).getImm();
6713 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6714 unsigned DstStart =
I * NarrowSize;
6716 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6724 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6730 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6738 int64_t ExtractOffset, InsertOffset;
6740 if (OpStart < DstStart) {
6742 ExtractOffset = DstStart - OpStart;
6743 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6745 InsertOffset = OpStart - DstStart;
6748 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6752 if (ExtractOffset != 0 || SegSize != OpSize) {
6772 MI.eraseFromParent();
6782 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6788 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6789 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6793 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6794 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6797 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6799 {Src0Regs[I], Src1Regs[I]});
6803 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6806 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6807 DstLeftoverRegs.
push_back(Inst.getReg(0));
6810 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6811 LeftoverTy, DstLeftoverRegs);
6813 MI.eraseFromParent();
6823 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6830 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6831 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6832 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6834 MI.eraseFromParent();
6844 Register CondReg =
MI.getOperand(1).getReg();
6856 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6857 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6861 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6862 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6865 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6867 CondReg, Src1Regs[
I], Src2Regs[
I]);
6871 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6873 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6877 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6878 LeftoverTy, DstLeftoverRegs);
6880 MI.eraseFromParent();
6890 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6894 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6897 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6899 auto C_0 =
B.buildConstant(NarrowTy, 0);
6901 UnmergeSrc.getReg(1), C_0);
6902 auto LoCTLZ = IsUndef ?
6903 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6904 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6905 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6906 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6907 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6908 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6910 MI.eraseFromParent();
6923 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6927 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6930 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6932 auto C_0 =
B.buildConstant(NarrowTy, 0);
6934 UnmergeSrc.getReg(0), C_0);
6935 auto HiCTTZ = IsUndef ?
6936 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6937 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6938 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6939 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6940 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6941 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6943 MI.eraseFromParent();
6956 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6966 MI.eraseFromParent();
6986 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6987 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6988 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6989 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6991 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6993 MI.getOperand(2).setReg(Trunc.getReg(0));
7000 unsigned Opc =
MI.getOpcode();
7009 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7012 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7016 case TargetOpcode::G_CTLZ: {
7017 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7020 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7028 MI.eraseFromParent();
7044 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7048 Op = MIBOp.getReg(0);
7053 MI.eraseFromParent();
7056 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7059 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7063 case TargetOpcode::G_CTTZ: {
7064 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7067 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7076 MI.eraseFromParent();
7087 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7088 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7092 MI.eraseFromParent();
7096 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7097 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7101 case TargetOpcode::G_CTPOP: {
7112 auto C_1 =
B.buildConstant(Ty, 1);
7113 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7115 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7116 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7117 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7121 auto C_2 =
B.buildConstant(Ty, 2);
7122 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7124 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7125 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7126 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7127 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7134 auto C_4 =
B.buildConstant(Ty, 4);
7135 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7136 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7138 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7139 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7141 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
7147 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7149 auto IsMulSupported = [
this](
const LLT Ty) {
7150 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7153 if (IsMulSupported(Ty)) {
7154 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7155 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7157 auto ResTmp = B8Count;
7158 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7159 auto ShiftC =
B.buildConstant(Ty, Shift);
7160 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7161 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7163 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7165 MI.eraseFromParent();
7178 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
7186 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7195 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7196 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7219 MI.eraseFromParent();
7225 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7230 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7274 MI.eraseFromParent();
7288 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7289 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7293 return lowerFunnelShiftAsShifts(
MI);
7297 if (Result == UnableToLegalize)
7298 return lowerFunnelShiftAsShifts(
MI);
7303 auto [Dst, Src] =
MI.getFirst2Regs();
7317 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7331 {UnmergeSrc.getReg(0)});
7333 {UnmergeSrc.getReg(1)});
7338 MI.eraseFromParent();
7355 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
7359 LLT DstTy =
MRI.getType(DstReg);
7360 LLT SrcTy =
MRI.getType(SrcReg);
7380 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
7394 MI.eraseFromParent();
7403 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
7405 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
7406 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7409 MI.eraseFromParent();
7414 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
7416 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7417 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
7422 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7425 return lowerRotateWithReverseRotate(
MI);
7428 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7429 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7430 bool IsFShLegal =
false;
7431 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7432 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7436 MI.eraseFromParent();
7441 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7444 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7449 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7450 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7451 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
7457 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
7458 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7460 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7466 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
7467 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
7469 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7471 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7475 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
7476 MI.eraseFromParent();
7484 auto [Dst, Src] =
MI.getFirst2Regs();
7534 MI.eraseFromParent();
7542 auto [Dst, Src] =
MI.getFirst2Regs();
7569 MI.eraseFromParent();
7577 auto [Dst, Src] =
MI.getFirst2Regs();
7594 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7606 MI.eraseFromParent();
7611 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7617 MI.eraseFromParent();
7638 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7648 MI.eraseFromParent();
7673 MI.eraseFromParent();
7681 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7685 if (SrcTy !=
S64 && SrcTy !=
S32)
7687 if (DstTy !=
S32 && DstTy !=
S64)
7716 MI.eraseFromParent();
7721 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7780 MI.eraseFromParent();
7786 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7788 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7789 unsigned SatWidth = DstTy.getScalarSizeInBits();
7793 APInt MinInt, MaxInt;
7816 if (AreExactFloatBounds) {
7833 MI.eraseFromParent();
7840 DstTy.changeElementSize(1), Src, Src);
7843 MI.eraseFromParent();
7870 MI.eraseFromParent();
7878 DstTy.changeElementSize(1), Src, Src);
7880 MI.eraseFromParent();
7890 auto [Dst, Src] =
MI.getFirst2Regs();
7898 unsigned Flags =
MI.getFlags();
7901 MI.eraseFromParent();
7905 const unsigned ExpMask = 0x7ff;
7906 const unsigned ExpBiasf64 = 1023;
7907 const unsigned ExpBiasf16 = 15;
7996 MI.eraseFromParent();
8002 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8013 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8018 MI.eraseFromParent();
8024 case TargetOpcode::G_SMIN:
8026 case TargetOpcode::G_SMAX:
8028 case TargetOpcode::G_UMIN:
8030 case TargetOpcode::G_UMAX:
8038 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8046 MI.eraseFromParent();
8086 unsigned BoolExtOp =
8093 MI.eraseFromParent();
8099 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8100 const int Src0Size = Src0Ty.getScalarSizeInBits();
8101 const int Src1Size = Src1Ty.getScalarSizeInBits();
8111 if (Src0Ty == Src1Ty) {
8113 }
else if (Src0Size > Src1Size) {
8128 unsigned Flags =
MI.getFlags();
8135 MI.eraseFromParent();
8141 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8142 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8144 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8164 MI.eraseFromParent();
8172 unsigned Flags =
MI.getFlags();
8177 MI.eraseFromParent();
8183 auto [DstReg,
X] =
MI.getFirst2Regs();
8184 const unsigned Flags =
MI.getFlags();
8211 MI.eraseFromParent();
8216 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
8217 unsigned Flags =
MI.getFlags();
8229 SrcReg, Zero, Flags);
8231 SrcReg, Trunc, Flags);
8236 MI.eraseFromParent();
8242 const unsigned NumOps =
MI.getNumOperands();
8243 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
8244 unsigned PartSize = Src0Ty.getSizeInBits();
8249 for (
unsigned I = 2;
I != NumOps; ++
I) {
8250 const unsigned Offset = (
I - 1) * PartSize;
8255 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
8261 ResultReg = NextResult;
8264 if (DstTy.isPointer()) {
8266 DstTy.getAddressSpace())) {
8274 MI.eraseFromParent();
8280 const unsigned NumDst =
MI.getNumOperands() - 1;
8281 Register SrcReg =
MI.getOperand(NumDst).getReg();
8282 Register Dst0Reg =
MI.getOperand(0).getReg();
8297 unsigned Offset = DstSize;
8298 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
8304 MI.eraseFromParent();
8323 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8324 InsertVal =
MI.getOperand(2).getReg();
8338 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
8344 MI.eraseFromParent();
8349 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
8367 int64_t
Offset = IdxVal * EltBytes;
8386 MI.eraseFromParent();
8392 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8393 MI.getFirst3RegLLTs();
8401 for (
int Idx : Mask) {
8403 if (!Undef.isValid())
8409 if (Src0Ty.isScalar()) {
8412 int NumElts = Src0Ty.getNumElements();
8413 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
8414 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
8421 if (DstTy.isScalar())
8425 MI.eraseFromParent();
8431 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8432 MI.getFirst4RegLLTs();
8434 if (VecTy.isScalableVector())
8459 std::optional<APInt> PassthruSplatVal =
8462 if (PassthruSplatVal.has_value()) {
8465 }
else if (HasPassthru) {
8477 unsigned NumElmts = VecTy.getNumElements();
8478 for (
unsigned I = 0;
I < NumElmts; ++
I) {
8493 if (HasPassthru &&
I == NumElmts - 1) {
8499 {OutPos, EndOfVector});
8512 MI.eraseFromParent();
8529 if (Alignment >
Align(1)) {
8541 const auto &MF = *
MI.getMF();
8542 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8547 Register AllocSize =
MI.getOperand(1).getReg();
8558 MI.eraseFromParent();
8569 MI.eraseFromParent();
8580 MI.eraseFromParent();
8586 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8587 unsigned Offset =
MI.getOperand(2).getImm();
8592 unsigned DstSize = DstTy.getSizeInBits();
8594 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8601 for (
unsigned Idx =
Offset / SrcEltSize;
8605 if (SubVectorElts.
size() == 1)
8610 MI.eraseFromParent();
8615 if (DstTy.isScalar() &&
8618 LLT SrcIntTy = SrcTy;
8632 MI.eraseFromParent();
8640 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
8652 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8665 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
8667 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
8680 MI.eraseFromParent();
8694 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
8698 LLT IntDstTy = DstTy;
8724 MI.eraseFromParent();
8730 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
8731 MI.getFirst4RegLLTs();
8732 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
8735 LLT BoolTy = Dst1Ty;
8754 auto ResultLowerThanLHS =
8762 MI.eraseFromParent();
8769 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8774 switch (
MI.getOpcode()) {
8777 case TargetOpcode::G_UADDSAT:
8780 BaseOp = TargetOpcode::G_ADD;
8782 case TargetOpcode::G_SADDSAT:
8785 BaseOp = TargetOpcode::G_ADD;
8787 case TargetOpcode::G_USUBSAT:
8790 BaseOp = TargetOpcode::G_SUB;
8792 case TargetOpcode::G_SSUBSAT:
8795 BaseOp = TargetOpcode::G_SUB;
8838 MI.eraseFromParent();
8844 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8849 unsigned OverflowOp;
8850 switch (
MI.getOpcode()) {
8853 case TargetOpcode::G_UADDSAT:
8856 OverflowOp = TargetOpcode::G_UADDO;
8858 case TargetOpcode::G_SADDSAT:
8861 OverflowOp = TargetOpcode::G_SADDO;
8863 case TargetOpcode::G_USUBSAT:
8866 OverflowOp = TargetOpcode::G_USUBO;
8868 case TargetOpcode::G_SSUBSAT:
8871 OverflowOp = TargetOpcode::G_SSUBO;
8877 Register Tmp = OverflowRes.getReg(0);
8878 Register Ov = OverflowRes.getReg(1);
8904 MI.eraseFromParent();
8910 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8911 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8912 "Expected shlsat opcode!");
8913 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8914 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8936 MI.eraseFromParent();
8941 auto [Dst, Src] =
MI.getFirst2Regs();
8944 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8953 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
8955 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8967 Res.getInstr()->getOperand(0).setReg(Dst);
8969 MI.eraseFromParent();
8976 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
8979 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8980 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8981 return B.buildOr(Dst,
LHS,
RHS);
8986 auto [Dst, Src] =
MI.getFirst2Regs();
9014 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
9034 MI.eraseFromParent();
9042 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9043 int NameOpIdx = IsRead ? 1 : 0;
9044 int ValRegIndex = IsRead ? 0 : 1;
9046 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
9048 const MDString *RegStr = cast<MDString>(
9049 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9060 MI.eraseFromParent();
9066 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
9067 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9076 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9082 MI.eraseFromParent();
9088 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9093 MI.eraseFromParent();
9098 MI.eraseFromParent();
9117 APInt ExpMask = Inf;
9135 LLT DstTyCopy = DstTy;
9150 Mask &= ~fcPosFinite;
9157 Mask &= ~fcNegFinite;
9168 Mask &= ~PartialCheck;
9177 else if (PartialCheck ==
fcZero)
9196 appendToRes(SubnormalRes);
9203 else if (PartialCheck ==
fcInf)
9216 if (PartialCheck ==
fcNan) {
9220 }
else if (PartialCheck ==
fcQNan) {
9230 Abs, InfWithQnanBitC);
9238 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
9241 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9252 appendToRes(NormalRes);
9256 MI.eraseFromParent();
9262 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9263 MI.getFirst4RegLLTs();
9265 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9274 if (MaskTy.isScalar()) {
9288 if (DstTy.isVector()) {
9291 MaskReg = ShufSplat.
getReg(0);
9296 }
else if (!DstTy.isVector()) {
9301 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9314 MI.eraseFromParent();
9320 unsigned Opcode =
MI.getOpcode();
9323 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9324 : TargetOpcode::G_UDIV,
9325 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
9327 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9328 : TargetOpcode::G_UREM,
9329 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
9330 MI.eraseFromParent();
9347 MI.eraseFromParent();
9362 MI.eraseFromParent();
9369 Register DestReg =
MI.getOperand(0).getReg();
9375 MI.eraseFromParent();
9391 MI.eraseFromParent();
9418 Register ListPtr =
MI.getOperand(1).getReg();
9428 const Align A(
MI.getOperand(2).getImm());
9435 VAList = AndDst.
getReg(0);
9453 Align EltAlignment =
DL.getABITypeAlign(Ty);
9458 MI.eraseFromParent();
9473 unsigned Limit,
const MemOp &
Op,
9474 unsigned DstAS,
unsigned SrcAS,
9477 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
9487 if (
Op.isFixedDstAlign())
9495 unsigned NumMemOps = 0;
9499 while (TySize >
Size) {
9508 assert(NewTySize > 0 &&
"Could not find appropriate type");
9515 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
9517 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
9527 if (++NumMemOps > Limit)
9530 MemOps.push_back(Ty);
9542 if (!Ty.
isVector() && ValVRegAndVal) {
9543 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9551 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9574 auto &MF = *
MI.getParent()->getParent();
9575 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9576 auto &
DL = MF.getDataLayout();
9579 assert(KnownLen != 0 &&
"Have a zero length memset length!");
9581 bool DstAlignCanChange =
false;
9587 DstAlignCanChange =
true;
9590 std::vector<LLT> MemOps;
9592 const auto &DstMMO = **
MI.memoperands_begin();
9596 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9604 MF.getFunction().getAttributes(), TLI))
9607 if (DstAlignCanChange) {
9610 Align NewAlign =
DL.getABITypeAlign(IRTy);
9611 if (NewAlign > Alignment) {
9612 Alignment = NewAlign;
9622 LLT LargestTy = MemOps[0];
9623 for (
unsigned i = 1; i < MemOps.size(); i++)
9625 LargestTy = MemOps[i];
9638 unsigned DstOff = 0;
9639 unsigned Size = KnownLen;
9640 for (
unsigned I = 0;
I < MemOps.size();
I++) {
9643 if (TySize >
Size) {
9646 assert(
I == MemOps.size() - 1 &&
I != 0);
9647 DstOff -= TySize -
Size;
9658 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9665 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9671 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
9674 MIB.buildStore(
Value,
Ptr, *StoreMMO);
9679 MI.eraseFromParent();
9685 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9687 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9689 const auto *MMOIt =
MI.memoperands_begin();
9691 bool IsVolatile =
MemOp->isVolatile();
9697 "inline memcpy with dynamic size is not yet supported");
9698 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9699 if (KnownLen == 0) {
9700 MI.eraseFromParent();
9704 const auto &DstMMO = **
MI.memoperands_begin();
9705 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9706 Align DstAlign = DstMMO.getBaseAlign();
9707 Align SrcAlign = SrcMMO.getBaseAlign();
9709 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9716 Align SrcAlign,
bool IsVolatile) {
9717 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9718 return lowerMemcpy(
MI, Dst, Src, KnownLen,
9719 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9726 Align SrcAlign,
bool IsVolatile) {
9727 auto &MF = *
MI.getParent()->getParent();
9728 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9729 auto &
DL = MF.getDataLayout();
9732 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
9734 bool DstAlignCanChange =
false;
9736 Align Alignment = std::min(DstAlign, SrcAlign);
9740 DstAlignCanChange =
true;
9746 std::vector<LLT> MemOps;
9748 const auto &DstMMO = **
MI.memoperands_begin();
9749 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9755 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9758 MF.getFunction().getAttributes(), TLI))
9761 if (DstAlignCanChange) {
9764 Align NewAlign =
DL.getABITypeAlign(IRTy);
9769 if (!
TRI->hasStackRealignment(MF))
9771 NewAlign = std::min(NewAlign, *StackAlign);
9773 if (NewAlign > Alignment) {
9774 Alignment = NewAlign;
9782 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
9790 unsigned CurrOffset = 0;
9791 unsigned Size = KnownLen;
9792 for (
auto CopyTy : MemOps) {
9795 if (CopyTy.getSizeInBytes() >
Size)
9796 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
9800 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9802 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9807 if (CurrOffset != 0) {
9811 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9813 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9817 if (CurrOffset != 0) {
9819 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9821 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9822 CurrOffset += CopyTy.getSizeInBytes();
9823 Size -= CopyTy.getSizeInBytes();
9826 MI.eraseFromParent();
9834 auto &MF = *
MI.getParent()->getParent();
9835 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9836 auto &
DL = MF.getDataLayout();
9839 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
9841 bool DstAlignCanChange =
false;
9844 Align Alignment = std::min(DstAlign, SrcAlign);
9848 DstAlignCanChange =
true;
9851 std::vector<LLT> MemOps;
9853 const auto &DstMMO = **
MI.memoperands_begin();
9854 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9863 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9866 MF.getFunction().getAttributes(), TLI))
9869 if (DstAlignCanChange) {
9872 Align NewAlign =
DL.getABITypeAlign(IRTy);
9877 if (!
TRI->hasStackRealignment(MF))
9879 NewAlign = std::min(NewAlign, *StackAlign);
9881 if (NewAlign > Alignment) {
9882 Alignment = NewAlign;
9890 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
9896 unsigned CurrOffset = 0;
9898 for (
auto CopyTy : MemOps) {
9901 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9905 if (CurrOffset != 0) {
9909 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9911 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9912 CurrOffset += CopyTy.getSizeInBytes();
9916 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
9917 LLT CopyTy = MemOps[
I];
9920 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
9923 if (CurrOffset != 0) {
9927 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9929 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
9932 MI.eraseFromParent();
9938 const unsigned Opc =
MI.getOpcode();
9941 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9942 Opc == TargetOpcode::G_MEMSET) &&
9943 "Expected memcpy like instruction");
9945 auto MMOIt =
MI.memoperands_begin();
9950 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9952 if (Opc != TargetOpcode::G_MEMSET) {
9953 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
9955 SrcAlign =
MemOp->getBaseAlign();
9962 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9964 if (KnownLen == 0) {
9965 MI.eraseFromParent();
9969 bool IsVolatile =
MemOp->isVolatile();
9970 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9971 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9978 if (MaxLen && KnownLen > MaxLen)
9981 if (Opc == TargetOpcode::G_MEMCPY) {
9982 auto &MF = *
MI.getParent()->getParent();
9983 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9986 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9989 if (Opc == TargetOpcode::G_MEMMOVE)
9990 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9991 if (Opc == TargetOpcode::G_MEMSET)
9992 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult lowerFAbs(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLT getLLTForMVT(MVT Ty)
Get a rough equivalent of an LLT for a given MVT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
OutputIt copy(R &&Range, OutputIt Out)
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)