LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FMODF:
475 RTLIBCASE(MODF_F);
476 case TargetOpcode::G_FLOG10:
477 RTLIBCASE(LOG10_F);
478 case TargetOpcode::G_FLOG:
479 RTLIBCASE(LOG_F);
480 case TargetOpcode::G_FLOG2:
481 RTLIBCASE(LOG2_F);
482 case TargetOpcode::G_FLDEXP:
483 RTLIBCASE(LDEXP_F);
484 case TargetOpcode::G_FCEIL:
485 RTLIBCASE(CEIL_F);
486 case TargetOpcode::G_FFLOOR:
487 RTLIBCASE(FLOOR_F);
488 case TargetOpcode::G_FMINNUM:
489 RTLIBCASE(FMIN_F);
490 case TargetOpcode::G_FMAXNUM:
491 RTLIBCASE(FMAX_F);
492 case TargetOpcode::G_FMINIMUMNUM:
493 RTLIBCASE(FMINIMUM_NUM_F);
494 case TargetOpcode::G_FMAXIMUMNUM:
495 RTLIBCASE(FMAXIMUM_NUM_F);
496 case TargetOpcode::G_FSQRT:
497 RTLIBCASE(SQRT_F);
498 case TargetOpcode::G_FRINT:
499 RTLIBCASE(RINT_F);
500 case TargetOpcode::G_FNEARBYINT:
501 RTLIBCASE(NEARBYINT_F);
502 case TargetOpcode::G_INTRINSIC_TRUNC:
503 RTLIBCASE(TRUNC_F);
504 case TargetOpcode::G_INTRINSIC_ROUND:
505 RTLIBCASE(ROUND_F);
506 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
507 RTLIBCASE(ROUNDEVEN_F);
508 case TargetOpcode::G_INTRINSIC_LRINT:
509 RTLIBCASE(LRINT_F);
510 case TargetOpcode::G_INTRINSIC_LLRINT:
511 RTLIBCASE(LLRINT_F);
512 }
513 llvm_unreachable("Unknown libcall function");
514#undef RTLIBCASE_INT
515#undef RTLIBCASE
516}
517
518/// True if an instruction is in tail position in its caller. Intended for
519/// legalizing libcalls as tail calls when possible.
522 const TargetInstrInfo &TII,
524 MachineBasicBlock &MBB = *MI.getParent();
525 const Function &F = MBB.getParent()->getFunction();
526
527 // Conservatively require the attributes of the call to match those of
528 // the return. Ignore NoAlias and NonNull because they don't affect the
529 // call sequence.
530 AttributeList CallerAttrs = F.getAttributes();
531 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
532 .removeAttribute(Attribute::NoAlias)
533 .removeAttribute(Attribute::NonNull)
534 .hasAttributes())
535 return false;
536
537 // It's not safe to eliminate the sign / zero extension of the return value.
538 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
539 CallerAttrs.hasRetAttr(Attribute::SExt))
540 return false;
541
542 // Only tail call if the following instruction is a standard return or if we
543 // have a `thisreturn` callee, and a sequence like:
544 //
545 // G_MEMCPY %0, %1, %2
546 // $x0 = COPY %0
547 // RET_ReallyLR implicit $x0
548 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
549 if (Next != MBB.instr_end() && Next->isCopy()) {
550 if (MI.getOpcode() == TargetOpcode::G_BZERO)
551 return false;
552
553 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
554 // mempy/etc routines return the same parameter. For other it will be the
555 // returned value.
556 Register VReg = MI.getOperand(0).getReg();
557 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
558 return false;
559
560 Register PReg = Next->getOperand(0).getReg();
561 if (!PReg.isPhysical())
562 return false;
563
564 auto Ret = next_nodbg(Next, MBB.instr_end());
565 if (Ret == MBB.instr_end() || !Ret->isReturn())
566 return false;
567
568 if (Ret->getNumImplicitOperands() != 1)
569 return false;
570
571 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
572 return false;
573
574 // Skip over the COPY that we just validated.
575 Next = Ret;
576 }
577
578 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
579 return false;
580
581 return true;
582}
583
585llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
586 const CallLowering::ArgInfo &Result,
588 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
589 MachineInstr *MI) {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
633 const CallLowering::ArgInfo &Result,
635 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
636 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
637 const char *Name = TLI.getLibcallName(Libcall);
638 if (!Name)
640 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
641 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
642}
643
644// Useful for libcalls where all operands have the same type.
647 Type *OpType, LostDebugLocObserver &LocObserver) {
648 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
649
650 // FIXME: What does the original arg index mean here?
652 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
653 Args.push_back({MO.getReg(), OpType, 0});
654 return createLibcall(MIRBuilder, Libcall,
655 {MI.getOperand(0).getReg(), OpType, 0}, Args,
656 LocObserver, &MI);
657}
658
659LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
660 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
661 LostDebugLocObserver &LocObserver) {
662 MachineFunction &MF = *MI.getMF();
663 MachineRegisterInfo &MRI = MF.getRegInfo();
664
665 Register DstSin = MI.getOperand(0).getReg();
666 Register DstCos = MI.getOperand(1).getReg();
667 Register Src = MI.getOperand(2).getReg();
668 LLT DstTy = MRI.getType(DstSin);
669
670 int MemSize = DstTy.getSizeInBytes();
671 Align Alignment = getStackTemporaryAlignment(DstTy);
672 const DataLayout &DL = MIRBuilder.getDataLayout();
673 unsigned AddrSpace = DL.getAllocaAddrSpace();
674 MachinePointerInfo PtrInfo;
675
676 Register StackPtrSin =
677 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
678 .getReg(0);
679 Register StackPtrCos =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682
683 auto &Ctx = MF.getFunction().getContext();
684 auto LibcallResult =
686 {{0}, Type::getVoidTy(Ctx), 0},
687 {{Src, OpType, 0},
688 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
689 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
690 LocObserver, &MI);
691
692 if (LibcallResult != LegalizeResult::Legalized)
694
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
699
700 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
701 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
702 MI.eraseFromParent();
703
705}
706
708LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
709 unsigned Size, Type *OpType,
710 LostDebugLocObserver &LocObserver) {
711 MachineFunction &MF = MIRBuilder.getMF();
712 MachineRegisterInfo &MRI = MF.getRegInfo();
713
714 Register DstFrac = MI.getOperand(0).getReg();
715 Register DstInt = MI.getOperand(1).getReg();
716 Register Src = MI.getOperand(2).getReg();
717 LLT DstTy = MRI.getType(DstFrac);
718
719 int MemSize = DstTy.getSizeInBytes();
720 Align Alignment = getStackTemporaryAlignment(DstTy);
721 const DataLayout &DL = MIRBuilder.getDataLayout();
722 unsigned AddrSpace = DL.getAllocaAddrSpace();
723 MachinePointerInfo PtrInfo;
724
725 Register StackPtrInt =
726 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
727 .getReg(0);
728
729 auto &Ctx = MF.getFunction().getContext();
730 auto LibcallResult = createLibcall(
731 MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
732 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
733 LocObserver, &MI);
734
735 if (LibcallResult != LegalizeResult::Legalized)
737
739 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
740
741 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
742 MI.eraseFromParent();
743
745}
746
749 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
750 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
751
753 // Add all the args, except for the last which is an imm denoting 'tail'.
754 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
755 Register Reg = MI.getOperand(i).getReg();
756
757 // Need derive an IR type for call lowering.
758 LLT OpLLT = MRI.getType(Reg);
759 Type *OpTy = nullptr;
760 if (OpLLT.isPointer())
761 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
762 else
763 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
764 Args.push_back({Reg, OpTy, 0});
765 }
766
767 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
768 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
769 RTLIB::Libcall RTLibcall;
770 unsigned Opc = MI.getOpcode();
771 const char *Name;
772 switch (Opc) {
773 case TargetOpcode::G_BZERO:
774 RTLibcall = RTLIB::BZERO;
775 Name = TLI.getLibcallName(RTLibcall);
776 break;
777 case TargetOpcode::G_MEMCPY:
778 RTLibcall = RTLIB::MEMCPY;
779 Name = TLI.getMemcpyName();
780 Args[0].Flags[0].setReturned();
781 break;
782 case TargetOpcode::G_MEMMOVE:
783 RTLibcall = RTLIB::MEMMOVE;
784 Name = TLI.getLibcallName(RTLibcall);
785 Args[0].Flags[0].setReturned();
786 break;
787 case TargetOpcode::G_MEMSET:
788 RTLibcall = RTLIB::MEMSET;
789 Name = TLI.getLibcallName(RTLibcall);
790 Args[0].Flags[0].setReturned();
791 break;
792 default:
793 llvm_unreachable("unsupported opcode");
794 }
795
796 // Unsupported libcall on the target.
797 if (!Name) {
798 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
799 << MIRBuilder.getTII().getName(Opc) << "\n");
801 }
802
804 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
805 Info.Callee = MachineOperand::CreateES(Name);
806 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
807 Info.IsTailCall =
808 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
809 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
810
811 llvm::append_range(Info.OrigArgs, Args);
812 if (!CLI.lowerCall(MIRBuilder, Info))
814
815 if (Info.LoweredTailCall) {
816 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
817
818 // Check debug locations before removing the return.
819 LocObserver.checkpoint(true);
820
821 // We must have a return following the call (or debug insts) to get past
822 // isLibCallInTailPosition.
823 do {
824 MachineInstr *Next = MI.getNextNode();
825 assert(Next &&
826 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
827 "Expected instr following MI to be return or debug inst?");
828 // We lowered a tail call, so the call is now the return from the block.
829 // Delete the old return.
830 Next->eraseFromParent();
831 } while (MI.getNextNode());
832
833 // We expect to lose the debug location from the return.
834 LocObserver.checkpoint(false);
835 }
836
838}
839
840static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
841 unsigned Opc = MI.getOpcode();
842 auto &AtomicMI = cast<GMemOperation>(MI);
843 auto &MMO = AtomicMI.getMMO();
844 auto Ordering = MMO.getMergedOrdering();
845 LLT MemType = MMO.getMemoryType();
846 uint64_t MemSize = MemType.getSizeInBytes();
847 if (MemType.isVector())
848 return RTLIB::UNKNOWN_LIBCALL;
849
850#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
851#define LCALL5(A) \
852 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
856 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
857 return getOutlineAtomicHelper(LC, Ordering, MemSize);
858 }
859 case TargetOpcode::G_ATOMICRMW_XCHG: {
860 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
861 return getOutlineAtomicHelper(LC, Ordering, MemSize);
862 }
863 case TargetOpcode::G_ATOMICRMW_ADD:
864 case TargetOpcode::G_ATOMICRMW_SUB: {
865 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
866 return getOutlineAtomicHelper(LC, Ordering, MemSize);
867 }
868 case TargetOpcode::G_ATOMICRMW_AND: {
869 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
870 return getOutlineAtomicHelper(LC, Ordering, MemSize);
871 }
872 case TargetOpcode::G_ATOMICRMW_OR: {
873 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
874 return getOutlineAtomicHelper(LC, Ordering, MemSize);
875 }
876 case TargetOpcode::G_ATOMICRMW_XOR: {
877 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
878 return getOutlineAtomicHelper(LC, Ordering, MemSize);
879 }
880 default:
881 return RTLIB::UNKNOWN_LIBCALL;
882 }
883#undef LCALLS
884#undef LCALL5
885}
886
889 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
890
891 Type *RetTy;
892 SmallVector<Register> RetRegs;
894 unsigned Opc = MI.getOpcode();
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 LLT SuccessLLT;
900 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
901 MI.getFirst4RegLLTs();
902 RetRegs.push_back(Ret);
903 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
904 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
905 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
906 NewLLT) = MI.getFirst5RegLLTs();
907 RetRegs.push_back(Success);
908 RetTy = StructType::get(
909 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
910 }
911 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
912 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
913 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
914 break;
915 }
916 case TargetOpcode::G_ATOMICRMW_XCHG:
917 case TargetOpcode::G_ATOMICRMW_ADD:
918 case TargetOpcode::G_ATOMICRMW_SUB:
919 case TargetOpcode::G_ATOMICRMW_AND:
920 case TargetOpcode::G_ATOMICRMW_OR:
921 case TargetOpcode::G_ATOMICRMW_XOR: {
922 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
923 RetRegs.push_back(Ret);
924 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
925 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
926 Val =
927 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
928 .getReg(0);
929 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
930 Val =
931 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
932 .getReg(0);
933 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
934 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
935 break;
936 }
937 default:
938 llvm_unreachable("unsupported opcode");
939 }
940
941 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
942 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
943 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
944 const char *Name = TLI.getLibcallName(RTLibcall);
945
946 // Unsupported libcall on the target.
947 if (!Name) {
948 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
949 << MIRBuilder.getTII().getName(Opc) << "\n");
951 }
952
954 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
955 Info.Callee = MachineOperand::CreateES(Name);
956 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
957
958 llvm::append_range(Info.OrigArgs, Args);
959 if (!CLI.lowerCall(MIRBuilder, Info))
961
963}
964
965static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
966 Type *FromType) {
967 auto ToMVT = MVT::getVT(ToType);
968 auto FromMVT = MVT::getVT(FromType);
969
970 switch (Opcode) {
971 case TargetOpcode::G_FPEXT:
972 return RTLIB::getFPEXT(FromMVT, ToMVT);
973 case TargetOpcode::G_FPTRUNC:
974 return RTLIB::getFPROUND(FromMVT, ToMVT);
975 case TargetOpcode::G_FPTOSI:
976 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
977 case TargetOpcode::G_FPTOUI:
978 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
979 case TargetOpcode::G_SITOFP:
980 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
981 case TargetOpcode::G_UITOFP:
982 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
983 }
984 llvm_unreachable("Unsupported libcall function");
985}
986
989 Type *FromType, LostDebugLocObserver &LocObserver,
990 const TargetLowering &TLI, bool IsSigned = false) {
991 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
992 if (FromType->isIntegerTy()) {
993 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
994 Arg.Flags[0].setSExt();
995 else
996 Arg.Flags[0].setZExt();
997 }
998
999 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
1000 return createLibcall(MIRBuilder, Libcall,
1001 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
1002 &MI);
1003}
1004
1005static RTLIB::Libcall
1007 RTLIB::Libcall RTLibcall;
1008 switch (MI.getOpcode()) {
1009 case TargetOpcode::G_GET_FPENV:
1010 RTLibcall = RTLIB::FEGETENV;
1011 break;
1012 case TargetOpcode::G_SET_FPENV:
1013 case TargetOpcode::G_RESET_FPENV:
1014 RTLibcall = RTLIB::FESETENV;
1015 break;
1016 case TargetOpcode::G_GET_FPMODE:
1017 RTLibcall = RTLIB::FEGETMODE;
1018 break;
1019 case TargetOpcode::G_SET_FPMODE:
1020 case TargetOpcode::G_RESET_FPMODE:
1021 RTLibcall = RTLIB::FESETMODE;
1022 break;
1023 default:
1024 llvm_unreachable("Unexpected opcode");
1025 }
1026 return RTLibcall;
1027}
1028
1029// Some library functions that read FP state (fegetmode, fegetenv) write the
1030// state into a region in memory. IR intrinsics that do the same operations
1031// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1032// intrinsics via the library functions, we need to use temporary variable,
1033// for example:
1034//
1035// %0:_(s32) = G_GET_FPMODE
1036//
1037// is transformed to:
1038//
1039// %1:_(p0) = G_FRAME_INDEX %stack.0
1040// BL &fegetmode
1041// %0:_(s32) = G_LOAD % 1
1042//
1044LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1046 LostDebugLocObserver &LocObserver) {
1047 const DataLayout &DL = MIRBuilder.getDataLayout();
1048 auto &MF = MIRBuilder.getMF();
1049 auto &MRI = *MIRBuilder.getMRI();
1050 auto &Ctx = MF.getFunction().getContext();
1051
1052 // Create temporary, where library function will put the read state.
1053 Register Dst = MI.getOperand(0).getReg();
1054 LLT StateTy = MRI.getType(Dst);
1055 TypeSize StateSize = StateTy.getSizeInBytes();
1056 Align TempAlign = getStackTemporaryAlignment(StateTy);
1057 MachinePointerInfo TempPtrInfo;
1058 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1059
1060 // Create a call to library function, with the temporary as an argument.
1061 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1062 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1063 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1064 auto Res =
1065 createLibcall(MIRBuilder, RTLibcall,
1066 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1067 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1068 LocObserver, nullptr);
1069 if (Res != LegalizerHelper::Legalized)
1070 return Res;
1071
1072 // Create a load from the temporary.
1073 MachineMemOperand *MMO = MF.getMachineMemOperand(
1074 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1075 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1076
1078}
1079
1080// Similar to `createGetStateLibcall` the function calls a library function
1081// using transient space in stack. In this case the library function reads
1082// content of memory region.
1084LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1086 LostDebugLocObserver &LocObserver) {
1087 const DataLayout &DL = MIRBuilder.getDataLayout();
1088 auto &MF = MIRBuilder.getMF();
1089 auto &MRI = *MIRBuilder.getMRI();
1090 auto &Ctx = MF.getFunction().getContext();
1091
1092 // Create temporary, where library function will get the new state.
1093 Register Src = MI.getOperand(0).getReg();
1094 LLT StateTy = MRI.getType(Src);
1095 TypeSize StateSize = StateTy.getSizeInBytes();
1096 Align TempAlign = getStackTemporaryAlignment(StateTy);
1097 MachinePointerInfo TempPtrInfo;
1098 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1099
1100 // Put the new state into the temporary.
1101 MachineMemOperand *MMO = MF.getMachineMemOperand(
1102 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1103 MIRBuilder.buildStore(Src, Temp, *MMO);
1104
1105 // Create a call to library function, with the temporary as an argument.
1106 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1107 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1108 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1109 return createLibcall(MIRBuilder, RTLibcall,
1110 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1111 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1112 LocObserver, nullptr);
1113}
1114
1115/// Returns the corresponding libcall for the given Pred and
1116/// the ICMP predicate that should be generated to compare with #0
1117/// after the libcall.
1118static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1120#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1121 do { \
1122 switch (Size) { \
1123 case 32: \
1124 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1125 case 64: \
1126 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1127 case 128: \
1128 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1129 default: \
1130 llvm_unreachable("unexpected size"); \
1131 } \
1132 } while (0)
1133
1134 switch (Pred) {
1135 case CmpInst::FCMP_OEQ:
1137 case CmpInst::FCMP_UNE:
1139 case CmpInst::FCMP_OGE:
1141 case CmpInst::FCMP_OLT:
1143 case CmpInst::FCMP_OLE:
1145 case CmpInst::FCMP_OGT:
1147 case CmpInst::FCMP_UNO:
1149 default:
1150 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1151 }
1152}
1153
1155LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1157 LostDebugLocObserver &LocObserver) {
1158 auto &MF = MIRBuilder.getMF();
1159 auto &Ctx = MF.getFunction().getContext();
1160 const GFCmp *Cmp = cast<GFCmp>(&MI);
1161
1162 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1163 unsigned Size = OpLLT.getSizeInBits();
1164 if ((Size != 32 && Size != 64 && Size != 128) ||
1165 OpLLT != MRI.getType(Cmp->getRHSReg()))
1166 return UnableToLegalize;
1167
1168 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1169
1170 // DstReg type is s32
1171 const Register DstReg = Cmp->getReg(0);
1172 LLT DstTy = MRI.getType(DstReg);
1173 const auto Cond = Cmp->getCond();
1174
1175 // Reference:
1176 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1177 // Generates a libcall followed by ICMP.
1178 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1179 const CmpInst::Predicate ICmpPred,
1180 const DstOp &Res) -> Register {
1181 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1182 constexpr LLT TempLLT = LLT::scalar(32);
1183 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1184 // Generate libcall, holding result in Temp
1185 const auto Status = createLibcall(
1186 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1187 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1188 LocObserver, &MI);
1189 if (!Status)
1190 return {};
1191
1192 // Compare temp with #0 to get the final result.
1193 return MIRBuilder
1194 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1195 .getReg(0);
1196 };
1197
1198 // Simple case if we have a direct mapping from predicate to libcall
1199 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1200 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1201 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1202 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1203 return Legalized;
1204 }
1205 return UnableToLegalize;
1206 }
1207
1208 // No direct mapping found, should be generated as combination of libcalls.
1209
1210 switch (Cond) {
1211 case CmpInst::FCMP_UEQ: {
1212 // FCMP_UEQ: unordered or equal
1213 // Convert into (FCMP_OEQ || FCMP_UNO).
1214
1215 const auto [OeqLibcall, OeqPred] =
1217 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1218
1219 const auto [UnoLibcall, UnoPred] =
1221 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1222 if (Oeq && Uno)
1223 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1224 else
1225 return UnableToLegalize;
1226
1227 break;
1228 }
1229 case CmpInst::FCMP_ONE: {
1230 // FCMP_ONE: ordered and operands are unequal
1231 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1232
1233 // We inverse the predicate instead of generating a NOT
1234 // to save one instruction.
1235 // On AArch64 isel can even select two cmp into a single ccmp.
1236 const auto [OeqLibcall, OeqPred] =
1238 const auto NotOeq =
1239 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1240
1241 const auto [UnoLibcall, UnoPred] =
1243 const auto NotUno =
1244 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1245
1246 if (NotOeq && NotUno)
1247 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1248 else
1249 return UnableToLegalize;
1250
1251 break;
1252 }
1253 case CmpInst::FCMP_ULT:
1254 case CmpInst::FCMP_UGE:
1255 case CmpInst::FCMP_UGT:
1256 case CmpInst::FCMP_ULE:
1257 case CmpInst::FCMP_ORD: {
1258 // Convert into: !(inverse(Pred))
1259 // E.g. FCMP_ULT becomes !FCMP_OGE
1260 // This is equivalent to the following, but saves some instructions.
1261 // MIRBuilder.buildNot(
1262 // PredTy,
1263 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1264 // Op1, Op2));
1265 const auto [InversedLibcall, InversedPred] =
1267 if (!BuildLibcall(InversedLibcall,
1268 CmpInst::getInversePredicate(InversedPred), DstReg))
1269 return UnableToLegalize;
1270 break;
1271 }
1272 default:
1273 return UnableToLegalize;
1274 }
1275
1276 return Legalized;
1277}
1278
1279// The function is used to legalize operations that set default environment
1280// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1281// On most targets supported in glibc FE_DFL_MODE is defined as
1282// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1283// it is not true, the target must provide custom lowering.
1285LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1287 LostDebugLocObserver &LocObserver) {
1288 const DataLayout &DL = MIRBuilder.getDataLayout();
1289 auto &MF = MIRBuilder.getMF();
1290 auto &Ctx = MF.getFunction().getContext();
1291
1292 // Create an argument for the library function.
1293 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1294 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1295 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1296 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1297 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1298 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1299 MIRBuilder.buildIntToPtr(Dest, DefValue);
1300
1301 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1302 return createLibcall(MIRBuilder, RTLibcall,
1303 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1304 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1305 LocObserver, &MI);
1306}
1307
1310 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1311
1312 switch (MI.getOpcode()) {
1313 default:
1314 return UnableToLegalize;
1315 case TargetOpcode::G_MUL:
1316 case TargetOpcode::G_SDIV:
1317 case TargetOpcode::G_UDIV:
1318 case TargetOpcode::G_SREM:
1319 case TargetOpcode::G_UREM:
1320 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1321 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1322 unsigned Size = LLTy.getSizeInBits();
1323 Type *HLTy = IntegerType::get(Ctx, Size);
1324 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1325 if (Status != Legalized)
1326 return Status;
1327 break;
1328 }
1329 case TargetOpcode::G_FADD:
1330 case TargetOpcode::G_FSUB:
1331 case TargetOpcode::G_FMUL:
1332 case TargetOpcode::G_FDIV:
1333 case TargetOpcode::G_FMA:
1334 case TargetOpcode::G_FPOW:
1335 case TargetOpcode::G_FREM:
1336 case TargetOpcode::G_FCOS:
1337 case TargetOpcode::G_FSIN:
1338 case TargetOpcode::G_FTAN:
1339 case TargetOpcode::G_FACOS:
1340 case TargetOpcode::G_FASIN:
1341 case TargetOpcode::G_FATAN:
1342 case TargetOpcode::G_FATAN2:
1343 case TargetOpcode::G_FCOSH:
1344 case TargetOpcode::G_FSINH:
1345 case TargetOpcode::G_FTANH:
1346 case TargetOpcode::G_FLOG10:
1347 case TargetOpcode::G_FLOG:
1348 case TargetOpcode::G_FLOG2:
1349 case TargetOpcode::G_FEXP:
1350 case TargetOpcode::G_FEXP2:
1351 case TargetOpcode::G_FEXP10:
1352 case TargetOpcode::G_FCEIL:
1353 case TargetOpcode::G_FFLOOR:
1354 case TargetOpcode::G_FMINNUM:
1355 case TargetOpcode::G_FMAXNUM:
1356 case TargetOpcode::G_FMINIMUMNUM:
1357 case TargetOpcode::G_FMAXIMUMNUM:
1358 case TargetOpcode::G_FSQRT:
1359 case TargetOpcode::G_FRINT:
1360 case TargetOpcode::G_FNEARBYINT:
1361 case TargetOpcode::G_INTRINSIC_TRUNC:
1362 case TargetOpcode::G_INTRINSIC_ROUND:
1363 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1364 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1365 unsigned Size = LLTy.getSizeInBits();
1366 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1372 if (Status != Legalized)
1373 return Status;
1374 break;
1375 }
1376 case TargetOpcode::G_FSINCOS: {
1377 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1378 unsigned Size = LLTy.getSizeInBits();
1379 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1380 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1381 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1382 return UnableToLegalize;
1383 }
1384 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1385 }
1386 case TargetOpcode::G_FMODF: {
1387 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1388 unsigned Size = LLTy.getSizeInBits();
1389 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1390 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1391 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1392 return UnableToLegalize;
1393 }
1394 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1395 }
1396 case TargetOpcode::G_LROUND:
1397 case TargetOpcode::G_LLROUND:
1398 case TargetOpcode::G_INTRINSIC_LRINT:
1399 case TargetOpcode::G_INTRINSIC_LLRINT: {
1400 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1401 unsigned Size = LLTy.getSizeInBits();
1402 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1403 Type *ITy = IntegerType::get(
1404 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1405 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1406 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1407 return UnableToLegalize;
1408 }
1409 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1411 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1412 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1413 if (Status != Legalized)
1414 return Status;
1415 MI.eraseFromParent();
1416 return Legalized;
1417 }
1418 case TargetOpcode::G_FPOWI:
1419 case TargetOpcode::G_FLDEXP: {
1420 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1421 unsigned Size = LLTy.getSizeInBits();
1422 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1423 Type *ITy = IntegerType::get(
1424 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1425 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1426 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1427 return UnableToLegalize;
1428 }
1429 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1431 {MI.getOperand(1).getReg(), HLTy, 0},
1432 {MI.getOperand(2).getReg(), ITy, 1}};
1433 Args[1].Flags[0].setSExt();
1435 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1436 Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1448 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1465 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1469 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1483 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1484 LocObserver, TLI, IsSigned);
1485 if (Status != Legalized)
1486 return Status;
1487 break;
1488 }
1489 case TargetOpcode::G_ATOMICRMW_XCHG:
1490 case TargetOpcode::G_ATOMICRMW_ADD:
1491 case TargetOpcode::G_ATOMICRMW_SUB:
1492 case TargetOpcode::G_ATOMICRMW_AND:
1493 case TargetOpcode::G_ATOMICRMW_OR:
1494 case TargetOpcode::G_ATOMICRMW_XOR:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG:
1496 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1498 if (Status != Legalized)
1499 return Status;
1500 break;
1501 }
1502 case TargetOpcode::G_BZERO:
1503 case TargetOpcode::G_MEMCPY:
1504 case TargetOpcode::G_MEMMOVE:
1505 case TargetOpcode::G_MEMSET: {
1506 LegalizeResult Result =
1507 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1508 if (Result != Legalized)
1509 return Result;
1510 MI.eraseFromParent();
1511 return Result;
1512 }
1513 case TargetOpcode::G_GET_FPENV:
1514 case TargetOpcode::G_GET_FPMODE: {
1515 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1516 if (Result != Legalized)
1517 return Result;
1518 break;
1519 }
1520 case TargetOpcode::G_SET_FPENV:
1521 case TargetOpcode::G_SET_FPMODE: {
1522 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1523 if (Result != Legalized)
1524 return Result;
1525 break;
1526 }
1527 case TargetOpcode::G_RESET_FPENV:
1528 case TargetOpcode::G_RESET_FPMODE: {
1529 LegalizeResult Result =
1530 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1531 if (Result != Legalized)
1532 return Result;
1533 break;
1534 }
1535 }
1536
1537 MI.eraseFromParent();
1538 return Legalized;
1539}
1540
1542 unsigned TypeIdx,
1543 LLT NarrowTy) {
1544 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1545 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1546
1547 switch (MI.getOpcode()) {
1548 default:
1549 return UnableToLegalize;
1550 case TargetOpcode::G_IMPLICIT_DEF: {
1551 Register DstReg = MI.getOperand(0).getReg();
1552 LLT DstTy = MRI.getType(DstReg);
1553
1554 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1555 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1556 // FIXME: Although this would also be legal for the general case, it causes
1557 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1558 // combines not being hit). This seems to be a problem related to the
1559 // artifact combiner.
1560 if (SizeOp0 % NarrowSize != 0) {
1561 LLT ImplicitTy = NarrowTy;
1562 if (DstTy.isVector())
1563 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1564
1565 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1566 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1567
1568 MI.eraseFromParent();
1569 return Legalized;
1570 }
1571
1572 int NumParts = SizeOp0 / NarrowSize;
1573
1575 for (int i = 0; i < NumParts; ++i)
1576 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1577
1578 if (DstTy.isVector())
1579 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1580 else
1581 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_CONSTANT: {
1586 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1587 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1588 unsigned TotalSize = Ty.getSizeInBits();
1589 unsigned NarrowSize = NarrowTy.getSizeInBits();
1590 int NumParts = TotalSize / NarrowSize;
1591
1592 SmallVector<Register, 4> PartRegs;
1593 for (int I = 0; I != NumParts; ++I) {
1594 unsigned Offset = I * NarrowSize;
1595 auto K = MIRBuilder.buildConstant(NarrowTy,
1596 Val.lshr(Offset).trunc(NarrowSize));
1597 PartRegs.push_back(K.getReg(0));
1598 }
1599
1600 LLT LeftoverTy;
1601 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1602 SmallVector<Register, 1> LeftoverRegs;
1603 if (LeftoverBits != 0) {
1604 LeftoverTy = LLT::scalar(LeftoverBits);
1605 auto K = MIRBuilder.buildConstant(
1606 LeftoverTy,
1607 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1608 LeftoverRegs.push_back(K.getReg(0));
1609 }
1610
1611 insertParts(MI.getOperand(0).getReg(),
1612 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1613
1614 MI.eraseFromParent();
1615 return Legalized;
1616 }
1617 case TargetOpcode::G_SEXT:
1618 case TargetOpcode::G_ZEXT:
1619 case TargetOpcode::G_ANYEXT:
1620 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1621 case TargetOpcode::G_TRUNC: {
1622 if (TypeIdx != 1)
1623 return UnableToLegalize;
1624
1625 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1626 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1627 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1628 return UnableToLegalize;
1629 }
1630
1631 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1632 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1633 MI.eraseFromParent();
1634 return Legalized;
1635 }
1636 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1637 case TargetOpcode::G_FREEZE: {
1638 if (TypeIdx != 0)
1639 return UnableToLegalize;
1640
1641 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1642 // Should widen scalar first
1643 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1644 return UnableToLegalize;
1645
1646 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1648 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1649 Parts.push_back(
1650 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1651 .getReg(0));
1652 }
1653
1654 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1655 MI.eraseFromParent();
1656 return Legalized;
1657 }
1658 case TargetOpcode::G_ADD:
1659 case TargetOpcode::G_SUB:
1660 case TargetOpcode::G_SADDO:
1661 case TargetOpcode::G_SSUBO:
1662 case TargetOpcode::G_SADDE:
1663 case TargetOpcode::G_SSUBE:
1664 case TargetOpcode::G_UADDO:
1665 case TargetOpcode::G_USUBO:
1666 case TargetOpcode::G_UADDE:
1667 case TargetOpcode::G_USUBE:
1668 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1669 case TargetOpcode::G_MUL:
1670 case TargetOpcode::G_UMULH:
1671 return narrowScalarMul(MI, NarrowTy);
1672 case TargetOpcode::G_EXTRACT:
1673 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1674 case TargetOpcode::G_INSERT:
1675 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1676 case TargetOpcode::G_LOAD: {
1677 auto &LoadMI = cast<GLoad>(MI);
1678 Register DstReg = LoadMI.getDstReg();
1679 LLT DstTy = MRI.getType(DstReg);
1680 if (DstTy.isVector())
1681 return UnableToLegalize;
1682
1683 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1684 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1685 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1686 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1687 LoadMI.eraseFromParent();
1688 return Legalized;
1689 }
1690
1691 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1692 }
1693 case TargetOpcode::G_ZEXTLOAD:
1694 case TargetOpcode::G_SEXTLOAD: {
1695 auto &LoadMI = cast<GExtLoad>(MI);
1696 Register DstReg = LoadMI.getDstReg();
1697 Register PtrReg = LoadMI.getPointerReg();
1698
1699 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1700 auto &MMO = LoadMI.getMMO();
1701 unsigned MemSize = MMO.getSizeInBits().getValue();
1702
1703 if (MemSize == NarrowSize) {
1704 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1705 } else if (MemSize < NarrowSize) {
1706 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1707 } else if (MemSize > NarrowSize) {
1708 // FIXME: Need to split the load.
1709 return UnableToLegalize;
1710 }
1711
1712 if (isa<GZExtLoad>(LoadMI))
1713 MIRBuilder.buildZExt(DstReg, TmpReg);
1714 else
1715 MIRBuilder.buildSExt(DstReg, TmpReg);
1716
1717 LoadMI.eraseFromParent();
1718 return Legalized;
1719 }
1720 case TargetOpcode::G_STORE: {
1721 auto &StoreMI = cast<GStore>(MI);
1722
1723 Register SrcReg = StoreMI.getValueReg();
1724 LLT SrcTy = MRI.getType(SrcReg);
1725 if (SrcTy.isVector())
1726 return UnableToLegalize;
1727
1728 int NumParts = SizeOp0 / NarrowSize;
1729 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1730 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1731 if (SrcTy.isVector() && LeftoverBits != 0)
1732 return UnableToLegalize;
1733
1734 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1735 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1736 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1737 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1738 StoreMI.eraseFromParent();
1739 return Legalized;
1740 }
1741
1742 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1743 }
1744 case TargetOpcode::G_SELECT:
1745 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1746 case TargetOpcode::G_AND:
1747 case TargetOpcode::G_OR:
1748 case TargetOpcode::G_XOR: {
1749 // Legalize bitwise operation:
1750 // A = BinOp<Ty> B, C
1751 // into:
1752 // B1, ..., BN = G_UNMERGE_VALUES B
1753 // C1, ..., CN = G_UNMERGE_VALUES C
1754 // A1 = BinOp<Ty/N> B1, C2
1755 // ...
1756 // AN = BinOp<Ty/N> BN, CN
1757 // A = G_MERGE_VALUES A1, ..., AN
1758 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1759 }
1760 case TargetOpcode::G_SHL:
1761 case TargetOpcode::G_LSHR:
1762 case TargetOpcode::G_ASHR:
1763 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1764 case TargetOpcode::G_CTLZ:
1765 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1766 case TargetOpcode::G_CTTZ:
1767 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1768 case TargetOpcode::G_CTPOP:
1769 if (TypeIdx == 1)
1770 switch (MI.getOpcode()) {
1771 case TargetOpcode::G_CTLZ:
1772 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1773 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTTZ:
1775 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1776 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1777 case TargetOpcode::G_CTPOP:
1778 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1779 default:
1780 return UnableToLegalize;
1781 }
1782
1783 Observer.changingInstr(MI);
1784 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_INTTOPTR:
1788 if (TypeIdx != 1)
1789 return UnableToLegalize;
1790
1791 Observer.changingInstr(MI);
1792 narrowScalarSrc(MI, NarrowTy, 1);
1793 Observer.changedInstr(MI);
1794 return Legalized;
1795 case TargetOpcode::G_PTRTOINT:
1796 if (TypeIdx != 0)
1797 return UnableToLegalize;
1798
1799 Observer.changingInstr(MI);
1800 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1801 Observer.changedInstr(MI);
1802 return Legalized;
1803 case TargetOpcode::G_PHI: {
1804 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1805 // NarrowSize.
1806 if (SizeOp0 % NarrowSize != 0)
1807 return UnableToLegalize;
1808
1809 unsigned NumParts = SizeOp0 / NarrowSize;
1810 SmallVector<Register, 2> DstRegs(NumParts);
1811 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1812 Observer.changingInstr(MI);
1813 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1814 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1815 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1816 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1817 SrcRegs[i / 2], MIRBuilder, MRI);
1818 }
1819 MachineBasicBlock &MBB = *MI.getParent();
1820 MIRBuilder.setInsertPt(MBB, MI);
1821 for (unsigned i = 0; i < NumParts; ++i) {
1822 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1824 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1825 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1826 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1827 }
1828 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1829 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1830 Observer.changedInstr(MI);
1831 MI.eraseFromParent();
1832 return Legalized;
1833 }
1834 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1835 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 if (TypeIdx != 2)
1837 return UnableToLegalize;
1838
1839 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1840 Observer.changingInstr(MI);
1841 narrowScalarSrc(MI, NarrowTy, OpIdx);
1842 Observer.changedInstr(MI);
1843 return Legalized;
1844 }
1845 case TargetOpcode::G_ICMP: {
1846 Register LHS = MI.getOperand(2).getReg();
1847 LLT SrcTy = MRI.getType(LHS);
1848 CmpInst::Predicate Pred =
1849 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1850
1851 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1852 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1853 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1854 LHSLeftoverRegs, MIRBuilder, MRI))
1855 return UnableToLegalize;
1856
1857 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1858 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1859 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1860 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1861 return UnableToLegalize;
1862
1863 // We now have the LHS and RHS of the compare split into narrow-type
1864 // registers, plus potentially some leftover type.
1865 Register Dst = MI.getOperand(0).getReg();
1866 LLT ResTy = MRI.getType(Dst);
1867 if (ICmpInst::isEquality(Pred)) {
1868 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1869 // them together. For each equal part, the result should be all 0s. For
1870 // each non-equal part, we'll get at least one 1.
1871 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1873 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1874 auto LHS = std::get<0>(LHSAndRHS);
1875 auto RHS = std::get<1>(LHSAndRHS);
1876 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1877 Xors.push_back(Xor);
1878 }
1879
1880 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1881 // to the desired narrow type so that we can OR them together later.
1882 SmallVector<Register, 4> WidenedXors;
1883 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1884 auto LHS = std::get<0>(LHSAndRHS);
1885 auto RHS = std::get<1>(LHSAndRHS);
1886 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1887 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1888 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1889 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1890 llvm::append_range(Xors, WidenedXors);
1891 }
1892
1893 // Now, for each part we broke up, we know if they are equal/not equal
1894 // based off the G_XOR. We can OR these all together and compare against
1895 // 0 to get the result.
1896 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1897 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1898 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1899 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1900 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1901 } else {
1902 Register CmpIn;
1903 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1904 Register CmpOut;
1905 CmpInst::Predicate PartPred;
1906
1907 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1908 PartPred = Pred;
1909 CmpOut = Dst;
1910 } else {
1911 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1912 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 }
1914
1915 if (!CmpIn) {
1916 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 } else {
1919 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1920 RHSPartRegs[I]);
1921 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1922 LHSPartRegs[I], RHSPartRegs[I]);
1923 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1924 }
1925
1926 CmpIn = CmpOut;
1927 }
1928
1929 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1930 Register CmpOut;
1931 CmpInst::Predicate PartPred;
1932
1933 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1934 PartPred = Pred;
1935 CmpOut = Dst;
1936 } else {
1937 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1938 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 }
1940
1941 if (!CmpIn) {
1942 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 } else {
1945 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1946 RHSLeftoverRegs[I]);
1947 auto CmpEq =
1948 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1949 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1950 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1951 }
1952
1953 CmpIn = CmpOut;
1954 }
1955 }
1956 MI.eraseFromParent();
1957 return Legalized;
1958 }
1959 case TargetOpcode::G_FCMP:
1960 if (TypeIdx != 0)
1961 return UnableToLegalize;
1962
1963 Observer.changingInstr(MI);
1964 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1965 Observer.changedInstr(MI);
1966 return Legalized;
1967
1968 case TargetOpcode::G_SEXT_INREG: {
1969 if (TypeIdx != 0)
1970 return UnableToLegalize;
1971
1972 int64_t SizeInBits = MI.getOperand(2).getImm();
1973
1974 // So long as the new type has more bits than the bits we're extending we
1975 // don't need to break it apart.
1976 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1977 Observer.changingInstr(MI);
1978 // We don't lose any non-extension bits by truncating the src and
1979 // sign-extending the dst.
1980 MachineOperand &MO1 = MI.getOperand(1);
1981 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1982 MO1.setReg(TruncMIB.getReg(0));
1983
1984 MachineOperand &MO2 = MI.getOperand(0);
1985 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1986 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1987 MIRBuilder.buildSExt(MO2, DstExt);
1988 MO2.setReg(DstExt);
1989 Observer.changedInstr(MI);
1990 return Legalized;
1991 }
1992
1993 // Break it apart. Components below the extension point are unmodified. The
1994 // component containing the extension point becomes a narrower SEXT_INREG.
1995 // Components above it are ashr'd from the component containing the
1996 // extension point.
1997 if (SizeOp0 % NarrowSize != 0)
1998 return UnableToLegalize;
1999 int NumParts = SizeOp0 / NarrowSize;
2000
2001 // List the registers where the destination will be scattered.
2003 // List the registers where the source will be split.
2005
2006 // Create all the temporary registers.
2007 for (int i = 0; i < NumParts; ++i) {
2008 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2009
2010 SrcRegs.push_back(SrcReg);
2011 }
2012
2013 // Explode the big arguments into smaller chunks.
2014 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2015
2016 Register AshrCstReg =
2017 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2018 .getReg(0);
2019 Register FullExtensionReg;
2020 Register PartialExtensionReg;
2021
2022 // Do the operation on each small part.
2023 for (int i = 0; i < NumParts; ++i) {
2024 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2025 DstRegs.push_back(SrcRegs[i]);
2026 PartialExtensionReg = DstRegs.back();
2027 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2028 assert(PartialExtensionReg &&
2029 "Expected to visit partial extension before full");
2030 if (FullExtensionReg) {
2031 DstRegs.push_back(FullExtensionReg);
2032 continue;
2033 }
2034 DstRegs.push_back(
2035 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2036 .getReg(0));
2037 FullExtensionReg = DstRegs.back();
2038 } else {
2039 DstRegs.push_back(
2041 .buildInstr(
2042 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2043 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2044 .getReg(0));
2045 PartialExtensionReg = DstRegs.back();
2046 }
2047 }
2048
2049 // Gather the destination registers into the final destination.
2050 Register DstReg = MI.getOperand(0).getReg();
2051 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2052 MI.eraseFromParent();
2053 return Legalized;
2054 }
2055 case TargetOpcode::G_BSWAP:
2056 case TargetOpcode::G_BITREVERSE: {
2057 if (SizeOp0 % NarrowSize != 0)
2058 return UnableToLegalize;
2059
2060 Observer.changingInstr(MI);
2061 SmallVector<Register, 2> SrcRegs, DstRegs;
2062 unsigned NumParts = SizeOp0 / NarrowSize;
2063 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2064 MIRBuilder, MRI);
2065
2066 for (unsigned i = 0; i < NumParts; ++i) {
2067 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2068 {SrcRegs[NumParts - 1 - i]});
2069 DstRegs.push_back(DstPart.getReg(0));
2070 }
2071
2072 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2073
2074 Observer.changedInstr(MI);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_PTR_ADD:
2079 case TargetOpcode::G_PTRMASK: {
2080 if (TypeIdx != 1)
2081 return UnableToLegalize;
2082 Observer.changingInstr(MI);
2083 narrowScalarSrc(MI, NarrowTy, 2);
2084 Observer.changedInstr(MI);
2085 return Legalized;
2086 }
2087 case TargetOpcode::G_FPTOUI:
2088 case TargetOpcode::G_FPTOSI:
2089 case TargetOpcode::G_FPTOUI_SAT:
2090 case TargetOpcode::G_FPTOSI_SAT:
2091 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2092 case TargetOpcode::G_FPEXT:
2093 if (TypeIdx != 0)
2094 return UnableToLegalize;
2095 Observer.changingInstr(MI);
2096 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2097 Observer.changedInstr(MI);
2098 return Legalized;
2099 case TargetOpcode::G_FLDEXP:
2100 case TargetOpcode::G_STRICT_FLDEXP:
2101 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2102 case TargetOpcode::G_VSCALE: {
2103 Register Dst = MI.getOperand(0).getReg();
2104 LLT Ty = MRI.getType(Dst);
2105
2106 // Assume VSCALE(1) fits into a legal integer
2107 const APInt One(NarrowTy.getSizeInBits(), 1);
2108 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2109 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2110 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2111 MIRBuilder.buildMul(Dst, ZExt, C);
2112
2113 MI.eraseFromParent();
2114 return Legalized;
2115 }
2116 }
2117}
2118
2120 LLT Ty = MRI.getType(Val);
2121 if (Ty.isScalar())
2122 return Val;
2123
2124 const DataLayout &DL = MIRBuilder.getDataLayout();
2125 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2126 if (Ty.isPointer()) {
2127 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2128 return Register();
2129 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2130 }
2131
2132 Register NewVal = Val;
2133
2134 assert(Ty.isVector());
2135 if (Ty.isPointerVector())
2136 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2137 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138}
2139
2141 unsigned OpIdx, unsigned ExtOpcode) {
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2144 MO.setReg(ExtB.getReg(0));
2145}
2146
2148 unsigned OpIdx) {
2149 MachineOperand &MO = MI.getOperand(OpIdx);
2150 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2151 MO.setReg(ExtB.getReg(0));
2152}
2153
2155 unsigned OpIdx, unsigned TruncOpcode) {
2156 MachineOperand &MO = MI.getOperand(OpIdx);
2157 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2158 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2159 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2160 MO.setReg(DstExt);
2161}
2162
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(OpIdx);
2166 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2167 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2168 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2169 MO.setReg(DstTrunc);
2170}
2171
2173 unsigned OpIdx) {
2174 MachineOperand &MO = MI.getOperand(OpIdx);
2175 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2176 Register Dst = MO.getReg();
2177 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2178 MO.setReg(DstExt);
2179 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2180}
2181
2183 unsigned OpIdx) {
2184 MachineOperand &MO = MI.getOperand(OpIdx);
2185 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2186}
2187
2189 MachineOperand &Op = MI.getOperand(OpIdx);
2190 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2191}
2192
2194 MachineOperand &MO = MI.getOperand(OpIdx);
2195 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2196 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2197 MIRBuilder.buildBitcast(MO, CastDst);
2198 MO.setReg(CastDst);
2199}
2200
2202LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 1)
2205 return UnableToLegalize;
2206
2207 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2208 if (DstTy.isVector())
2209 return UnableToLegalize;
2210
2211 LLT SrcTy = MRI.getType(Src1Reg);
2212 const int DstSize = DstTy.getSizeInBits();
2213 const int SrcSize = SrcTy.getSizeInBits();
2214 const int WideSize = WideTy.getSizeInBits();
2215 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2216
2217 unsigned NumOps = MI.getNumOperands();
2218 unsigned NumSrc = MI.getNumOperands() - 1;
2219 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2220
2221 if (WideSize >= DstSize) {
2222 // Directly pack the bits in the target type.
2223 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2224
2225 for (unsigned I = 2; I != NumOps; ++I) {
2226 const unsigned Offset = (I - 1) * PartSize;
2227
2228 Register SrcReg = MI.getOperand(I).getReg();
2229 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2230
2231 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2232
2233 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2234 MRI.createGenericVirtualRegister(WideTy);
2235
2236 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2237 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2238 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2239 ResultReg = NextResult;
2240 }
2241
2242 if (WideSize > DstSize)
2243 MIRBuilder.buildTrunc(DstReg, ResultReg);
2244 else if (DstTy.isPointer())
2245 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2246
2247 MI.eraseFromParent();
2248 return Legalized;
2249 }
2250
2251 // Unmerge the original values to the GCD type, and recombine to the next
2252 // multiple greater than the original type.
2253 //
2254 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2255 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2256 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2257 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2258 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2259 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2260 // %12:_(s12) = G_MERGE_VALUES %10, %11
2261 //
2262 // Padding with undef if necessary:
2263 //
2264 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2265 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2266 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2267 // %7:_(s2) = G_IMPLICIT_DEF
2268 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2269 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2270 // %10:_(s12) = G_MERGE_VALUES %8, %9
2271
2272 const int GCD = std::gcd(SrcSize, WideSize);
2273 LLT GCDTy = LLT::scalar(GCD);
2274
2275 SmallVector<Register, 8> NewMergeRegs;
2276 SmallVector<Register, 8> Unmerges;
2277 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2278
2279 // Decompose the original operands if they don't evenly divide.
2280 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2281 Register SrcReg = MO.getReg();
2282 if (GCD == SrcSize) {
2283 Unmerges.push_back(SrcReg);
2284 } else {
2285 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2286 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2287 Unmerges.push_back(Unmerge.getReg(J));
2288 }
2289 }
2290
2291 // Pad with undef to the next size that is a multiple of the requested size.
2292 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2293 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2294 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2295 Unmerges.push_back(UndefReg);
2296 }
2297
2298 const int PartsPerGCD = WideSize / GCD;
2299
2300 // Build merges of each piece.
2301 ArrayRef<Register> Slicer(Unmerges);
2302 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2303 auto Merge =
2304 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2305 NewMergeRegs.push_back(Merge.getReg(0));
2306 }
2307
2308 // A truncate may be necessary if the requested type doesn't evenly divide the
2309 // original result type.
2310 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2311 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2312 } else {
2313 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2314 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2315 }
2316
2317 MI.eraseFromParent();
2318 return Legalized;
2319}
2320
2322LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2323 LLT WideTy) {
2324 if (TypeIdx != 0)
2325 return UnableToLegalize;
2326
2327 int NumDst = MI.getNumOperands() - 1;
2328 Register SrcReg = MI.getOperand(NumDst).getReg();
2329 LLT SrcTy = MRI.getType(SrcReg);
2330 if (SrcTy.isVector())
2331 return UnableToLegalize;
2332
2333 Register Dst0Reg = MI.getOperand(0).getReg();
2334 LLT DstTy = MRI.getType(Dst0Reg);
2335 if (!DstTy.isScalar())
2336 return UnableToLegalize;
2337
2338 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2339 if (SrcTy.isPointer()) {
2340 const DataLayout &DL = MIRBuilder.getDataLayout();
2341 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2342 LLVM_DEBUG(
2343 dbgs() << "Not casting non-integral address space integer\n");
2344 return UnableToLegalize;
2345 }
2346
2347 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2348 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2349 }
2350
2351 // Widen SrcTy to WideTy. This does not affect the result, but since the
2352 // user requested this size, it is probably better handled than SrcTy and
2353 // should reduce the total number of legalization artifacts.
2354 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2355 SrcTy = WideTy;
2356 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2357 }
2358
2359 // Theres no unmerge type to target. Directly extract the bits from the
2360 // source type
2361 unsigned DstSize = DstTy.getSizeInBits();
2362
2363 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2364 for (int I = 1; I != NumDst; ++I) {
2365 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2366 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2368 }
2369
2370 MI.eraseFromParent();
2371 return Legalized;
2372 }
2373
2374 // Extend the source to a wider type.
2375 LLT LCMTy = getLCMType(SrcTy, WideTy);
2376
2377 Register WideSrc = SrcReg;
2378 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2379 // TODO: If this is an integral address space, cast to integer and anyext.
2380 if (SrcTy.isPointer()) {
2381 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2382 return UnableToLegalize;
2383 }
2384
2385 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2386 }
2387
2388 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2389
2390 // Create a sequence of unmerges and merges to the original results. Since we
2391 // may have widened the source, we will need to pad the results with dead defs
2392 // to cover the source register.
2393 // e.g. widen s48 to s64:
2394 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2395 //
2396 // =>
2397 // %4:_(s192) = G_ANYEXT %0:_(s96)
2398 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2399 // ; unpack to GCD type, with extra dead defs
2400 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2401 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2402 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2403 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2404 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2405 const LLT GCDTy = getGCDType(WideTy, DstTy);
2406 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2407 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2408
2409 // Directly unmerge to the destination without going through a GCD type
2410 // if possible
2411 if (PartsPerRemerge == 1) {
2412 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2413
2414 for (int I = 0; I != NumUnmerge; ++I) {
2415 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2416
2417 for (int J = 0; J != PartsPerUnmerge; ++J) {
2418 int Idx = I * PartsPerUnmerge + J;
2419 if (Idx < NumDst)
2420 MIB.addDef(MI.getOperand(Idx).getReg());
2421 else {
2422 // Create dead def for excess components.
2423 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 }
2425 }
2426
2427 MIB.addUse(Unmerge.getReg(I));
2428 }
2429 } else {
2430 SmallVector<Register, 16> Parts;
2431 for (int J = 0; J != NumUnmerge; ++J)
2432 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2433
2434 SmallVector<Register, 8> RemergeParts;
2435 for (int I = 0; I != NumDst; ++I) {
2436 for (int J = 0; J < PartsPerRemerge; ++J) {
2437 const int Idx = I * PartsPerRemerge + J;
2438 RemergeParts.emplace_back(Parts[Idx]);
2439 }
2440
2441 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2442 RemergeParts.clear();
2443 }
2444 }
2445
2446 MI.eraseFromParent();
2447 return Legalized;
2448}
2449
2451LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2452 LLT WideTy) {
2453 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2454 unsigned Offset = MI.getOperand(2).getImm();
2455
2456 if (TypeIdx == 0) {
2457 if (SrcTy.isVector() || DstTy.isVector())
2458 return UnableToLegalize;
2459
2460 SrcOp Src(SrcReg);
2461 if (SrcTy.isPointer()) {
2462 // Extracts from pointers can be handled only if they are really just
2463 // simple integers.
2464 const DataLayout &DL = MIRBuilder.getDataLayout();
2465 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2466 return UnableToLegalize;
2467
2468 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2469 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 SrcTy = SrcAsIntTy;
2471 }
2472
2473 if (DstTy.isPointer())
2474 return UnableToLegalize;
2475
2476 if (Offset == 0) {
2477 // Avoid a shift in the degenerate case.
2478 MIRBuilder.buildTrunc(DstReg,
2479 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2480 MI.eraseFromParent();
2481 return Legalized;
2482 }
2483
2484 // Do a shift in the source type.
2485 LLT ShiftTy = SrcTy;
2486 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2487 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2488 ShiftTy = WideTy;
2489 }
2490
2491 auto LShr = MIRBuilder.buildLShr(
2492 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2493 MIRBuilder.buildTrunc(DstReg, LShr);
2494 MI.eraseFromParent();
2495 return Legalized;
2496 }
2497
2498 if (SrcTy.isScalar()) {
2499 Observer.changingInstr(MI);
2500 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2501 Observer.changedInstr(MI);
2502 return Legalized;
2503 }
2504
2505 if (!SrcTy.isVector())
2506 return UnableToLegalize;
2507
2508 if (DstTy != SrcTy.getElementType())
2509 return UnableToLegalize;
2510
2511 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2512 return UnableToLegalize;
2513
2514 Observer.changingInstr(MI);
2515 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2516
2517 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2518 Offset);
2519 widenScalarDst(MI, WideTy.getScalarType(), 0);
2520 Observer.changedInstr(MI);
2521 return Legalized;
2522}
2523
2525LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2526 LLT WideTy) {
2527 if (TypeIdx != 0 || WideTy.isVector())
2528 return UnableToLegalize;
2529 Observer.changingInstr(MI);
2530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2531 widenScalarDst(MI, WideTy);
2532 Observer.changedInstr(MI);
2533 return Legalized;
2534}
2535
2537LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2538 LLT WideTy) {
2539 unsigned Opcode;
2540 unsigned ExtOpcode;
2541 std::optional<Register> CarryIn;
2542 switch (MI.getOpcode()) {
2543 default:
2544 llvm_unreachable("Unexpected opcode!");
2545 case TargetOpcode::G_SADDO:
2546 Opcode = TargetOpcode::G_ADD;
2547 ExtOpcode = TargetOpcode::G_SEXT;
2548 break;
2549 case TargetOpcode::G_SSUBO:
2550 Opcode = TargetOpcode::G_SUB;
2551 ExtOpcode = TargetOpcode::G_SEXT;
2552 break;
2553 case TargetOpcode::G_UADDO:
2554 Opcode = TargetOpcode::G_ADD;
2555 ExtOpcode = TargetOpcode::G_ZEXT;
2556 break;
2557 case TargetOpcode::G_USUBO:
2558 Opcode = TargetOpcode::G_SUB;
2559 ExtOpcode = TargetOpcode::G_ZEXT;
2560 break;
2561 case TargetOpcode::G_SADDE:
2562 Opcode = TargetOpcode::G_UADDE;
2563 ExtOpcode = TargetOpcode::G_SEXT;
2564 CarryIn = MI.getOperand(4).getReg();
2565 break;
2566 case TargetOpcode::G_SSUBE:
2567 Opcode = TargetOpcode::G_USUBE;
2568 ExtOpcode = TargetOpcode::G_SEXT;
2569 CarryIn = MI.getOperand(4).getReg();
2570 break;
2571 case TargetOpcode::G_UADDE:
2572 Opcode = TargetOpcode::G_UADDE;
2573 ExtOpcode = TargetOpcode::G_ZEXT;
2574 CarryIn = MI.getOperand(4).getReg();
2575 break;
2576 case TargetOpcode::G_USUBE:
2577 Opcode = TargetOpcode::G_USUBE;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 CarryIn = MI.getOperand(4).getReg();
2580 break;
2581 }
2582
2583 if (TypeIdx == 1) {
2584 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2585
2586 Observer.changingInstr(MI);
2587 if (CarryIn)
2588 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2589 widenScalarDst(MI, WideTy, 1);
2590
2591 Observer.changedInstr(MI);
2592 return Legalized;
2593 }
2594
2595 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2596 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2597 // Do the arithmetic in the larger type.
2598 Register NewOp;
2599 if (CarryIn) {
2600 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2601 NewOp = MIRBuilder
2602 .buildInstr(Opcode, {WideTy, CarryOutTy},
2603 {LHSExt, RHSExt, *CarryIn})
2604 .getReg(0);
2605 } else {
2606 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2607 }
2608 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2609 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2610 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2611 // There is no overflow if the ExtOp is the same as NewOp.
2612 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2613 // Now trunc the NewOp to the original result.
2614 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2615 MI.eraseFromParent();
2616 return Legalized;
2617}
2618
2620LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2621 LLT WideTy) {
2622 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2623 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2624 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2625 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2626 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2627 // We can convert this to:
2628 // 1. Any extend iN to iM
2629 // 2. SHL by M-N
2630 // 3. [US][ADD|SUB|SHL]SAT
2631 // 4. L/ASHR by M-N
2632 //
2633 // It may be more efficient to lower this to a min and a max operation in
2634 // the higher precision arithmetic if the promoted operation isn't legal,
2635 // but this decision is up to the target's lowering request.
2636 Register DstReg = MI.getOperand(0).getReg();
2637
2638 unsigned NewBits = WideTy.getScalarSizeInBits();
2639 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2640
2641 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2642 // must not left shift the RHS to preserve the shift amount.
2643 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2644 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2645 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2646 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2648 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2649
2650 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2651 {ShiftL, ShiftR}, MI.getFlags());
2652
2653 // Use a shift that will preserve the number of sign bits when the trunc is
2654 // folded away.
2655 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2656 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2657
2658 MIRBuilder.buildTrunc(DstReg, Result);
2659 MI.eraseFromParent();
2660 return Legalized;
2661}
2662
2664LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2665 LLT WideTy) {
2666 if (TypeIdx == 1) {
2667 Observer.changingInstr(MI);
2668 widenScalarDst(MI, WideTy, 1);
2669 Observer.changedInstr(MI);
2670 return Legalized;
2671 }
2672
2673 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2674 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2675 LLT SrcTy = MRI.getType(LHS);
2676 LLT OverflowTy = MRI.getType(OriginalOverflow);
2677 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2678
2679 // To determine if the result overflowed in the larger type, we extend the
2680 // input to the larger type, do the multiply (checking if it overflows),
2681 // then also check the high bits of the result to see if overflow happened
2682 // there.
2683 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2684 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2685 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2686
2687 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2688 // so we don't need to check the overflow result of larger type Mulo.
2689 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2690
2691 unsigned MulOpc =
2692 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2693
2694 MachineInstrBuilder Mulo;
2695 if (WideMulCanOverflow)
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2697 {LeftOperand, RightOperand});
2698 else
2699 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2700
2701 auto Mul = Mulo->getOperand(0);
2702 MIRBuilder.buildTrunc(Result, Mul);
2703
2704 MachineInstrBuilder ExtResult;
2705 // Overflow occurred if it occurred in the larger type, or if the high part
2706 // of the result does not zero/sign-extend the low part. Check this second
2707 // possibility first.
2708 if (IsSigned) {
2709 // For signed, overflow occurred when the high part does not sign-extend
2710 // the low part.
2711 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2712 } else {
2713 // Unsigned overflow occurred when the high part does not zero-extend the
2714 // low part.
2715 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2716 }
2717
2718 if (WideMulCanOverflow) {
2719 auto Overflow =
2720 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2721 // Finally check if the multiplication in the larger type itself overflowed.
2722 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2723 } else {
2724 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2725 }
2726 MI.eraseFromParent();
2727 return Legalized;
2728}
2729
2732 unsigned Opcode = MI.getOpcode();
2733 switch (Opcode) {
2734 default:
2735 return UnableToLegalize;
2736 case TargetOpcode::G_ATOMICRMW_XCHG:
2737 case TargetOpcode::G_ATOMICRMW_ADD:
2738 case TargetOpcode::G_ATOMICRMW_SUB:
2739 case TargetOpcode::G_ATOMICRMW_AND:
2740 case TargetOpcode::G_ATOMICRMW_OR:
2741 case TargetOpcode::G_ATOMICRMW_XOR:
2742 case TargetOpcode::G_ATOMICRMW_MIN:
2743 case TargetOpcode::G_ATOMICRMW_MAX:
2744 case TargetOpcode::G_ATOMICRMW_UMIN:
2745 case TargetOpcode::G_ATOMICRMW_UMAX:
2746 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2747 Observer.changingInstr(MI);
2748 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2749 widenScalarDst(MI, WideTy, 0);
2750 Observer.changedInstr(MI);
2751 return Legalized;
2752 case TargetOpcode::G_ATOMIC_CMPXCHG:
2753 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2754 Observer.changingInstr(MI);
2755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2756 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2761 if (TypeIdx == 0) {
2762 Observer.changingInstr(MI);
2763 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2764 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2765 widenScalarDst(MI, WideTy, 0);
2766 Observer.changedInstr(MI);
2767 return Legalized;
2768 }
2769 assert(TypeIdx == 1 &&
2770 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2771 Observer.changingInstr(MI);
2772 widenScalarDst(MI, WideTy, 1);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_EXTRACT:
2776 return widenScalarExtract(MI, TypeIdx, WideTy);
2777 case TargetOpcode::G_INSERT:
2778 return widenScalarInsert(MI, TypeIdx, WideTy);
2779 case TargetOpcode::G_MERGE_VALUES:
2780 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2781 case TargetOpcode::G_UNMERGE_VALUES:
2782 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2783 case TargetOpcode::G_SADDO:
2784 case TargetOpcode::G_SSUBO:
2785 case TargetOpcode::G_UADDO:
2786 case TargetOpcode::G_USUBO:
2787 case TargetOpcode::G_SADDE:
2788 case TargetOpcode::G_SSUBE:
2789 case TargetOpcode::G_UADDE:
2790 case TargetOpcode::G_USUBE:
2791 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_UMULO:
2793 case TargetOpcode::G_SMULO:
2794 return widenScalarMulo(MI, TypeIdx, WideTy);
2795 case TargetOpcode::G_SADDSAT:
2796 case TargetOpcode::G_SSUBSAT:
2797 case TargetOpcode::G_SSHLSAT:
2798 case TargetOpcode::G_UADDSAT:
2799 case TargetOpcode::G_USUBSAT:
2800 case TargetOpcode::G_USHLSAT:
2801 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_CTTZ:
2803 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2804 case TargetOpcode::G_CTLZ:
2805 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2806 case TargetOpcode::G_CTPOP: {
2807 if (TypeIdx == 0) {
2808 Observer.changingInstr(MI);
2809 widenScalarDst(MI, WideTy, 0);
2810 Observer.changedInstr(MI);
2811 return Legalized;
2812 }
2813
2814 Register SrcReg = MI.getOperand(1).getReg();
2815
2816 // First extend the input.
2817 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2818 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2819 ? TargetOpcode::G_ANYEXT
2820 : TargetOpcode::G_ZEXT;
2821 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2822 LLT CurTy = MRI.getType(SrcReg);
2823 unsigned NewOpc = Opcode;
2824 if (NewOpc == TargetOpcode::G_CTTZ) {
2825 // The count is the same in the larger type except if the original
2826 // value was zero. This can be handled by setting the bit just off
2827 // the top of the original type.
2828 auto TopBit =
2830 MIBSrc = MIRBuilder.buildOr(
2831 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2832 // Now we know the operand is non-zero, use the more relaxed opcode.
2833 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2834 }
2835
2836 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2837
2838 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2839 // An optimization where the result is the CTLZ after the left shift by
2840 // (Difference in widety and current ty), that is,
2841 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2842 // Result = ctlz MIBSrc
2843 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2844 MIRBuilder.buildConstant(WideTy, SizeDiff));
2845 }
2846
2847 // Perform the operation at the larger size.
2848 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2849 // This is already the correct result for CTPOP and CTTZs
2850 if (Opcode == TargetOpcode::G_CTLZ) {
2851 // The correct result is NewOp - (Difference in widety and current ty).
2852 MIBNewOp = MIRBuilder.buildSub(
2853 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2854 }
2855
2856 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2857 MI.eraseFromParent();
2858 return Legalized;
2859 }
2860 case TargetOpcode::G_BSWAP: {
2861 Observer.changingInstr(MI);
2862 Register DstReg = MI.getOperand(0).getReg();
2863
2864 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2865 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2866 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2867 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2868
2869 MI.getOperand(0).setReg(DstExt);
2870
2871 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2872
2873 LLT Ty = MRI.getType(DstReg);
2874 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2875 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2876 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2877
2878 MIRBuilder.buildTrunc(DstReg, ShrReg);
2879 Observer.changedInstr(MI);
2880 return Legalized;
2881 }
2882 case TargetOpcode::G_BITREVERSE: {
2883 Observer.changingInstr(MI);
2884
2885 Register DstReg = MI.getOperand(0).getReg();
2886 LLT Ty = MRI.getType(DstReg);
2887 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2888
2889 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2890 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2891 MI.getOperand(0).setReg(DstExt);
2892 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2893
2894 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2895 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2896 MIRBuilder.buildTrunc(DstReg, Shift);
2897 Observer.changedInstr(MI);
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_FREEZE:
2901 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2902 Observer.changingInstr(MI);
2903 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2904 widenScalarDst(MI, WideTy);
2905 Observer.changedInstr(MI);
2906 return Legalized;
2907
2908 case TargetOpcode::G_ABS:
2909 Observer.changingInstr(MI);
2910 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2911 widenScalarDst(MI, WideTy);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_ADD:
2916 case TargetOpcode::G_AND:
2917 case TargetOpcode::G_MUL:
2918 case TargetOpcode::G_OR:
2919 case TargetOpcode::G_XOR:
2920 case TargetOpcode::G_SUB:
2921 case TargetOpcode::G_SHUFFLE_VECTOR:
2922 // Perform operation at larger width (any extension is fines here, high bits
2923 // don't affect the result) and then truncate the result back to the
2924 // original type.
2925 Observer.changingInstr(MI);
2926 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarDst(MI, WideTy);
2929 Observer.changedInstr(MI);
2930 return Legalized;
2931
2932 case TargetOpcode::G_SBFX:
2933 case TargetOpcode::G_UBFX:
2934 Observer.changingInstr(MI);
2935
2936 if (TypeIdx == 0) {
2937 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2938 widenScalarDst(MI, WideTy);
2939 } else {
2940 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2941 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2942 }
2943
2944 Observer.changedInstr(MI);
2945 return Legalized;
2946
2947 case TargetOpcode::G_SHL:
2948 Observer.changingInstr(MI);
2949
2950 if (TypeIdx == 0) {
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 assert(TypeIdx == 1);
2955 // The "number of bits to shift" operand must preserve its value as an
2956 // unsigned integer:
2957 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2958 }
2959
2960 Observer.changedInstr(MI);
2961 return Legalized;
2962
2963 case TargetOpcode::G_ROTR:
2964 case TargetOpcode::G_ROTL:
2965 if (TypeIdx != 1)
2966 return UnableToLegalize;
2967
2968 Observer.changingInstr(MI);
2969 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2970 Observer.changedInstr(MI);
2971 return Legalized;
2972
2973 case TargetOpcode::G_SDIV:
2974 case TargetOpcode::G_SREM:
2975 case TargetOpcode::G_SMIN:
2976 case TargetOpcode::G_SMAX:
2977 case TargetOpcode::G_ABDS:
2978 Observer.changingInstr(MI);
2979 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2980 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2981 widenScalarDst(MI, WideTy);
2982 Observer.changedInstr(MI);
2983 return Legalized;
2984
2985 case TargetOpcode::G_SDIVREM:
2986 Observer.changingInstr(MI);
2987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2988 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2989 widenScalarDst(MI, WideTy);
2990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2991 widenScalarDst(MI, WideTy, 1);
2992 Observer.changedInstr(MI);
2993 return Legalized;
2994
2995 case TargetOpcode::G_ASHR:
2996 case TargetOpcode::G_LSHR:
2997 Observer.changingInstr(MI);
2998
2999 if (TypeIdx == 0) {
3000 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3001 : TargetOpcode::G_ZEXT;
3002
3003 widenScalarSrc(MI, WideTy, 1, CvtOp);
3004 widenScalarDst(MI, WideTy);
3005 } else {
3006 assert(TypeIdx == 1);
3007 // The "number of bits to shift" operand must preserve its value as an
3008 // unsigned integer:
3009 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3010 }
3011
3012 Observer.changedInstr(MI);
3013 return Legalized;
3014 case TargetOpcode::G_UDIV:
3015 case TargetOpcode::G_UREM:
3016 case TargetOpcode::G_ABDU:
3017 Observer.changingInstr(MI);
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3019 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3020 widenScalarDst(MI, WideTy);
3021 Observer.changedInstr(MI);
3022 return Legalized;
3023 case TargetOpcode::G_UDIVREM:
3024 Observer.changingInstr(MI);
3025 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3026 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3027 widenScalarDst(MI, WideTy);
3028 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3029 widenScalarDst(MI, WideTy, 1);
3030 Observer.changedInstr(MI);
3031 return Legalized;
3032 case TargetOpcode::G_UMIN:
3033 case TargetOpcode::G_UMAX: {
3034 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3035
3036 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3037 unsigned ExtOpc =
3038 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3039 getApproximateEVTForLLT(WideTy, Ctx))
3040 ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 Observer.changingInstr(MI);
3044 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3045 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3046 widenScalarDst(MI, WideTy);
3047 Observer.changedInstr(MI);
3048 return Legalized;
3049 }
3050
3051 case TargetOpcode::G_SELECT:
3052 Observer.changingInstr(MI);
3053 if (TypeIdx == 0) {
3054 // Perform operation at larger width (any extension is fine here, high
3055 // bits don't affect the result) and then truncate the result back to the
3056 // original type.
3057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3058 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3059 widenScalarDst(MI, WideTy);
3060 } else {
3061 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3062 // Explicit extension is required here since high bits affect the result.
3063 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3064 }
3065 Observer.changedInstr(MI);
3066 return Legalized;
3067
3068 case TargetOpcode::G_FPTOSI:
3069 case TargetOpcode::G_FPTOUI:
3070 case TargetOpcode::G_INTRINSIC_LRINT:
3071 case TargetOpcode::G_INTRINSIC_LLRINT:
3072 case TargetOpcode::G_IS_FPCLASS:
3073 Observer.changingInstr(MI);
3074
3075 if (TypeIdx == 0)
3076 widenScalarDst(MI, WideTy);
3077 else
3078 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3079
3080 Observer.changedInstr(MI);
3081 return Legalized;
3082 case TargetOpcode::G_SITOFP:
3083 Observer.changingInstr(MI);
3084
3085 if (TypeIdx == 0)
3086 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3087 else
3088 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3089
3090 Observer.changedInstr(MI);
3091 return Legalized;
3092 case TargetOpcode::G_UITOFP:
3093 Observer.changingInstr(MI);
3094
3095 if (TypeIdx == 0)
3096 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3097 else
3098 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3099
3100 Observer.changedInstr(MI);
3101 return Legalized;
3102 case TargetOpcode::G_FPTOSI_SAT:
3103 case TargetOpcode::G_FPTOUI_SAT:
3104 Observer.changingInstr(MI);
3105
3106 if (TypeIdx == 0) {
3107 Register OldDst = MI.getOperand(0).getReg();
3108 LLT Ty = MRI.getType(OldDst);
3109 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3110 Register NewDst;
3111 MI.getOperand(0).setReg(ExtReg);
3112 uint64_t ShortBits = Ty.getScalarSizeInBits();
3113 uint64_t WideBits = WideTy.getScalarSizeInBits();
3114 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3115 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3116 // z = i16 fptosi_sat(a)
3117 // ->
3118 // x = i32 fptosi_sat(a)
3119 // y = smin(x, 32767)
3120 // z = smax(y, -32768)
3121 auto MaxVal = MIRBuilder.buildConstant(
3122 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3123 auto MinVal = MIRBuilder.buildConstant(
3124 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3125 Register MidReg =
3126 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3127 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3128 } else {
3129 // z = i16 fptoui_sat(a)
3130 // ->
3131 // x = i32 fptoui_sat(a)
3132 // y = smin(x, 65535)
3133 auto MaxVal = MIRBuilder.buildConstant(
3134 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3135 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3136 }
3137 MIRBuilder.buildTrunc(OldDst, NewDst);
3138 } else
3139 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3140
3141 Observer.changedInstr(MI);
3142 return Legalized;
3143 case TargetOpcode::G_LOAD:
3144 case TargetOpcode::G_SEXTLOAD:
3145 case TargetOpcode::G_ZEXTLOAD:
3146 Observer.changingInstr(MI);
3147 widenScalarDst(MI, WideTy);
3148 Observer.changedInstr(MI);
3149 return Legalized;
3150
3151 case TargetOpcode::G_STORE: {
3152 if (TypeIdx != 0)
3153 return UnableToLegalize;
3154
3155 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3156 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3157 if (!Ty.isScalar()) {
3158 // We need to widen the vector element type.
3159 Observer.changingInstr(MI);
3160 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3161 // We also need to adjust the MMO to turn this into a truncating store.
3162 MachineMemOperand &MMO = **MI.memoperands_begin();
3163 MachineFunction &MF = MIRBuilder.getMF();
3164 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3165 MI.setMemRefs(MF, {NewMMO});
3166 Observer.changedInstr(MI);
3167 return Legalized;
3168 }
3169
3170 Observer.changingInstr(MI);
3171
3172 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3173 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3174 widenScalarSrc(MI, WideTy, 0, ExtType);
3175
3176 Observer.changedInstr(MI);
3177 return Legalized;
3178 }
3179 case TargetOpcode::G_CONSTANT: {
3180 MachineOperand &SrcMO = MI.getOperand(1);
3181 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3182 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3183 MRI.getType(MI.getOperand(0).getReg()));
3184 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3185 ExtOpc == TargetOpcode::G_ANYEXT) &&
3186 "Illegal Extend");
3187 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3188 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3189 ? SrcVal.sext(WideTy.getSizeInBits())
3190 : SrcVal.zext(WideTy.getSizeInBits());
3191 Observer.changingInstr(MI);
3192 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3193
3194 widenScalarDst(MI, WideTy);
3195 Observer.changedInstr(MI);
3196 return Legalized;
3197 }
3198 case TargetOpcode::G_FCONSTANT: {
3199 // To avoid changing the bits of the constant due to extension to a larger
3200 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3201 MachineOperand &SrcMO = MI.getOperand(1);
3202 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3203 MIRBuilder.setInstrAndDebugLoc(MI);
3204 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3205 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3206 MI.eraseFromParent();
3207 return Legalized;
3208 }
3209 case TargetOpcode::G_IMPLICIT_DEF: {
3210 Observer.changingInstr(MI);
3211 widenScalarDst(MI, WideTy);
3212 Observer.changedInstr(MI);
3213 return Legalized;
3214 }
3215 case TargetOpcode::G_BRCOND:
3216 Observer.changingInstr(MI);
3217 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3218 Observer.changedInstr(MI);
3219 return Legalized;
3220
3221 case TargetOpcode::G_FCMP:
3222 Observer.changingInstr(MI);
3223 if (TypeIdx == 0)
3224 widenScalarDst(MI, WideTy);
3225 else {
3226 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3227 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3228 }
3229 Observer.changedInstr(MI);
3230 return Legalized;
3231
3232 case TargetOpcode::G_ICMP:
3233 Observer.changingInstr(MI);
3234 if (TypeIdx == 0)
3235 widenScalarDst(MI, WideTy);
3236 else {
3237 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3238 CmpInst::Predicate Pred =
3239 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3240
3241 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3242 unsigned ExtOpcode =
3243 (CmpInst::isSigned(Pred) ||
3244 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3245 getApproximateEVTForLLT(WideTy, Ctx)))
3246 ? TargetOpcode::G_SEXT
3247 : TargetOpcode::G_ZEXT;
3248 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3249 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3250 }
3251 Observer.changedInstr(MI);
3252 return Legalized;
3253
3254 case TargetOpcode::G_PTR_ADD:
3255 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3256 Observer.changingInstr(MI);
3257 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3258 Observer.changedInstr(MI);
3259 return Legalized;
3260
3261 case TargetOpcode::G_PHI: {
3262 assert(TypeIdx == 0 && "Expecting only Idx 0");
3263
3264 Observer.changingInstr(MI);
3265 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3266 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3267 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3268 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3269 }
3270
3271 MachineBasicBlock &MBB = *MI.getParent();
3272 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3273 widenScalarDst(MI, WideTy);
3274 Observer.changedInstr(MI);
3275 return Legalized;
3276 }
3277 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3278 if (TypeIdx == 0) {
3279 Register VecReg = MI.getOperand(1).getReg();
3280 LLT VecTy = MRI.getType(VecReg);
3281 Observer.changingInstr(MI);
3282
3284 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3285 TargetOpcode::G_ANYEXT);
3286
3287 widenScalarDst(MI, WideTy, 0);
3288 Observer.changedInstr(MI);
3289 return Legalized;
3290 }
3291
3292 if (TypeIdx != 2)
3293 return UnableToLegalize;
3294 Observer.changingInstr(MI);
3295 // TODO: Probably should be zext
3296 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3297 Observer.changedInstr(MI);
3298 return Legalized;
3299 }
3300 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3301 if (TypeIdx == 0) {
3302 Observer.changingInstr(MI);
3303 const LLT WideEltTy = WideTy.getElementType();
3304
3305 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3306 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3307 widenScalarDst(MI, WideTy, 0);
3308 Observer.changedInstr(MI);
3309 return Legalized;
3310 }
3311
3312 if (TypeIdx == 1) {
3313 Observer.changingInstr(MI);
3314
3315 Register VecReg = MI.getOperand(1).getReg();
3316 LLT VecTy = MRI.getType(VecReg);
3317 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3318
3319 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3320 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3321 widenScalarDst(MI, WideVecTy, 0);
3322 Observer.changedInstr(MI);
3323 return Legalized;
3324 }
3325
3326 if (TypeIdx == 2) {
3327 Observer.changingInstr(MI);
3328 // TODO: Probably should be zext
3329 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3330 Observer.changedInstr(MI);
3331 return Legalized;
3332 }
3333
3334 return UnableToLegalize;
3335 }
3336 case TargetOpcode::G_FADD:
3337 case TargetOpcode::G_FMUL:
3338 case TargetOpcode::G_FSUB:
3339 case TargetOpcode::G_FMA:
3340 case TargetOpcode::G_FMAD:
3341 case TargetOpcode::G_FNEG:
3342 case TargetOpcode::G_FABS:
3343 case TargetOpcode::G_FCANONICALIZE:
3344 case TargetOpcode::G_FMINNUM:
3345 case TargetOpcode::G_FMAXNUM:
3346 case TargetOpcode::G_FMINNUM_IEEE:
3347 case TargetOpcode::G_FMAXNUM_IEEE:
3348 case TargetOpcode::G_FMINIMUM:
3349 case TargetOpcode::G_FMAXIMUM:
3350 case TargetOpcode::G_FMINIMUMNUM:
3351 case TargetOpcode::G_FMAXIMUMNUM:
3352 case TargetOpcode::G_FDIV:
3353 case TargetOpcode::G_FREM:
3354 case TargetOpcode::G_FCEIL:
3355 case TargetOpcode::G_FFLOOR:
3356 case TargetOpcode::G_FCOS:
3357 case TargetOpcode::G_FSIN:
3358 case TargetOpcode::G_FTAN:
3359 case TargetOpcode::G_FACOS:
3360 case TargetOpcode::G_FASIN:
3361 case TargetOpcode::G_FATAN:
3362 case TargetOpcode::G_FATAN2:
3363 case TargetOpcode::G_FCOSH:
3364 case TargetOpcode::G_FSINH:
3365 case TargetOpcode::G_FTANH:
3366 case TargetOpcode::G_FLOG10:
3367 case TargetOpcode::G_FLOG:
3368 case TargetOpcode::G_FLOG2:
3369 case TargetOpcode::G_FRINT:
3370 case TargetOpcode::G_FNEARBYINT:
3371 case TargetOpcode::G_FSQRT:
3372 case TargetOpcode::G_FEXP:
3373 case TargetOpcode::G_FEXP2:
3374 case TargetOpcode::G_FEXP10:
3375 case TargetOpcode::G_FPOW:
3376 case TargetOpcode::G_INTRINSIC_TRUNC:
3377 case TargetOpcode::G_INTRINSIC_ROUND:
3378 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3379 assert(TypeIdx == 0);
3380 Observer.changingInstr(MI);
3381
3382 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3383 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3384
3385 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3386 Observer.changedInstr(MI);
3387 return Legalized;
3388 case TargetOpcode::G_FMODF: {
3389 Observer.changingInstr(MI);
3390 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3391
3392 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3393 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3394 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3395 Observer.changedInstr(MI);
3396 return Legalized;
3397 }
3398 case TargetOpcode::G_FPOWI:
3399 case TargetOpcode::G_FLDEXP:
3400 case TargetOpcode::G_STRICT_FLDEXP: {
3401 if (TypeIdx == 0) {
3402 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3403 return UnableToLegalize;
3404
3405 Observer.changingInstr(MI);
3406 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3407 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3408 Observer.changedInstr(MI);
3409 return Legalized;
3410 }
3411
3412 if (TypeIdx == 1) {
3413 // For some reason SelectionDAG tries to promote to a libcall without
3414 // actually changing the integer type for promotion.
3415 Observer.changingInstr(MI);
3416 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3417 Observer.changedInstr(MI);
3418 return Legalized;
3419 }
3420
3421 return UnableToLegalize;
3422 }
3423 case TargetOpcode::G_FFREXP: {
3424 Observer.changingInstr(MI);
3425
3426 if (TypeIdx == 0) {
3427 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3428 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3429 } else {
3430 widenScalarDst(MI, WideTy, 1);
3431 }
3432
3433 Observer.changedInstr(MI);
3434 return Legalized;
3435 }
3436 case TargetOpcode::G_INTTOPTR:
3437 if (TypeIdx != 1)
3438 return UnableToLegalize;
3439
3440 Observer.changingInstr(MI);
3441 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3442 Observer.changedInstr(MI);
3443 return Legalized;
3444 case TargetOpcode::G_PTRTOINT:
3445 if (TypeIdx != 0)
3446 return UnableToLegalize;
3447
3448 Observer.changingInstr(MI);
3449 widenScalarDst(MI, WideTy, 0);
3450 Observer.changedInstr(MI);
3451 return Legalized;
3452 case TargetOpcode::G_BUILD_VECTOR: {
3453 Observer.changingInstr(MI);
3454
3455 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3456 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3457 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3458
3459 // Avoid changing the result vector type if the source element type was
3460 // requested.
3461 if (TypeIdx == 1) {
3462 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3463 } else {
3464 widenScalarDst(MI, WideTy, 0);
3465 }
3466
3467 Observer.changedInstr(MI);
3468 return Legalized;
3469 }
3470 case TargetOpcode::G_SEXT_INREG:
3471 if (TypeIdx != 0)
3472 return UnableToLegalize;
3473
3474 Observer.changingInstr(MI);
3475 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3476 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3477 Observer.changedInstr(MI);
3478 return Legalized;
3479 case TargetOpcode::G_PTRMASK: {
3480 if (TypeIdx != 1)
3481 return UnableToLegalize;
3482 Observer.changingInstr(MI);
3483 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3484 Observer.changedInstr(MI);
3485 return Legalized;
3486 }
3487 case TargetOpcode::G_VECREDUCE_ADD: {
3488 if (TypeIdx != 1)
3489 return UnableToLegalize;
3490 Observer.changingInstr(MI);
3491 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3492 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3493 Observer.changedInstr(MI);
3494 return Legalized;
3495 }
3496 case TargetOpcode::G_VECREDUCE_FADD:
3497 case TargetOpcode::G_VECREDUCE_FMUL:
3498 case TargetOpcode::G_VECREDUCE_FMIN:
3499 case TargetOpcode::G_VECREDUCE_FMAX:
3500 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3501 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3502 if (TypeIdx != 0)
3503 return UnableToLegalize;
3504 Observer.changingInstr(MI);
3505 Register VecReg = MI.getOperand(1).getReg();
3506 LLT VecTy = MRI.getType(VecReg);
3507 LLT WideVecTy = VecTy.isVector()
3508 ? LLT::vector(VecTy.getElementCount(), WideTy)
3509 : WideTy;
3510 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3511 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3512 Observer.changedInstr(MI);
3513 return Legalized;
3514 }
3515 case TargetOpcode::G_VSCALE: {
3516 MachineOperand &SrcMO = MI.getOperand(1);
3517 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3518 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3519 // The CImm is always a signed value
3520 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3521 Observer.changingInstr(MI);
3522 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3523 widenScalarDst(MI, WideTy);
3524 Observer.changedInstr(MI);
3525 return Legalized;
3526 }
3527 case TargetOpcode::G_SPLAT_VECTOR: {
3528 if (TypeIdx != 1)
3529 return UnableToLegalize;
3530
3531 Observer.changingInstr(MI);
3532 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3533 Observer.changedInstr(MI);
3534 return Legalized;
3535 }
3536 case TargetOpcode::G_INSERT_SUBVECTOR: {
3537 if (TypeIdx != 0)
3538 return UnableToLegalize;
3539
3541 Register BigVec = IS.getBigVec();
3542 Register SubVec = IS.getSubVec();
3543
3544 LLT SubVecTy = MRI.getType(SubVec);
3545 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3546
3547 // Widen the G_INSERT_SUBVECTOR
3548 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3549 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3550 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3551 IS.getIndexImm());
3552
3553 // Truncate back down
3554 auto SplatZero = MIRBuilder.buildSplatVector(
3555 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3556 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3557 SplatZero);
3558
3559 MI.eraseFromParent();
3560
3561 return Legalized;
3562 }
3563 }
3564}
3565
3567 MachineIRBuilder &B, Register Src, LLT Ty) {
3568 auto Unmerge = B.buildUnmerge(Ty, Src);
3569 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3570 Pieces.push_back(Unmerge.getReg(I));
3571}
3572
3573static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3574 MachineIRBuilder &MIRBuilder) {
3575 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3576 MachineFunction &MF = MIRBuilder.getMF();
3577 const DataLayout &DL = MIRBuilder.getDataLayout();
3578 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3579 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3580 LLT DstLLT = MRI.getType(DstReg);
3581
3582 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3583
3584 auto Addr = MIRBuilder.buildConstantPool(
3585 AddrPtrTy,
3586 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3587
3588 MachineMemOperand *MMO =
3590 MachineMemOperand::MOLoad, DstLLT, Alignment);
3591
3592 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3593}
3594
3597 const MachineOperand &ConstOperand = MI.getOperand(1);
3598 const Constant *ConstantVal = ConstOperand.getCImm();
3599
3600 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3601 MI.eraseFromParent();
3602
3603 return Legalized;
3604}
3605
3608 const MachineOperand &ConstOperand = MI.getOperand(1);
3609 const Constant *ConstantVal = ConstOperand.getFPImm();
3610
3611 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3612 MI.eraseFromParent();
3613
3614 return Legalized;
3615}
3616
3619 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3620 if (SrcTy.isVector()) {
3621 LLT SrcEltTy = SrcTy.getElementType();
3623
3624 if (DstTy.isVector()) {
3625 int NumDstElt = DstTy.getNumElements();
3626 int NumSrcElt = SrcTy.getNumElements();
3627
3628 LLT DstEltTy = DstTy.getElementType();
3629 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3630 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3631
3632 // If there's an element size mismatch, insert intermediate casts to match
3633 // the result element type.
3634 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3635 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3636 //
3637 // =>
3638 //
3639 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3640 // %3:_(<2 x s8>) = G_BITCAST %2
3641 // %4:_(<2 x s8>) = G_BITCAST %3
3642 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3643 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3644 SrcPartTy = SrcEltTy;
3645 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3646 //
3647 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3648 //
3649 // =>
3650 //
3651 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3652 // %3:_(s16) = G_BITCAST %2
3653 // %4:_(s16) = G_BITCAST %3
3654 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3655 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3656 DstCastTy = DstEltTy;
3657 }
3658
3659 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3660 for (Register &SrcReg : SrcRegs)
3661 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3662 } else
3663 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3664
3665 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3666 MI.eraseFromParent();
3667 return Legalized;
3668 }
3669
3670 if (DstTy.isVector()) {
3672 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3673 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3674 MI.eraseFromParent();
3675 return Legalized;
3676 }
3677
3678 return UnableToLegalize;
3679}
3680
3681/// Figure out the bit offset into a register when coercing a vector index for
3682/// the wide element type. This is only for the case when promoting vector to
3683/// one with larger elements.
3684//
3685///
3686/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3687/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3689 Register Idx,
3690 unsigned NewEltSize,
3691 unsigned OldEltSize) {
3692 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3693 LLT IdxTy = B.getMRI()->getType(Idx);
3694
3695 // Now figure out the amount we need to shift to get the target bits.
3696 auto OffsetMask = B.buildConstant(
3697 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3698 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3699 return B.buildShl(IdxTy, OffsetIdx,
3700 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3701}
3702
3703/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3704/// is casting to a vector with a smaller element size, perform multiple element
3705/// extracts and merge the results. If this is coercing to a vector with larger
3706/// elements, index the bitcasted vector and extract the target element with bit
3707/// operations. This is intended to force the indexing in the native register
3708/// size for architectures that can dynamically index the register file.
3711 LLT CastTy) {
3712 if (TypeIdx != 1)
3713 return UnableToLegalize;
3714
3715 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3716
3717 LLT SrcEltTy = SrcVecTy.getElementType();
3718 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3719 unsigned OldNumElts = SrcVecTy.getNumElements();
3720
3721 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3722 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3723
3724 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3725 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3726 if (NewNumElts > OldNumElts) {
3727 // Decreasing the vector element size
3728 //
3729 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3730 // =>
3731 // v4i32:castx = bitcast x:v2i64
3732 //
3733 // i64 = bitcast
3734 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3735 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3736 //
3737 if (NewNumElts % OldNumElts != 0)
3738 return UnableToLegalize;
3739
3740 // Type of the intermediate result vector.
3741 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3742 LLT MidTy =
3743 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3744
3745 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3746
3747 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3748 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3749
3750 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3751 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3752 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3753 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3754 NewOps[I] = Elt.getReg(0);
3755 }
3756
3757 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3758 MIRBuilder.buildBitcast(Dst, NewVec);
3759 MI.eraseFromParent();
3760 return Legalized;
3761 }
3762
3763 if (NewNumElts < OldNumElts) {
3764 if (NewEltSize % OldEltSize != 0)
3765 return UnableToLegalize;
3766
3767 // This only depends on powers of 2 because we use bit tricks to figure out
3768 // the bit offset we need to shift to get the target element. A general
3769 // expansion could emit division/multiply.
3770 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3771 return UnableToLegalize;
3772
3773 // Increasing the vector element size.
3774 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3775 //
3776 // =>
3777 //
3778 // %cast = G_BITCAST %vec
3779 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3780 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3781 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3782 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3783 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3784 // %elt = G_TRUNC %elt_bits
3785
3786 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3787 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3788
3789 // Divide to get the index in the wider element type.
3790 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3791
3792 Register WideElt = CastVec;
3793 if (CastTy.isVector()) {
3794 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3795 ScaledIdx).getReg(0);
3796 }
3797
3798 // Compute the bit offset into the register of the target element.
3800 MIRBuilder, Idx, NewEltSize, OldEltSize);
3801
3802 // Shift the wide element to get the target element.
3803 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3804 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3805 MI.eraseFromParent();
3806 return Legalized;
3807 }
3808
3809 return UnableToLegalize;
3810}
3811
3812/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3813/// TargetReg, while preserving other bits in \p TargetReg.
3814///
3815/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3817 Register TargetReg, Register InsertReg,
3818 Register OffsetBits) {
3819 LLT TargetTy = B.getMRI()->getType(TargetReg);
3820 LLT InsertTy = B.getMRI()->getType(InsertReg);
3821 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3822 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3823
3824 // Produce a bitmask of the value to insert
3825 auto EltMask = B.buildConstant(
3826 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3827 InsertTy.getSizeInBits()));
3828 // Shift it into position
3829 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3830 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3831
3832 // Clear out the bits in the wide element
3833 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3834
3835 // The value to insert has all zeros already, so stick it into the masked
3836 // wide element.
3837 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3838}
3839
3840/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3841/// is increasing the element size, perform the indexing in the target element
3842/// type, and use bit operations to insert at the element position. This is
3843/// intended for architectures that can dynamically index the register file and
3844/// want to force indexing in the native register size.
3847 LLT CastTy) {
3848 if (TypeIdx != 0)
3849 return UnableToLegalize;
3850
3851 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3852 MI.getFirst4RegLLTs();
3853 LLT VecTy = DstTy;
3854
3855 LLT VecEltTy = VecTy.getElementType();
3856 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3857 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3858 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3859
3860 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3861 unsigned OldNumElts = VecTy.getNumElements();
3862
3863 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3864 if (NewNumElts < OldNumElts) {
3865 if (NewEltSize % OldEltSize != 0)
3866 return UnableToLegalize;
3867
3868 // This only depends on powers of 2 because we use bit tricks to figure out
3869 // the bit offset we need to shift to get the target element. A general
3870 // expansion could emit division/multiply.
3871 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3872 return UnableToLegalize;
3873
3874 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3875 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3876
3877 // Divide to get the index in the wider element type.
3878 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3879
3880 Register ExtractedElt = CastVec;
3881 if (CastTy.isVector()) {
3882 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3883 ScaledIdx).getReg(0);
3884 }
3885
3886 // Compute the bit offset into the register of the target element.
3888 MIRBuilder, Idx, NewEltSize, OldEltSize);
3889
3890 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3891 Val, OffsetBits);
3892 if (CastTy.isVector()) {
3893 InsertedElt = MIRBuilder.buildInsertVectorElement(
3894 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3895 }
3896
3897 MIRBuilder.buildBitcast(Dst, InsertedElt);
3898 MI.eraseFromParent();
3899 return Legalized;
3900 }
3901
3902 return UnableToLegalize;
3903}
3904
3905// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3906// those that have smaller than legal operands.
3907//
3908// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3909//
3910// ===>
3911//
3912// s32 = G_BITCAST <4 x s8>
3913// s32 = G_BITCAST <4 x s8>
3914// s32 = G_BITCAST <4 x s8>
3915// s32 = G_BITCAST <4 x s8>
3916// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3917// <16 x s8> = G_BITCAST <4 x s32>
3920 LLT CastTy) {
3921 // Convert it to CONCAT instruction
3922 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3923 if (!ConcatMI) {
3924 return UnableToLegalize;
3925 }
3926
3927 // Check if bitcast is Legal
3928 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3929 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3930
3931 // Check if the build vector is Legal
3932 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3933 return UnableToLegalize;
3934 }
3935
3936 // Bitcast the sources
3937 SmallVector<Register> BitcastRegs;
3938 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3939 BitcastRegs.push_back(
3940 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3941 .getReg(0));
3942 }
3943
3944 // Build the scalar values into a vector
3945 Register BuildReg =
3946 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3947 MIRBuilder.buildBitcast(DstReg, BuildReg);
3948
3949 MI.eraseFromParent();
3950 return Legalized;
3951}
3952
3953// This bitcasts a shuffle vector to a different type currently of the same
3954// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3955// will be used instead.
3956//
3957// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3958// ===>
3959// <4 x s64> = G_PTRTOINT <4 x p0>
3960// <4 x s64> = G_PTRTOINT <4 x p0>
3961// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3962// <16 x p0> = G_INTTOPTR <16 x s64>
3965 LLT CastTy) {
3966 auto ShuffleMI = cast<GShuffleVector>(&MI);
3967 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3968 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3969
3970 // We currently only handle vectors of the same size.
3971 if (TypeIdx != 0 ||
3972 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3973 CastTy.getElementCount() != DstTy.getElementCount())
3974 return UnableToLegalize;
3975
3976 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3977
3978 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3979 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3980 auto Shuf =
3981 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3982 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3983
3984 MI.eraseFromParent();
3985 return Legalized;
3986}
3987
3988/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3989///
3990/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3991///
3992/// ===>
3993///
3994/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3995/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3996/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3999 LLT CastTy) {
4000 auto ES = cast<GExtractSubvector>(&MI);
4001
4002 if (!CastTy.isVector())
4003 return UnableToLegalize;
4004
4005 if (TypeIdx != 0)
4006 return UnableToLegalize;
4007
4008 Register Dst = ES->getReg(0);
4009 Register Src = ES->getSrcVec();
4010 uint64_t Idx = ES->getIndexImm();
4011
4012 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4013
4014 LLT DstTy = MRI.getType(Dst);
4015 LLT SrcTy = MRI.getType(Src);
4016 ElementCount DstTyEC = DstTy.getElementCount();
4017 ElementCount SrcTyEC = SrcTy.getElementCount();
4018 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4019 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4020
4021 if (DstTy == CastTy)
4022 return Legalized;
4023
4024 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4025 return UnableToLegalize;
4026
4027 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4028 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4029 if (CastEltSize < DstEltSize)
4030 return UnableToLegalize;
4031
4032 auto AdjustAmt = CastEltSize / DstEltSize;
4033 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4034 SrcTyMinElts % AdjustAmt != 0)
4035 return UnableToLegalize;
4036
4037 Idx /= AdjustAmt;
4038 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4039 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4040 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4041 MIRBuilder.buildBitcast(Dst, PromotedES);
4042
4043 ES->eraseFromParent();
4044 return Legalized;
4045}
4046
4047/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4048///
4049/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4050/// <vscale x 8 x i1>,
4051/// N
4052///
4053/// ===>
4054///
4055/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4056/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4057/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4058/// <vscale x 1 x i8>, N / 8
4059/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4062 LLT CastTy) {
4063 auto ES = cast<GInsertSubvector>(&MI);
4064
4065 if (!CastTy.isVector())
4066 return UnableToLegalize;
4067
4068 if (TypeIdx != 0)
4069 return UnableToLegalize;
4070
4071 Register Dst = ES->getReg(0);
4072 Register BigVec = ES->getBigVec();
4073 Register SubVec = ES->getSubVec();
4074 uint64_t Idx = ES->getIndexImm();
4075
4076 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4077
4078 LLT DstTy = MRI.getType(Dst);
4079 LLT BigVecTy = MRI.getType(BigVec);
4080 LLT SubVecTy = MRI.getType(SubVec);
4081
4082 if (DstTy == CastTy)
4083 return Legalized;
4084
4085 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4086 return UnableToLegalize;
4087
4088 ElementCount DstTyEC = DstTy.getElementCount();
4089 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4090 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4091 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4092 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4093 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4094
4095 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4096 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4097 if (CastEltSize < DstEltSize)
4098 return UnableToLegalize;
4099
4100 auto AdjustAmt = CastEltSize / DstEltSize;
4101 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4102 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4103 return UnableToLegalize;
4104
4105 Idx /= AdjustAmt;
4106 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4107 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4108 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4109 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4110 auto PromotedIS =
4111 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4112 MIRBuilder.buildBitcast(Dst, PromotedIS);
4113
4114 ES->eraseFromParent();
4115 return Legalized;
4116}
4117
4119 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4120 Register DstReg = LoadMI.getDstReg();
4121 Register PtrReg = LoadMI.getPointerReg();
4122 LLT DstTy = MRI.getType(DstReg);
4123 MachineMemOperand &MMO = LoadMI.getMMO();
4124 LLT MemTy = MMO.getMemoryType();
4125 MachineFunction &MF = MIRBuilder.getMF();
4126
4127 unsigned MemSizeInBits = MemTy.getSizeInBits();
4128 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4129
4130 if (MemSizeInBits != MemStoreSizeInBits) {
4131 if (MemTy.isVector())
4132 return UnableToLegalize;
4133
4134 // Promote to a byte-sized load if not loading an integral number of
4135 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4136 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4137 MachineMemOperand *NewMMO =
4138 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4139
4140 Register LoadReg = DstReg;
4141 LLT LoadTy = DstTy;
4142
4143 // If this wasn't already an extending load, we need to widen the result
4144 // register to avoid creating a load with a narrower result than the source.
4145 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4146 LoadTy = WideMemTy;
4147 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4148 }
4149
4150 if (isa<GSExtLoad>(LoadMI)) {
4151 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4152 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4153 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4154 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4155 // The extra bits are guaranteed to be zero, since we stored them that
4156 // way. A zext load from Wide thus automatically gives zext from MemVT.
4157 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4158 } else {
4159 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4160 }
4161
4162 if (DstTy != LoadTy)
4163 MIRBuilder.buildTrunc(DstReg, LoadReg);
4164
4165 LoadMI.eraseFromParent();
4166 return Legalized;
4167 }
4168
4169 // Big endian lowering not implemented.
4170 if (MIRBuilder.getDataLayout().isBigEndian())
4171 return UnableToLegalize;
4172
4173 // This load needs splitting into power of 2 sized loads.
4174 //
4175 // Our strategy here is to generate anyextending loads for the smaller
4176 // types up to next power-2 result type, and then combine the two larger
4177 // result values together, before truncating back down to the non-pow-2
4178 // type.
4179 // E.g. v1 = i24 load =>
4180 // v2 = i32 zextload (2 byte)
4181 // v3 = i32 load (1 byte)
4182 // v4 = i32 shl v3, 16
4183 // v5 = i32 or v4, v2
4184 // v1 = i24 trunc v5
4185 // By doing this we generate the correct truncate which should get
4186 // combined away as an artifact with a matching extend.
4187
4188 uint64_t LargeSplitSize, SmallSplitSize;
4189
4190 if (!isPowerOf2_32(MemSizeInBits)) {
4191 // This load needs splitting into power of 2 sized loads.
4192 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4193 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4194 } else {
4195 // This is already a power of 2, but we still need to split this in half.
4196 //
4197 // Assume we're being asked to decompose an unaligned load.
4198 // TODO: If this requires multiple splits, handle them all at once.
4199 auto &Ctx = MF.getFunction().getContext();
4200 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4201 return UnableToLegalize;
4202
4203 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4204 }
4205
4206 if (MemTy.isVector()) {
4207 // TODO: Handle vector extloads
4208 if (MemTy != DstTy)
4209 return UnableToLegalize;
4210
4211 Align Alignment = LoadMI.getAlign();
4212 // Given an alignment larger than the size of the memory, we can increase
4213 // the size of the load without needing to scalarize it.
4214 if (Alignment.value() * 8 > MemSizeInBits &&
4217 DstTy.getElementType());
4218 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4219 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4220 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4221 NewLoad.getReg(0));
4222 LoadMI.eraseFromParent();
4223 return Legalized;
4224 }
4225
4226 // TODO: We can do better than scalarizing the vector and at least split it
4227 // in half.
4228 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4229 }
4230
4231 MachineMemOperand *LargeMMO =
4232 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4233 MachineMemOperand *SmallMMO =
4234 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4235
4236 LLT PtrTy = MRI.getType(PtrReg);
4237 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4238 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4239 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4240 PtrReg, *LargeMMO);
4241
4242 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4243 LargeSplitSize / 8);
4244 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4245 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4246 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4247 SmallPtr, *SmallMMO);
4248
4249 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4250 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4251
4252 if (AnyExtTy == DstTy)
4253 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4254 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4255 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4256 MIRBuilder.buildTrunc(DstReg, {Or});
4257 } else {
4258 assert(DstTy.isPointer() && "expected pointer");
4259 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4260
4261 // FIXME: We currently consider this to be illegal for non-integral address
4262 // spaces, but we need still need a way to reinterpret the bits.
4263 MIRBuilder.buildIntToPtr(DstReg, Or);
4264 }
4265
4266 LoadMI.eraseFromParent();
4267 return Legalized;
4268}
4269
4271 // Lower a non-power of 2 store into multiple pow-2 stores.
4272 // E.g. split an i24 store into an i16 store + i8 store.
4273 // We do this by first extending the stored value to the next largest power
4274 // of 2 type, and then using truncating stores to store the components.
4275 // By doing this, likewise with G_LOAD, generate an extend that can be
4276 // artifact-combined away instead of leaving behind extracts.
4277 Register SrcReg = StoreMI.getValueReg();
4278 Register PtrReg = StoreMI.getPointerReg();
4279 LLT SrcTy = MRI.getType(SrcReg);
4280 MachineFunction &MF = MIRBuilder.getMF();
4281 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4282 LLT MemTy = MMO.getMemoryType();
4283
4284 unsigned StoreWidth = MemTy.getSizeInBits();
4285 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4286
4287 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4288 // Promote to a byte-sized store with upper bits zero if not
4289 // storing an integral number of bytes. For example, promote
4290 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4291 LLT WideTy = LLT::scalar(StoreSizeInBits);
4292
4293 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4294 // Avoid creating a store with a narrower source than result.
4295 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4296 SrcTy = WideTy;
4297 }
4298
4299 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4300
4301 MachineMemOperand *NewMMO =
4302 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4303 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4304 StoreMI.eraseFromParent();
4305 return Legalized;
4306 }
4307
4308 if (MemTy.isVector()) {
4309 if (MemTy != SrcTy)
4310 return scalarizeVectorBooleanStore(StoreMI);
4311
4312 // TODO: We can do better than scalarizing the vector and at least split it
4313 // in half.
4314 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4315 }
4316
4317 unsigned MemSizeInBits = MemTy.getSizeInBits();
4318 uint64_t LargeSplitSize, SmallSplitSize;
4319
4320 if (!isPowerOf2_32(MemSizeInBits)) {
4321 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4322 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4323 } else {
4324 auto &Ctx = MF.getFunction().getContext();
4325 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4326 return UnableToLegalize; // Don't know what we're being asked to do.
4327
4328 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4329 }
4330
4331 // Extend to the next pow-2. If this store was itself the result of lowering,
4332 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4333 // that's wider than the stored size.
4334 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4335 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4336
4337 if (SrcTy.isPointer()) {
4338 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4339 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4340 }
4341
4342 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4343
4344 // Obtain the smaller value by shifting away the larger value.
4345 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4346 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4347
4348 // Generate the PtrAdd and truncating stores.
4349 LLT PtrTy = MRI.getType(PtrReg);
4350 auto OffsetCst = MIRBuilder.buildConstant(
4351 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4352 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4353
4354 MachineMemOperand *LargeMMO =
4355 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4356 MachineMemOperand *SmallMMO =
4357 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4358 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4359 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4360 StoreMI.eraseFromParent();
4361 return Legalized;
4362}
4363
4366 Register SrcReg = StoreMI.getValueReg();
4367 Register PtrReg = StoreMI.getPointerReg();
4368 LLT SrcTy = MRI.getType(SrcReg);
4369 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4370 LLT MemTy = MMO.getMemoryType();
4371 LLT MemScalarTy = MemTy.getElementType();
4372 MachineFunction &MF = MIRBuilder.getMF();
4373
4374 assert(SrcTy.isVector() && "Expect a vector store type");
4375
4376 if (!MemScalarTy.isByteSized()) {
4377 // We need to build an integer scalar of the vector bit pattern.
4378 // It's not legal for us to add padding when storing a vector.
4379 unsigned NumBits = MemTy.getSizeInBits();
4380 LLT IntTy = LLT::scalar(NumBits);
4381 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4382 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4383
4384 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4385 auto Elt = MIRBuilder.buildExtractVectorElement(
4386 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4387 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4388 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4389 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4390 ? (MemTy.getNumElements() - 1) - I
4391 : I;
4392 auto ShiftAmt = MIRBuilder.buildConstant(
4393 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4394 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4395 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4396 }
4397 auto PtrInfo = MMO.getPointerInfo();
4398 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4399 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4400 StoreMI.eraseFromParent();
4401 return Legalized;
4402 }
4403
4404 // TODO: implement simple scalarization.
4405 return UnableToLegalize;
4406}
4407
4409LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4410 switch (MI.getOpcode()) {
4411 case TargetOpcode::G_LOAD: {
4412 if (TypeIdx != 0)
4413 return UnableToLegalize;
4414 MachineMemOperand &MMO = **MI.memoperands_begin();
4415
4416 // Not sure how to interpret a bitcast of an extending load.
4417 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4418 return UnableToLegalize;
4419
4420 Observer.changingInstr(MI);
4421 bitcastDst(MI, CastTy, 0);
4422 MMO.setType(CastTy);
4423 // The range metadata is no longer valid when reinterpreted as a different
4424 // type.
4425 MMO.clearRanges();
4426 Observer.changedInstr(MI);
4427 return Legalized;
4428 }
4429 case TargetOpcode::G_STORE: {
4430 if (TypeIdx != 0)
4431 return UnableToLegalize;
4432
4433 MachineMemOperand &MMO = **MI.memoperands_begin();
4434
4435 // Not sure how to interpret a bitcast of a truncating store.
4436 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4437 return UnableToLegalize;
4438
4439 Observer.changingInstr(MI);
4440 bitcastSrc(MI, CastTy, 0);
4441 MMO.setType(CastTy);
4442 Observer.changedInstr(MI);
4443 return Legalized;
4444 }
4445 case TargetOpcode::G_SELECT: {
4446 if (TypeIdx != 0)
4447 return UnableToLegalize;
4448
4449 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4450 LLVM_DEBUG(
4451 dbgs() << "bitcast action not implemented for vector select\n");
4452 return UnableToLegalize;
4453 }
4454
4455 Observer.changingInstr(MI);
4456 bitcastSrc(MI, CastTy, 2);
4457 bitcastSrc(MI, CastTy, 3);
4458 bitcastDst(MI, CastTy, 0);
4459 Observer.changedInstr(MI);
4460 return Legalized;
4461 }
4462 case TargetOpcode::G_AND:
4463 case TargetOpcode::G_OR:
4464 case TargetOpcode::G_XOR: {
4465 Observer.changingInstr(MI);
4466 bitcastSrc(MI, CastTy, 1);
4467 bitcastSrc(MI, CastTy, 2);
4468 bitcastDst(MI, CastTy, 0);
4469 Observer.changedInstr(MI);
4470 return Legalized;
4471 }
4472 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4473 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4474 case TargetOpcode::G_INSERT_VECTOR_ELT:
4475 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4476 case TargetOpcode::G_CONCAT_VECTORS:
4477 return bitcastConcatVector(MI, TypeIdx, CastTy);
4478 case TargetOpcode::G_SHUFFLE_VECTOR:
4479 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4480 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4481 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4482 case TargetOpcode::G_INSERT_SUBVECTOR:
4483 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4484 default:
4485 return UnableToLegalize;
4486 }
4487}
4488
4489// Legalize an instruction by changing the opcode in place.
4490void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4492 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4494}
4495
4497LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4498 using namespace TargetOpcode;
4499
4500 switch(MI.getOpcode()) {
4501 default:
4502 return UnableToLegalize;
4503 case TargetOpcode::G_FCONSTANT:
4504 return lowerFConstant(MI);
4505 case TargetOpcode::G_BITCAST:
4506 return lowerBitcast(MI);
4507 case TargetOpcode::G_SREM:
4508 case TargetOpcode::G_UREM: {
4509 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4510 auto Quot =
4511 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4512 {MI.getOperand(1), MI.getOperand(2)});
4513
4514 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4515 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4516 MI.eraseFromParent();
4517 return Legalized;
4518 }
4519 case TargetOpcode::G_SADDO:
4520 case TargetOpcode::G_SSUBO:
4521 return lowerSADDO_SSUBO(MI);
4522 case TargetOpcode::G_SADDE:
4523 return lowerSADDE(MI);
4524 case TargetOpcode::G_SSUBE:
4525 return lowerSSUBE(MI);
4526 case TargetOpcode::G_UMULH:
4527 case TargetOpcode::G_SMULH:
4528 return lowerSMULH_UMULH(MI);
4529 case TargetOpcode::G_SMULO:
4530 case TargetOpcode::G_UMULO: {
4531 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4532 // result.
4533 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4534 LLT Ty = MRI.getType(Res);
4535
4536 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4537 ? TargetOpcode::G_SMULH
4538 : TargetOpcode::G_UMULH;
4539
4540 Observer.changingInstr(MI);
4541 const auto &TII = MIRBuilder.getTII();
4542 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4543 MI.removeOperand(1);
4544 Observer.changedInstr(MI);
4545
4546 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4547 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4548
4549 // Move insert point forward so we can use the Res register if needed.
4550 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4551
4552 // For *signed* multiply, overflow is detected by checking:
4553 // (hi != (lo >> bitwidth-1))
4554 if (Opcode == TargetOpcode::G_SMULH) {
4555 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4556 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4557 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4558 } else {
4559 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4560 }
4561 return Legalized;
4562 }
4563 case TargetOpcode::G_FNEG: {
4564 auto [Res, SubByReg] = MI.getFirst2Regs();
4565 LLT Ty = MRI.getType(Res);
4566
4567 auto SignMask = MIRBuilder.buildConstant(
4568 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4569 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4570 MI.eraseFromParent();
4571 return Legalized;
4572 }
4573 case TargetOpcode::G_FSUB:
4574 case TargetOpcode::G_STRICT_FSUB: {
4575 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4576 LLT Ty = MRI.getType(Res);
4577
4578 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4579 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4580
4581 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4582 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4583 else
4584 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4585
4586 MI.eraseFromParent();
4587 return Legalized;
4588 }
4589 case TargetOpcode::G_FMAD:
4590 return lowerFMad(MI);
4591 case TargetOpcode::G_FFLOOR:
4592 return lowerFFloor(MI);
4593 case TargetOpcode::G_LROUND:
4594 case TargetOpcode::G_LLROUND: {
4595 Register DstReg = MI.getOperand(0).getReg();
4596 Register SrcReg = MI.getOperand(1).getReg();
4597 LLT SrcTy = MRI.getType(SrcReg);
4598 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4599 {SrcReg});
4600 MIRBuilder.buildFPTOSI(DstReg, Round);
4601 MI.eraseFromParent();
4602 return Legalized;
4603 }
4604 case TargetOpcode::G_INTRINSIC_ROUND:
4605 return lowerIntrinsicRound(MI);
4606 case TargetOpcode::G_FRINT: {
4607 // Since round even is the assumed rounding mode for unconstrained FP
4608 // operations, rint and roundeven are the same operation.
4609 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4610 return Legalized;
4611 }
4612 case TargetOpcode::G_INTRINSIC_LRINT:
4613 case TargetOpcode::G_INTRINSIC_LLRINT: {
4614 Register DstReg = MI.getOperand(0).getReg();
4615 Register SrcReg = MI.getOperand(1).getReg();
4616 LLT SrcTy = MRI.getType(SrcReg);
4617 auto Round =
4618 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4619 MIRBuilder.buildFPTOSI(DstReg, Round);
4620 MI.eraseFromParent();
4621 return Legalized;
4622 }
4623 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4624 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4625 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4626 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4627 **MI.memoperands_begin());
4628 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4629 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4630 MI.eraseFromParent();
4631 return Legalized;
4632 }
4633 case TargetOpcode::G_LOAD:
4634 case TargetOpcode::G_SEXTLOAD:
4635 case TargetOpcode::G_ZEXTLOAD:
4636 return lowerLoad(cast<GAnyLoad>(MI));
4637 case TargetOpcode::G_STORE:
4638 return lowerStore(cast<GStore>(MI));
4639 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4640 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4641 case TargetOpcode::G_CTLZ:
4642 case TargetOpcode::G_CTTZ:
4643 case TargetOpcode::G_CTPOP:
4644 return lowerBitCount(MI);
4645 case G_UADDO: {
4646 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4647
4648 Register NewRes = MRI.cloneVirtualRegister(Res);
4649
4650 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4651 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4652
4653 MIRBuilder.buildCopy(Res, NewRes);
4654
4655 MI.eraseFromParent();
4656 return Legalized;
4657 }
4658 case G_UADDE: {
4659 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4660 const LLT CondTy = MRI.getType(CarryOut);
4661 const LLT Ty = MRI.getType(Res);
4662
4663 Register NewRes = MRI.cloneVirtualRegister(Res);
4664
4665 // Initial add of the two operands.
4666 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4667
4668 // Initial check for carry.
4669 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4670
4671 // Add the sum and the carry.
4672 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4673 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4674
4675 // Second check for carry. We can only carry if the initial sum is all 1s
4676 // and the carry is set, resulting in a new sum of 0.
4677 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4678 auto ResEqZero =
4679 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4680 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4681 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4682
4683 MIRBuilder.buildCopy(Res, NewRes);
4684
4685 MI.eraseFromParent();
4686 return Legalized;
4687 }
4688 case G_USUBO: {
4689 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4690
4691 MIRBuilder.buildSub(Res, LHS, RHS);
4692 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4693
4694 MI.eraseFromParent();
4695 return Legalized;
4696 }
4697 case G_USUBE: {
4698 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4699 const LLT CondTy = MRI.getType(BorrowOut);
4700 const LLT Ty = MRI.getType(Res);
4701
4702 // Initial subtract of the two operands.
4703 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4704
4705 // Initial check for borrow.
4706 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4707
4708 // Subtract the borrow from the first subtract.
4709 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4710 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4711
4712 // Second check for borrow. We can only borrow if the initial difference is
4713 // 0 and the borrow is set, resulting in a new difference of all 1s.
4714 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4715 auto TmpResEqZero =
4716 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4717 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4718 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4719
4720 MI.eraseFromParent();
4721 return Legalized;
4722 }
4723 case G_UITOFP:
4724 return lowerUITOFP(MI);
4725 case G_SITOFP:
4726 return lowerSITOFP(MI);
4727 case G_FPTOUI:
4728 return lowerFPTOUI(MI);
4729 case G_FPTOSI:
4730 return lowerFPTOSI(MI);
4731 case G_FPTOUI_SAT:
4732 case G_FPTOSI_SAT:
4733 return lowerFPTOINT_SAT(MI);
4734 case G_FPTRUNC:
4735 return lowerFPTRUNC(MI);
4736 case G_FPOWI:
4737 return lowerFPOWI(MI);
4738 case G_SMIN:
4739 case G_SMAX:
4740 case G_UMIN:
4741 case G_UMAX:
4742 return lowerMinMax(MI);
4743 case G_SCMP:
4744 case G_UCMP:
4745 return lowerThreewayCompare(MI);
4746 case G_FCOPYSIGN:
4747 return lowerFCopySign(MI);
4748 case G_FMINNUM:
4749 case G_FMAXNUM:
4750 case G_FMINIMUMNUM:
4751 case G_FMAXIMUMNUM:
4752 return lowerFMinNumMaxNum(MI);
4753 case G_MERGE_VALUES:
4754 return lowerMergeValues(MI);
4755 case G_UNMERGE_VALUES:
4756 return lowerUnmergeValues(MI);
4757 case TargetOpcode::G_SEXT_INREG: {
4758 assert(MI.getOperand(2).isImm() && "Expected immediate");
4759 int64_t SizeInBits = MI.getOperand(2).getImm();
4760
4761 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4762 LLT DstTy = MRI.getType(DstReg);
4763 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4764
4765 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4766 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4767 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4768 MI.eraseFromParent();
4769 return Legalized;
4770 }
4771 case G_EXTRACT_VECTOR_ELT:
4772 case G_INSERT_VECTOR_ELT:
4774 case G_SHUFFLE_VECTOR:
4775 return lowerShuffleVector(MI);
4776 case G_VECTOR_COMPRESS:
4777 return lowerVECTOR_COMPRESS(MI);
4778 case G_DYN_STACKALLOC:
4779 return lowerDynStackAlloc(MI);
4780 case G_STACKSAVE:
4781 return lowerStackSave(MI);
4782 case G_STACKRESTORE:
4783 return lowerStackRestore(MI);
4784 case G_EXTRACT:
4785 return lowerExtract(MI);
4786 case G_INSERT:
4787 return lowerInsert(MI);
4788 case G_BSWAP:
4789 return lowerBswap(MI);
4790 case G_BITREVERSE:
4791 return lowerBitreverse(MI);
4792 case G_READ_REGISTER:
4793 case G_WRITE_REGISTER:
4794 return lowerReadWriteRegister(MI);
4795 case G_UADDSAT:
4796 case G_USUBSAT: {
4797 // Try to make a reasonable guess about which lowering strategy to use. The
4798 // target can override this with custom lowering and calling the
4799 // implementation functions.
4800 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4801 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4802 return lowerAddSubSatToMinMax(MI);
4804 }
4805 case G_SADDSAT:
4806 case G_SSUBSAT: {
4807 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4808
4809 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4810 // since it's a shorter expansion. However, we would need to figure out the
4811 // preferred boolean type for the carry out for the query.
4812 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4813 return lowerAddSubSatToMinMax(MI);
4815 }
4816 case G_SSHLSAT:
4817 case G_USHLSAT:
4818 return lowerShlSat(MI);
4819 case G_ABS:
4820 return lowerAbsToAddXor(MI);
4821 case G_ABDS:
4822 case G_ABDU: {
4823 bool IsSigned = MI.getOpcode() == G_ABDS;
4824 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4825 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4826 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4827 return lowerAbsDiffToMinMax(MI);
4828 }
4829 return lowerAbsDiffToSelect(MI);
4830 }
4831 case G_FABS:
4832 return lowerFAbs(MI);
4833 case G_SELECT:
4834 return lowerSelect(MI);
4835 case G_IS_FPCLASS:
4836 return lowerISFPCLASS(MI);
4837 case G_SDIVREM:
4838 case G_UDIVREM:
4839 return lowerDIVREM(MI);
4840 case G_FSHL:
4841 case G_FSHR:
4842 return lowerFunnelShift(MI);
4843 case G_ROTL:
4844 case G_ROTR:
4845 return lowerRotate(MI);
4846 case G_MEMSET:
4847 case G_MEMCPY:
4848 case G_MEMMOVE:
4849 return lowerMemCpyFamily(MI);
4850 case G_MEMCPY_INLINE:
4851 return lowerMemcpyInline(MI);
4852 case G_ZEXT:
4853 case G_SEXT:
4854 case G_ANYEXT:
4855 return lowerEXT(MI);
4856 case G_TRUNC:
4857 return lowerTRUNC(MI);
4859 return lowerVectorReduction(MI);
4860 case G_VAARG:
4861 return lowerVAArg(MI);
4862 case G_ATOMICRMW_SUB: {
4863 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4864 const LLT ValTy = MRI.getType(Val);
4865 MachineMemOperand *MMO = *MI.memoperands_begin();
4866
4867 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4868 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4869 MI.eraseFromParent();
4870 return Legalized;
4871 }
4872 }
4873}
4874
4876 Align MinAlign) const {
4877 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4878 // datalayout for the preferred alignment. Also there should be a target hook
4879 // for this to allow targets to reduce the alignment and ignore the
4880 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4881 // the type.
4882 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4883}
4884
4887 MachinePointerInfo &PtrInfo) {
4888 MachineFunction &MF = MIRBuilder.getMF();
4889 const DataLayout &DL = MIRBuilder.getDataLayout();
4890 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4891
4892 unsigned AddrSpace = DL.getAllocaAddrSpace();
4893 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4894
4895 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4896 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4897}
4898
4900 const SrcOp &Val) {
4901 LLT SrcTy = Val.getLLTTy(MRI);
4902 Align StackTypeAlign =
4903 std::max(getStackTemporaryAlignment(SrcTy),
4905 MachinePointerInfo PtrInfo;
4906 auto StackTemp =
4907 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4908
4909 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4910 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4911}
4912
4914 LLT VecTy) {
4915 LLT IdxTy = B.getMRI()->getType(IdxReg);
4916 unsigned NElts = VecTy.getNumElements();
4917
4918 int64_t IdxVal;
4919 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4920 if (IdxVal < VecTy.getNumElements())
4921 return IdxReg;
4922 // If a constant index would be out of bounds, clamp it as well.
4923 }
4924
4925 if (isPowerOf2_32(NElts)) {
4926 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4927 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4928 }
4929
4930 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4931 .getReg(0);
4932}
4933
4935 Register Index) {
4936 LLT EltTy = VecTy.getElementType();
4937
4938 // Calculate the element offset and add it to the pointer.
4939 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4940 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4941 "Converting bits to bytes lost precision");
4942
4943 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4944
4945 // Convert index to the correct size for the address space.
4946 const DataLayout &DL = MIRBuilder.getDataLayout();
4947 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4948 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4949 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4950 if (IdxTy != MRI.getType(Index))
4951 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4952
4953 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4954 MIRBuilder.buildConstant(IdxTy, EltSize));
4955
4956 LLT PtrTy = MRI.getType(VecPtr);
4957 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4958}
4959
4960#ifndef NDEBUG
4961/// Check that all vector operands have same number of elements. Other operands
4962/// should be listed in NonVecOp.
4965 std::initializer_list<unsigned> NonVecOpIndices) {
4966 if (MI.getNumMemOperands() != 0)
4967 return false;
4968
4969 LLT VecTy = MRI.getType(MI.getReg(0));
4970 if (!VecTy.isVector())
4971 return false;
4972 unsigned NumElts = VecTy.getNumElements();
4973
4974 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4975 MachineOperand &Op = MI.getOperand(OpIdx);
4976 if (!Op.isReg()) {
4977 if (!is_contained(NonVecOpIndices, OpIdx))
4978 return false;
4979 continue;
4980 }
4981
4982 LLT Ty = MRI.getType(Op.getReg());
4983 if (!Ty.isVector()) {
4984 if (!is_contained(NonVecOpIndices, OpIdx))
4985 return false;
4986 continue;
4987 }
4988
4989 if (Ty.getNumElements() != NumElts)
4990 return false;
4991 }
4992
4993 return true;
4994}
4995#endif
4996
4997/// Fill \p DstOps with DstOps that have same number of elements combined as
4998/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4999/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5000/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5001static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5002 unsigned NumElts) {
5003 LLT LeftoverTy;
5004 assert(Ty.isVector() && "Expected vector type");
5005 LLT EltTy = Ty.getElementType();
5006 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
5007 int NumParts, NumLeftover;
5008 std::tie(NumParts, NumLeftover) =
5009 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5010
5011 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5012 for (int i = 0; i < NumParts; ++i) {
5013 DstOps.push_back(NarrowTy);
5014 }
5015
5016 if (LeftoverTy.isValid()) {
5017 assert(NumLeftover == 1 && "expected exactly one leftover");
5018 DstOps.push_back(LeftoverTy);
5019 }
5020}
5021
5022/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5023/// made from \p Op depending on operand type.
5025 MachineOperand &Op) {
5026 for (unsigned i = 0; i < N; ++i) {
5027 if (Op.isReg())
5028 Ops.push_back(Op.getReg());
5029 else if (Op.isImm())
5030 Ops.push_back(Op.getImm());
5031 else if (Op.isPredicate())
5032 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5033 else
5034 llvm_unreachable("Unsupported type");
5035 }
5036}
5037
5038// Handle splitting vector operations which need to have the same number of
5039// elements in each type index, but each type index may have a different element
5040// type.
5041//
5042// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5043// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5044// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5045//
5046// Also handles some irregular breakdown cases, e.g.
5047// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5048// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5049// s64 = G_SHL s64, s32
5052 GenericMachineInstr &MI, unsigned NumElts,
5053 std::initializer_list<unsigned> NonVecOpIndices) {
5054 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5055 "Non-compatible opcode or not specified non-vector operands");
5056 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5057
5058 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5059 unsigned NumDefs = MI.getNumDefs();
5060
5061 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5062 // Build instructions with DstOps to use instruction found by CSE directly.
5063 // CSE copies found instruction into given vreg when building with vreg dest.
5064 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5065 // Output registers will be taken from created instructions.
5066 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5067 for (unsigned i = 0; i < NumDefs; ++i) {
5068 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5069 }
5070
5071 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5072 // Operands listed in NonVecOpIndices will be used as is without splitting;
5073 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5074 // scalar condition (op 1), immediate in sext_inreg (op 2).
5075 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5076 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5077 ++UseIdx, ++UseNo) {
5078 if (is_contained(NonVecOpIndices, UseIdx)) {
5079 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5080 MI.getOperand(UseIdx));
5081 } else {
5082 SmallVector<Register, 8> SplitPieces;
5083 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5084 MRI);
5085 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5086 }
5087 }
5088
5089 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5090
5091 // Take i-th piece of each input operand split and build sub-vector/scalar
5092 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5093 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5095 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5096 Defs.push_back(OutputOpsPieces[DstNo][i]);
5097
5099 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5100 Uses.push_back(InputOpsPieces[InputNo][i]);
5101
5102 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5103 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5104 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5105 }
5106
5107 // Merge small outputs into MI's output for each def operand.
5108 if (NumLeftovers) {
5109 for (unsigned i = 0; i < NumDefs; ++i)
5110 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5111 } else {
5112 for (unsigned i = 0; i < NumDefs; ++i)
5113 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5114 }
5115
5116 MI.eraseFromParent();
5117 return Legalized;
5118}
5119
5122 unsigned NumElts) {
5123 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5124
5125 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5126 unsigned NumDefs = MI.getNumDefs();
5127
5128 SmallVector<DstOp, 8> OutputOpsPieces;
5129 SmallVector<Register, 8> OutputRegs;
5130 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5131
5132 // Instructions that perform register split will be inserted in basic block
5133 // where register is defined (basic block is in the next operand).
5134 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5135 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5136 UseIdx += 2, ++UseNo) {
5137 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5138 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5139 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5140 MIRBuilder, MRI);
5141 }
5142
5143 // Build PHIs with fewer elements.
5144 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5145 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5146 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5147 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5148 Phi.addDef(
5149 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5150 OutputRegs.push_back(Phi.getReg(0));
5151
5152 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5153 Phi.addUse(InputOpsPieces[j][i]);
5154 Phi.add(MI.getOperand(1 + j * 2 + 1));
5155 }
5156 }
5157
5158 // Set the insert point after the existing PHIs
5159 MachineBasicBlock &MBB = *MI.getParent();
5160 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5161
5162 // Merge small outputs into MI's def.
5163 if (NumLeftovers) {
5164 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5165 } else {
5166 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5167 }
5168
5169 MI.eraseFromParent();
5170 return Legalized;
5171}
5172
5175 unsigned TypeIdx,
5176 LLT NarrowTy) {
5177 const int NumDst = MI.getNumOperands() - 1;
5178 const Register SrcReg = MI.getOperand(NumDst).getReg();
5179 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5180 LLT SrcTy = MRI.getType(SrcReg);
5181
5182 if (TypeIdx != 1 || NarrowTy == DstTy)
5183 return UnableToLegalize;
5184
5185 // Requires compatible types. Otherwise SrcReg should have been defined by
5186 // merge-like instruction that would get artifact combined. Most likely
5187 // instruction that defines SrcReg has to perform more/fewer elements
5188 // legalization compatible with NarrowTy.
5189 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5190 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5191
5192 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5193 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5194 return UnableToLegalize;
5195
5196 // This is most likely DstTy (smaller then register size) packed in SrcTy
5197 // (larger then register size) and since unmerge was not combined it will be
5198 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5199 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5200
5201 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5202 //
5203 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5204 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5205 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5206 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5207 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5208 const int PartsPerUnmerge = NumDst / NumUnmerge;
5209
5210 for (int I = 0; I != NumUnmerge; ++I) {
5211 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5212
5213 for (int J = 0; J != PartsPerUnmerge; ++J)
5214 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5215 MIB.addUse(Unmerge.getReg(I));
5216 }
5217
5218 MI.eraseFromParent();
5219 return Legalized;
5220}
5221
5224 LLT NarrowTy) {
5225 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5226 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5227 // that should have been artifact combined. Most likely instruction that uses
5228 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5229 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5230 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5231 if (NarrowTy == SrcTy)
5232 return UnableToLegalize;
5233
5234 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5235 // is for old mir tests. Since the changes to more/fewer elements it should no
5236 // longer be possible to generate MIR like this when starting from llvm-ir
5237 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5238 if (TypeIdx == 1) {
5239 assert(SrcTy.isVector() && "Expected vector types");
5240 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5241 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5242 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5243 return UnableToLegalize;
5244 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5245 //
5246 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5247 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5248 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5249 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5250 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5251 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5252
5254 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5255 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5256 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5257 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5258 Elts.push_back(Unmerge.getReg(j));
5259 }
5260
5261 SmallVector<Register, 8> NarrowTyElts;
5262 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5263 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5264 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5265 ++i, Offset += NumNarrowTyElts) {
5266 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5267 NarrowTyElts.push_back(
5268 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5269 }
5270
5271 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5272 MI.eraseFromParent();
5273 return Legalized;
5274 }
5275
5276 assert(TypeIdx == 0 && "Bad type index");
5277 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5278 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5279 return UnableToLegalize;
5280
5281 // This is most likely SrcTy (smaller then register size) packed in DstTy
5282 // (larger then register size) and since merge was not combined it will be
5283 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5284 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5285
5286 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5287 //
5288 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5289 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5290 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5291 SmallVector<Register, 8> NarrowTyElts;
5292 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5293 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5294 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5295 for (unsigned i = 0; i < NumParts; ++i) {
5297 for (unsigned j = 0; j < NumElts; ++j)
5298 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5299 NarrowTyElts.push_back(
5300 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5301 }
5302
5303 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5304 MI.eraseFromParent();
5305 return Legalized;
5306}
5307
5310 unsigned TypeIdx,
5311 LLT NarrowVecTy) {
5312 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5313 Register InsertVal;
5314 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5315
5316 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5317 if (IsInsert)
5318 InsertVal = MI.getOperand(2).getReg();
5319
5320 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5321 LLT VecTy = MRI.getType(SrcVec);
5322
5323 // If the index is a constant, we can really break this down as you would
5324 // expect, and index into the target size pieces.
5325 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5326 if (MaybeCst) {
5327 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5328 // Avoid out of bounds indexing the pieces.
5329 if (IdxVal >= VecTy.getNumElements()) {
5330 MIRBuilder.buildUndef(DstReg);
5331 MI.eraseFromParent();
5332 return Legalized;
5333 }
5334
5335 if (!NarrowVecTy.isVector()) {
5336 SmallVector<Register, 8> SplitPieces;
5337 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5338 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5339 if (IsInsert) {
5340 SplitPieces[IdxVal] = InsertVal;
5341 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5342 } else {
5343 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5344 }
5345 } else {
5346 SmallVector<Register, 8> VecParts;
5347 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5348
5349 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5350 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5351 TargetOpcode::G_ANYEXT);
5352
5353 unsigned NewNumElts = NarrowVecTy.getNumElements();
5354
5355 LLT IdxTy = MRI.getType(Idx);
5356 int64_t PartIdx = IdxVal / NewNumElts;
5357 auto NewIdx =
5358 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5359
5360 if (IsInsert) {
5361 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5362
5363 // Use the adjusted index to insert into one of the subvectors.
5364 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5365 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5366 VecParts[PartIdx] = InsertPart.getReg(0);
5367
5368 // Recombine the inserted subvector with the others to reform the result
5369 // vector.
5370 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5371 } else {
5372 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5373 }
5374 }
5375
5376 MI.eraseFromParent();
5377 return Legalized;
5378 }
5379
5380 // With a variable index, we can't perform the operation in a smaller type, so
5381 // we're forced to expand this.
5382 //
5383 // TODO: We could emit a chain of compare/select to figure out which piece to
5384 // index.
5386}
5387
5390 LLT NarrowTy) {
5391 // FIXME: Don't know how to handle secondary types yet.
5392 if (TypeIdx != 0)
5393 return UnableToLegalize;
5394
5395 if (!NarrowTy.isByteSized()) {
5396 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5397 return UnableToLegalize;
5398 }
5399
5400 // This implementation doesn't work for atomics. Give up instead of doing
5401 // something invalid.
5402 if (LdStMI.isAtomic())
5403 return UnableToLegalize;
5404
5405 bool IsLoad = isa<GLoad>(LdStMI);
5406 Register ValReg = LdStMI.getReg(0);
5407 Register AddrReg = LdStMI.getPointerReg();
5408 LLT ValTy = MRI.getType(ValReg);
5409
5410 // FIXME: Do we need a distinct NarrowMemory legalize action?
5411 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5412 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5413 return UnableToLegalize;
5414 }
5415
5416 int NumParts = -1;
5417 int NumLeftover = -1;
5418 LLT LeftoverTy;
5419 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5420 if (IsLoad) {
5421 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5422 } else {
5423 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5424 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5425 NumParts = NarrowRegs.size();
5426 NumLeftover = NarrowLeftoverRegs.size();
5427 }
5428 }
5429
5430 if (NumParts == -1)
5431 return UnableToLegalize;
5432
5433 LLT PtrTy = MRI.getType(AddrReg);
5434 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5435
5436 unsigned TotalSize = ValTy.getSizeInBits();
5437
5438 // Split the load/store into PartTy sized pieces starting at Offset. If this
5439 // is a load, return the new registers in ValRegs. For a store, each elements
5440 // of ValRegs should be PartTy. Returns the next offset that needs to be
5441 // handled.
5442 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5443 auto MMO = LdStMI.getMMO();
5444 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5445 unsigned NumParts, unsigned Offset) -> unsigned {
5446 MachineFunction &MF = MIRBuilder.getMF();
5447 unsigned PartSize = PartTy.getSizeInBits();
5448 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5449 ++Idx) {
5450 unsigned ByteOffset = Offset / 8;
5451 Register NewAddrReg;
5452
5453 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5454 ByteOffset);
5455
5456 MachineMemOperand *NewMMO =
5457 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5458
5459 if (IsLoad) {
5460 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5461 ValRegs.push_back(Dst);
5462 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5463 } else {
5464 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5465 }
5466 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5467 }
5468
5469 return Offset;
5470 };
5471
5472 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5473 unsigned HandledOffset =
5474 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5475
5476 // Handle the rest of the register if this isn't an even type breakdown.
5477 if (LeftoverTy.isValid())
5478 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5479
5480 if (IsLoad) {
5481 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5482 LeftoverTy, NarrowLeftoverRegs);
5483 }
5484
5485 LdStMI.eraseFromParent();
5486 return Legalized;
5487}
5488
5491 LLT NarrowTy) {
5492 using namespace TargetOpcode;
5494 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5495
5496 switch (MI.getOpcode()) {
5497 case G_IMPLICIT_DEF:
5498 case G_TRUNC:
5499 case G_AND:
5500 case G_OR:
5501 case G_XOR:
5502 case G_ADD:
5503 case G_SUB:
5504 case G_MUL:
5505 case G_PTR_ADD:
5506 case G_SMULH:
5507 case G_UMULH:
5508 case G_FADD:
5509 case G_FMUL:
5510 case G_FSUB:
5511 case G_FNEG:
5512 case G_FABS:
5513 case G_FCANONICALIZE:
5514 case G_FDIV:
5515 case G_FREM:
5516 case G_FMA:
5517 case G_FMAD:
5518 case G_FPOW:
5519 case G_FEXP:
5520 case G_FEXP2:
5521 case G_FEXP10:
5522 case G_FLOG:
5523 case G_FLOG2:
5524 case G_FLOG10:
5525 case G_FLDEXP:
5526 case G_FNEARBYINT:
5527 case G_FCEIL:
5528 case G_FFLOOR:
5529 case G_FRINT:
5530 case G_INTRINSIC_LRINT:
5531 case G_INTRINSIC_LLRINT:
5532 case G_INTRINSIC_ROUND:
5533 case G_INTRINSIC_ROUNDEVEN:
5534 case G_LROUND:
5535 case G_LLROUND:
5536 case G_INTRINSIC_TRUNC:
5537 case G_FMODF:
5538 case G_FCOS:
5539 case G_FSIN:
5540 case G_FTAN:
5541 case G_FACOS:
5542 case G_FASIN:
5543 case G_FATAN:
5544 case G_FATAN2:
5545 case G_FCOSH:
5546 case G_FSINH:
5547 case G_FTANH:
5548 case G_FSQRT:
5549 case G_BSWAP:
5550 case G_BITREVERSE:
5551 case G_SDIV:
5552 case G_UDIV:
5553 case G_SREM:
5554 case G_UREM:
5555 case G_SDIVREM:
5556 case G_UDIVREM:
5557 case G_SMIN:
5558 case G_SMAX:
5559 case G_UMIN:
5560 case G_UMAX:
5561 case G_ABS:
5562 case G_FMINNUM:
5563 case G_FMAXNUM:
5564 case G_FMINNUM_IEEE:
5565 case G_FMAXNUM_IEEE:
5566 case G_FMINIMUM:
5567 case G_FMAXIMUM:
5568 case G_FMINIMUMNUM:
5569 case G_FMAXIMUMNUM:
5570 case G_FSHL:
5571 case G_FSHR:
5572 case G_ROTL:
5573 case G_ROTR:
5574 case G_FREEZE:
5575 case G_SADDSAT:
5576 case G_SSUBSAT:
5577 case G_UADDSAT:
5578 case G_USUBSAT:
5579 case G_UMULO:
5580 case G_SMULO:
5581 case G_SHL:
5582 case G_LSHR:
5583 case G_ASHR:
5584 case G_SSHLSAT:
5585 case G_USHLSAT:
5586 case G_CTLZ:
5587 case G_CTLZ_ZERO_UNDEF:
5588 case G_CTTZ:
5589 case G_CTTZ_ZERO_UNDEF:
5590 case G_CTPOP:
5591 case G_FCOPYSIGN:
5592 case G_ZEXT:
5593 case G_SEXT:
5594 case G_ANYEXT:
5595 case G_FPEXT:
5596 case G_FPTRUNC:
5597 case G_SITOFP:
5598 case G_UITOFP:
5599 case G_FPTOSI:
5600 case G_FPTOUI:
5601 case G_FPTOSI_SAT:
5602 case G_FPTOUI_SAT:
5603 case G_INTTOPTR:
5604 case G_PTRTOINT:
5605 case G_ADDRSPACE_CAST:
5606 case G_UADDO:
5607 case G_USUBO:
5608 case G_UADDE:
5609 case G_USUBE:
5610 case G_SADDO:
5611 case G_SSUBO:
5612 case G_SADDE:
5613 case G_SSUBE:
5614 case G_STRICT_FADD:
5615 case G_STRICT_FSUB:
5616 case G_STRICT_FMUL:
5617 case G_STRICT_FMA:
5618 case G_STRICT_FLDEXP:
5619 case G_FFREXP:
5620 return fewerElementsVectorMultiEltType(GMI, NumElts);
5621 case G_ICMP:
5622 case G_FCMP:
5623 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5624 case G_IS_FPCLASS:
5625 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5626 case G_SELECT:
5627 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5628 return fewerElementsVectorMultiEltType(GMI, NumElts);
5629 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5630 case G_PHI:
5631 return fewerElementsVectorPhi(GMI, NumElts);
5632 case G_UNMERGE_VALUES:
5633 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5634 case G_BUILD_VECTOR:
5635 assert(TypeIdx == 0 && "not a vector type index");
5636 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5637 case G_CONCAT_VECTORS:
5638 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5639 return UnableToLegalize;
5640 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5641 case G_EXTRACT_VECTOR_ELT:
5642 case G_INSERT_VECTOR_ELT:
5643 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5644 case G_LOAD:
5645 case G_STORE:
5646 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5647 case G_SEXT_INREG:
5648 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5650 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5651 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5652 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5653 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5654 case G_SHUFFLE_VECTOR:
5655 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5656 case G_FPOWI:
5657 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5658 case G_BITCAST:
5659 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5660 case G_INTRINSIC_FPTRUNC_ROUND:
5661 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5662 default:
5663 return UnableToLegalize;
5664 }
5665}
5666
5669 LLT NarrowTy) {
5670 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5671 "Not a bitcast operation");
5672
5673 if (TypeIdx != 0)
5674 return UnableToLegalize;
5675
5676 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5677
5678 unsigned NewElemCount =
5679 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5680 SmallVector<Register> SrcVRegs, BitcastVRegs;
5681 if (NewElemCount == 1) {
5682 LLT SrcNarrowTy = SrcTy.getElementType();
5683
5684 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5685 getUnmergeResults(SrcVRegs, *Unmerge);
5686 } else {
5687 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5688
5689 // Split the Src and Dst Reg into smaller registers
5690 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5691 return UnableToLegalize;
5692 }
5693
5694 // Build new smaller bitcast instructions
5695 // Not supporting Leftover types for now but will have to
5696 for (Register Reg : SrcVRegs)
5697 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5698
5699 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5700 MI.eraseFromParent();
5701 return Legalized;
5702}
5703
5705 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5706 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5707 if (TypeIdx != 0)
5708 return UnableToLegalize;
5709
5710 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5711 MI.getFirst3RegLLTs();
5712 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5713 // The shuffle should be canonicalized by now.
5714 if (DstTy != Src1Ty)
5715 return UnableToLegalize;
5716 if (DstTy != Src2Ty)
5717 return UnableToLegalize;
5718
5719 if (!isPowerOf2_32(DstTy.getNumElements()))
5720 return UnableToLegalize;
5721
5722 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5723 // Further legalization attempts will be needed to do split further.
5724 NarrowTy =
5725 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5726 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5727
5728 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5729 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5730 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5731 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5732 SplitSrc2Regs[1]};
5733
5734 Register Hi, Lo;
5735
5736 // If Lo or Hi uses elements from at most two of the four input vectors, then
5737 // express it as a vector shuffle of those two inputs. Otherwise extract the
5738 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5740 for (unsigned High = 0; High < 2; ++High) {
5741 Register &Output = High ? Hi : Lo;
5742
5743 // Build a shuffle mask for the output, discovering on the fly which
5744 // input vectors to use as shuffle operands (recorded in InputUsed).
5745 // If building a suitable shuffle vector proves too hard, then bail
5746 // out with useBuildVector set.
5747 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5748 unsigned FirstMaskIdx = High * NewElts;
5749 bool UseBuildVector = false;
5750 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5751 // The mask element. This indexes into the input.
5752 int Idx = Mask[FirstMaskIdx + MaskOffset];
5753
5754 // The input vector this mask element indexes into.
5755 unsigned Input = (unsigned)Idx / NewElts;
5756
5757 if (Input >= std::size(Inputs)) {
5758 // The mask element does not index into any input vector.
5759 Ops.push_back(-1);
5760 continue;
5761 }
5762
5763 // Turn the index into an offset from the start of the input vector.
5764 Idx -= Input * NewElts;
5765
5766 // Find or create a shuffle vector operand to hold this input.
5767 unsigned OpNo;
5768 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5769 if (InputUsed[OpNo] == Input) {
5770 // This input vector is already an operand.
5771 break;
5772 } else if (InputUsed[OpNo] == -1U) {
5773 // Create a new operand for this input vector.
5774 InputUsed[OpNo] = Input;
5775 break;
5776 }
5777 }
5778
5779 if (OpNo >= std::size(InputUsed)) {
5780 // More than two input vectors used! Give up on trying to create a
5781 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5782 UseBuildVector = true;
5783 break;
5784 }
5785
5786 // Add the mask index for the new shuffle vector.
5787 Ops.push_back(Idx + OpNo * NewElts);
5788 }
5789
5790 if (UseBuildVector) {
5791 LLT EltTy = NarrowTy.getElementType();
5793
5794 // Extract the input elements by hand.
5795 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5796 // The mask element. This indexes into the input.
5797 int Idx = Mask[FirstMaskIdx + MaskOffset];
5798
5799 // The input vector this mask element indexes into.
5800 unsigned Input = (unsigned)Idx / NewElts;
5801
5802 if (Input >= std::size(Inputs)) {
5803 // The mask element is "undef" or indexes off the end of the input.
5804 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5805 continue;
5806 }
5807
5808 // Turn the index into an offset from the start of the input vector.
5809 Idx -= Input * NewElts;
5810
5811 // Extract the vector element by hand.
5812 SVOps.push_back(MIRBuilder
5813 .buildExtractVectorElement(
5814 EltTy, Inputs[Input],
5815 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5816 .getReg(0));
5817 }
5818
5819 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5820 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5821 } else if (InputUsed[0] == -1U) {
5822 // No input vectors were used! The result is undefined.
5823 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5824 } else {
5825 Register Op0 = Inputs[InputUsed[0]];
5826 // If only one input was used, use an undefined vector for the other.
5827 Register Op1 = InputUsed[1] == -1U
5828 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5829 : Inputs[InputUsed[1]];
5830 // At least one input vector was used. Create a new shuffle vector.
5831 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5832 }
5833
5834 Ops.clear();
5835 }
5836
5837 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5838 MI.eraseFromParent();
5839 return Legalized;
5840}
5841
5843 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5844 auto &RdxMI = cast<GVecReduce>(MI);
5845
5846 if (TypeIdx != 1)
5847 return UnableToLegalize;
5848
5849 // The semantics of the normal non-sequential reductions allow us to freely
5850 // re-associate the operation.
5851 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5852
5853 if (NarrowTy.isVector() &&
5854 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5855 return UnableToLegalize;
5856
5857 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5858 SmallVector<Register> SplitSrcs;
5859 // If NarrowTy is a scalar then we're being asked to scalarize.
5860 const unsigned NumParts =
5861 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5862 : SrcTy.getNumElements();
5863
5864 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5865 if (NarrowTy.isScalar()) {
5866 if (DstTy != NarrowTy)
5867 return UnableToLegalize; // FIXME: handle implicit extensions.
5868
5869 if (isPowerOf2_32(NumParts)) {
5870 // Generate a tree of scalar operations to reduce the critical path.
5871 SmallVector<Register> PartialResults;
5872 unsigned NumPartsLeft = NumParts;
5873 while (NumPartsLeft > 1) {
5874 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5875 PartialResults.emplace_back(
5877 .buildInstr(ScalarOpc, {NarrowTy},
5878 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5879 .getReg(0));
5880 }
5881 SplitSrcs = PartialResults;
5882 PartialResults.clear();
5883 NumPartsLeft = SplitSrcs.size();
5884 }
5885 assert(SplitSrcs.size() == 1);
5886 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5887 MI.eraseFromParent();
5888 return Legalized;
5889 }
5890 // If we can't generate a tree, then just do sequential operations.
5891 Register Acc = SplitSrcs[0];
5892 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5893 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5894 .getReg(0);
5895 MIRBuilder.buildCopy(DstReg, Acc);
5896 MI.eraseFromParent();
5897 return Legalized;
5898 }
5899 SmallVector<Register> PartialReductions;
5900 for (unsigned Part = 0; Part < NumParts; ++Part) {
5901 PartialReductions.push_back(
5902 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5903 .getReg(0));
5904 }
5905
5906 // If the types involved are powers of 2, we can generate intermediate vector
5907 // ops, before generating a final reduction operation.
5908 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5909 isPowerOf2_32(NarrowTy.getNumElements())) {
5910 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5911 }
5912
5913 Register Acc = PartialReductions[0];
5914 for (unsigned Part = 1; Part < NumParts; ++Part) {
5915 if (Part == NumParts - 1) {
5916 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5917 {Acc, PartialReductions[Part]});
5918 } else {
5919 Acc = MIRBuilder
5920 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5921 .getReg(0);
5922 }
5923 }
5924 MI.eraseFromParent();
5925 return Legalized;
5926}
5927
5930 unsigned int TypeIdx,
5931 LLT NarrowTy) {
5932 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5933 MI.getFirst3RegLLTs();
5934 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5935 DstTy != NarrowTy)
5936 return UnableToLegalize;
5937
5938 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5939 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5940 "Unexpected vecreduce opcode");
5941 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5942 ? TargetOpcode::G_FADD
5943 : TargetOpcode::G_FMUL;
5944
5945 SmallVector<Register> SplitSrcs;
5946 unsigned NumParts = SrcTy.getNumElements();
5947 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5948 Register Acc = ScalarReg;
5949 for (unsigned i = 0; i < NumParts; i++)
5950 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5951 .getReg(0);
5952
5953 MIRBuilder.buildCopy(DstReg, Acc);
5954 MI.eraseFromParent();
5955 return Legalized;
5956}
5957
5959LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5960 LLT SrcTy, LLT NarrowTy,
5961 unsigned ScalarOpc) {
5962 SmallVector<Register> SplitSrcs;
5963 // Split the sources into NarrowTy size pieces.
5964 extractParts(SrcReg, NarrowTy,
5965 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5966 MIRBuilder, MRI);
5967 // We're going to do a tree reduction using vector operations until we have
5968 // one NarrowTy size value left.
5969 while (SplitSrcs.size() > 1) {
5970 SmallVector<Register> PartialRdxs;
5971 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5972 Register LHS = SplitSrcs[Idx];
5973 Register RHS = SplitSrcs[Idx + 1];
5974 // Create the intermediate vector op.
5975 Register Res =
5976 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5977 PartialRdxs.push_back(Res);
5978 }
5979 SplitSrcs = std::move(PartialRdxs);
5980 }
5981 // Finally generate the requested NarrowTy based reduction.
5982 Observer.changingInstr(MI);
5983 MI.getOperand(1).setReg(SplitSrcs[0]);
5984 Observer.changedInstr(MI);
5985 return Legalized;
5986}
5987
5990 const LLT HalfTy, const LLT AmtTy) {
5991
5992 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5993 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5994 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5995
5996 if (Amt.isZero()) {
5997 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5998 MI.eraseFromParent();
5999 return Legalized;
6000 }
6001
6002 LLT NVT = HalfTy;
6003 unsigned NVTBits = HalfTy.getSizeInBits();
6004 unsigned VTBits = 2 * NVTBits;
6005
6006 SrcOp Lo(Register(0)), Hi(Register(0));
6007 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6008 if (Amt.ugt(VTBits)) {
6009 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6010 } else if (Amt.ugt(NVTBits)) {
6011 Lo = MIRBuilder.buildConstant(NVT, 0);
6012 Hi = MIRBuilder.buildShl(NVT, InL,
6013 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6014 } else if (Amt == NVTBits) {
6015 Lo = MIRBuilder.buildConstant(NVT, 0);
6016 Hi = InL;
6017 } else {
6018 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6019 auto OrLHS =
6020 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6021 auto OrRHS = MIRBuilder.buildLShr(
6022 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6023 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6024 }
6025 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6026 if (Amt.ugt(VTBits)) {
6027 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6028 } else if (Amt.ugt(NVTBits)) {
6029 Lo = MIRBuilder.buildLShr(NVT, InH,
6030 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6031 Hi = MIRBuilder.buildConstant(NVT, 0);
6032 } else if (Amt == NVTBits) {
6033 Lo = InH;
6034 Hi = MIRBuilder.buildConstant(NVT, 0);
6035 } else {
6036 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6037
6038 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6039 auto OrRHS = MIRBuilder.buildShl(
6040 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6041
6042 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6043 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6044 }
6045 } else {
6046 if (Amt.ugt(VTBits)) {
6047 Hi = Lo = MIRBuilder.buildAShr(
6048 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6049 } else if (Amt.ugt(NVTBits)) {
6050 Lo = MIRBuilder.buildAShr(NVT, InH,
6051 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6052 Hi = MIRBuilder.buildAShr(NVT, InH,
6053 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6054 } else if (Amt == NVTBits) {
6055 Lo = InH;
6056 Hi = MIRBuilder.buildAShr(NVT, InH,
6057 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6058 } else {
6059 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6060
6061 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6062 auto OrRHS = MIRBuilder.buildShl(
6063 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6064
6065 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6066 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6067 }
6068 }
6069
6070 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6071 MI.eraseFromParent();
6072
6073 return Legalized;
6074}
6075
6078 LLT RequestedTy) {
6079 if (TypeIdx == 1) {
6080 Observer.changingInstr(MI);
6081 narrowScalarSrc(MI, RequestedTy, 2);
6082 Observer.changedInstr(MI);
6083 return Legalized;
6084 }
6085
6086 Register DstReg = MI.getOperand(0).getReg();
6087 LLT DstTy = MRI.getType(DstReg);
6088 if (DstTy.isVector())
6089 return UnableToLegalize;
6090
6091 Register Amt = MI.getOperand(2).getReg();
6092 LLT ShiftAmtTy = MRI.getType(Amt);
6093 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6094 if (DstEltSize % 2 != 0)
6095 return UnableToLegalize;
6096
6097 // Check if we should use multi-way splitting instead of recursive binary
6098 // splitting.
6099 //
6100 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6101 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6102 // and dependency chains created by usual binary splitting approach
6103 // (128->64->32).
6104 //
6105 // The >= 8 parts threshold ensures we only use this optimization when binary
6106 // splitting would require multiple recursive passes, avoiding overhead for
6107 // simple 2-way splits where binary approach is sufficient.
6108 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6109 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6110 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6111 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6112 // steps).
6113 if (NumParts >= 8)
6114 return narrowScalarShiftMultiway(MI, RequestedTy);
6115 }
6116
6117 // Fall back to binary splitting:
6118 // Ignore the input type. We can only go to exactly half the size of the
6119 // input. If that isn't small enough, the resulting pieces will be further
6120 // legalized.
6121 const unsigned NewBitSize = DstEltSize / 2;
6122 const LLT HalfTy = LLT::scalar(NewBitSize);
6123 const LLT CondTy = LLT::scalar(1);
6124
6125 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6126 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6127 ShiftAmtTy);
6128 }
6129
6130 // TODO: Expand with known bits.
6131
6132 // Handle the fully general expansion by an unknown amount.
6133 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6134
6135 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6136 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6137 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6138
6139 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6140 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6141
6142 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6143 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6144 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6145
6146 Register ResultRegs[2];
6147 switch (MI.getOpcode()) {
6148 case TargetOpcode::G_SHL: {
6149 // Short: ShAmt < NewBitSize
6150 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6151
6152 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6153 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6154 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6155
6156 // Long: ShAmt >= NewBitSize
6157 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6158 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6159
6160 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6161 auto Hi = MIRBuilder.buildSelect(
6162 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6163
6164 ResultRegs[0] = Lo.getReg(0);
6165 ResultRegs[1] = Hi.getReg(0);
6166 break;
6167 }
6168 case TargetOpcode::G_LSHR:
6169 case TargetOpcode::G_ASHR: {
6170 // Short: ShAmt < NewBitSize
6171 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6172
6173 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6174 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6175 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6176
6177 // Long: ShAmt >= NewBitSize
6179 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6180 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6181 } else {
6182 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6183 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6184 }
6185 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6186 {InH, AmtExcess}); // Lo from Hi part.
6187
6188 auto Lo = MIRBuilder.buildSelect(
6189 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6190
6191 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6192
6193 ResultRegs[0] = Lo.getReg(0);
6194 ResultRegs[1] = Hi.getReg(0);
6195 break;
6196 }
6197 default:
6198 llvm_unreachable("not a shift");
6199 }
6200
6201 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6202 MI.eraseFromParent();
6203 return Legalized;
6204}
6205
6207 unsigned PartIdx,
6208 unsigned NumParts,
6209 ArrayRef<Register> SrcParts,
6210 const ShiftParams &Params,
6211 LLT TargetTy, LLT ShiftAmtTy) {
6212 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6213 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6214 assert(WordShiftConst && BitShiftConst && "Expected constants");
6215
6216 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6217 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6218 const bool NeedsInterWordShift = ShiftBits != 0;
6219
6220 switch (Opcode) {
6221 case TargetOpcode::G_SHL: {
6222 // Data moves from lower indices to higher indices
6223 // If this part would come from a source beyond our range, it's zero
6224 if (PartIdx < ShiftWords)
6225 return Params.Zero;
6226
6227 unsigned SrcIdx = PartIdx - ShiftWords;
6228 if (!NeedsInterWordShift)
6229 return SrcParts[SrcIdx];
6230
6231 // Combine shifted main part with carry from previous part
6232 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6233 if (SrcIdx > 0) {
6234 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6235 Params.InvBitShift);
6236 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6237 }
6238 return Hi.getReg(0);
6239 }
6240
6241 case TargetOpcode::G_LSHR: {
6242 unsigned SrcIdx = PartIdx + ShiftWords;
6243 if (SrcIdx >= NumParts)
6244 return Params.Zero;
6245 if (!NeedsInterWordShift)
6246 return SrcParts[SrcIdx];
6247
6248 // Combine shifted main part with carry from next part
6249 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6250 if (SrcIdx + 1 < NumParts) {
6251 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6252 Params.InvBitShift);
6253 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6254 }
6255 return Lo.getReg(0);
6256 }
6257
6258 case TargetOpcode::G_ASHR: {
6259 // Like LSHR but preserves sign bit
6260 unsigned SrcIdx = PartIdx + ShiftWords;
6261 if (SrcIdx >= NumParts)
6262 return Params.SignBit;
6263 if (!NeedsInterWordShift)
6264 return SrcParts[SrcIdx];
6265
6266 // Only the original MSB part uses arithmetic shift to preserve sign. All
6267 // other parts use logical shift since they're just moving data bits.
6268 auto Lo =
6269 (SrcIdx == NumParts - 1)
6270 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6271 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6272 Register HiSrc =
6273 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6274 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6275 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6276 }
6277
6278 default:
6279 llvm_unreachable("not a shift");
6280 }
6281}
6282
6284 Register MainOperand,
6285 Register ShiftAmt,
6286 LLT TargetTy,
6287 Register CarryOperand) {
6288 // This helper generates a single output part for variable shifts by combining
6289 // the main operand (shifted by BitShift) with carry bits from an adjacent
6290 // part.
6291
6292 // For G_ASHR, individual parts don't have their own sign bit, only the
6293 // complete value does. So we use LSHR for the main operand shift in ASHR
6294 // context.
6295 unsigned MainOpcode =
6296 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6297
6298 // Perform the primary shift on the main operand
6299 Register MainShifted =
6300 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6301 .getReg(0);
6302
6303 // No carry operand available
6304 if (!CarryOperand.isValid())
6305 return MainShifted;
6306
6307 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6308 // so carry bits aren't needed.
6309 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6310 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6311 LLT BoolTy = LLT::scalar(1);
6312 auto IsZeroBitShift =
6313 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6314
6315 // Extract bits from the adjacent part that will "carry over" into this part.
6316 // The carry direction is opposite to the main shift direction, so we can
6317 // align the two shifted values before combining them with OR.
6318
6319 // Determine the carry shift opcode (opposite direction)
6320 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6321 : TargetOpcode::G_SHL;
6322
6323 // Calculate inverse shift amount: BitWidth - ShiftAmt
6324 auto TargetBitsConst =
6325 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6326 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6327
6328 // Shift the carry operand
6329 Register CarryBits =
6331 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6332 .getReg(0);
6333
6334 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6335 // TargetBits which would be poison for the individual carry shift operation).
6336 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6337 Register SafeCarryBits =
6338 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6339 .getReg(0);
6340
6341 // Combine the main shifted part with the carry bits
6342 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6343}
6344
6347 const APInt &Amt,
6348 LLT TargetTy,
6349 LLT ShiftAmtTy) {
6350 // Any wide shift can be decomposed into WordShift + BitShift components.
6351 // When shift amount is known constant, directly compute the decomposition
6352 // values and generate constant registers.
6353 Register DstReg = MI.getOperand(0).getReg();
6354 Register SrcReg = MI.getOperand(1).getReg();
6355 LLT DstTy = MRI.getType(DstReg);
6356
6357 const unsigned DstBits = DstTy.getScalarSizeInBits();
6358 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6359 const unsigned NumParts = DstBits / TargetBits;
6360
6361 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6362
6363 // When the shift amount is known at compile time, we just calculate which
6364 // source parts contribute to each output part.
6365
6366 SmallVector<Register, 8> SrcParts;
6367 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6368
6369 if (Amt.isZero()) {
6370 // No shift needed, just copy
6371 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6372 MI.eraseFromParent();
6373 return Legalized;
6374 }
6375
6376 ShiftParams Params;
6377 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6378 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6379
6380 // Generate constants and values needed by all shift types
6381 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6382 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6383 Params.InvBitShift =
6384 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6385 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6386
6387 // For ASHR, we need the sign-extended value to fill shifted-out positions
6388 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6389 Params.SignBit =
6391 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6392 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6393 .getReg(0);
6394
6395 SmallVector<Register, 8> DstParts(NumParts);
6396 for (unsigned I = 0; I < NumParts; ++I)
6397 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6398 Params, TargetTy, ShiftAmtTy);
6399
6400 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6401 MI.eraseFromParent();
6402 return Legalized;
6403}
6404
6407 Register DstReg = MI.getOperand(0).getReg();
6408 Register SrcReg = MI.getOperand(1).getReg();
6409 Register AmtReg = MI.getOperand(2).getReg();
6410 LLT DstTy = MRI.getType(DstReg);
6411 LLT ShiftAmtTy = MRI.getType(AmtReg);
6412
6413 const unsigned DstBits = DstTy.getScalarSizeInBits();
6414 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6415 const unsigned NumParts = DstBits / TargetBits;
6416
6417 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6418 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6419
6420 // If the shift amount is known at compile time, we can use direct indexing
6421 // instead of generating select chains in the general case.
6422 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6423 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6424 ShiftAmtTy);
6425
6426 // For runtime-variable shift amounts, we must generate a more complex
6427 // sequence that handles all possible shift values using select chains.
6428
6429 // Split the input into target-sized pieces
6430 SmallVector<Register, 8> SrcParts;
6431 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6432
6433 // Shifting by zero should be a no-op.
6434 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6435 LLT BoolTy = LLT::scalar(1);
6436 auto IsZeroShift =
6437 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6438
6439 // Any wide shift can be decomposed into two components:
6440 // 1. WordShift: number of complete target-sized words to shift
6441 // 2. BitShift: number of bits to shift within each word
6442 //
6443 // Example: 128-bit >> 50 with 32-bit target:
6444 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6445 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6446 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6447 auto TargetBitsLog2Const =
6448 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6449 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6450
6451 Register WordShift =
6452 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6453 Register BitShift =
6454 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6455
6456 // Fill values:
6457 // - SHL/LSHR: fill with zeros
6458 // - ASHR: fill with sign-extended MSB
6459 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6460
6461 Register FillValue;
6462 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6463 auto TargetBitsMinusOneConst =
6464 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6465 FillValue = MIRBuilder
6466 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6467 TargetBitsMinusOneConst)
6468 .getReg(0);
6469 } else {
6470 FillValue = ZeroReg;
6471 }
6472
6473 SmallVector<Register, 8> DstParts(NumParts);
6474
6475 // For each output part, generate a select chain that chooses the correct
6476 // result based on the runtime WordShift value. This handles all possible
6477 // word shift amounts by pre-calculating what each would produce.
6478 for (unsigned I = 0; I < NumParts; ++I) {
6479 // Initialize with appropriate default value for this shift type
6480 Register InBoundsResult = FillValue;
6481
6482 // clang-format off
6483 // Build a branchless select chain by pre-computing results for all possible
6484 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6485 //
6486 // K=0: select(WordShift==0, result0, FillValue)
6487 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6488 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6489 // clang-format on
6490 for (unsigned K = 0; K < NumParts; ++K) {
6491 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6492 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6493 WordShift, WordShiftKConst);
6494
6495 // Calculate source indices for this word shift
6496 //
6497 // For 4-part 128-bit value with K=1 word shift:
6498 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6499 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6500 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6501 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6502 int MainSrcIdx;
6503 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6504
6505 switch (MI.getOpcode()) {
6506 case TargetOpcode::G_SHL:
6507 MainSrcIdx = (int)I - (int)K;
6508 CarrySrcIdx = MainSrcIdx - 1;
6509 break;
6510 case TargetOpcode::G_LSHR:
6511 case TargetOpcode::G_ASHR:
6512 MainSrcIdx = (int)I + (int)K;
6513 CarrySrcIdx = MainSrcIdx + 1;
6514 break;
6515 default:
6516 llvm_unreachable("Not a shift");
6517 }
6518
6519 // Check bounds and build the result for this word shift
6520 Register ResultForK;
6521 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6522 Register MainOp = SrcParts[MainSrcIdx];
6523 Register CarryOp;
6524
6525 // Determine carry operand with bounds checking
6526 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6527 CarryOp = SrcParts[CarrySrcIdx];
6528 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6529 CarrySrcIdx >= (int)NumParts)
6530 CarryOp = FillValue; // Use sign extension
6531
6532 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6533 TargetTy, CarryOp);
6534 } else {
6535 // Out of bounds - use fill value for this k
6536 ResultForK = FillValue;
6537 }
6538
6539 // Select this result if WordShift equals k
6540 InBoundsResult =
6542 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6543 .getReg(0);
6544 }
6545
6546 // Handle zero-shift special case: if shift is 0, use original input
6547 DstParts[I] =
6549 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6550 .getReg(0);
6551 }
6552
6553 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6554 MI.eraseFromParent();
6555 return Legalized;
6556}
6557
6560 LLT MoreTy) {
6561 assert(TypeIdx == 0 && "Expecting only Idx 0");
6562
6563 Observer.changingInstr(MI);
6564 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6565 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6566 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6567 moreElementsVectorSrc(MI, MoreTy, I);
6568 }
6569
6570 MachineBasicBlock &MBB = *MI.getParent();
6571 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6572 moreElementsVectorDst(MI, MoreTy, 0);
6573 Observer.changedInstr(MI);
6574 return Legalized;
6575}
6576
6577MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6578 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6579 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6580
6581 switch (Opcode) {
6582 default:
6584 "getNeutralElementForVecReduce called with invalid opcode!");
6585 case TargetOpcode::G_VECREDUCE_ADD:
6586 case TargetOpcode::G_VECREDUCE_OR:
6587 case TargetOpcode::G_VECREDUCE_XOR:
6588 case TargetOpcode::G_VECREDUCE_UMAX:
6589 return MIRBuilder.buildConstant(Ty, 0);
6590 case TargetOpcode::G_VECREDUCE_MUL:
6591 return MIRBuilder.buildConstant(Ty, 1);
6592 case TargetOpcode::G_VECREDUCE_AND:
6593 case TargetOpcode::G_VECREDUCE_UMIN:
6595 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6596 case TargetOpcode::G_VECREDUCE_SMAX:
6598 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6599 case TargetOpcode::G_VECREDUCE_SMIN:
6601 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6602 case TargetOpcode::G_VECREDUCE_FADD:
6603 return MIRBuilder.buildFConstant(Ty, -0.0);
6604 case TargetOpcode::G_VECREDUCE_FMUL:
6605 return MIRBuilder.buildFConstant(Ty, 1.0);
6606 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6607 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6608 assert(false && "getNeutralElementForVecReduce unimplemented for "
6609 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6610 }
6611 llvm_unreachable("switch expected to return!");
6612}
6613
6616 LLT MoreTy) {
6617 unsigned Opc = MI.getOpcode();
6618 switch (Opc) {
6619 case TargetOpcode::G_IMPLICIT_DEF:
6620 case TargetOpcode::G_LOAD: {
6621 if (TypeIdx != 0)
6622 return UnableToLegalize;
6623 Observer.changingInstr(MI);
6624 moreElementsVectorDst(MI, MoreTy, 0);
6625 Observer.changedInstr(MI);
6626 return Legalized;
6627 }
6628 case TargetOpcode::G_STORE:
6629 if (TypeIdx != 0)
6630 return UnableToLegalize;
6631 Observer.changingInstr(MI);
6632 moreElementsVectorSrc(MI, MoreTy, 0);
6633 Observer.changedInstr(MI);
6634 return Legalized;
6635 case TargetOpcode::G_AND:
6636 case TargetOpcode::G_OR:
6637 case TargetOpcode::G_XOR:
6638 case TargetOpcode::G_ADD:
6639 case TargetOpcode::G_SUB:
6640 case TargetOpcode::G_MUL:
6641 case TargetOpcode::G_FADD:
6642 case TargetOpcode::G_FSUB:
6643 case TargetOpcode::G_FMUL:
6644 case TargetOpcode::G_FDIV:
6645 case TargetOpcode::G_FCOPYSIGN:
6646 case TargetOpcode::G_UADDSAT:
6647 case TargetOpcode::G_USUBSAT:
6648 case TargetOpcode::G_SADDSAT:
6649 case TargetOpcode::G_SSUBSAT:
6650 case TargetOpcode::G_SMIN:
6651 case TargetOpcode::G_SMAX:
6652 case TargetOpcode::G_UMIN:
6653 case TargetOpcode::G_UMAX:
6654 case TargetOpcode::G_FMINNUM:
6655 case TargetOpcode::G_FMAXNUM:
6656 case TargetOpcode::G_FMINNUM_IEEE:
6657 case TargetOpcode::G_FMAXNUM_IEEE:
6658 case TargetOpcode::G_FMINIMUM:
6659 case TargetOpcode::G_FMAXIMUM:
6660 case TargetOpcode::G_FMINIMUMNUM:
6661 case TargetOpcode::G_FMAXIMUMNUM:
6662 case TargetOpcode::G_STRICT_FADD:
6663 case TargetOpcode::G_STRICT_FSUB:
6664 case TargetOpcode::G_STRICT_FMUL:
6665 case TargetOpcode::G_SHL:
6666 case TargetOpcode::G_ASHR:
6667 case TargetOpcode::G_LSHR: {
6668 Observer.changingInstr(MI);
6669 moreElementsVectorSrc(MI, MoreTy, 1);
6670 moreElementsVectorSrc(MI, MoreTy, 2);
6671 moreElementsVectorDst(MI, MoreTy, 0);
6672 Observer.changedInstr(MI);
6673 return Legalized;
6674 }
6675 case TargetOpcode::G_FMA:
6676 case TargetOpcode::G_STRICT_FMA:
6677 case TargetOpcode::G_FSHR:
6678 case TargetOpcode::G_FSHL: {
6679 Observer.changingInstr(MI);
6680 moreElementsVectorSrc(MI, MoreTy, 1);
6681 moreElementsVectorSrc(MI, MoreTy, 2);
6682 moreElementsVectorSrc(MI, MoreTy, 3);
6683 moreElementsVectorDst(MI, MoreTy, 0);
6684 Observer.changedInstr(MI);
6685 return Legalized;
6686 }
6687 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6688 case TargetOpcode::G_EXTRACT:
6689 if (TypeIdx != 1)
6690 return UnableToLegalize;
6691 Observer.changingInstr(MI);
6692 moreElementsVectorSrc(MI, MoreTy, 1);
6693 Observer.changedInstr(MI);
6694 return Legalized;
6695 case TargetOpcode::G_INSERT:
6696 case TargetOpcode::G_INSERT_VECTOR_ELT:
6697 case TargetOpcode::G_FREEZE:
6698 case TargetOpcode::G_FNEG:
6699 case TargetOpcode::G_FABS:
6700 case TargetOpcode::G_FSQRT:
6701 case TargetOpcode::G_FCEIL:
6702 case TargetOpcode::G_FFLOOR:
6703 case TargetOpcode::G_FNEARBYINT:
6704 case TargetOpcode::G_FRINT:
6705 case TargetOpcode::G_INTRINSIC_ROUND:
6706 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6707 case TargetOpcode::G_INTRINSIC_TRUNC:
6708 case TargetOpcode::G_BITREVERSE:
6709 case TargetOpcode::G_BSWAP:
6710 case TargetOpcode::G_FCANONICALIZE:
6711 case TargetOpcode::G_SEXT_INREG:
6712 case TargetOpcode::G_ABS:
6713 case TargetOpcode::G_CTLZ:
6714 case TargetOpcode::G_CTPOP:
6715 if (TypeIdx != 0)
6716 return UnableToLegalize;
6717 Observer.changingInstr(MI);
6718 moreElementsVectorSrc(MI, MoreTy, 1);
6719 moreElementsVectorDst(MI, MoreTy, 0);
6720 Observer.changedInstr(MI);
6721 return Legalized;
6722 case TargetOpcode::G_SELECT: {
6723 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6724 if (TypeIdx == 1) {
6725 if (!CondTy.isScalar() ||
6726 DstTy.getElementCount() != MoreTy.getElementCount())
6727 return UnableToLegalize;
6728
6729 // This is turning a scalar select of vectors into a vector
6730 // select. Broadcast the select condition.
6731 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6732 Observer.changingInstr(MI);
6733 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6734 Observer.changedInstr(MI);
6735 return Legalized;
6736 }
6737
6738 if (CondTy.isVector())
6739 return UnableToLegalize;
6740
6741 Observer.changingInstr(MI);
6742 moreElementsVectorSrc(MI, MoreTy, 2);
6743 moreElementsVectorSrc(MI, MoreTy, 3);
6744 moreElementsVectorDst(MI, MoreTy, 0);
6745 Observer.changedInstr(MI);
6746 return Legalized;
6747 }
6748 case TargetOpcode::G_UNMERGE_VALUES:
6749 return UnableToLegalize;
6750 case TargetOpcode::G_PHI:
6751 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6752 case TargetOpcode::G_SHUFFLE_VECTOR:
6753 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6754 case TargetOpcode::G_BUILD_VECTOR: {
6756 for (auto Op : MI.uses()) {
6757 Elts.push_back(Op.getReg());
6758 }
6759
6760 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6761 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6762 }
6763
6764 MIRBuilder.buildDeleteTrailingVectorElements(
6765 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6766 MI.eraseFromParent();
6767 return Legalized;
6768 }
6769 case TargetOpcode::G_SEXT:
6770 case TargetOpcode::G_ZEXT:
6771 case TargetOpcode::G_ANYEXT:
6772 case TargetOpcode::G_TRUNC:
6773 case TargetOpcode::G_FPTRUNC:
6774 case TargetOpcode::G_FPEXT:
6775 case TargetOpcode::G_FPTOSI:
6776 case TargetOpcode::G_FPTOUI:
6777 case TargetOpcode::G_FPTOSI_SAT:
6778 case TargetOpcode::G_FPTOUI_SAT:
6779 case TargetOpcode::G_SITOFP:
6780 case TargetOpcode::G_UITOFP: {
6781 Observer.changingInstr(MI);
6782 LLT SrcExtTy;
6783 LLT DstExtTy;
6784 if (TypeIdx == 0) {
6785 DstExtTy = MoreTy;
6786 SrcExtTy = LLT::fixed_vector(
6787 MoreTy.getNumElements(),
6788 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6789 } else {
6790 DstExtTy = LLT::fixed_vector(
6791 MoreTy.getNumElements(),
6792 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6793 SrcExtTy = MoreTy;
6794 }
6795 moreElementsVectorSrc(MI, SrcExtTy, 1);
6796 moreElementsVectorDst(MI, DstExtTy, 0);
6797 Observer.changedInstr(MI);
6798 return Legalized;
6799 }
6800 case TargetOpcode::G_ICMP:
6801 case TargetOpcode::G_FCMP: {
6802 if (TypeIdx != 1)
6803 return UnableToLegalize;
6804
6805 Observer.changingInstr(MI);
6806 moreElementsVectorSrc(MI, MoreTy, 2);
6807 moreElementsVectorSrc(MI, MoreTy, 3);
6808 LLT CondTy = LLT::fixed_vector(
6809 MoreTy.getNumElements(),
6810 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6811 moreElementsVectorDst(MI, CondTy, 0);
6812 Observer.changedInstr(MI);
6813 return Legalized;
6814 }
6815 case TargetOpcode::G_BITCAST: {
6816 if (TypeIdx != 0)
6817 return UnableToLegalize;
6818
6819 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6820 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6821
6822 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6823 if (coefficient % DstTy.getNumElements() != 0)
6824 return UnableToLegalize;
6825
6826 coefficient = coefficient / DstTy.getNumElements();
6827
6828 LLT NewTy = SrcTy.changeElementCount(
6829 ElementCount::get(coefficient, MoreTy.isScalable()));
6830 Observer.changingInstr(MI);
6831 moreElementsVectorSrc(MI, NewTy, 1);
6832 moreElementsVectorDst(MI, MoreTy, 0);
6833 Observer.changedInstr(MI);
6834 return Legalized;
6835 }
6836 case TargetOpcode::G_VECREDUCE_FADD:
6837 case TargetOpcode::G_VECREDUCE_FMUL:
6838 case TargetOpcode::G_VECREDUCE_ADD:
6839 case TargetOpcode::G_VECREDUCE_MUL:
6840 case TargetOpcode::G_VECREDUCE_AND:
6841 case TargetOpcode::G_VECREDUCE_OR:
6842 case TargetOpcode::G_VECREDUCE_XOR:
6843 case TargetOpcode::G_VECREDUCE_SMAX:
6844 case TargetOpcode::G_VECREDUCE_SMIN:
6845 case TargetOpcode::G_VECREDUCE_UMAX:
6846 case TargetOpcode::G_VECREDUCE_UMIN: {
6847 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6848 MachineOperand &MO = MI.getOperand(1);
6849 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6850 auto NeutralElement = getNeutralElementForVecReduce(
6851 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6852
6853 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6854 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6855 i != e; i++) {
6856 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6857 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6858 NeutralElement, Idx);
6859 }
6860
6861 Observer.changingInstr(MI);
6862 MO.setReg(NewVec.getReg(0));
6863 Observer.changedInstr(MI);
6864 return Legalized;
6865 }
6866
6867 default:
6868 return UnableToLegalize;
6869 }
6870}
6871
6874 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6875 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6876 unsigned MaskNumElts = Mask.size();
6877 unsigned SrcNumElts = SrcTy.getNumElements();
6878 LLT DestEltTy = DstTy.getElementType();
6879
6880 if (MaskNumElts == SrcNumElts)
6881 return Legalized;
6882
6883 if (MaskNumElts < SrcNumElts) {
6884 // Extend mask to match new destination vector size with
6885 // undef values.
6886 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6887 llvm::copy(Mask, NewMask.begin());
6888
6889 moreElementsVectorDst(MI, SrcTy, 0);
6890 MIRBuilder.setInstrAndDebugLoc(MI);
6891 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6892 MI.getOperand(1).getReg(),
6893 MI.getOperand(2).getReg(), NewMask);
6894 MI.eraseFromParent();
6895
6896 return Legalized;
6897 }
6898
6899 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6900 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6901 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6902
6903 // Create new source vectors by concatenating the initial
6904 // source vectors with undefined vectors of the same size.
6905 auto Undef = MIRBuilder.buildUndef(SrcTy);
6906 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6907 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6908 MOps1[0] = MI.getOperand(1).getReg();
6909 MOps2[0] = MI.getOperand(2).getReg();
6910
6911 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6912 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6913
6914 // Readjust mask for new input vector length.
6915 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6916 for (unsigned I = 0; I != MaskNumElts; ++I) {
6917 int Idx = Mask[I];
6918 if (Idx >= static_cast<int>(SrcNumElts))
6919 Idx += PaddedMaskNumElts - SrcNumElts;
6920 MappedOps[I] = Idx;
6921 }
6922
6923 // If we got more elements than required, extract subvector.
6924 if (MaskNumElts != PaddedMaskNumElts) {
6925 auto Shuffle =
6926 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6927
6928 SmallVector<Register, 16> Elts(MaskNumElts);
6929 for (unsigned I = 0; I < MaskNumElts; ++I) {
6930 Elts[I] =
6931 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6932 .getReg(0);
6933 }
6934 MIRBuilder.buildBuildVector(DstReg, Elts);
6935 } else {
6936 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6937 }
6938
6939 MI.eraseFromParent();
6941}
6942
6945 unsigned int TypeIdx, LLT MoreTy) {
6946 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6947 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6948 unsigned NumElts = DstTy.getNumElements();
6949 unsigned WidenNumElts = MoreTy.getNumElements();
6950
6951 if (DstTy.isVector() && Src1Ty.isVector() &&
6952 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6954 }
6955
6956 if (TypeIdx != 0)
6957 return UnableToLegalize;
6958
6959 // Expect a canonicalized shuffle.
6960 if (DstTy != Src1Ty || DstTy != Src2Ty)
6961 return UnableToLegalize;
6962
6963 moreElementsVectorSrc(MI, MoreTy, 1);
6964 moreElementsVectorSrc(MI, MoreTy, 2);
6965
6966 // Adjust mask based on new input vector length.
6967 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6968 for (unsigned I = 0; I != NumElts; ++I) {
6969 int Idx = Mask[I];
6970 if (Idx < static_cast<int>(NumElts))
6971 NewMask[I] = Idx;
6972 else
6973 NewMask[I] = Idx - NumElts + WidenNumElts;
6974 }
6975 moreElementsVectorDst(MI, MoreTy, 0);
6976 MIRBuilder.setInstrAndDebugLoc(MI);
6977 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6978 MI.getOperand(1).getReg(),
6979 MI.getOperand(2).getReg(), NewMask);
6980 MI.eraseFromParent();
6981 return Legalized;
6982}
6983
6984void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6985 ArrayRef<Register> Src1Regs,
6986 ArrayRef<Register> Src2Regs,
6987 LLT NarrowTy) {
6989 unsigned SrcParts = Src1Regs.size();
6990 unsigned DstParts = DstRegs.size();
6991
6992 unsigned DstIdx = 0; // Low bits of the result.
6993 Register FactorSum =
6994 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6995 DstRegs[DstIdx] = FactorSum;
6996
6997 Register CarrySumPrevDstIdx;
6999
7000 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7001 // Collect low parts of muls for DstIdx.
7002 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7003 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7005 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7006 Factors.push_back(Mul.getReg(0));
7007 }
7008 // Collect high parts of muls from previous DstIdx.
7009 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7010 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7011 MachineInstrBuilder Umulh =
7012 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7013 Factors.push_back(Umulh.getReg(0));
7014 }
7015 // Add CarrySum from additions calculated for previous DstIdx.
7016 if (DstIdx != 1) {
7017 Factors.push_back(CarrySumPrevDstIdx);
7018 }
7019
7020 Register CarrySum;
7021 // Add all factors and accumulate all carries into CarrySum.
7022 if (DstIdx != DstParts - 1) {
7023 MachineInstrBuilder Uaddo =
7024 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7025 FactorSum = Uaddo.getReg(0);
7026 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7027 for (unsigned i = 2; i < Factors.size(); ++i) {
7028 MachineInstrBuilder Uaddo =
7029 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7030 FactorSum = Uaddo.getReg(0);
7031 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7032 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7033 }
7034 } else {
7035 // Since value for the next index is not calculated, neither is CarrySum.
7036 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7037 for (unsigned i = 2; i < Factors.size(); ++i)
7038 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7039 }
7040
7041 CarrySumPrevDstIdx = CarrySum;
7042 DstRegs[DstIdx] = FactorSum;
7043 Factors.clear();
7044 }
7045}
7046
7049 LLT NarrowTy) {
7050 if (TypeIdx != 0)
7051 return UnableToLegalize;
7052
7053 Register DstReg = MI.getOperand(0).getReg();
7054 LLT DstType = MRI.getType(DstReg);
7055 // FIXME: add support for vector types
7056 if (DstType.isVector())
7057 return UnableToLegalize;
7058
7059 unsigned Opcode = MI.getOpcode();
7060 unsigned OpO, OpE, OpF;
7061 switch (Opcode) {
7062 case TargetOpcode::G_SADDO:
7063 case TargetOpcode::G_SADDE:
7064 case TargetOpcode::G_UADDO:
7065 case TargetOpcode::G_UADDE:
7066 case TargetOpcode::G_ADD:
7067 OpO = TargetOpcode::G_UADDO;
7068 OpE = TargetOpcode::G_UADDE;
7069 OpF = TargetOpcode::G_UADDE;
7070 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7071 OpF = TargetOpcode::G_SADDE;
7072 break;
7073 case TargetOpcode::G_SSUBO:
7074 case TargetOpcode::G_SSUBE:
7075 case TargetOpcode::G_USUBO:
7076 case TargetOpcode::G_USUBE:
7077 case TargetOpcode::G_SUB:
7078 OpO = TargetOpcode::G_USUBO;
7079 OpE = TargetOpcode::G_USUBE;
7080 OpF = TargetOpcode::G_USUBE;
7081 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7082 OpF = TargetOpcode::G_SSUBE;
7083 break;
7084 default:
7085 llvm_unreachable("Unexpected add/sub opcode!");
7086 }
7087
7088 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7089 unsigned NumDefs = MI.getNumExplicitDefs();
7090 Register Src1 = MI.getOperand(NumDefs).getReg();
7091 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7092 Register CarryDst, CarryIn;
7093 if (NumDefs == 2)
7094 CarryDst = MI.getOperand(1).getReg();
7095 if (MI.getNumOperands() == NumDefs + 3)
7096 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7097
7098 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7099 LLT LeftoverTy, DummyTy;
7100 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7101 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7102 MIRBuilder, MRI);
7103 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7104 MRI);
7105
7106 int NarrowParts = Src1Regs.size();
7107 Src1Regs.append(Src1Left);
7108 Src2Regs.append(Src2Left);
7109 DstRegs.reserve(Src1Regs.size());
7110
7111 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7112 Register DstReg =
7113 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7114 Register CarryOut;
7115 // Forward the final carry-out to the destination register
7116 if (i == e - 1 && CarryDst)
7117 CarryOut = CarryDst;
7118 else
7119 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7120
7121 if (!CarryIn) {
7122 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7123 {Src1Regs[i], Src2Regs[i]});
7124 } else if (i == e - 1) {
7125 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7126 {Src1Regs[i], Src2Regs[i], CarryIn});
7127 } else {
7128 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7129 {Src1Regs[i], Src2Regs[i], CarryIn});
7130 }
7131
7132 DstRegs.push_back(DstReg);
7133 CarryIn = CarryOut;
7134 }
7135 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7136 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7137 ArrayRef(DstRegs).drop_front(NarrowParts));
7138
7139 MI.eraseFromParent();
7140 return Legalized;
7141}
7142
7145 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7146
7147 LLT Ty = MRI.getType(DstReg);
7148 if (Ty.isVector())
7149 return UnableToLegalize;
7150
7151 unsigned Size = Ty.getSizeInBits();
7152 unsigned NarrowSize = NarrowTy.getSizeInBits();
7153 if (Size % NarrowSize != 0)
7154 return UnableToLegalize;
7155
7156 unsigned NumParts = Size / NarrowSize;
7157 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7158 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7159
7160 SmallVector<Register, 2> Src1Parts, Src2Parts;
7161 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7162 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7163 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7164 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7165
7166 // Take only high half of registers if this is high mul.
7167 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7168 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7169 MI.eraseFromParent();
7170 return Legalized;
7171}
7172
7175 LLT NarrowTy) {
7176 if (TypeIdx != 0)
7177 return UnableToLegalize;
7178
7179 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7180
7181 Register Src = MI.getOperand(1).getReg();
7182 LLT SrcTy = MRI.getType(Src);
7183
7184 // If all finite floats fit into the narrowed integer type, we can just swap
7185 // out the result type. This is practically only useful for conversions from
7186 // half to at least 16-bits, so just handle the one case.
7187 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7188 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7189 return UnableToLegalize;
7190
7191 Observer.changingInstr(MI);
7192 narrowScalarDst(MI, NarrowTy, 0,
7193 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7194 Observer.changedInstr(MI);
7195 return Legalized;
7196}
7197
7200 LLT NarrowTy) {
7201 if (TypeIdx != 1)
7202 return UnableToLegalize;
7203
7204 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7205
7206 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7207 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7208 // NarrowSize.
7209 if (SizeOp1 % NarrowSize != 0)
7210 return UnableToLegalize;
7211 int NumParts = SizeOp1 / NarrowSize;
7212
7213 SmallVector<Register, 2> SrcRegs, DstRegs;
7214 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7215 MIRBuilder, MRI);
7216
7217 Register OpReg = MI.getOperand(0).getReg();
7218 uint64_t OpStart = MI.getOperand(2).getImm();
7219 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7220 for (int i = 0; i < NumParts; ++i) {
7221 unsigned SrcStart = i * NarrowSize;
7222
7223 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7224 // No part of the extract uses this subregister, ignore it.
7225 continue;
7226 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7227 // The entire subregister is extracted, forward the value.
7228 DstRegs.push_back(SrcRegs[i]);
7229 continue;
7230 }
7231
7232 // OpSegStart is where this destination segment would start in OpReg if it
7233 // extended infinitely in both directions.
7234 int64_t ExtractOffset;
7235 uint64_t SegSize;
7236 if (OpStart < SrcStart) {
7237 ExtractOffset = 0;
7238 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7239 } else {
7240 ExtractOffset = OpStart - SrcStart;
7241 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7242 }
7243
7244 Register SegReg = SrcRegs[i];
7245 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7246 // A genuine extract is needed.
7247 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7248 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7249 }
7250
7251 DstRegs.push_back(SegReg);
7252 }
7253
7254 Register DstReg = MI.getOperand(0).getReg();
7255 if (MRI.getType(DstReg).isVector())
7256 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7257 else if (DstRegs.size() > 1)
7258 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7259 else
7260 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7261 MI.eraseFromParent();
7262 return Legalized;
7263}
7264
7267 LLT NarrowTy) {
7268 // FIXME: Don't know how to handle secondary types yet.
7269 if (TypeIdx != 0)
7270 return UnableToLegalize;
7271
7272 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7273 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7274 LLT LeftoverTy;
7275 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7276 LeftoverRegs, MIRBuilder, MRI);
7277
7278 SrcRegs.append(LeftoverRegs);
7279
7280 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7281 Register OpReg = MI.getOperand(2).getReg();
7282 uint64_t OpStart = MI.getOperand(3).getImm();
7283 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7284 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7285 unsigned DstStart = I * NarrowSize;
7286
7287 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7288 // The entire subregister is defined by this insert, forward the new
7289 // value.
7290 DstRegs.push_back(OpReg);
7291 continue;
7292 }
7293
7294 Register SrcReg = SrcRegs[I];
7295 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7296 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7297 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7298 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7299 }
7300
7301 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7302 // No part of the insert affects this subregister, forward the original.
7303 DstRegs.push_back(SrcReg);
7304 continue;
7305 }
7306
7307 // OpSegStart is where this destination segment would start in OpReg if it
7308 // extended infinitely in both directions.
7309 int64_t ExtractOffset, InsertOffset;
7310 uint64_t SegSize;
7311 if (OpStart < DstStart) {
7312 InsertOffset = 0;
7313 ExtractOffset = DstStart - OpStart;
7314 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7315 } else {
7316 InsertOffset = OpStart - DstStart;
7317 ExtractOffset = 0;
7318 SegSize =
7319 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7320 }
7321
7322 Register SegReg = OpReg;
7323 if (ExtractOffset != 0 || SegSize != OpSize) {
7324 // A genuine extract is needed.
7325 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7326 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7327 }
7328
7329 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7330 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7331 DstRegs.push_back(DstReg);
7332 }
7333
7334 uint64_t WideSize = DstRegs.size() * NarrowSize;
7335 Register DstReg = MI.getOperand(0).getReg();
7336 if (WideSize > RegTy.getSizeInBits()) {
7337 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7338 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7339 MIRBuilder.buildTrunc(DstReg, MergeReg);
7340 } else
7341 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7342
7343 MI.eraseFromParent();
7344 return Legalized;
7345}
7346
7349 LLT NarrowTy) {
7350 Register DstReg = MI.getOperand(0).getReg();
7351 LLT DstTy = MRI.getType(DstReg);
7352
7353 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7354
7355 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7356 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7357 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7358 LLT LeftoverTy;
7359 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7360 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7361 return UnableToLegalize;
7362
7363 LLT Unused;
7364 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7365 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7366 llvm_unreachable("inconsistent extractParts result");
7367
7368 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7369 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7370 {Src0Regs[I], Src1Regs[I]});
7371 DstRegs.push_back(Inst.getReg(0));
7372 }
7373
7374 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7375 auto Inst = MIRBuilder.buildInstr(
7376 MI.getOpcode(),
7377 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7378 DstLeftoverRegs.push_back(Inst.getReg(0));
7379 }
7380
7381 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7382 LeftoverTy, DstLeftoverRegs);
7383
7384 MI.eraseFromParent();
7385 return Legalized;
7386}
7387
7390 LLT NarrowTy) {
7391 if (TypeIdx != 0)
7392 return UnableToLegalize;
7393
7394 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7395
7396 LLT DstTy = MRI.getType(DstReg);
7397 if (DstTy.isVector())
7398 return UnableToLegalize;
7399
7401 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7402 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7403 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7404
7405 MI.eraseFromParent();
7406 return Legalized;
7407}
7408
7411 LLT NarrowTy) {
7412 if (TypeIdx != 0)
7413 return UnableToLegalize;
7414
7415 Register CondReg = MI.getOperand(1).getReg();
7416 LLT CondTy = MRI.getType(CondReg);
7417 if (CondTy.isVector()) // TODO: Handle vselect
7418 return UnableToLegalize;
7419
7420 Register DstReg = MI.getOperand(0).getReg();
7421 LLT DstTy = MRI.getType(DstReg);
7422
7423 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7424 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7425 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7426 LLT LeftoverTy;
7427 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7428 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7429 return UnableToLegalize;
7430
7431 LLT Unused;
7432 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7433 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7434 llvm_unreachable("inconsistent extractParts result");
7435
7436 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7437 auto Select = MIRBuilder.buildSelect(NarrowTy,
7438 CondReg, Src1Regs[I], Src2Regs[I]);
7439 DstRegs.push_back(Select.getReg(0));
7440 }
7441
7442 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7443 auto Select = MIRBuilder.buildSelect(
7444 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7445 DstLeftoverRegs.push_back(Select.getReg(0));
7446 }
7447
7448 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7449 LeftoverTy, DstLeftoverRegs);
7450
7451 MI.eraseFromParent();
7452 return Legalized;
7453}
7454
7457 LLT NarrowTy) {
7458 if (TypeIdx != 1)
7459 return UnableToLegalize;
7460
7461 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7462 unsigned NarrowSize = NarrowTy.getSizeInBits();
7463
7464 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7465 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7466
7468 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7469 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7470 auto C_0 = B.buildConstant(NarrowTy, 0);
7471 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7472 UnmergeSrc.getReg(1), C_0);
7473 auto LoCTLZ = IsUndef ?
7474 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7475 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7476 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7477 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7478 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7479 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7480
7481 MI.eraseFromParent();
7482 return Legalized;
7483 }
7484
7485 return UnableToLegalize;
7486}
7487
7490 LLT NarrowTy) {
7491 if (TypeIdx != 1)
7492 return UnableToLegalize;
7493
7494 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7495 unsigned NarrowSize = NarrowTy.getSizeInBits();
7496
7497 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7498 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7499
7501 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7502 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7503 auto C_0 = B.buildConstant(NarrowTy, 0);
7504 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7505 UnmergeSrc.getReg(0), C_0);
7506 auto HiCTTZ = IsUndef ?
7507 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7508 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7509 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7510 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7511 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7512 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7513
7514 MI.eraseFromParent();
7515 return Legalized;
7516 }
7517
7518 return UnableToLegalize;
7519}
7520
7523 LLT NarrowTy) {
7524 if (TypeIdx != 1)
7525 return UnableToLegalize;
7526
7527 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7528 unsigned NarrowSize = NarrowTy.getSizeInBits();
7529
7530 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7531 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7532
7533 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7534 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7535 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7536
7537 MI.eraseFromParent();
7538 return Legalized;
7539 }
7540
7541 return UnableToLegalize;
7542}
7543
7546 LLT NarrowTy) {
7547 if (TypeIdx != 1)
7548 return UnableToLegalize;
7549
7551 Register ExpReg = MI.getOperand(2).getReg();
7552 LLT ExpTy = MRI.getType(ExpReg);
7553
7554 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7555
7556 // Clamp the exponent to the range of the target type.
7557 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7558 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7559 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7560 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7561
7562 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7563 Observer.changingInstr(MI);
7564 MI.getOperand(2).setReg(Trunc.getReg(0));
7565 Observer.changedInstr(MI);
7566 return Legalized;
7567}
7568
7571 unsigned Opc = MI.getOpcode();
7572 const auto &TII = MIRBuilder.getTII();
7573 auto isSupported = [this](const LegalityQuery &Q) {
7574 auto QAction = LI.getAction(Q).Action;
7575 return QAction == Legal || QAction == Libcall || QAction == Custom;
7576 };
7577 switch (Opc) {
7578 default:
7579 return UnableToLegalize;
7580 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7581 // This trivially expands to CTLZ.
7582 Observer.changingInstr(MI);
7583 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7584 Observer.changedInstr(MI);
7585 return Legalized;
7586 }
7587 case TargetOpcode::G_CTLZ: {
7588 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7589 unsigned Len = SrcTy.getSizeInBits();
7590
7591 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7592 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7593 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7594 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7595 auto ICmp = MIRBuilder.buildICmp(
7596 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7597 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7598 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7599 MI.eraseFromParent();
7600 return Legalized;
7601 }
7602 // for now, we do this:
7603 // NewLen = NextPowerOf2(Len);
7604 // x = x | (x >> 1);
7605 // x = x | (x >> 2);
7606 // ...
7607 // x = x | (x >>16);
7608 // x = x | (x >>32); // for 64-bit input
7609 // Upto NewLen/2
7610 // return Len - popcount(x);
7611 //
7612 // Ref: "Hacker's Delight" by Henry Warren
7613 Register Op = SrcReg;
7614 unsigned NewLen = PowerOf2Ceil(Len);
7615 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7616 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7617 auto MIBOp = MIRBuilder.buildOr(
7618 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7619 Op = MIBOp.getReg(0);
7620 }
7621 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7622 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7623 MIBPop);
7624 MI.eraseFromParent();
7625 return Legalized;
7626 }
7627 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7628 // This trivially expands to CTTZ.
7629 Observer.changingInstr(MI);
7630 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7631 Observer.changedInstr(MI);
7632 return Legalized;
7633 }
7634 case TargetOpcode::G_CTTZ: {
7635 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7636
7637 unsigned Len = SrcTy.getSizeInBits();
7638 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7639 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7640 // zero.
7641 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7642 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7643 auto ICmp = MIRBuilder.buildICmp(
7644 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7645 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7646 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7647 MI.eraseFromParent();
7648 return Legalized;
7649 }
7650 // for now, we use: { return popcount(~x & (x - 1)); }
7651 // unless the target has ctlz but not ctpop, in which case we use:
7652 // { return 32 - nlz(~x & (x-1)); }
7653 // Ref: "Hacker's Delight" by Henry Warren
7654 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7655 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7656 auto MIBTmp = MIRBuilder.buildAnd(
7657 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7658 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7659 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7660 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7661 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7662 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7663 MI.eraseFromParent();
7664 return Legalized;
7665 }
7666 Observer.changingInstr(MI);
7667 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7668 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7669 Observer.changedInstr(MI);
7670 return Legalized;
7671 }
7672 case TargetOpcode::G_CTPOP: {
7673 Register SrcReg = MI.getOperand(1).getReg();
7674 LLT Ty = MRI.getType(SrcReg);
7675 unsigned Size = Ty.getSizeInBits();
7677
7678 // Count set bits in blocks of 2 bits. Default approach would be
7679 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7680 // We use following formula instead:
7681 // B2Count = val - { (val >> 1) & 0x55555555 }
7682 // since it gives same result in blocks of 2 with one instruction less.
7683 auto C_1 = B.buildConstant(Ty, 1);
7684 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7685 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7686 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7687 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7688 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7689
7690 // In order to get count in blocks of 4 add values from adjacent block of 2.
7691 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7692 auto C_2 = B.buildConstant(Ty, 2);
7693 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7694 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7695 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7696 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7697 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7698 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7699
7700 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7701 // addition since count value sits in range {0,...,8} and 4 bits are enough
7702 // to hold such binary values. After addition high 4 bits still hold count
7703 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7704 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7705 auto C_4 = B.buildConstant(Ty, 4);
7706 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7707 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7708 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7709 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7710 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7711
7712 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7713 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7714 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7715 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7716
7717 // Shift count result from 8 high bits to low bits.
7718 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7719
7720 auto IsMulSupported = [this](const LLT Ty) {
7721 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7722 return Action == Legal || Action == WidenScalar || Action == Custom;
7723 };
7724 if (IsMulSupported(Ty)) {
7725 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7726 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7727 } else {
7728 auto ResTmp = B8Count;
7729 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7730 auto ShiftC = B.buildConstant(Ty, Shift);
7731 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7732 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7733 }
7734 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7735 }
7736 MI.eraseFromParent();
7737 return Legalized;
7738 }
7739 }
7740}
7741
7742// Check that (every element of) Reg is undef or not an exact multiple of BW.
7744 Register Reg, unsigned BW) {
7745 return matchUnaryPredicate(
7746 MRI, Reg,
7747 [=](const Constant *C) {
7748 // Null constant here means an undef.
7750 return !CI || CI->getValue().urem(BW) != 0;
7751 },
7752 /*AllowUndefs*/ true);
7753}
7754
7757 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7758 LLT Ty = MRI.getType(Dst);
7759 LLT ShTy = MRI.getType(Z);
7760
7761 unsigned BW = Ty.getScalarSizeInBits();
7762
7763 if (!isPowerOf2_32(BW))
7764 return UnableToLegalize;
7765
7766 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7767 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7768
7769 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7770 // fshl X, Y, Z -> fshr X, Y, -Z
7771 // fshr X, Y, Z -> fshl X, Y, -Z
7772 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7773 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7774 } else {
7775 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7776 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7777 auto One = MIRBuilder.buildConstant(ShTy, 1);
7778 if (IsFSHL) {
7779 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7780 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7781 } else {
7782 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7783 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7784 }
7785
7786 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7787 }
7788
7789 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7790 MI.eraseFromParent();
7791 return Legalized;
7792}
7793
7796 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7797 LLT Ty = MRI.getType(Dst);
7798 LLT ShTy = MRI.getType(Z);
7799
7800 const unsigned BW = Ty.getScalarSizeInBits();
7801 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7802
7803 Register ShX, ShY;
7804 Register ShAmt, InvShAmt;
7805
7806 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7807 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7808 // fshl: X << C | Y >> (BW - C)
7809 // fshr: X << (BW - C) | Y >> C
7810 // where C = Z % BW is not zero
7811 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7812 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7813 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7814 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7815 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7816 } else {
7817 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7818 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7819 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7820 if (isPowerOf2_32(BW)) {
7821 // Z % BW -> Z & (BW - 1)
7822 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7823 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7824 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7825 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7826 } else {
7827 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7828 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7829 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7830 }
7831
7832 auto One = MIRBuilder.buildConstant(ShTy, 1);
7833 if (IsFSHL) {
7834 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7835 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7836 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7837 } else {
7838 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7839 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7840 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7841 }
7842 }
7843
7844 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7845 MI.eraseFromParent();
7846 return Legalized;
7847}
7848
7851 // These operations approximately do the following (while avoiding undefined
7852 // shifts by BW):
7853 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7854 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7855 Register Dst = MI.getOperand(0).getReg();
7856 LLT Ty = MRI.getType(Dst);
7857 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7858
7859 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7860 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7861
7862 // TODO: Use smarter heuristic that accounts for vector legalization.
7863 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7864 return lowerFunnelShiftAsShifts(MI);
7865
7866 // This only works for powers of 2, fallback to shifts if it fails.
7867 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7868 if (Result == UnableToLegalize)
7869 return lowerFunnelShiftAsShifts(MI);
7870 return Result;
7871}
7872
7874 auto [Dst, Src] = MI.getFirst2Regs();
7875 LLT DstTy = MRI.getType(Dst);
7876 LLT SrcTy = MRI.getType(Src);
7877
7878 uint32_t DstTySize = DstTy.getSizeInBits();
7879 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7880 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7881
7882 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7883 !isPowerOf2_32(SrcTyScalarSize))
7884 return UnableToLegalize;
7885
7886 // The step between extend is too large, split it by creating an intermediate
7887 // extend instruction
7888 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7889 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7890 // If the destination type is illegal, split it into multiple statements
7891 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7892 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7893 // Unmerge the vector
7894 LLT EltTy = MidTy.changeElementCount(
7896 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7897
7898 // ZExt the vectors
7899 LLT ZExtResTy = DstTy.changeElementCount(
7901 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7902 {UnmergeSrc.getReg(0)});
7903 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7904 {UnmergeSrc.getReg(1)});
7905
7906 // Merge the ending vectors
7907 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7908
7909 MI.eraseFromParent();
7910 return Legalized;
7911 }
7912 return UnableToLegalize;
7913}
7914
7916 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7917 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7918 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7919 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7920 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7921 // %lo16(<4 x s16>) = G_TRUNC %inlo
7922 // %hi16(<4 x s16>) = G_TRUNC %inhi
7923 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7924 // %res(<8 x s8>) = G_TRUNC %in16
7925
7926 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7927
7928 Register DstReg = MI.getOperand(0).getReg();
7929 Register SrcReg = MI.getOperand(1).getReg();
7930 LLT DstTy = MRI.getType(DstReg);
7931 LLT SrcTy = MRI.getType(SrcReg);
7932
7933 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7935 isPowerOf2_32(SrcTy.getNumElements()) &&
7936 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7937 // Split input type.
7938 LLT SplitSrcTy = SrcTy.changeElementCount(
7939 SrcTy.getElementCount().divideCoefficientBy(2));
7940
7941 // First, split the source into two smaller vectors.
7942 SmallVector<Register, 2> SplitSrcs;
7943 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7944
7945 // Truncate the splits into intermediate narrower elements.
7946 LLT InterTy;
7947 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7948 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7949 else
7950 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7951 for (Register &Src : SplitSrcs)
7952 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7953
7954 // Combine the new truncates into one vector
7955 auto Merge = MIRBuilder.buildMergeLikeInstr(
7956 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7957
7958 // Truncate the new vector to the final result type
7959 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7960 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7961 else
7962 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7963
7964 MI.eraseFromParent();
7965
7966 return Legalized;
7967 }
7968 return UnableToLegalize;
7969}
7970
7973 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7974 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7975 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7976 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7977 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7978 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7979 MI.eraseFromParent();
7980 return Legalized;
7981}
7982
7984 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7985
7986 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7987 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7988
7989 MIRBuilder.setInstrAndDebugLoc(MI);
7990
7991 // If a rotate in the other direction is supported, use it.
7992 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7993 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7994 isPowerOf2_32(EltSizeInBits))
7995 return lowerRotateWithReverseRotate(MI);
7996
7997 // If a funnel shift is supported, use it.
7998 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7999 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8000 bool IsFShLegal = false;
8001 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8002 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8003 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8004 Register R3) {
8005 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8006 MI.eraseFromParent();
8007 return Legalized;
8008 };
8009 // If a funnel shift in the other direction is supported, use it.
8010 if (IsFShLegal) {
8011 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8012 } else if (isPowerOf2_32(EltSizeInBits)) {
8013 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8014 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8015 }
8016 }
8017
8018 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8019 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8020 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8021 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8022 Register ShVal;
8023 Register RevShiftVal;
8024 if (isPowerOf2_32(EltSizeInBits)) {
8025 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8026 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8027 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8028 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8029 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8030 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8031 RevShiftVal =
8032 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8033 } else {
8034 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8035 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8036 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8037 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8038 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8039 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8040 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8041 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8042 RevShiftVal =
8043 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8044 }
8045 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
8046 MI.eraseFromParent();
8047 return Legalized;
8048}
8049
8050// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8051// representation.
8054 auto [Dst, Src] = MI.getFirst2Regs();
8055 const LLT S64 = LLT::scalar(64);
8056 const LLT S32 = LLT::scalar(32);
8057 const LLT S1 = LLT::scalar(1);
8058
8059 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8060
8061 // unsigned cul2f(ulong u) {
8062 // uint lz = clz(u);
8063 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8064 // u = (u << lz) & 0x7fffffffffffffffUL;
8065 // ulong t = u & 0xffffffffffUL;
8066 // uint v = (e << 23) | (uint)(u >> 40);
8067 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8068 // return as_float(v + r);
8069 // }
8070
8071 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8072 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8073
8074 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8075
8076 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8077 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8078
8079 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8080 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8081
8082 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8083 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8084
8085 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8086
8087 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8088 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8089
8090 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8091 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8092 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8093
8094 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8095 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8096 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8097 auto One = MIRBuilder.buildConstant(S32, 1);
8098
8099 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8100 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8101 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8102 MIRBuilder.buildAdd(Dst, V, R);
8103
8104 MI.eraseFromParent();
8105 return Legalized;
8106}
8107
8108// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8109// operations and G_SITOFP
8112 auto [Dst, Src] = MI.getFirst2Regs();
8113 const LLT S64 = LLT::scalar(64);
8114 const LLT S32 = LLT::scalar(32);
8115 const LLT S1 = LLT::scalar(1);
8116
8117 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8118
8119 // For i64 < INT_MAX we simply reuse SITOFP.
8120 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8121 // saved before division, convert to float by SITOFP, multiply the result
8122 // by 2.
8123 auto One = MIRBuilder.buildConstant(S64, 1);
8124 auto Zero = MIRBuilder.buildConstant(S64, 0);
8125 // Result if Src < INT_MAX
8126 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8127 // Result if Src >= INT_MAX
8128 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8129 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8130 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8131 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8132 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8133 // Check if the original value is larger than INT_MAX by comparing with
8134 // zero to pick one of the two conversions.
8135 auto IsLarge =
8136 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8137 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8138
8139 MI.eraseFromParent();
8140 return Legalized;
8141}
8142
8143// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8144// IEEE double representation.
8147 auto [Dst, Src] = MI.getFirst2Regs();
8148 const LLT S64 = LLT::scalar(64);
8149 const LLT S32 = LLT::scalar(32);
8150
8151 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8152
8153 // We create double value from 32 bit parts with 32 exponent difference.
8154 // Note that + and - are float operations that adjust the implicit leading
8155 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8156 //
8157 // X = 2^52 * 1.0...LowBits
8158 // Y = 2^84 * 1.0...HighBits
8159 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8160 // = - 2^52 * 1.0...HighBits
8161 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8162 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8163 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8164 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8165 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8166 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8167
8168 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8169 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8170 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8171 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8172 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8173 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8174 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8175
8176 MI.eraseFromParent();
8177 return Legalized;
8178}
8179
8180/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8181/// convert fpround f64->f16 without double-rounding, so we manually perform the
8182/// lowering here where we know it is valid.
8185 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8186 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8187 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8188 : MIRBuilder.buildSITOFP(SrcTy, Src);
8189 LLT S32Ty = SrcTy.changeElementSize(32);
8190 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8191 MIRBuilder.buildFPTrunc(Dst, M2);
8192 MI.eraseFromParent();
8194}
8195
8197 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8198
8199 if (SrcTy == LLT::scalar(1)) {
8200 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8201 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8202 MIRBuilder.buildSelect(Dst, Src, True, False);
8203 MI.eraseFromParent();
8204 return Legalized;
8205 }
8206
8207 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8208 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8209
8210 if (SrcTy != LLT::scalar(64))
8211 return UnableToLegalize;
8212
8213 if (DstTy == LLT::scalar(32))
8214 // TODO: SelectionDAG has several alternative expansions to port which may
8215 // be more reasonable depending on the available instructions. We also need
8216 // a more advanced mechanism to choose an optimal version depending on
8217 // target features such as sitofp or CTLZ availability.
8219
8220 if (DstTy == LLT::scalar(64))
8222
8223 return UnableToLegalize;
8224}
8225
8227 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8228
8229 const LLT S64 = LLT::scalar(64);
8230 const LLT S32 = LLT::scalar(32);
8231 const LLT S1 = LLT::scalar(1);
8232
8233 if (SrcTy == S1) {
8234 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8235 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8236 MIRBuilder.buildSelect(Dst, Src, True, False);
8237 MI.eraseFromParent();
8238 return Legalized;
8239 }
8240
8241 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8242 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8243
8244 if (SrcTy != S64)
8245 return UnableToLegalize;
8246
8247 if (DstTy == S32) {
8248 // signed cl2f(long l) {
8249 // long s = l >> 63;
8250 // float r = cul2f((l + s) ^ s);
8251 // return s ? -r : r;
8252 // }
8253 Register L = Src;
8254 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8255 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8256
8257 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8258 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8259 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8260
8261 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8262 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8263 MIRBuilder.buildConstant(S64, 0));
8264 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8265 MI.eraseFromParent();
8266 return Legalized;
8267 }
8268
8269 return UnableToLegalize;
8270}
8271
8273 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8274 const LLT S64 = LLT::scalar(64);
8275 const LLT S32 = LLT::scalar(32);
8276
8277 if (SrcTy != S64 && SrcTy != S32)
8278 return UnableToLegalize;
8279 if (DstTy != S32 && DstTy != S64)
8280 return UnableToLegalize;
8281
8282 // FPTOSI gives same result as FPTOUI for positive signed integers.
8283 // FPTOUI needs to deal with fp values that convert to unsigned integers
8284 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8285
8286 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8287 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8289 APInt::getZero(SrcTy.getSizeInBits()));
8290 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8291
8292 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8293
8294 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8295 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8296 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8297 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8298 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8299 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8300 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8301
8302 const LLT S1 = LLT::scalar(1);
8303
8304 MachineInstrBuilder FCMP =
8305 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8306 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8307
8308 MI.eraseFromParent();
8309 return Legalized;
8310}
8311
8313 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8314 const LLT S64 = LLT::scalar(64);
8315 const LLT S32 = LLT::scalar(32);
8316
8317 // FIXME: Only f32 to i64 conversions are supported.
8318 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8319 return UnableToLegalize;
8320
8321 // Expand f32 -> i64 conversion
8322 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8323 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8324
8325 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8326
8327 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8328 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8329
8330 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8331 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8332
8333 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8334 APInt::getSignMask(SrcEltBits));
8335 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8336 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8337 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8338 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8339
8340 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8341 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8342 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8343
8344 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8345 R = MIRBuilder.buildZExt(DstTy, R);
8346
8347 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8348 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8349 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8350 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8351
8352 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8353 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8354
8355 const LLT S1 = LLT::scalar(1);
8356 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8357 S1, Exponent, ExponentLoBit);
8358
8359 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8360
8361 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8362 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8363
8364 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8365
8366 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8367 S1, Exponent, ZeroSrcTy);
8368
8369 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8370 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8371
8372 MI.eraseFromParent();
8373 return Legalized;
8374}
8375
8378 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8379
8380 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8381 unsigned SatWidth = DstTy.getScalarSizeInBits();
8382
8383 // Determine minimum and maximum integer values and their corresponding
8384 // floating-point values.
8385 APInt MinInt, MaxInt;
8386 if (IsSigned) {
8387 MinInt = APInt::getSignedMinValue(SatWidth);
8388 MaxInt = APInt::getSignedMaxValue(SatWidth);
8389 } else {
8390 MinInt = APInt::getMinValue(SatWidth);
8391 MaxInt = APInt::getMaxValue(SatWidth);
8392 }
8393
8394 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8395 APFloat MinFloat(Semantics);
8396 APFloat MaxFloat(Semantics);
8397
8398 APFloat::opStatus MinStatus =
8399 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8400 APFloat::opStatus MaxStatus =
8401 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8402 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8403 !(MaxStatus & APFloat::opStatus::opInexact);
8404
8405 // If the integer bounds are exactly representable as floats, emit a
8406 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8407 // and selects.
8408 if (AreExactFloatBounds) {
8409 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8410 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8411 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8412 SrcTy.changeElementSize(1), Src, MaxC);
8413 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8414 // Clamp by MaxFloat from above. NaN cannot occur.
8415 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8416 auto MinP =
8417 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8419 auto Min =
8420 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8421 // Convert clamped value to integer. In the unsigned case we're done,
8422 // because we mapped NaN to MinFloat, which will cast to zero.
8423 if (!IsSigned) {
8424 MIRBuilder.buildFPTOUI(Dst, Min);
8425 MI.eraseFromParent();
8426 return Legalized;
8427 }
8428
8429 // Otherwise, select 0 if Src is NaN.
8430 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8431 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8432 DstTy.changeElementSize(1), Src, Src);
8433 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8434 FpToInt);
8435 MI.eraseFromParent();
8436 return Legalized;
8437 }
8438
8439 // Result of direct conversion. The assumption here is that the operation is
8440 // non-trapping and it's fine to apply it to an out-of-range value if we
8441 // select it away later.
8442 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8443 : MIRBuilder.buildFPTOUI(DstTy, Src);
8444
8445 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8446 // MinInt if Src is NaN.
8447 auto ULT =
8448 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8449 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8450 auto Max = MIRBuilder.buildSelect(
8451 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8452 // If Src OGT MaxFloat, select MaxInt.
8453 auto OGT =
8454 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8455 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8456
8457 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8458 // is already zero.
8459 if (!IsSigned) {
8460 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8461 Max);
8462 MI.eraseFromParent();
8463 return Legalized;
8464 }
8465
8466 // Otherwise, select 0 if Src is NaN.
8467 auto Min = MIRBuilder.buildSelect(
8468 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8469 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8470 DstTy.changeElementSize(1), Src, Src);
8471 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8472 MI.eraseFromParent();
8473 return Legalized;
8474}
8475
8476// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8479 const LLT S1 = LLT::scalar(1);
8480 const LLT S32 = LLT::scalar(32);
8481
8482 auto [Dst, Src] = MI.getFirst2Regs();
8483 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8484 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8485
8486 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8487 return UnableToLegalize;
8488
8489 if (MI.getFlag(MachineInstr::FmAfn)) {
8490 unsigned Flags = MI.getFlags();
8491 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8492 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8493 MI.eraseFromParent();
8494 return Legalized;
8495 }
8496
8497 const unsigned ExpMask = 0x7ff;
8498 const unsigned ExpBiasf64 = 1023;
8499 const unsigned ExpBiasf16 = 15;
8500
8501 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8502 Register U = Unmerge.getReg(0);
8503 Register UH = Unmerge.getReg(1);
8504
8505 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8506 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8507
8508 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8509 // add the f16 bias (15) to get the biased exponent for the f16 format.
8510 E = MIRBuilder.buildAdd(
8511 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8512
8513 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8514 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8515
8516 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8517 MIRBuilder.buildConstant(S32, 0x1ff));
8518 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8519
8520 auto Zero = MIRBuilder.buildConstant(S32, 0);
8521 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8522 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8523 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8524
8525 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8526 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8527 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8528 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8529
8530 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8531 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8532
8533 // N = M | (E << 12);
8534 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8535 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8536
8537 // B = clamp(1-E, 0, 13);
8538 auto One = MIRBuilder.buildConstant(S32, 1);
8539 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8540 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8541 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8542
8543 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8544 MIRBuilder.buildConstant(S32, 0x1000));
8545
8546 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8547 auto D0 = MIRBuilder.buildShl(S32, D, B);
8548
8549 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8550 D0, SigSetHigh);
8551 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8552 D = MIRBuilder.buildOr(S32, D, D1);
8553
8554 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8555 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8556
8557 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8558 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8559
8560 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8561 MIRBuilder.buildConstant(S32, 3));
8562 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8563
8564 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8565 MIRBuilder.buildConstant(S32, 5));
8566 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8567
8568 V1 = MIRBuilder.buildOr(S32, V0, V1);
8569 V = MIRBuilder.buildAdd(S32, V, V1);
8570
8571 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8572 E, MIRBuilder.buildConstant(S32, 30));
8573 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8574 MIRBuilder.buildConstant(S32, 0x7c00), V);
8575
8576 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8577 E, MIRBuilder.buildConstant(S32, 1039));
8578 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8579
8580 // Extract the sign bit.
8581 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8582 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8583
8584 // Insert the sign bit
8585 V = MIRBuilder.buildOr(S32, Sign, V);
8586
8587 MIRBuilder.buildTrunc(Dst, V);
8588 MI.eraseFromParent();
8589 return Legalized;
8590}
8591
8594 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8595 const LLT S64 = LLT::scalar(64);
8596 const LLT S16 = LLT::scalar(16);
8597
8598 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8600
8601 return UnableToLegalize;
8602}
8603
8605 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8606 LLT Ty = MRI.getType(Dst);
8607
8608 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8609 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8610 MI.eraseFromParent();
8611 return Legalized;
8612}
8613
8615 switch (Opc) {
8616 case TargetOpcode::G_SMIN:
8617 return CmpInst::ICMP_SLT;
8618 case TargetOpcode::G_SMAX:
8619 return CmpInst::ICMP_SGT;
8620 case TargetOpcode::G_UMIN:
8621 return CmpInst::ICMP_ULT;
8622 case TargetOpcode::G_UMAX:
8623 return CmpInst::ICMP_UGT;
8624 default:
8625 llvm_unreachable("not in integer min/max");
8626 }
8627}
8628
8630 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8631
8632 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8633 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8634
8635 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8636 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8637
8638 MI.eraseFromParent();
8639 return Legalized;
8640}
8641
8644 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8645
8646 Register Dst = Cmp->getReg(0);
8647 LLT DstTy = MRI.getType(Dst);
8648 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8649 LLT CmpTy = DstTy.changeElementSize(1);
8650
8651 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8654 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8657
8658 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8659 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8660 Cmp->getRHSReg());
8661 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8662 Cmp->getRHSReg());
8663
8664 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8665 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8666 if (TLI.preferSelectsOverBooleanArithmetic(
8667 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8669 auto One = MIRBuilder.buildConstant(DstTy, 1);
8670 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8671
8672 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8673 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8674 } else {
8676 std::swap(IsGT, IsLT);
8677 // Extend boolean results to DstTy, which is at least i2, before subtracting
8678 // them.
8679 unsigned BoolExtOp =
8680 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8681 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8682 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8683 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8684 }
8685
8686 MI.eraseFromParent();
8687 return Legalized;
8688}
8689
8692 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8693 const int Src0Size = Src0Ty.getScalarSizeInBits();
8694 const int Src1Size = Src1Ty.getScalarSizeInBits();
8695
8696 auto SignBitMask = MIRBuilder.buildConstant(
8697 Src0Ty, APInt::getSignMask(Src0Size));
8698
8699 auto NotSignBitMask = MIRBuilder.buildConstant(
8700 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8701
8702 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8703 Register And1;
8704 if (Src0Ty == Src1Ty) {
8705 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8706 } else if (Src0Size > Src1Size) {
8707 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8708 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8709 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8710 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8711 } else {
8712 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8713 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8714 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8715 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8716 }
8717
8718 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8719 // constants are a nan and -0.0, but the final result should preserve
8720 // everything.
8721 unsigned Flags = MI.getFlags();
8722
8723 // We masked the sign bit and the not-sign bit, so these are disjoint.
8724 Flags |= MachineInstr::Disjoint;
8725
8726 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8727
8728 MI.eraseFromParent();
8729 return Legalized;
8730}
8731
8734 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8735 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8736 // depend on fminnum/fmaxnum.
8737
8738 unsigned NewOp;
8739 switch (MI.getOpcode()) {
8740 case TargetOpcode::G_FMINNUM:
8741 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8742 break;
8743 case TargetOpcode::G_FMINIMUMNUM:
8744 NewOp = TargetOpcode::G_FMINNUM;
8745 break;
8746 case TargetOpcode::G_FMAXNUM:
8747 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8748 break;
8749 case TargetOpcode::G_FMAXIMUMNUM:
8750 NewOp = TargetOpcode::G_FMAXNUM;
8751 break;
8752 default:
8753 llvm_unreachable("unexpected min/max opcode");
8754 }
8755
8756 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8757 LLT Ty = MRI.getType(Dst);
8758
8759 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8760 // Insert canonicalizes if it's possible we need to quiet to get correct
8761 // sNaN behavior.
8762
8763 // Note this must be done here, and not as an optimization combine in the
8764 // absence of a dedicate quiet-snan instruction as we're using an
8765 // omni-purpose G_FCANONICALIZE.
8766 if (!isKnownNeverSNaN(Src0, MRI))
8767 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8768
8769 if (!isKnownNeverSNaN(Src1, MRI))
8770 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8771 }
8772
8773 // If there are no nans, it's safe to simply replace this with the non-IEEE
8774 // version.
8775 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8776 MI.eraseFromParent();
8777 return Legalized;
8778}
8779
8781 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8782 Register DstReg = MI.getOperand(0).getReg();
8783 LLT Ty = MRI.getType(DstReg);
8784 unsigned Flags = MI.getFlags();
8785
8786 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8787 Flags);
8788 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8789 MI.eraseFromParent();
8790 return Legalized;
8791}
8792
8795 auto [DstReg, X] = MI.getFirst2Regs();
8796 const unsigned Flags = MI.getFlags();
8797 const LLT Ty = MRI.getType(DstReg);
8798 const LLT CondTy = Ty.changeElementSize(1);
8799
8800 // round(x) =>
8801 // t = trunc(x);
8802 // d = fabs(x - t);
8803 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8804 // return t + o;
8805
8806 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8807
8808 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8809 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8810
8811 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8812 auto Cmp =
8813 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8814
8815 // Could emit G_UITOFP instead
8816 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8817 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8818 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8819 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8820
8821 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8822
8823 MI.eraseFromParent();
8824 return Legalized;
8825}
8826
8828 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8829 unsigned Flags = MI.getFlags();
8830 LLT Ty = MRI.getType(DstReg);
8831 const LLT CondTy = Ty.changeElementSize(1);
8832
8833 // result = trunc(src);
8834 // if (src < 0.0 && src != result)
8835 // result += -1.0.
8836
8837 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8838 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8839
8840 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8841 SrcReg, Zero, Flags);
8842 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8843 SrcReg, Trunc, Flags);
8844 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8845 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8846
8847 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8848 MI.eraseFromParent();
8849 return Legalized;
8850}
8851
8854 const unsigned NumOps = MI.getNumOperands();
8855 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8856 unsigned PartSize = Src0Ty.getSizeInBits();
8857
8858 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8859 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8860
8861 for (unsigned I = 2; I != NumOps; ++I) {
8862 const unsigned Offset = (I - 1) * PartSize;
8863
8864 Register SrcReg = MI.getOperand(I).getReg();
8865 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8866
8867 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8868 MRI.createGenericVirtualRegister(WideTy);
8869
8870 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8871 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8872 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8873 ResultReg = NextResult;
8874 }
8875
8876 if (DstTy.isPointer()) {
8877 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8878 DstTy.getAddressSpace())) {
8879 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8880 return UnableToLegalize;
8881 }
8882
8883 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8884 }
8885
8886 MI.eraseFromParent();
8887 return Legalized;
8888}
8889
8892 const unsigned NumDst = MI.getNumOperands() - 1;
8893 Register SrcReg = MI.getOperand(NumDst).getReg();
8894 Register Dst0Reg = MI.getOperand(0).getReg();
8895 LLT DstTy = MRI.getType(Dst0Reg);
8896 if (DstTy.isPointer())
8897 return UnableToLegalize; // TODO
8898
8899 SrcReg = coerceToScalar(SrcReg);
8900 if (!SrcReg)
8901 return UnableToLegalize;
8902
8903 // Expand scalarizing unmerge as bitcast to integer and shift.
8904 LLT IntTy = MRI.getType(SrcReg);
8905
8906 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8907
8908 const unsigned DstSize = DstTy.getSizeInBits();
8909 unsigned Offset = DstSize;
8910 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8911 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8912 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8913 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8914 }
8915
8916 MI.eraseFromParent();
8917 return Legalized;
8918}
8919
8920/// Lower a vector extract or insert by writing the vector to a stack temporary
8921/// and reloading the element or vector.
8922///
8923/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8924/// =>
8925/// %stack_temp = G_FRAME_INDEX
8926/// G_STORE %vec, %stack_temp
8927/// %idx = clamp(%idx, %vec.getNumElements())
8928/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8929/// %dst = G_LOAD %element_ptr
8932 Register DstReg = MI.getOperand(0).getReg();
8933 Register SrcVec = MI.getOperand(1).getReg();
8934 Register InsertVal;
8935 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8936 InsertVal = MI.getOperand(2).getReg();
8937
8938 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8939
8940 LLT VecTy = MRI.getType(SrcVec);
8941 LLT EltTy = VecTy.getElementType();
8942 unsigned NumElts = VecTy.getNumElements();
8943
8944 int64_t IdxVal;
8945 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8947 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8948
8949 if (InsertVal) {
8950 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8951 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8952 } else {
8953 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8954 }
8955
8956 MI.eraseFromParent();
8957 return Legalized;
8958 }
8959
8960 if (!EltTy.isByteSized()) { // Not implemented.
8961 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8962 return UnableToLegalize;
8963 }
8964
8965 unsigned EltBytes = EltTy.getSizeInBytes();
8966 Align VecAlign = getStackTemporaryAlignment(VecTy);
8967 Align EltAlign;
8968
8969 MachinePointerInfo PtrInfo;
8970 auto StackTemp = createStackTemporary(
8971 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8972 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8973
8974 // Get the pointer to the element, and be sure not to hit undefined behavior
8975 // if the index is out of bounds.
8976 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8977
8978 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8979 int64_t Offset = IdxVal * EltBytes;
8980 PtrInfo = PtrInfo.getWithOffset(Offset);
8981 EltAlign = commonAlignment(VecAlign, Offset);
8982 } else {
8983 // We lose information with a variable offset.
8984 EltAlign = getStackTemporaryAlignment(EltTy);
8985 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8986 }
8987
8988 if (InsertVal) {
8989 // Write the inserted element
8990 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8991
8992 // Reload the whole vector.
8993 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8994 } else {
8995 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8996 }
8997
8998 MI.eraseFromParent();
8999 return Legalized;
9000}
9001
9004 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9005 MI.getFirst3RegLLTs();
9006 LLT IdxTy = LLT::scalar(32);
9007
9008 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9009 Register Undef;
9011 LLT EltTy = DstTy.getScalarType();
9012
9013 for (int Idx : Mask) {
9014 if (Idx < 0) {
9015 if (!Undef.isValid())
9016 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9017 BuildVec.push_back(Undef);
9018 continue;
9019 }
9020
9021 if (Src0Ty.isScalar()) {
9022 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
9023 } else {
9024 int NumElts = Src0Ty.getNumElements();
9025 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9026 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9027 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9028 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
9029 BuildVec.push_back(Extract.getReg(0));
9030 }
9031 }
9032
9033 if (DstTy.isVector())
9034 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9035 else
9036 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
9037 MI.eraseFromParent();
9038 return Legalized;
9039}
9040
9043 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9044 MI.getFirst4RegLLTs();
9045
9046 if (VecTy.isScalableVector())
9047 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9048
9049 Align VecAlign = getStackTemporaryAlignment(VecTy);
9050 MachinePointerInfo PtrInfo;
9051 Register StackPtr =
9052 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9053 PtrInfo)
9054 .getReg(0);
9055 MachinePointerInfo ValPtrInfo =
9057
9058 LLT IdxTy = LLT::scalar(32);
9059 LLT ValTy = VecTy.getElementType();
9060 Align ValAlign = getStackTemporaryAlignment(ValTy);
9061
9062 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9063
9064 bool HasPassthru =
9065 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9066
9067 if (HasPassthru)
9068 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9069
9070 Register LastWriteVal;
9071 std::optional<APInt> PassthruSplatVal =
9072 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9073
9074 if (PassthruSplatVal.has_value()) {
9075 LastWriteVal =
9076 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9077 } else if (HasPassthru) {
9078 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9079 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9080 {LLT::scalar(32)}, {Popcount});
9081
9082 Register LastElmtPtr =
9083 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9084 LastWriteVal =
9085 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9086 .getReg(0);
9087 }
9088
9089 unsigned NumElmts = VecTy.getNumElements();
9090 for (unsigned I = 0; I < NumElmts; ++I) {
9091 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9092 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9093 Register ElmtPtr =
9094 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9095 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9096
9097 LLT MaskITy = MaskTy.getElementType();
9098 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9099 if (MaskITy.getSizeInBits() > 1)
9100 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9101
9102 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9103 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9104
9105 if (HasPassthru && I == NumElmts - 1) {
9106 auto EndOfVector =
9107 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9108 auto AllLanesSelected = MIRBuilder.buildICmp(
9109 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9110 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9111 {OutPos, EndOfVector});
9112 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9113
9114 LastWriteVal =
9115 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9116 .getReg(0);
9117 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9118 }
9119 }
9120
9121 // TODO: Use StackPtr's FrameIndex alignment.
9122 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9123
9124 MI.eraseFromParent();
9125 return Legalized;
9126}
9127
9129 Register AllocSize,
9130 Align Alignment,
9131 LLT PtrTy) {
9132 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9133
9134 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9135 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9136
9137 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9138 // have to generate an extra instruction to negate the alloc and then use
9139 // G_PTR_ADD to add the negative offset.
9140 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9141 if (Alignment > Align(1)) {
9142 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9143 AlignMask.negate();
9144 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9145 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9146 }
9147
9148 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9149}
9150
9153 const auto &MF = *MI.getMF();
9154 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9155 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9156 return UnableToLegalize;
9157
9158 Register Dst = MI.getOperand(0).getReg();
9159 Register AllocSize = MI.getOperand(1).getReg();
9160 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9161
9162 LLT PtrTy = MRI.getType(Dst);
9163 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9164 Register SPTmp =
9165 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9166
9167 MIRBuilder.buildCopy(SPReg, SPTmp);
9168 MIRBuilder.buildCopy(Dst, SPTmp);
9169
9170 MI.eraseFromParent();
9171 return Legalized;
9172}
9173
9176 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9177 if (!StackPtr)
9178 return UnableToLegalize;
9179
9180 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9181 MI.eraseFromParent();
9182 return Legalized;
9183}
9184
9187 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9188 if (!StackPtr)
9189 return UnableToLegalize;
9190
9191 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9192 MI.eraseFromParent();
9193 return Legalized;
9194}
9195
9198 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9199 unsigned Offset = MI.getOperand(2).getImm();
9200
9201 // Extract sub-vector or one element
9202 if (SrcTy.isVector()) {
9203 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9204 unsigned DstSize = DstTy.getSizeInBits();
9205
9206 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9207 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9208 // Unmerge and allow access to each Src element for the artifact combiner.
9209 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9210
9211 // Take element(s) we need to extract and copy it (merge them).
9212 SmallVector<Register, 8> SubVectorElts;
9213 for (unsigned Idx = Offset / SrcEltSize;
9214 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9215 SubVectorElts.push_back(Unmerge.getReg(Idx));
9216 }
9217 if (SubVectorElts.size() == 1)
9218 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9219 else
9220 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9221
9222 MI.eraseFromParent();
9223 return Legalized;
9224 }
9225 }
9226
9227 if (DstTy.isScalar() &&
9228 (SrcTy.isScalar() ||
9229 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9230 LLT SrcIntTy = SrcTy;
9231 if (!SrcTy.isScalar()) {
9232 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9233 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9234 }
9235
9236 if (Offset == 0)
9237 MIRBuilder.buildTrunc(DstReg, SrcReg);
9238 else {
9239 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9240 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9241 MIRBuilder.buildTrunc(DstReg, Shr);
9242 }
9243
9244 MI.eraseFromParent();
9245 return Legalized;
9246 }
9247
9248 return UnableToLegalize;
9249}
9250
9252 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9253 uint64_t Offset = MI.getOperand(3).getImm();
9254
9255 LLT DstTy = MRI.getType(Src);
9256 LLT InsertTy = MRI.getType(InsertSrc);
9257
9258 // Insert sub-vector or one element
9259 if (DstTy.isVector() && !InsertTy.isPointer()) {
9260 LLT EltTy = DstTy.getElementType();
9261 unsigned EltSize = EltTy.getSizeInBits();
9262 unsigned InsertSize = InsertTy.getSizeInBits();
9263
9264 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9265 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9266 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9268 unsigned Idx = 0;
9269 // Elements from Src before insert start Offset
9270 for (; Idx < Offset / EltSize; ++Idx) {
9271 DstElts.push_back(UnmergeSrc.getReg(Idx));
9272 }
9273
9274 // Replace elements in Src with elements from InsertSrc
9275 if (InsertTy.getSizeInBits() > EltSize) {
9276 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9277 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9278 ++Idx, ++i) {
9279 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9280 }
9281 } else {
9282 DstElts.push_back(InsertSrc);
9283 ++Idx;
9284 }
9285
9286 // Remaining elements from Src after insert
9287 for (; Idx < DstTy.getNumElements(); ++Idx) {
9288 DstElts.push_back(UnmergeSrc.getReg(Idx));
9289 }
9290
9291 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9292 MI.eraseFromParent();
9293 return Legalized;
9294 }
9295 }
9296
9297 if (InsertTy.isVector() ||
9298 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9299 return UnableToLegalize;
9300
9301 const DataLayout &DL = MIRBuilder.getDataLayout();
9302 if ((DstTy.isPointer() &&
9303 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9304 (InsertTy.isPointer() &&
9305 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9306 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9307 return UnableToLegalize;
9308 }
9309
9310 LLT IntDstTy = DstTy;
9311
9312 if (!DstTy.isScalar()) {
9313 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9314 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9315 }
9316
9317 if (!InsertTy.isScalar()) {
9318 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9319 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9320 }
9321
9322 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9323 if (Offset != 0) {
9324 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9325 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9326 }
9327
9329 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9330
9331 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9332 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9333 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9334
9335 MIRBuilder.buildCast(Dst, Or);
9336 MI.eraseFromParent();
9337 return Legalized;
9338}
9339
9342 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9343 MI.getFirst4RegLLTs();
9344 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9345
9346 LLT Ty = Dst0Ty;
9347 LLT BoolTy = Dst1Ty;
9348
9349 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9350
9351 if (IsAdd)
9352 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9353 else
9354 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9355
9356 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9357
9358 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9359
9360 // For an addition, the result should be less than one of the operands (LHS)
9361 // if and only if the other operand (RHS) is negative, otherwise there will
9362 // be overflow.
9363 // For a subtraction, the result should be less than one of the operands
9364 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9365 // otherwise there will be overflow.
9366 auto ResultLowerThanLHS =
9367 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9368 auto ConditionRHS = MIRBuilder.buildICmp(
9369 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9370
9371 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9372
9373 MIRBuilder.buildCopy(Dst0, NewDst0);
9374 MI.eraseFromParent();
9375
9376 return Legalized;
9377}
9378
9380 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9381 const LLT Ty = MRI.getType(Res);
9382
9383 // sum = LHS + RHS + zext(CarryIn)
9384 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9385 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9386 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9387 MIRBuilder.buildCopy(Res, Sum);
9388
9389 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9390 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9391 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9392 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9393
9394 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9395 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9396
9397 MI.eraseFromParent();
9398 return Legalized;
9399}
9400
9402 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9403 const LLT Ty = MRI.getType(Res);
9404
9405 // Diff = LHS - (RHS + zext(CarryIn))
9406 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9407 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9408 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9409 MIRBuilder.buildCopy(Res, Diff);
9410
9411 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9412 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9413 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9414 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9415 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9416 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9417
9418 MI.eraseFromParent();
9419 return Legalized;
9420}
9421
9424 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9425 LLT Ty = MRI.getType(Res);
9426 bool IsSigned;
9427 bool IsAdd;
9428 unsigned BaseOp;
9429 switch (MI.getOpcode()) {
9430 default:
9431 llvm_unreachable("unexpected addsat/subsat opcode");
9432 case TargetOpcode::G_UADDSAT:
9433 IsSigned = false;
9434 IsAdd = true;
9435 BaseOp = TargetOpcode::G_ADD;
9436 break;
9437 case TargetOpcode::G_SADDSAT:
9438 IsSigned = true;
9439 IsAdd = true;
9440 BaseOp = TargetOpcode::G_ADD;
9441 break;
9442 case TargetOpcode::G_USUBSAT:
9443 IsSigned = false;
9444 IsAdd = false;
9445 BaseOp = TargetOpcode::G_SUB;
9446 break;
9447 case TargetOpcode::G_SSUBSAT:
9448 IsSigned = true;
9449 IsAdd = false;
9450 BaseOp = TargetOpcode::G_SUB;
9451 break;
9452 }
9453
9454 if (IsSigned) {
9455 // sadd.sat(a, b) ->
9456 // hi = 0x7fffffff - smax(a, 0)
9457 // lo = 0x80000000 - smin(a, 0)
9458 // a + smin(smax(lo, b), hi)
9459 // ssub.sat(a, b) ->
9460 // lo = smax(a, -1) - 0x7fffffff
9461 // hi = smin(a, -1) - 0x80000000
9462 // a - smin(smax(lo, b), hi)
9463 // TODO: AMDGPU can use a "median of 3" instruction here:
9464 // a +/- med3(lo, b, hi)
9465 uint64_t NumBits = Ty.getScalarSizeInBits();
9466 auto MaxVal =
9467 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9468 auto MinVal =
9469 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9471 if (IsAdd) {
9472 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9473 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9474 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9475 } else {
9476 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9477 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9478 MaxVal);
9479 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9480 MinVal);
9481 }
9482 auto RHSClamped =
9483 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9484 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9485 } else {
9486 // uadd.sat(a, b) -> a + umin(~a, b)
9487 // usub.sat(a, b) -> a - umin(a, b)
9488 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9489 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9490 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9491 }
9492
9493 MI.eraseFromParent();
9494 return Legalized;
9495}
9496
9499 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9500 LLT Ty = MRI.getType(Res);
9501 LLT BoolTy = Ty.changeElementSize(1);
9502 bool IsSigned;
9503 bool IsAdd;
9504 unsigned OverflowOp;
9505 switch (MI.getOpcode()) {
9506 default:
9507 llvm_unreachable("unexpected addsat/subsat opcode");
9508 case TargetOpcode::G_UADDSAT:
9509 IsSigned = false;
9510 IsAdd = true;
9511 OverflowOp = TargetOpcode::G_UADDO;
9512 break;
9513 case TargetOpcode::G_SADDSAT:
9514 IsSigned = true;
9515 IsAdd = true;
9516 OverflowOp = TargetOpcode::G_SADDO;
9517 break;
9518 case TargetOpcode::G_USUBSAT:
9519 IsSigned = false;
9520 IsAdd = false;
9521 OverflowOp = TargetOpcode::G_USUBO;
9522 break;
9523 case TargetOpcode::G_SSUBSAT:
9524 IsSigned = true;
9525 IsAdd = false;
9526 OverflowOp = TargetOpcode::G_SSUBO;
9527 break;
9528 }
9529
9530 auto OverflowRes =
9531 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9532 Register Tmp = OverflowRes.getReg(0);
9533 Register Ov = OverflowRes.getReg(1);
9534 MachineInstrBuilder Clamp;
9535 if (IsSigned) {
9536 // sadd.sat(a, b) ->
9537 // {tmp, ov} = saddo(a, b)
9538 // ov ? (tmp >>s 31) + 0x80000000 : r
9539 // ssub.sat(a, b) ->
9540 // {tmp, ov} = ssubo(a, b)
9541 // ov ? (tmp >>s 31) + 0x80000000 : r
9542 uint64_t NumBits = Ty.getScalarSizeInBits();
9543 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9544 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9545 auto MinVal =
9546 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9547 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9548 } else {
9549 // uadd.sat(a, b) ->
9550 // {tmp, ov} = uaddo(a, b)
9551 // ov ? 0xffffffff : tmp
9552 // usub.sat(a, b) ->
9553 // {tmp, ov} = usubo(a, b)
9554 // ov ? 0 : tmp
9555 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9556 }
9557 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9558
9559 MI.eraseFromParent();
9560 return Legalized;
9561}
9562
9565 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9566 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9567 "Expected shlsat opcode!");
9568 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9569 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9570 LLT Ty = MRI.getType(Res);
9571 LLT BoolTy = Ty.changeElementSize(1);
9572
9573 unsigned BW = Ty.getScalarSizeInBits();
9574 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9575 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9576 : MIRBuilder.buildLShr(Ty, Result, RHS);
9577
9578 MachineInstrBuilder SatVal;
9579 if (IsSigned) {
9580 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9581 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9582 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9583 MIRBuilder.buildConstant(Ty, 0));
9584 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9585 } else {
9586 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9587 }
9588 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9589 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9590
9591 MI.eraseFromParent();
9592 return Legalized;
9593}
9594
9596 auto [Dst, Src] = MI.getFirst2Regs();
9597 const LLT Ty = MRI.getType(Src);
9598 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9599 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9600
9601 // Swap most and least significant byte, set remaining bytes in Res to zero.
9602 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9603 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9604 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9605 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9606
9607 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9608 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9609 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9610 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9611 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9612 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9613 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9614 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9615 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9616 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9617 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9618 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9619 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9620 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9621 }
9622 Res.getInstr()->getOperand(0).setReg(Dst);
9623
9624 MI.eraseFromParent();
9625 return Legalized;
9626}
9627
9628//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9630 MachineInstrBuilder Src, const APInt &Mask) {
9631 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9632 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9633 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9634 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9635 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9636 return B.buildOr(Dst, LHS, RHS);
9637}
9638
9641 auto [Dst, Src] = MI.getFirst2Regs();
9642 const LLT SrcTy = MRI.getType(Src);
9643 unsigned Size = SrcTy.getScalarSizeInBits();
9644 unsigned VSize = SrcTy.getSizeInBits();
9645
9646 if (Size >= 8) {
9647 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9648 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9649 {LLT::fixed_vector(VSize / 8, 8),
9650 LLT::fixed_vector(VSize / 8, 8)}}))) {
9651 // If bitreverse is legal for i8 vector of the same size, then cast
9652 // to i8 vector type.
9653 // e.g. v4s32 -> v16s8
9654 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9655 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9656 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9657 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9658 MIRBuilder.buildBitcast(Dst, RBIT);
9659 } else {
9660 MachineInstrBuilder BSWAP =
9661 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9662
9663 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9664 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9665 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9666 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9667 APInt::getSplat(Size, APInt(8, 0xF0)));
9668
9669 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9670 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9671 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9672 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9673 APInt::getSplat(Size, APInt(8, 0xCC)));
9674
9675 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9676 // 6|7
9677 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9678 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9679 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9680 }
9681 } else {
9682 // Expand bitreverse for types smaller than 8 bits.
9684 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9686 if (I < J) {
9687 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9688 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9689 } else {
9690 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9691 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9692 }
9693
9694 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9695 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9696 if (I == 0)
9697 Tmp = Tmp2;
9698 else
9699 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9700 }
9701 MIRBuilder.buildCopy(Dst, Tmp);
9702 }
9703
9704 MI.eraseFromParent();
9705 return Legalized;
9706}
9707
9710 MachineFunction &MF = MIRBuilder.getMF();
9711
9712 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9713 int NameOpIdx = IsRead ? 1 : 0;
9714 int ValRegIndex = IsRead ? 0 : 1;
9715
9716 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9717 const LLT Ty = MRI.getType(ValReg);
9718 const MDString *RegStr = cast<MDString>(
9719 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9720
9721 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9722 if (!PhysReg) {
9723 const Function &Fn = MF.getFunction();
9725 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9726 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9727 Fn, MI.getDebugLoc()));
9728 if (IsRead)
9729 MIRBuilder.buildUndef(ValReg);
9730
9731 MI.eraseFromParent();
9732 return Legalized;
9733 }
9734
9735 if (IsRead)
9736 MIRBuilder.buildCopy(ValReg, PhysReg);
9737 else
9738 MIRBuilder.buildCopy(PhysReg, ValReg);
9739
9740 MI.eraseFromParent();
9741 return Legalized;
9742}
9743
9746 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9747 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9748 Register Result = MI.getOperand(0).getReg();
9749 LLT OrigTy = MRI.getType(Result);
9750 auto SizeInBits = OrigTy.getScalarSizeInBits();
9751 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9752
9753 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9754 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9755 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9756 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9757
9758 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9759 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9760 MIRBuilder.buildTrunc(Result, Shifted);
9761
9762 MI.eraseFromParent();
9763 return Legalized;
9764}
9765
9768 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9769 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9770
9771 if (Mask == fcNone) {
9772 MIRBuilder.buildConstant(DstReg, 0);
9773 MI.eraseFromParent();
9774 return Legalized;
9775 }
9776 if (Mask == fcAllFlags) {
9777 MIRBuilder.buildConstant(DstReg, 1);
9778 MI.eraseFromParent();
9779 return Legalized;
9780 }
9781
9782 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9783 // version
9784
9785 unsigned BitSize = SrcTy.getScalarSizeInBits();
9786 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9787
9788 LLT IntTy = LLT::scalar(BitSize);
9789 if (SrcTy.isVector())
9790 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9791 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9792
9793 // Various masks.
9794 APInt SignBit = APInt::getSignMask(BitSize);
9795 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9796 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9797 APInt ExpMask = Inf;
9798 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9799 APInt QNaNBitMask =
9800 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9801 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9802
9803 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9804 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9805 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9806 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9807 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9808
9809 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9810 auto Sign =
9811 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9812
9813 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9814 // Clang doesn't support capture of structured bindings:
9815 LLT DstTyCopy = DstTy;
9816 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9817 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9818 };
9819
9820 // Tests that involve more than one class should be processed first.
9821 if ((Mask & fcFinite) == fcFinite) {
9822 // finite(V) ==> abs(V) u< exp_mask
9823 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9824 ExpMaskC));
9825 Mask &= ~fcFinite;
9826 } else if ((Mask & fcFinite) == fcPosFinite) {
9827 // finite(V) && V > 0 ==> V u< exp_mask
9828 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9829 ExpMaskC));
9830 Mask &= ~fcPosFinite;
9831 } else if ((Mask & fcFinite) == fcNegFinite) {
9832 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9833 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9834 ExpMaskC);
9835 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9836 appendToRes(And);
9837 Mask &= ~fcNegFinite;
9838 }
9839
9840 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9841 // fcZero | fcSubnormal => test all exponent bits are 0
9842 // TODO: Handle sign bit specific cases
9843 // TODO: Handle inverted case
9844 if (PartialCheck == (fcZero | fcSubnormal)) {
9845 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9846 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9847 ExpBits, ZeroC));
9848 Mask &= ~PartialCheck;
9849 }
9850 }
9851
9852 // Check for individual classes.
9853 if (FPClassTest PartialCheck = Mask & fcZero) {
9854 if (PartialCheck == fcPosZero)
9855 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9856 AsInt, ZeroC));
9857 else if (PartialCheck == fcZero)
9858 appendToRes(
9859 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9860 else // fcNegZero
9861 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9862 AsInt, SignBitC));
9863 }
9864
9865 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9866 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9867 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9868 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9869 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9870 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9871 auto SubnormalRes =
9872 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9873 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9874 if (PartialCheck == fcNegSubnormal)
9875 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9876 appendToRes(SubnormalRes);
9877 }
9878
9879 if (FPClassTest PartialCheck = Mask & fcInf) {
9880 if (PartialCheck == fcPosInf)
9881 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9882 AsInt, InfC));
9883 else if (PartialCheck == fcInf)
9884 appendToRes(
9885 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9886 else { // fcNegInf
9887 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9888 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9889 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9890 AsInt, NegInfC));
9891 }
9892 }
9893
9894 if (FPClassTest PartialCheck = Mask & fcNan) {
9895 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9896 if (PartialCheck == fcNan) {
9897 // isnan(V) ==> abs(V) u> int(inf)
9898 appendToRes(
9899 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9900 } else if (PartialCheck == fcQNan) {
9901 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9902 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9903 InfWithQnanBitC));
9904 } else { // fcSNan
9905 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9906 // abs(V) u< (unsigned(Inf) | quiet_bit)
9907 auto IsNan =
9908 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9909 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9910 Abs, InfWithQnanBitC);
9911 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9912 }
9913 }
9914
9915 if (FPClassTest PartialCheck = Mask & fcNormal) {
9916 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9917 // (max_exp-1))
9918 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9919 auto ExpMinusOne = MIRBuilder.buildSub(
9920 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9921 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9922 auto NormalRes =
9923 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9924 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9925 if (PartialCheck == fcNegNormal)
9926 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9927 else if (PartialCheck == fcPosNormal) {
9928 auto PosSign = MIRBuilder.buildXor(
9929 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
9930 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9931 }
9932 appendToRes(NormalRes);
9933 }
9934
9935 MIRBuilder.buildCopy(DstReg, Res);
9936 MI.eraseFromParent();
9937 return Legalized;
9938}
9939
9941 // Implement G_SELECT in terms of XOR, AND, OR.
9942 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9943 MI.getFirst4RegLLTs();
9944
9945 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9946 if (IsEltPtr) {
9947 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9948 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9949 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9950 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9951 DstTy = NewTy;
9952 }
9953
9954 if (MaskTy.isScalar()) {
9955 // Turn the scalar condition into a vector condition mask if needed.
9956
9957 Register MaskElt = MaskReg;
9958
9959 // The condition was potentially zero extended before, but we want a sign
9960 // extended boolean.
9961 if (MaskTy != LLT::scalar(1))
9962 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9963
9964 // Continue the sign extension (or truncate) to match the data type.
9965 MaskElt =
9966 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9967
9968 if (DstTy.isVector()) {
9969 // Generate a vector splat idiom.
9970 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9971 MaskReg = ShufSplat.getReg(0);
9972 } else {
9973 MaskReg = MaskElt;
9974 }
9975 MaskTy = DstTy;
9976 } else if (!DstTy.isVector()) {
9977 // Cannot handle the case that mask is a vector and dst is a scalar.
9978 return UnableToLegalize;
9979 }
9980
9981 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9982 return UnableToLegalize;
9983 }
9984
9985 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9986 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9987 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9988 if (IsEltPtr) {
9989 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9990 MIRBuilder.buildIntToPtr(DstReg, Or);
9991 } else {
9992 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9993 }
9994 MI.eraseFromParent();
9995 return Legalized;
9996}
9997
9999 // Split DIVREM into individual instructions.
10000 unsigned Opcode = MI.getOpcode();
10001
10002 MIRBuilder.buildInstr(
10003 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10004 : TargetOpcode::G_UDIV,
10005 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10006 MIRBuilder.buildInstr(
10007 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10008 : TargetOpcode::G_UREM,
10009 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10010 MI.eraseFromParent();
10011 return Legalized;
10012}
10013
10016 // Expand %res = G_ABS %a into:
10017 // %v1 = G_ASHR %a, scalar_size-1
10018 // %v2 = G_ADD %a, %v1
10019 // %res = G_XOR %v2, %v1
10020 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10021 Register OpReg = MI.getOperand(1).getReg();
10022 auto ShiftAmt =
10023 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10024 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10025 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10026 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10027 MI.eraseFromParent();
10028 return Legalized;
10029}
10030
10033 // Expand %res = G_ABS %a into:
10034 // %v1 = G_CONSTANT 0
10035 // %v2 = G_SUB %v1, %a
10036 // %res = G_SMAX %a, %v2
10037 Register SrcReg = MI.getOperand(1).getReg();
10038 LLT Ty = MRI.getType(SrcReg);
10039 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10040 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10041 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10042 MI.eraseFromParent();
10043 return Legalized;
10044}
10045
10048 Register SrcReg = MI.getOperand(1).getReg();
10049 Register DestReg = MI.getOperand(0).getReg();
10050 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10051 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10052 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10053 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10054 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10055 MI.eraseFromParent();
10056 return Legalized;
10057}
10058
10061 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10062 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10063 "Expected G_ABDS or G_ABDU instruction");
10064
10065 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10066 LLT Ty = MRI.getType(LHS);
10067
10068 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10069 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10070 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10071 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10072 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10075 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10076 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10077
10078 MI.eraseFromParent();
10079 return Legalized;
10080}
10081
10084 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10085 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10086 "Expected G_ABDS or G_ABDU instruction");
10087
10088 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10089 LLT Ty = MRI.getType(LHS);
10090
10091 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10092 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10093 Register MaxReg, MinReg;
10094 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10095 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10096 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10097 } else {
10098 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10099 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10100 }
10101 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10102
10103 MI.eraseFromParent();
10104 return Legalized;
10105}
10106
10108 Register SrcReg = MI.getOperand(1).getReg();
10109 Register DstReg = MI.getOperand(0).getReg();
10110
10111 LLT Ty = MRI.getType(DstReg);
10112
10113 // Reset sign bit
10114 MIRBuilder.buildAnd(
10115 DstReg, SrcReg,
10116 MIRBuilder.buildConstant(
10117 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10118
10119 MI.eraseFromParent();
10120 return Legalized;
10121}
10122
10125 Register SrcReg = MI.getOperand(1).getReg();
10126 LLT SrcTy = MRI.getType(SrcReg);
10127 LLT DstTy = MRI.getType(SrcReg);
10128
10129 // The source could be a scalar if the IR type was <1 x sN>.
10130 if (SrcTy.isScalar()) {
10131 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10132 return UnableToLegalize; // FIXME: handle extension.
10133 // This can be just a plain copy.
10134 Observer.changingInstr(MI);
10135 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10136 Observer.changedInstr(MI);
10137 return Legalized;
10138 }
10139 return UnableToLegalize;
10140}
10141
10143 MachineFunction &MF = *MI.getMF();
10144 const DataLayout &DL = MIRBuilder.getDataLayout();
10145 LLVMContext &Ctx = MF.getFunction().getContext();
10146 Register ListPtr = MI.getOperand(1).getReg();
10147 LLT PtrTy = MRI.getType(ListPtr);
10148
10149 // LstPtr is a pointer to the head of the list. Get the address
10150 // of the head of the list.
10151 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10152 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10153 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10154 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10155
10156 const Align A(MI.getOperand(2).getImm());
10157 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10158 if (A > TLI.getMinStackArgumentAlignment()) {
10159 Register AlignAmt =
10160 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10161 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10162 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10163 VAList = AndDst.getReg(0);
10164 }
10165
10166 // Increment the pointer, VAList, to the next vaarg
10167 // The list should be bumped by the size of element in the current head of
10168 // list.
10169 Register Dst = MI.getOperand(0).getReg();
10170 LLT LLTTy = MRI.getType(Dst);
10171 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10172 auto IncAmt =
10173 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10174 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10175
10176 // Store the increment VAList to the legalized pointer
10178 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10179 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10180 // Load the actual argument out of the pointer VAList
10181 Align EltAlignment = DL.getABITypeAlign(Ty);
10182 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10183 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10184 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10185
10186 MI.eraseFromParent();
10187 return Legalized;
10188}
10189
10191 // On Darwin, -Os means optimize for size without hurting performance, so
10192 // only really optimize for size when -Oz (MinSize) is used.
10194 return MF.getFunction().hasMinSize();
10195 return MF.getFunction().hasOptSize();
10196}
10197
10198// Returns a list of types to use for memory op lowering in MemOps. A partial
10199// port of findOptimalMemOpLowering in TargetLowering.
10200static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10201 unsigned Limit, const MemOp &Op,
10202 unsigned DstAS, unsigned SrcAS,
10203 const AttributeList &FuncAttributes,
10204 const TargetLowering &TLI) {
10205 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10206 return false;
10207
10208 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10209
10210 if (Ty == LLT()) {
10211 // Use the largest scalar type whose alignment constraints are satisfied.
10212 // We only need to check DstAlign here as SrcAlign is always greater or
10213 // equal to DstAlign (or zero).
10214 Ty = LLT::scalar(64);
10215 if (Op.isFixedDstAlign())
10216 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10217 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10218 Ty = LLT::scalar(Ty.getSizeInBytes());
10219 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10220 // FIXME: check for the largest legal type we can load/store to.
10221 }
10222
10223 unsigned NumMemOps = 0;
10224 uint64_t Size = Op.size();
10225 while (Size) {
10226 unsigned TySize = Ty.getSizeInBytes();
10227 while (TySize > Size) {
10228 // For now, only use non-vector load / store's for the left-over pieces.
10229 LLT NewTy = Ty;
10230 // FIXME: check for mem op safety and legality of the types. Not all of
10231 // SDAGisms map cleanly to GISel concepts.
10232 if (NewTy.isVector())
10233 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10234 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10235 unsigned NewTySize = NewTy.getSizeInBytes();
10236 assert(NewTySize > 0 && "Could not find appropriate type");
10237
10238 // If the new LLT cannot cover all of the remaining bits, then consider
10239 // issuing a (or a pair of) unaligned and overlapping load / store.
10240 unsigned Fast;
10241 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10242 MVT VT = getMVTForLLT(Ty);
10243 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10245 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10247 Fast)
10248 TySize = Size;
10249 else {
10250 Ty = NewTy;
10251 TySize = NewTySize;
10252 }
10253 }
10254
10255 if (++NumMemOps > Limit)
10256 return false;
10257
10258 MemOps.push_back(Ty);
10259 Size -= TySize;
10260 }
10261
10262 return true;
10263}
10264
10265// Get a vectorized representation of the memset value operand, GISel edition.
10267 MachineRegisterInfo &MRI = *MIB.getMRI();
10268 unsigned NumBits = Ty.getScalarSizeInBits();
10269 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10270 if (!Ty.isVector() && ValVRegAndVal) {
10271 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10272 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10273 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10274 }
10275
10276 // Extend the byte value to the larger type, and then multiply by a magic
10277 // value 0x010101... in order to replicate it across every byte.
10278 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10279 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10280 return MIB.buildConstant(Ty, 0).getReg(0);
10281 }
10282
10283 LLT ExtType = Ty.getScalarType();
10284 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10285 if (NumBits > 8) {
10286 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10287 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10288 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10289 }
10290
10291 // For vector types create a G_BUILD_VECTOR.
10292 if (Ty.isVector())
10293 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10294
10295 return Val;
10296}
10297
10299LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10300 uint64_t KnownLen, Align Alignment,
10301 bool IsVolatile) {
10302 auto &MF = *MI.getParent()->getParent();
10303 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10304 auto &DL = MF.getDataLayout();
10305 LLVMContext &C = MF.getFunction().getContext();
10306
10307 assert(KnownLen != 0 && "Have a zero length memset length!");
10308
10309 bool DstAlignCanChange = false;
10310 MachineFrameInfo &MFI = MF.getFrameInfo();
10311 bool OptSize = shouldLowerMemFuncForSize(MF);
10312
10313 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10314 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10315 DstAlignCanChange = true;
10316
10317 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10318 std::vector<LLT> MemOps;
10319
10320 const auto &DstMMO = **MI.memoperands_begin();
10321 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10322
10323 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10324 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10325
10326 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10327 MemOp::Set(KnownLen, DstAlignCanChange,
10328 Alignment,
10329 /*IsZeroMemset=*/IsZeroVal,
10330 /*IsVolatile=*/IsVolatile),
10331 DstPtrInfo.getAddrSpace(), ~0u,
10332 MF.getFunction().getAttributes(), TLI))
10333 return UnableToLegalize;
10334
10335 if (DstAlignCanChange) {
10336 // Get an estimate of the type from the LLT.
10337 Type *IRTy = getTypeForLLT(MemOps[0], C);
10338 Align NewAlign = DL.getABITypeAlign(IRTy);
10339 if (NewAlign > Alignment) {
10340 Alignment = NewAlign;
10341 unsigned FI = FIDef->getOperand(1).getIndex();
10342 // Give the stack frame object a larger alignment if needed.
10343 if (MFI.getObjectAlign(FI) < Alignment)
10344 MFI.setObjectAlignment(FI, Alignment);
10345 }
10346 }
10347
10348 MachineIRBuilder MIB(MI);
10349 // Find the largest store and generate the bit pattern for it.
10350 LLT LargestTy = MemOps[0];
10351 for (unsigned i = 1; i < MemOps.size(); i++)
10352 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10353 LargestTy = MemOps[i];
10354
10355 // The memset stored value is always defined as an s8, so in order to make it
10356 // work with larger store types we need to repeat the bit pattern across the
10357 // wider type.
10358 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10359
10360 if (!MemSetValue)
10361 return UnableToLegalize;
10362
10363 // Generate the stores. For each store type in the list, we generate the
10364 // matching store of that type to the destination address.
10365 LLT PtrTy = MRI.getType(Dst);
10366 unsigned DstOff = 0;
10367 unsigned Size = KnownLen;
10368 for (unsigned I = 0; I < MemOps.size(); I++) {
10369 LLT Ty = MemOps[I];
10370 unsigned TySize = Ty.getSizeInBytes();
10371 if (TySize > Size) {
10372 // Issuing an unaligned load / store pair that overlaps with the previous
10373 // pair. Adjust the offset accordingly.
10374 assert(I == MemOps.size() - 1 && I != 0);
10375 DstOff -= TySize - Size;
10376 }
10377
10378 // If this store is smaller than the largest store see whether we can get
10379 // the smaller value for free with a truncate.
10380 Register Value = MemSetValue;
10381 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10382 MVT VT = getMVTForLLT(Ty);
10383 MVT LargestVT = getMVTForLLT(LargestTy);
10384 if (!LargestTy.isVector() && !Ty.isVector() &&
10385 TLI.isTruncateFree(LargestVT, VT))
10386 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10387 else
10388 Value = getMemsetValue(Val, Ty, MIB);
10389 if (!Value)
10390 return UnableToLegalize;
10391 }
10392
10393 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10394
10395 Register Ptr = Dst;
10396 if (DstOff != 0) {
10397 auto Offset =
10398 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10399 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10400 }
10401
10402 MIB.buildStore(Value, Ptr, *StoreMMO);
10403 DstOff += Ty.getSizeInBytes();
10404 Size -= TySize;
10405 }
10406
10407 MI.eraseFromParent();
10408 return Legalized;
10409}
10410
10412LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10413 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10414
10415 auto [Dst, Src, Len] = MI.getFirst3Regs();
10416
10417 const auto *MMOIt = MI.memoperands_begin();
10418 const MachineMemOperand *MemOp = *MMOIt;
10419 bool IsVolatile = MemOp->isVolatile();
10420
10421 // See if this is a constant length copy
10422 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10423 // FIXME: support dynamically sized G_MEMCPY_INLINE
10424 assert(LenVRegAndVal &&
10425 "inline memcpy with dynamic size is not yet supported");
10426 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10427 if (KnownLen == 0) {
10428 MI.eraseFromParent();
10429 return Legalized;
10430 }
10431
10432 const auto &DstMMO = **MI.memoperands_begin();
10433 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10434 Align DstAlign = DstMMO.getBaseAlign();
10435 Align SrcAlign = SrcMMO.getBaseAlign();
10436
10437 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10438 IsVolatile);
10439}
10440
10442LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10443 uint64_t KnownLen, Align DstAlign,
10444 Align SrcAlign, bool IsVolatile) {
10445 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10446 return lowerMemcpy(MI, Dst, Src, KnownLen,
10447 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10448 IsVolatile);
10449}
10450
10452LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10453 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10454 Align SrcAlign, bool IsVolatile) {
10455 auto &MF = *MI.getParent()->getParent();
10456 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10457 auto &DL = MF.getDataLayout();
10459
10460 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10461
10462 bool DstAlignCanChange = false;
10463 MachineFrameInfo &MFI = MF.getFrameInfo();
10464 Align Alignment = std::min(DstAlign, SrcAlign);
10465
10466 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10467 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10468 DstAlignCanChange = true;
10469
10470 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10471 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10472 // if the memcpy is in a tail call position.
10473
10474 std::vector<LLT> MemOps;
10475
10476 const auto &DstMMO = **MI.memoperands_begin();
10477 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10478 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10479 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10480
10482 MemOps, Limit,
10483 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10484 IsVolatile),
10485 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10486 MF.getFunction().getAttributes(), TLI))
10487 return UnableToLegalize;
10488
10489 if (DstAlignCanChange) {
10490 // Get an estimate of the type from the LLT.
10491 Type *IRTy = getTypeForLLT(MemOps[0], C);
10492 Align NewAlign = DL.getABITypeAlign(IRTy);
10493
10494 // Don't promote to an alignment that would require dynamic stack
10495 // realignment.
10497 if (!TRI->hasStackRealignment(MF))
10498 if (MaybeAlign StackAlign = DL.getStackAlignment())
10499 NewAlign = std::min(NewAlign, *StackAlign);
10500
10501 if (NewAlign > Alignment) {
10502 Alignment = NewAlign;
10503 unsigned FI = FIDef->getOperand(1).getIndex();
10504 // Give the stack frame object a larger alignment if needed.
10505 if (MFI.getObjectAlign(FI) < Alignment)
10506 MFI.setObjectAlignment(FI, Alignment);
10507 }
10508 }
10509
10510 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10511
10512 MachineIRBuilder MIB(MI);
10513 // Now we need to emit a pair of load and stores for each of the types we've
10514 // collected. I.e. for each type, generate a load from the source pointer of
10515 // that type width, and then generate a corresponding store to the dest buffer
10516 // of that value loaded. This can result in a sequence of loads and stores
10517 // mixed types, depending on what the target specifies as good types to use.
10518 unsigned CurrOffset = 0;
10519 unsigned Size = KnownLen;
10520 for (auto CopyTy : MemOps) {
10521 // Issuing an unaligned load / store pair that overlaps with the previous
10522 // pair. Adjust the offset accordingly.
10523 if (CopyTy.getSizeInBytes() > Size)
10524 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10525
10526 // Construct MMOs for the accesses.
10527 auto *LoadMMO =
10528 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10529 auto *StoreMMO =
10530 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10531
10532 // Create the load.
10533 Register LoadPtr = Src;
10535 if (CurrOffset != 0) {
10536 LLT SrcTy = MRI.getType(Src);
10537 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10538 .getReg(0);
10539 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10540 }
10541 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10542
10543 // Create the store.
10544 Register StorePtr = Dst;
10545 if (CurrOffset != 0) {
10546 LLT DstTy = MRI.getType(Dst);
10547 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10548 }
10549 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10550 CurrOffset += CopyTy.getSizeInBytes();
10551 Size -= CopyTy.getSizeInBytes();
10552 }
10553
10554 MI.eraseFromParent();
10555 return Legalized;
10556}
10557
10559LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10560 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10561 bool IsVolatile) {
10562 auto &MF = *MI.getParent()->getParent();
10563 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10564 auto &DL = MF.getDataLayout();
10565 LLVMContext &C = MF.getFunction().getContext();
10566
10567 assert(KnownLen != 0 && "Have a zero length memmove length!");
10568
10569 bool DstAlignCanChange = false;
10570 MachineFrameInfo &MFI = MF.getFrameInfo();
10571 bool OptSize = shouldLowerMemFuncForSize(MF);
10572 Align Alignment = std::min(DstAlign, SrcAlign);
10573
10574 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10575 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10576 DstAlignCanChange = true;
10577
10578 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10579 std::vector<LLT> MemOps;
10580
10581 const auto &DstMMO = **MI.memoperands_begin();
10582 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10583 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10584 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10585
10586 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10587 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10588 // same thing here.
10590 MemOps, Limit,
10591 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10592 /*IsVolatile*/ true),
10593 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10594 MF.getFunction().getAttributes(), TLI))
10595 return UnableToLegalize;
10596
10597 if (DstAlignCanChange) {
10598 // Get an estimate of the type from the LLT.
10599 Type *IRTy = getTypeForLLT(MemOps[0], C);
10600 Align NewAlign = DL.getABITypeAlign(IRTy);
10601
10602 // Don't promote to an alignment that would require dynamic stack
10603 // realignment.
10604 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10605 if (!TRI->hasStackRealignment(MF))
10606 if (MaybeAlign StackAlign = DL.getStackAlignment())
10607 NewAlign = std::min(NewAlign, *StackAlign);
10608
10609 if (NewAlign > Alignment) {
10610 Alignment = NewAlign;
10611 unsigned FI = FIDef->getOperand(1).getIndex();
10612 // Give the stack frame object a larger alignment if needed.
10613 if (MFI.getObjectAlign(FI) < Alignment)
10614 MFI.setObjectAlignment(FI, Alignment);
10615 }
10616 }
10617
10618 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10619
10620 MachineIRBuilder MIB(MI);
10621 // Memmove requires that we perform the loads first before issuing the stores.
10622 // Apart from that, this loop is pretty much doing the same thing as the
10623 // memcpy codegen function.
10624 unsigned CurrOffset = 0;
10625 SmallVector<Register, 16> LoadVals;
10626 for (auto CopyTy : MemOps) {
10627 // Construct MMO for the load.
10628 auto *LoadMMO =
10629 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10630
10631 // Create the load.
10632 Register LoadPtr = Src;
10633 if (CurrOffset != 0) {
10634 LLT SrcTy = MRI.getType(Src);
10635 auto Offset =
10636 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10637 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10638 }
10639 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10640 CurrOffset += CopyTy.getSizeInBytes();
10641 }
10642
10643 CurrOffset = 0;
10644 for (unsigned I = 0; I < MemOps.size(); ++I) {
10645 LLT CopyTy = MemOps[I];
10646 // Now store the values loaded.
10647 auto *StoreMMO =
10648 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10649
10650 Register StorePtr = Dst;
10651 if (CurrOffset != 0) {
10652 LLT DstTy = MRI.getType(Dst);
10653 auto Offset =
10654 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10655 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10656 }
10657 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10658 CurrOffset += CopyTy.getSizeInBytes();
10659 }
10660 MI.eraseFromParent();
10661 return Legalized;
10662}
10663
10666 const unsigned Opc = MI.getOpcode();
10667 // This combine is fairly complex so it's not written with a separate
10668 // matcher function.
10669 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10670 Opc == TargetOpcode::G_MEMSET) &&
10671 "Expected memcpy like instruction");
10672
10673 auto MMOIt = MI.memoperands_begin();
10674 const MachineMemOperand *MemOp = *MMOIt;
10675
10676 Align DstAlign = MemOp->getBaseAlign();
10677 Align SrcAlign;
10678 auto [Dst, Src, Len] = MI.getFirst3Regs();
10679
10680 if (Opc != TargetOpcode::G_MEMSET) {
10681 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10682 MemOp = *(++MMOIt);
10683 SrcAlign = MemOp->getBaseAlign();
10684 }
10685
10686 // See if this is a constant length copy
10687 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10688 if (!LenVRegAndVal)
10689 return UnableToLegalize;
10690 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10691
10692 if (KnownLen == 0) {
10693 MI.eraseFromParent();
10694 return Legalized;
10695 }
10696
10697 if (MaxLen && KnownLen > MaxLen)
10698 return UnableToLegalize;
10699
10700 bool IsVolatile = MemOp->isVolatile();
10701 if (Opc == TargetOpcode::G_MEMCPY) {
10702 auto &MF = *MI.getParent()->getParent();
10703 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10704 bool OptSize = shouldLowerMemFuncForSize(MF);
10705 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10706 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10707 IsVolatile);
10708 }
10709 if (Opc == TargetOpcode::G_MEMMOVE)
10710 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10711 if (Opc == TargetOpcode::G_MEMSET)
10712 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10713 return UnableToLegalize;
10714}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
bool isSigned() const
Definition InstrTypes.h:932
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:316
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:611
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2033
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:232
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1622
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1189
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:368
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1815
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:241
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:299
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)