LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FLOG10:
475 RTLIBCASE(LOG10_F);
476 case TargetOpcode::G_FLOG:
477 RTLIBCASE(LOG_F);
478 case TargetOpcode::G_FLOG2:
479 RTLIBCASE(LOG2_F);
480 case TargetOpcode::G_FLDEXP:
481 RTLIBCASE(LDEXP_F);
482 case TargetOpcode::G_FCEIL:
483 RTLIBCASE(CEIL_F);
484 case TargetOpcode::G_FFLOOR:
485 RTLIBCASE(FLOOR_F);
486 case TargetOpcode::G_FMINNUM:
487 RTLIBCASE(FMIN_F);
488 case TargetOpcode::G_FMAXNUM:
489 RTLIBCASE(FMAX_F);
490 case TargetOpcode::G_FMINIMUMNUM:
491 RTLIBCASE(FMINIMUM_NUM_F);
492 case TargetOpcode::G_FMAXIMUMNUM:
493 RTLIBCASE(FMAXIMUM_NUM_F);
494 case TargetOpcode::G_FSQRT:
495 RTLIBCASE(SQRT_F);
496 case TargetOpcode::G_FRINT:
497 RTLIBCASE(RINT_F);
498 case TargetOpcode::G_FNEARBYINT:
499 RTLIBCASE(NEARBYINT_F);
500 case TargetOpcode::G_INTRINSIC_TRUNC:
501 RTLIBCASE(TRUNC_F);
502 case TargetOpcode::G_INTRINSIC_ROUND:
503 RTLIBCASE(ROUND_F);
504 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
505 RTLIBCASE(ROUNDEVEN_F);
506 case TargetOpcode::G_INTRINSIC_LRINT:
507 RTLIBCASE(LRINT_F);
508 case TargetOpcode::G_INTRINSIC_LLRINT:
509 RTLIBCASE(LLRINT_F);
510 }
511 llvm_unreachable("Unknown libcall function");
512#undef RTLIBCASE_INT
513#undef RTLIBCASE
514}
515
516/// True if an instruction is in tail position in its caller. Intended for
517/// legalizing libcalls as tail calls when possible.
520 const TargetInstrInfo &TII,
522 MachineBasicBlock &MBB = *MI.getParent();
523 const Function &F = MBB.getParent()->getFunction();
524
525 // Conservatively require the attributes of the call to match those of
526 // the return. Ignore NoAlias and NonNull because they don't affect the
527 // call sequence.
528 AttributeList CallerAttrs = F.getAttributes();
529 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
530 .removeAttribute(Attribute::NoAlias)
531 .removeAttribute(Attribute::NonNull)
532 .hasAttributes())
533 return false;
534
535 // It's not safe to eliminate the sign / zero extension of the return value.
536 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
537 CallerAttrs.hasRetAttr(Attribute::SExt))
538 return false;
539
540 // Only tail call if the following instruction is a standard return or if we
541 // have a `thisreturn` callee, and a sequence like:
542 //
543 // G_MEMCPY %0, %1, %2
544 // $x0 = COPY %0
545 // RET_ReallyLR implicit $x0
546 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
547 if (Next != MBB.instr_end() && Next->isCopy()) {
548 if (MI.getOpcode() == TargetOpcode::G_BZERO)
549 return false;
550
551 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
552 // mempy/etc routines return the same parameter. For other it will be the
553 // returned value.
554 Register VReg = MI.getOperand(0).getReg();
555 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
556 return false;
557
558 Register PReg = Next->getOperand(0).getReg();
559 if (!PReg.isPhysical())
560 return false;
561
562 auto Ret = next_nodbg(Next, MBB.instr_end());
563 if (Ret == MBB.instr_end() || !Ret->isReturn())
564 return false;
565
566 if (Ret->getNumImplicitOperands() != 1)
567 return false;
568
569 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
570 return false;
571
572 // Skip over the COPY that we just validated.
573 Next = Ret;
574 }
575
576 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
577 return false;
578
579 return true;
580}
581
583llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
584 const CallLowering::ArgInfo &Result,
586 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
587 MachineInstr *MI) {
588 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
589
591 Info.CallConv = CC;
592 Info.Callee = MachineOperand::CreateES(Name);
593 Info.OrigRet = Result;
594 if (MI)
595 Info.IsTailCall =
596 (Result.Ty->isVoidTy() ||
597 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
598 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
599 *MIRBuilder.getMRI());
600
601 llvm::append_range(Info.OrigArgs, Args);
602 if (!CLI.lowerCall(MIRBuilder, Info))
604
605 if (MI && Info.LoweredTailCall) {
606 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
607
608 // Check debug locations before removing the return.
609 LocObserver.checkpoint(true);
610
611 // We must have a return following the call (or debug insts) to get past
612 // isLibCallInTailPosition.
613 do {
614 MachineInstr *Next = MI->getNextNode();
615 assert(Next &&
616 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
617 "Expected instr following MI to be return or debug inst?");
618 // We lowered a tail call, so the call is now the return from the block.
619 // Delete the old return.
620 Next->eraseFromParent();
621 } while (MI->getNextNode());
622
623 // We expect to lose the debug location from the return.
624 LocObserver.checkpoint(false);
625 }
627}
628
630llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
631 const CallLowering::ArgInfo &Result,
633 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
634 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
635 const char *Name = TLI.getLibcallName(Libcall);
636 if (!Name)
638 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
639 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
640}
641
642// Useful for libcalls where all operands have the same type.
645 Type *OpType, LostDebugLocObserver &LocObserver) {
646 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
647
648 // FIXME: What does the original arg index mean here?
650 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
651 Args.push_back({MO.getReg(), OpType, 0});
652 return createLibcall(MIRBuilder, Libcall,
653 {MI.getOperand(0).getReg(), OpType, 0}, Args,
654 LocObserver, &MI);
655}
656
657LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
658 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
659 LostDebugLocObserver &LocObserver) {
660 MachineFunction &MF = *MI.getMF();
661 MachineRegisterInfo &MRI = MF.getRegInfo();
662
663 Register DstSin = MI.getOperand(0).getReg();
664 Register DstCos = MI.getOperand(1).getReg();
665 Register Src = MI.getOperand(2).getReg();
666 LLT DstTy = MRI.getType(DstSin);
667
668 int MemSize = DstTy.getSizeInBytes();
669 Align Alignment = getStackTemporaryAlignment(DstTy);
670 const DataLayout &DL = MIRBuilder.getDataLayout();
671 unsigned AddrSpace = DL.getAllocaAddrSpace();
672 MachinePointerInfo PtrInfo;
673
674 Register StackPtrSin =
675 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
676 .getReg(0);
677 Register StackPtrCos =
678 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
679 .getReg(0);
680
681 auto &Ctx = MF.getFunction().getContext();
682 auto LibcallResult =
684 {{0}, Type::getVoidTy(Ctx), 0},
685 {{Src, OpType, 0},
686 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
687 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
688 LocObserver, &MI);
689
690 if (LibcallResult != LegalizeResult::Legalized)
692
694 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
697
698 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
699 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
700 MI.eraseFromParent();
701
703}
704
707 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
708 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
709
711 // Add all the args, except for the last which is an imm denoting 'tail'.
712 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
713 Register Reg = MI.getOperand(i).getReg();
714
715 // Need derive an IR type for call lowering.
716 LLT OpLLT = MRI.getType(Reg);
717 Type *OpTy = nullptr;
718 if (OpLLT.isPointer())
719 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
720 else
721 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
722 Args.push_back({Reg, OpTy, 0});
723 }
724
725 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
726 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
727 RTLIB::Libcall RTLibcall;
728 unsigned Opc = MI.getOpcode();
729 const char *Name;
730 switch (Opc) {
731 case TargetOpcode::G_BZERO:
732 RTLibcall = RTLIB::BZERO;
733 Name = TLI.getLibcallName(RTLibcall);
734 break;
735 case TargetOpcode::G_MEMCPY:
736 RTLibcall = RTLIB::MEMCPY;
737 Name = TLI.getMemcpyName();
738 Args[0].Flags[0].setReturned();
739 break;
740 case TargetOpcode::G_MEMMOVE:
741 RTLibcall = RTLIB::MEMMOVE;
742 Name = TLI.getLibcallName(RTLibcall);
743 Args[0].Flags[0].setReturned();
744 break;
745 case TargetOpcode::G_MEMSET:
746 RTLibcall = RTLIB::MEMSET;
747 Name = TLI.getLibcallName(RTLibcall);
748 Args[0].Flags[0].setReturned();
749 break;
750 default:
751 llvm_unreachable("unsupported opcode");
752 }
753
754 // Unsupported libcall on the target.
755 if (!Name) {
756 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
757 << MIRBuilder.getTII().getName(Opc) << "\n");
759 }
760
762 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
763 Info.Callee = MachineOperand::CreateES(Name);
764 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
765 Info.IsTailCall =
766 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
767 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
768
769 llvm::append_range(Info.OrigArgs, Args);
770 if (!CLI.lowerCall(MIRBuilder, Info))
772
773 if (Info.LoweredTailCall) {
774 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
775
776 // Check debug locations before removing the return.
777 LocObserver.checkpoint(true);
778
779 // We must have a return following the call (or debug insts) to get past
780 // isLibCallInTailPosition.
781 do {
782 MachineInstr *Next = MI.getNextNode();
783 assert(Next &&
784 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
785 "Expected instr following MI to be return or debug inst?");
786 // We lowered a tail call, so the call is now the return from the block.
787 // Delete the old return.
788 Next->eraseFromParent();
789 } while (MI.getNextNode());
790
791 // We expect to lose the debug location from the return.
792 LocObserver.checkpoint(false);
793 }
794
796}
797
798static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
799 unsigned Opc = MI.getOpcode();
800 auto &AtomicMI = cast<GMemOperation>(MI);
801 auto &MMO = AtomicMI.getMMO();
802 auto Ordering = MMO.getMergedOrdering();
803 LLT MemType = MMO.getMemoryType();
804 uint64_t MemSize = MemType.getSizeInBytes();
805 if (MemType.isVector())
806 return RTLIB::UNKNOWN_LIBCALL;
807
808#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
809#define LCALL5(A) \
810 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
811 switch (Opc) {
812 case TargetOpcode::G_ATOMIC_CMPXCHG:
813 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
814 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
815 return getOutlineAtomicHelper(LC, Ordering, MemSize);
816 }
817 case TargetOpcode::G_ATOMICRMW_XCHG: {
818 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
819 return getOutlineAtomicHelper(LC, Ordering, MemSize);
820 }
821 case TargetOpcode::G_ATOMICRMW_ADD:
822 case TargetOpcode::G_ATOMICRMW_SUB: {
823 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
824 return getOutlineAtomicHelper(LC, Ordering, MemSize);
825 }
826 case TargetOpcode::G_ATOMICRMW_AND: {
827 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
828 return getOutlineAtomicHelper(LC, Ordering, MemSize);
829 }
830 case TargetOpcode::G_ATOMICRMW_OR: {
831 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
832 return getOutlineAtomicHelper(LC, Ordering, MemSize);
833 }
834 case TargetOpcode::G_ATOMICRMW_XOR: {
835 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
836 return getOutlineAtomicHelper(LC, Ordering, MemSize);
837 }
838 default:
839 return RTLIB::UNKNOWN_LIBCALL;
840 }
841#undef LCALLS
842#undef LCALL5
843}
844
847 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
848
849 Type *RetTy;
850 SmallVector<Register> RetRegs;
852 unsigned Opc = MI.getOpcode();
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
857 LLT SuccessLLT;
858 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
859 MI.getFirst4RegLLTs();
860 RetRegs.push_back(Ret);
861 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
862 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
863 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
864 NewLLT) = MI.getFirst5RegLLTs();
865 RetRegs.push_back(Success);
866 RetTy = StructType::get(
867 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
868 }
869 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
870 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
871 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
872 break;
873 }
874 case TargetOpcode::G_ATOMICRMW_XCHG:
875 case TargetOpcode::G_ATOMICRMW_ADD:
876 case TargetOpcode::G_ATOMICRMW_SUB:
877 case TargetOpcode::G_ATOMICRMW_AND:
878 case TargetOpcode::G_ATOMICRMW_OR:
879 case TargetOpcode::G_ATOMICRMW_XOR: {
880 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
881 RetRegs.push_back(Ret);
882 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
883 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
884 Val =
885 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
886 .getReg(0);
887 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
888 Val =
889 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
890 .getReg(0);
891 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
892 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
893 break;
894 }
895 default:
896 llvm_unreachable("unsupported opcode");
897 }
898
899 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
900 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
901 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
902 const char *Name = TLI.getLibcallName(RTLibcall);
903
904 // Unsupported libcall on the target.
905 if (!Name) {
906 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
907 << MIRBuilder.getTII().getName(Opc) << "\n");
909 }
910
912 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
913 Info.Callee = MachineOperand::CreateES(Name);
914 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
915
916 llvm::append_range(Info.OrigArgs, Args);
917 if (!CLI.lowerCall(MIRBuilder, Info))
919
921}
922
923static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
924 Type *FromType) {
925 auto ToMVT = MVT::getVT(ToType);
926 auto FromMVT = MVT::getVT(FromType);
927
928 switch (Opcode) {
929 case TargetOpcode::G_FPEXT:
930 return RTLIB::getFPEXT(FromMVT, ToMVT);
931 case TargetOpcode::G_FPTRUNC:
932 return RTLIB::getFPROUND(FromMVT, ToMVT);
933 case TargetOpcode::G_FPTOSI:
934 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
935 case TargetOpcode::G_FPTOUI:
936 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
937 case TargetOpcode::G_SITOFP:
938 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
939 case TargetOpcode::G_UITOFP:
940 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
941 }
942 llvm_unreachable("Unsupported libcall function");
943}
944
947 Type *FromType, LostDebugLocObserver &LocObserver,
948 const TargetLowering &TLI, bool IsSigned = false) {
949 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
950 if (FromType->isIntegerTy()) {
951 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
952 Arg.Flags[0].setSExt();
953 else
954 Arg.Flags[0].setZExt();
955 }
956
957 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
958 return createLibcall(MIRBuilder, Libcall,
959 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
960 &MI);
961}
962
963static RTLIB::Libcall
965 RTLIB::Libcall RTLibcall;
966 switch (MI.getOpcode()) {
967 case TargetOpcode::G_GET_FPENV:
968 RTLibcall = RTLIB::FEGETENV;
969 break;
970 case TargetOpcode::G_SET_FPENV:
971 case TargetOpcode::G_RESET_FPENV:
972 RTLibcall = RTLIB::FESETENV;
973 break;
974 case TargetOpcode::G_GET_FPMODE:
975 RTLibcall = RTLIB::FEGETMODE;
976 break;
977 case TargetOpcode::G_SET_FPMODE:
978 case TargetOpcode::G_RESET_FPMODE:
979 RTLibcall = RTLIB::FESETMODE;
980 break;
981 default:
982 llvm_unreachable("Unexpected opcode");
983 }
984 return RTLibcall;
985}
986
987// Some library functions that read FP state (fegetmode, fegetenv) write the
988// state into a region in memory. IR intrinsics that do the same operations
989// (get_fpmode, get_fpenv) return the state as integer value. To implement these
990// intrinsics via the library functions, we need to use temporary variable,
991// for example:
992//
993// %0:_(s32) = G_GET_FPMODE
994//
995// is transformed to:
996//
997// %1:_(p0) = G_FRAME_INDEX %stack.0
998// BL &fegetmode
999// %0:_(s32) = G_LOAD % 1
1000//
1002LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1004 LostDebugLocObserver &LocObserver) {
1005 const DataLayout &DL = MIRBuilder.getDataLayout();
1006 auto &MF = MIRBuilder.getMF();
1007 auto &MRI = *MIRBuilder.getMRI();
1008 auto &Ctx = MF.getFunction().getContext();
1009
1010 // Create temporary, where library function will put the read state.
1011 Register Dst = MI.getOperand(0).getReg();
1012 LLT StateTy = MRI.getType(Dst);
1013 TypeSize StateSize = StateTy.getSizeInBytes();
1014 Align TempAlign = getStackTemporaryAlignment(StateTy);
1015 MachinePointerInfo TempPtrInfo;
1016 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1017
1018 // Create a call to library function, with the temporary as an argument.
1019 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1020 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1021 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1022 auto Res =
1023 createLibcall(MIRBuilder, RTLibcall,
1024 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1025 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1026 LocObserver, nullptr);
1027 if (Res != LegalizerHelper::Legalized)
1028 return Res;
1029
1030 // Create a load from the temporary.
1031 MachineMemOperand *MMO = MF.getMachineMemOperand(
1032 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1033 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1034
1036}
1037
1038// Similar to `createGetStateLibcall` the function calls a library function
1039// using transient space in stack. In this case the library function reads
1040// content of memory region.
1042LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1044 LostDebugLocObserver &LocObserver) {
1045 const DataLayout &DL = MIRBuilder.getDataLayout();
1046 auto &MF = MIRBuilder.getMF();
1047 auto &MRI = *MIRBuilder.getMRI();
1048 auto &Ctx = MF.getFunction().getContext();
1049
1050 // Create temporary, where library function will get the new state.
1051 Register Src = MI.getOperand(0).getReg();
1052 LLT StateTy = MRI.getType(Src);
1053 TypeSize StateSize = StateTy.getSizeInBytes();
1054 Align TempAlign = getStackTemporaryAlignment(StateTy);
1055 MachinePointerInfo TempPtrInfo;
1056 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1057
1058 // Put the new state into the temporary.
1059 MachineMemOperand *MMO = MF.getMachineMemOperand(
1060 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1061 MIRBuilder.buildStore(Src, Temp, *MMO);
1062
1063 // Create a call to library function, with the temporary as an argument.
1064 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1065 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1066 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1067 return createLibcall(MIRBuilder, RTLibcall,
1068 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1069 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1070 LocObserver, nullptr);
1071}
1072
1073/// Returns the corresponding libcall for the given Pred and
1074/// the ICMP predicate that should be generated to compare with #0
1075/// after the libcall.
1076static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1078#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1079 do { \
1080 switch (Size) { \
1081 case 32: \
1082 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1083 case 64: \
1084 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1085 case 128: \
1086 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1087 default: \
1088 llvm_unreachable("unexpected size"); \
1089 } \
1090 } while (0)
1091
1092 switch (Pred) {
1093 case CmpInst::FCMP_OEQ:
1095 case CmpInst::FCMP_UNE:
1097 case CmpInst::FCMP_OGE:
1099 case CmpInst::FCMP_OLT:
1101 case CmpInst::FCMP_OLE:
1103 case CmpInst::FCMP_OGT:
1105 case CmpInst::FCMP_UNO:
1107 default:
1108 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1109 }
1110}
1111
1113LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1115 LostDebugLocObserver &LocObserver) {
1116 auto &MF = MIRBuilder.getMF();
1117 auto &Ctx = MF.getFunction().getContext();
1118 const GFCmp *Cmp = cast<GFCmp>(&MI);
1119
1120 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1121 unsigned Size = OpLLT.getSizeInBits();
1122 if ((Size != 32 && Size != 64 && Size != 128) ||
1123 OpLLT != MRI.getType(Cmp->getRHSReg()))
1124 return UnableToLegalize;
1125
1126 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1127
1128 // DstReg type is s32
1129 const Register DstReg = Cmp->getReg(0);
1130 LLT DstTy = MRI.getType(DstReg);
1131 const auto Cond = Cmp->getCond();
1132
1133 // Reference:
1134 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1135 // Generates a libcall followed by ICMP.
1136 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1137 const CmpInst::Predicate ICmpPred,
1138 const DstOp &Res) -> Register {
1139 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1140 constexpr LLT TempLLT = LLT::scalar(32);
1141 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1142 // Generate libcall, holding result in Temp
1143 const auto Status = createLibcall(
1144 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1145 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1146 LocObserver, &MI);
1147 if (!Status)
1148 return {};
1149
1150 // Compare temp with #0 to get the final result.
1151 return MIRBuilder
1152 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1153 .getReg(0);
1154 };
1155
1156 // Simple case if we have a direct mapping from predicate to libcall
1157 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1158 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1159 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1160 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1161 return Legalized;
1162 }
1163 return UnableToLegalize;
1164 }
1165
1166 // No direct mapping found, should be generated as combination of libcalls.
1167
1168 switch (Cond) {
1169 case CmpInst::FCMP_UEQ: {
1170 // FCMP_UEQ: unordered or equal
1171 // Convert into (FCMP_OEQ || FCMP_UNO).
1172
1173 const auto [OeqLibcall, OeqPred] =
1175 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1176
1177 const auto [UnoLibcall, UnoPred] =
1179 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1180 if (Oeq && Uno)
1181 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1182 else
1183 return UnableToLegalize;
1184
1185 break;
1186 }
1187 case CmpInst::FCMP_ONE: {
1188 // FCMP_ONE: ordered and operands are unequal
1189 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1190
1191 // We inverse the predicate instead of generating a NOT
1192 // to save one instruction.
1193 // On AArch64 isel can even select two cmp into a single ccmp.
1194 const auto [OeqLibcall, OeqPred] =
1196 const auto NotOeq =
1197 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1198
1199 const auto [UnoLibcall, UnoPred] =
1201 const auto NotUno =
1202 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1203
1204 if (NotOeq && NotUno)
1205 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1206 else
1207 return UnableToLegalize;
1208
1209 break;
1210 }
1211 case CmpInst::FCMP_ULT:
1212 case CmpInst::FCMP_UGE:
1213 case CmpInst::FCMP_UGT:
1214 case CmpInst::FCMP_ULE:
1215 case CmpInst::FCMP_ORD: {
1216 // Convert into: !(inverse(Pred))
1217 // E.g. FCMP_ULT becomes !FCMP_OGE
1218 // This is equivalent to the following, but saves some instructions.
1219 // MIRBuilder.buildNot(
1220 // PredTy,
1221 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1222 // Op1, Op2));
1223 const auto [InversedLibcall, InversedPred] =
1225 if (!BuildLibcall(InversedLibcall,
1226 CmpInst::getInversePredicate(InversedPred), DstReg))
1227 return UnableToLegalize;
1228 break;
1229 }
1230 default:
1231 return UnableToLegalize;
1232 }
1233
1234 return Legalized;
1235}
1236
1237// The function is used to legalize operations that set default environment
1238// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1239// On most targets supported in glibc FE_DFL_MODE is defined as
1240// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1241// it is not true, the target must provide custom lowering.
1243LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1245 LostDebugLocObserver &LocObserver) {
1246 const DataLayout &DL = MIRBuilder.getDataLayout();
1247 auto &MF = MIRBuilder.getMF();
1248 auto &Ctx = MF.getFunction().getContext();
1249
1250 // Create an argument for the library function.
1251 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1252 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1253 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1254 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1255 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1256 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1257 MIRBuilder.buildIntToPtr(Dest, DefValue);
1258
1259 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1260 return createLibcall(MIRBuilder, RTLibcall,
1261 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1262 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1263 LocObserver, &MI);
1264}
1265
1268 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1269
1270 switch (MI.getOpcode()) {
1271 default:
1272 return UnableToLegalize;
1273 case TargetOpcode::G_MUL:
1274 case TargetOpcode::G_SDIV:
1275 case TargetOpcode::G_UDIV:
1276 case TargetOpcode::G_SREM:
1277 case TargetOpcode::G_UREM:
1278 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1279 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1280 unsigned Size = LLTy.getSizeInBits();
1281 Type *HLTy = IntegerType::get(Ctx, Size);
1282 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1283 if (Status != Legalized)
1284 return Status;
1285 break;
1286 }
1287 case TargetOpcode::G_FADD:
1288 case TargetOpcode::G_FSUB:
1289 case TargetOpcode::G_FMUL:
1290 case TargetOpcode::G_FDIV:
1291 case TargetOpcode::G_FMA:
1292 case TargetOpcode::G_FPOW:
1293 case TargetOpcode::G_FREM:
1294 case TargetOpcode::G_FCOS:
1295 case TargetOpcode::G_FSIN:
1296 case TargetOpcode::G_FTAN:
1297 case TargetOpcode::G_FACOS:
1298 case TargetOpcode::G_FASIN:
1299 case TargetOpcode::G_FATAN:
1300 case TargetOpcode::G_FATAN2:
1301 case TargetOpcode::G_FCOSH:
1302 case TargetOpcode::G_FSINH:
1303 case TargetOpcode::G_FTANH:
1304 case TargetOpcode::G_FLOG10:
1305 case TargetOpcode::G_FLOG:
1306 case TargetOpcode::G_FLOG2:
1307 case TargetOpcode::G_FEXP:
1308 case TargetOpcode::G_FEXP2:
1309 case TargetOpcode::G_FEXP10:
1310 case TargetOpcode::G_FCEIL:
1311 case TargetOpcode::G_FFLOOR:
1312 case TargetOpcode::G_FMINNUM:
1313 case TargetOpcode::G_FMAXNUM:
1314 case TargetOpcode::G_FMINIMUMNUM:
1315 case TargetOpcode::G_FMAXIMUMNUM:
1316 case TargetOpcode::G_FSQRT:
1317 case TargetOpcode::G_FRINT:
1318 case TargetOpcode::G_FNEARBYINT:
1319 case TargetOpcode::G_INTRINSIC_TRUNC:
1320 case TargetOpcode::G_INTRINSIC_ROUND:
1321 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1325 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1326 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1327 return UnableToLegalize;
1328 }
1329 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1330 if (Status != Legalized)
1331 return Status;
1332 break;
1333 }
1334 case TargetOpcode::G_FSINCOS: {
1335 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1336 unsigned Size = LLTy.getSizeInBits();
1337 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1338 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1339 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1340 return UnableToLegalize;
1341 }
1342 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1343 }
1344 case TargetOpcode::G_LROUND:
1345 case TargetOpcode::G_LLROUND:
1346 case TargetOpcode::G_INTRINSIC_LRINT:
1347 case TargetOpcode::G_INTRINSIC_LLRINT: {
1348 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1349 unsigned Size = LLTy.getSizeInBits();
1350 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1351 Type *ITy = IntegerType::get(
1352 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1353 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1354 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1355 return UnableToLegalize;
1356 }
1357 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1359 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1360 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1361 if (Status != Legalized)
1362 return Status;
1363 MI.eraseFromParent();
1364 return Legalized;
1365 }
1366 case TargetOpcode::G_FPOWI:
1367 case TargetOpcode::G_FLDEXP: {
1368 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1369 unsigned Size = LLTy.getSizeInBits();
1370 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1371 Type *ITy = IntegerType::get(
1372 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1373 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1374 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1375 return UnableToLegalize;
1376 }
1377 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1379 {MI.getOperand(1).getReg(), HLTy, 0},
1380 {MI.getOperand(2).getReg(), ITy, 1}};
1381 Args[1].Flags[0].setSExt();
1383 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1384 Args, LocObserver, &MI);
1385 if (Status != Legalized)
1386 return Status;
1387 break;
1388 }
1389 case TargetOpcode::G_FPEXT:
1390 case TargetOpcode::G_FPTRUNC: {
1391 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1392 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1393 if (!FromTy || !ToTy)
1394 return UnableToLegalize;
1396 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1397 if (Status != Legalized)
1398 return Status;
1399 break;
1400 }
1401 case TargetOpcode::G_FCMP: {
1402 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1403 if (Status != Legalized)
1404 return Status;
1405 MI.eraseFromParent();
1406 return Status;
1407 }
1408 case TargetOpcode::G_FPTOSI:
1409 case TargetOpcode::G_FPTOUI: {
1410 // FIXME: Support other types
1411 Type *FromTy =
1412 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1413 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1414 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1415 return UnableToLegalize;
1417 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1418 if (Status != Legalized)
1419 return Status;
1420 break;
1421 }
1422 case TargetOpcode::G_SITOFP:
1423 case TargetOpcode::G_UITOFP: {
1424 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1425 Type *ToTy =
1426 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1427 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1428 return UnableToLegalize;
1429 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1431 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1432 LocObserver, TLI, IsSigned);
1433 if (Status != Legalized)
1434 return Status;
1435 break;
1436 }
1437 case TargetOpcode::G_ATOMICRMW_XCHG:
1438 case TargetOpcode::G_ATOMICRMW_ADD:
1439 case TargetOpcode::G_ATOMICRMW_SUB:
1440 case TargetOpcode::G_ATOMICRMW_AND:
1441 case TargetOpcode::G_ATOMICRMW_OR:
1442 case TargetOpcode::G_ATOMICRMW_XOR:
1443 case TargetOpcode::G_ATOMIC_CMPXCHG:
1444 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1446 if (Status != Legalized)
1447 return Status;
1448 break;
1449 }
1450 case TargetOpcode::G_BZERO:
1451 case TargetOpcode::G_MEMCPY:
1452 case TargetOpcode::G_MEMMOVE:
1453 case TargetOpcode::G_MEMSET: {
1454 LegalizeResult Result =
1455 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1456 if (Result != Legalized)
1457 return Result;
1458 MI.eraseFromParent();
1459 return Result;
1460 }
1461 case TargetOpcode::G_GET_FPENV:
1462 case TargetOpcode::G_GET_FPMODE: {
1463 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1464 if (Result != Legalized)
1465 return Result;
1466 break;
1467 }
1468 case TargetOpcode::G_SET_FPENV:
1469 case TargetOpcode::G_SET_FPMODE: {
1470 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1471 if (Result != Legalized)
1472 return Result;
1473 break;
1474 }
1475 case TargetOpcode::G_RESET_FPENV:
1476 case TargetOpcode::G_RESET_FPMODE: {
1477 LegalizeResult Result =
1478 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1479 if (Result != Legalized)
1480 return Result;
1481 break;
1482 }
1483 }
1484
1485 MI.eraseFromParent();
1486 return Legalized;
1487}
1488
1490 unsigned TypeIdx,
1491 LLT NarrowTy) {
1492 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1493 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1494
1495 switch (MI.getOpcode()) {
1496 default:
1497 return UnableToLegalize;
1498 case TargetOpcode::G_IMPLICIT_DEF: {
1499 Register DstReg = MI.getOperand(0).getReg();
1500 LLT DstTy = MRI.getType(DstReg);
1501
1502 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1503 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1504 // FIXME: Although this would also be legal for the general case, it causes
1505 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1506 // combines not being hit). This seems to be a problem related to the
1507 // artifact combiner.
1508 if (SizeOp0 % NarrowSize != 0) {
1509 LLT ImplicitTy = NarrowTy;
1510 if (DstTy.isVector())
1511 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1512
1513 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1514 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1515
1516 MI.eraseFromParent();
1517 return Legalized;
1518 }
1519
1520 int NumParts = SizeOp0 / NarrowSize;
1521
1523 for (int i = 0; i < NumParts; ++i)
1524 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1525
1526 if (DstTy.isVector())
1527 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1528 else
1529 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1530 MI.eraseFromParent();
1531 return Legalized;
1532 }
1533 case TargetOpcode::G_CONSTANT: {
1534 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1535 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1536 unsigned TotalSize = Ty.getSizeInBits();
1537 unsigned NarrowSize = NarrowTy.getSizeInBits();
1538 int NumParts = TotalSize / NarrowSize;
1539
1540 SmallVector<Register, 4> PartRegs;
1541 for (int I = 0; I != NumParts; ++I) {
1542 unsigned Offset = I * NarrowSize;
1543 auto K = MIRBuilder.buildConstant(NarrowTy,
1544 Val.lshr(Offset).trunc(NarrowSize));
1545 PartRegs.push_back(K.getReg(0));
1546 }
1547
1548 LLT LeftoverTy;
1549 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1550 SmallVector<Register, 1> LeftoverRegs;
1551 if (LeftoverBits != 0) {
1552 LeftoverTy = LLT::scalar(LeftoverBits);
1553 auto K = MIRBuilder.buildConstant(
1554 LeftoverTy,
1555 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1556 LeftoverRegs.push_back(K.getReg(0));
1557 }
1558
1559 insertParts(MI.getOperand(0).getReg(),
1560 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565 case TargetOpcode::G_SEXT:
1566 case TargetOpcode::G_ZEXT:
1567 case TargetOpcode::G_ANYEXT:
1568 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1569 case TargetOpcode::G_TRUNC: {
1570 if (TypeIdx != 1)
1571 return UnableToLegalize;
1572
1573 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1574 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1575 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1576 return UnableToLegalize;
1577 }
1578
1579 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1580 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1581 MI.eraseFromParent();
1582 return Legalized;
1583 }
1584 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1585 case TargetOpcode::G_FREEZE: {
1586 if (TypeIdx != 0)
1587 return UnableToLegalize;
1588
1589 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1590 // Should widen scalar first
1591 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1592 return UnableToLegalize;
1593
1594 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1596 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1597 Parts.push_back(
1598 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1599 .getReg(0));
1600 }
1601
1602 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1603 MI.eraseFromParent();
1604 return Legalized;
1605 }
1606 case TargetOpcode::G_ADD:
1607 case TargetOpcode::G_SUB:
1608 case TargetOpcode::G_SADDO:
1609 case TargetOpcode::G_SSUBO:
1610 case TargetOpcode::G_SADDE:
1611 case TargetOpcode::G_SSUBE:
1612 case TargetOpcode::G_UADDO:
1613 case TargetOpcode::G_USUBO:
1614 case TargetOpcode::G_UADDE:
1615 case TargetOpcode::G_USUBE:
1616 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1617 case TargetOpcode::G_MUL:
1618 case TargetOpcode::G_UMULH:
1619 return narrowScalarMul(MI, NarrowTy);
1620 case TargetOpcode::G_EXTRACT:
1621 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1622 case TargetOpcode::G_INSERT:
1623 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1624 case TargetOpcode::G_LOAD: {
1625 auto &LoadMI = cast<GLoad>(MI);
1626 Register DstReg = LoadMI.getDstReg();
1627 LLT DstTy = MRI.getType(DstReg);
1628 if (DstTy.isVector())
1629 return UnableToLegalize;
1630
1631 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1632 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1633 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1634 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1635 LoadMI.eraseFromParent();
1636 return Legalized;
1637 }
1638
1639 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1640 }
1641 case TargetOpcode::G_ZEXTLOAD:
1642 case TargetOpcode::G_SEXTLOAD: {
1643 auto &LoadMI = cast<GExtLoad>(MI);
1644 Register DstReg = LoadMI.getDstReg();
1645 Register PtrReg = LoadMI.getPointerReg();
1646
1647 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1648 auto &MMO = LoadMI.getMMO();
1649 unsigned MemSize = MMO.getSizeInBits().getValue();
1650
1651 if (MemSize == NarrowSize) {
1652 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1653 } else if (MemSize < NarrowSize) {
1654 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1655 } else if (MemSize > NarrowSize) {
1656 // FIXME: Need to split the load.
1657 return UnableToLegalize;
1658 }
1659
1660 if (isa<GZExtLoad>(LoadMI))
1661 MIRBuilder.buildZExt(DstReg, TmpReg);
1662 else
1663 MIRBuilder.buildSExt(DstReg, TmpReg);
1664
1665 LoadMI.eraseFromParent();
1666 return Legalized;
1667 }
1668 case TargetOpcode::G_STORE: {
1669 auto &StoreMI = cast<GStore>(MI);
1670
1671 Register SrcReg = StoreMI.getValueReg();
1672 LLT SrcTy = MRI.getType(SrcReg);
1673 if (SrcTy.isVector())
1674 return UnableToLegalize;
1675
1676 int NumParts = SizeOp0 / NarrowSize;
1677 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1678 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1679 if (SrcTy.isVector() && LeftoverBits != 0)
1680 return UnableToLegalize;
1681
1682 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1683 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1684 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1685 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1686 StoreMI.eraseFromParent();
1687 return Legalized;
1688 }
1689
1690 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1691 }
1692 case TargetOpcode::G_SELECT:
1693 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1694 case TargetOpcode::G_AND:
1695 case TargetOpcode::G_OR:
1696 case TargetOpcode::G_XOR: {
1697 // Legalize bitwise operation:
1698 // A = BinOp<Ty> B, C
1699 // into:
1700 // B1, ..., BN = G_UNMERGE_VALUES B
1701 // C1, ..., CN = G_UNMERGE_VALUES C
1702 // A1 = BinOp<Ty/N> B1, C2
1703 // ...
1704 // AN = BinOp<Ty/N> BN, CN
1705 // A = G_MERGE_VALUES A1, ..., AN
1706 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1707 }
1708 case TargetOpcode::G_SHL:
1709 case TargetOpcode::G_LSHR:
1710 case TargetOpcode::G_ASHR:
1711 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1712 case TargetOpcode::G_CTLZ:
1713 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1714 case TargetOpcode::G_CTTZ:
1715 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1716 case TargetOpcode::G_CTPOP:
1717 if (TypeIdx == 1)
1718 switch (MI.getOpcode()) {
1719 case TargetOpcode::G_CTLZ:
1720 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1721 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1722 case TargetOpcode::G_CTTZ:
1723 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1724 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1725 case TargetOpcode::G_CTPOP:
1726 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1727 default:
1728 return UnableToLegalize;
1729 }
1730
1731 Observer.changingInstr(MI);
1732 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1733 Observer.changedInstr(MI);
1734 return Legalized;
1735 case TargetOpcode::G_INTTOPTR:
1736 if (TypeIdx != 1)
1737 return UnableToLegalize;
1738
1739 Observer.changingInstr(MI);
1740 narrowScalarSrc(MI, NarrowTy, 1);
1741 Observer.changedInstr(MI);
1742 return Legalized;
1743 case TargetOpcode::G_PTRTOINT:
1744 if (TypeIdx != 0)
1745 return UnableToLegalize;
1746
1747 Observer.changingInstr(MI);
1748 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1749 Observer.changedInstr(MI);
1750 return Legalized;
1751 case TargetOpcode::G_PHI: {
1752 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1753 // NarrowSize.
1754 if (SizeOp0 % NarrowSize != 0)
1755 return UnableToLegalize;
1756
1757 unsigned NumParts = SizeOp0 / NarrowSize;
1758 SmallVector<Register, 2> DstRegs(NumParts);
1759 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1760 Observer.changingInstr(MI);
1761 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1762 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1763 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1764 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1765 SrcRegs[i / 2], MIRBuilder, MRI);
1766 }
1767 MachineBasicBlock &MBB = *MI.getParent();
1768 MIRBuilder.setInsertPt(MBB, MI);
1769 for (unsigned i = 0; i < NumParts; ++i) {
1770 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1772 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1773 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1774 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1775 }
1776 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1777 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1778 Observer.changedInstr(MI);
1779 MI.eraseFromParent();
1780 return Legalized;
1781 }
1782 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1783 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1784 if (TypeIdx != 2)
1785 return UnableToLegalize;
1786
1787 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, OpIdx);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 }
1793 case TargetOpcode::G_ICMP: {
1794 Register LHS = MI.getOperand(2).getReg();
1795 LLT SrcTy = MRI.getType(LHS);
1796 CmpInst::Predicate Pred =
1797 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1798
1799 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1800 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1801 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1802 LHSLeftoverRegs, MIRBuilder, MRI))
1803 return UnableToLegalize;
1804
1805 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1806 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1807 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1808 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1809 return UnableToLegalize;
1810
1811 // We now have the LHS and RHS of the compare split into narrow-type
1812 // registers, plus potentially some leftover type.
1813 Register Dst = MI.getOperand(0).getReg();
1814 LLT ResTy = MRI.getType(Dst);
1815 if (ICmpInst::isEquality(Pred)) {
1816 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1817 // them together. For each equal part, the result should be all 0s. For
1818 // each non-equal part, we'll get at least one 1.
1819 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1821 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1822 auto LHS = std::get<0>(LHSAndRHS);
1823 auto RHS = std::get<1>(LHSAndRHS);
1824 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1825 Xors.push_back(Xor);
1826 }
1827
1828 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1829 // to the desired narrow type so that we can OR them together later.
1830 SmallVector<Register, 4> WidenedXors;
1831 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1832 auto LHS = std::get<0>(LHSAndRHS);
1833 auto RHS = std::get<1>(LHSAndRHS);
1834 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1835 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1836 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1837 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1838 llvm::append_range(Xors, WidenedXors);
1839 }
1840
1841 // Now, for each part we broke up, we know if they are equal/not equal
1842 // based off the G_XOR. We can OR these all together and compare against
1843 // 0 to get the result.
1844 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1845 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1846 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1847 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1848 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1849 } else {
1850 Register CmpIn;
1851 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1852 Register CmpOut;
1853 CmpInst::Predicate PartPred;
1854
1855 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1856 PartPred = Pred;
1857 CmpOut = Dst;
1858 } else {
1859 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1860 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1861 }
1862
1863 if (!CmpIn) {
1864 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1865 RHSPartRegs[I]);
1866 } else {
1867 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1868 RHSPartRegs[I]);
1869 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1870 LHSPartRegs[I], RHSPartRegs[I]);
1871 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1872 }
1873
1874 CmpIn = CmpOut;
1875 }
1876
1877 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1878 Register CmpOut;
1879 CmpInst::Predicate PartPred;
1880
1881 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1882 PartPred = Pred;
1883 CmpOut = Dst;
1884 } else {
1885 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1886 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1887 }
1888
1889 if (!CmpIn) {
1890 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1891 RHSLeftoverRegs[I]);
1892 } else {
1893 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1894 RHSLeftoverRegs[I]);
1895 auto CmpEq =
1896 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1897 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1898 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1899 }
1900
1901 CmpIn = CmpOut;
1902 }
1903 }
1904 MI.eraseFromParent();
1905 return Legalized;
1906 }
1907 case TargetOpcode::G_FCMP:
1908 if (TypeIdx != 0)
1909 return UnableToLegalize;
1910
1911 Observer.changingInstr(MI);
1912 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1913 Observer.changedInstr(MI);
1914 return Legalized;
1915
1916 case TargetOpcode::G_SEXT_INREG: {
1917 if (TypeIdx != 0)
1918 return UnableToLegalize;
1919
1920 int64_t SizeInBits = MI.getOperand(2).getImm();
1921
1922 // So long as the new type has more bits than the bits we're extending we
1923 // don't need to break it apart.
1924 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1925 Observer.changingInstr(MI);
1926 // We don't lose any non-extension bits by truncating the src and
1927 // sign-extending the dst.
1928 MachineOperand &MO1 = MI.getOperand(1);
1929 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1930 MO1.setReg(TruncMIB.getReg(0));
1931
1932 MachineOperand &MO2 = MI.getOperand(0);
1933 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1934 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1935 MIRBuilder.buildSExt(MO2, DstExt);
1936 MO2.setReg(DstExt);
1937 Observer.changedInstr(MI);
1938 return Legalized;
1939 }
1940
1941 // Break it apart. Components below the extension point are unmodified. The
1942 // component containing the extension point becomes a narrower SEXT_INREG.
1943 // Components above it are ashr'd from the component containing the
1944 // extension point.
1945 if (SizeOp0 % NarrowSize != 0)
1946 return UnableToLegalize;
1947 int NumParts = SizeOp0 / NarrowSize;
1948
1949 // List the registers where the destination will be scattered.
1951 // List the registers where the source will be split.
1953
1954 // Create all the temporary registers.
1955 for (int i = 0; i < NumParts; ++i) {
1956 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1957
1958 SrcRegs.push_back(SrcReg);
1959 }
1960
1961 // Explode the big arguments into smaller chunks.
1962 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1963
1964 Register AshrCstReg =
1965 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1966 .getReg(0);
1967 Register FullExtensionReg;
1968 Register PartialExtensionReg;
1969
1970 // Do the operation on each small part.
1971 for (int i = 0; i < NumParts; ++i) {
1972 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1973 DstRegs.push_back(SrcRegs[i]);
1974 PartialExtensionReg = DstRegs.back();
1975 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1976 assert(PartialExtensionReg &&
1977 "Expected to visit partial extension before full");
1978 if (FullExtensionReg) {
1979 DstRegs.push_back(FullExtensionReg);
1980 continue;
1981 }
1982 DstRegs.push_back(
1983 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1984 .getReg(0));
1985 FullExtensionReg = DstRegs.back();
1986 } else {
1987 DstRegs.push_back(
1989 .buildInstr(
1990 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1991 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1992 .getReg(0));
1993 PartialExtensionReg = DstRegs.back();
1994 }
1995 }
1996
1997 // Gather the destination registers into the final destination.
1998 Register DstReg = MI.getOperand(0).getReg();
1999 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2000 MI.eraseFromParent();
2001 return Legalized;
2002 }
2003 case TargetOpcode::G_BSWAP:
2004 case TargetOpcode::G_BITREVERSE: {
2005 if (SizeOp0 % NarrowSize != 0)
2006 return UnableToLegalize;
2007
2008 Observer.changingInstr(MI);
2009 SmallVector<Register, 2> SrcRegs, DstRegs;
2010 unsigned NumParts = SizeOp0 / NarrowSize;
2011 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2012 MIRBuilder, MRI);
2013
2014 for (unsigned i = 0; i < NumParts; ++i) {
2015 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2016 {SrcRegs[NumParts - 1 - i]});
2017 DstRegs.push_back(DstPart.getReg(0));
2018 }
2019
2020 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2021
2022 Observer.changedInstr(MI);
2023 MI.eraseFromParent();
2024 return Legalized;
2025 }
2026 case TargetOpcode::G_PTR_ADD:
2027 case TargetOpcode::G_PTRMASK: {
2028 if (TypeIdx != 1)
2029 return UnableToLegalize;
2030 Observer.changingInstr(MI);
2031 narrowScalarSrc(MI, NarrowTy, 2);
2032 Observer.changedInstr(MI);
2033 return Legalized;
2034 }
2035 case TargetOpcode::G_FPTOUI:
2036 case TargetOpcode::G_FPTOSI:
2037 case TargetOpcode::G_FPTOUI_SAT:
2038 case TargetOpcode::G_FPTOSI_SAT:
2039 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2040 case TargetOpcode::G_FPEXT:
2041 if (TypeIdx != 0)
2042 return UnableToLegalize;
2043 Observer.changingInstr(MI);
2044 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2045 Observer.changedInstr(MI);
2046 return Legalized;
2047 case TargetOpcode::G_FLDEXP:
2048 case TargetOpcode::G_STRICT_FLDEXP:
2049 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2050 case TargetOpcode::G_VSCALE: {
2051 Register Dst = MI.getOperand(0).getReg();
2052 LLT Ty = MRI.getType(Dst);
2053
2054 // Assume VSCALE(1) fits into a legal integer
2055 const APInt One(NarrowTy.getSizeInBits(), 1);
2056 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2057 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2058 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2059 MIRBuilder.buildMul(Dst, ZExt, C);
2060
2061 MI.eraseFromParent();
2062 return Legalized;
2063 }
2064 }
2065}
2066
2068 LLT Ty = MRI.getType(Val);
2069 if (Ty.isScalar())
2070 return Val;
2071
2072 const DataLayout &DL = MIRBuilder.getDataLayout();
2073 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2074 if (Ty.isPointer()) {
2075 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2076 return Register();
2077 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2078 }
2079
2080 Register NewVal = Val;
2081
2082 assert(Ty.isVector());
2083 if (Ty.isPointerVector())
2084 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2085 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2086}
2087
2089 unsigned OpIdx, unsigned ExtOpcode) {
2090 MachineOperand &MO = MI.getOperand(OpIdx);
2091 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2092 MO.setReg(ExtB.getReg(0));
2093}
2094
2096 unsigned OpIdx) {
2097 MachineOperand &MO = MI.getOperand(OpIdx);
2098 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2099 MO.setReg(ExtB.getReg(0));
2100}
2101
2103 unsigned OpIdx, unsigned TruncOpcode) {
2104 MachineOperand &MO = MI.getOperand(OpIdx);
2105 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2106 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2107 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2108 MO.setReg(DstExt);
2109}
2110
2112 unsigned OpIdx, unsigned ExtOpcode) {
2113 MachineOperand &MO = MI.getOperand(OpIdx);
2114 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2115 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2116 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2117 MO.setReg(DstTrunc);
2118}
2119
2121 unsigned OpIdx) {
2122 MachineOperand &MO = MI.getOperand(OpIdx);
2123 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2124 Register Dst = MO.getReg();
2125 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2126 MO.setReg(DstExt);
2127 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2128}
2129
2131 unsigned OpIdx) {
2132 MachineOperand &MO = MI.getOperand(OpIdx);
2133 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2134}
2135
2137 MachineOperand &Op = MI.getOperand(OpIdx);
2138 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2139}
2140
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2144 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2145 MIRBuilder.buildBitcast(MO, CastDst);
2146 MO.setReg(CastDst);
2147}
2148
2150LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2151 LLT WideTy) {
2152 if (TypeIdx != 1)
2153 return UnableToLegalize;
2154
2155 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2156 if (DstTy.isVector())
2157 return UnableToLegalize;
2158
2159 LLT SrcTy = MRI.getType(Src1Reg);
2160 const int DstSize = DstTy.getSizeInBits();
2161 const int SrcSize = SrcTy.getSizeInBits();
2162 const int WideSize = WideTy.getSizeInBits();
2163 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2164
2165 unsigned NumOps = MI.getNumOperands();
2166 unsigned NumSrc = MI.getNumOperands() - 1;
2167 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2168
2169 if (WideSize >= DstSize) {
2170 // Directly pack the bits in the target type.
2171 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2172
2173 for (unsigned I = 2; I != NumOps; ++I) {
2174 const unsigned Offset = (I - 1) * PartSize;
2175
2176 Register SrcReg = MI.getOperand(I).getReg();
2177 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2178
2179 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2180
2181 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2182 MRI.createGenericVirtualRegister(WideTy);
2183
2184 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2185 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2186 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2187 ResultReg = NextResult;
2188 }
2189
2190 if (WideSize > DstSize)
2191 MIRBuilder.buildTrunc(DstReg, ResultReg);
2192 else if (DstTy.isPointer())
2193 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2194
2195 MI.eraseFromParent();
2196 return Legalized;
2197 }
2198
2199 // Unmerge the original values to the GCD type, and recombine to the next
2200 // multiple greater than the original type.
2201 //
2202 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2203 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2204 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2205 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2206 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2207 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2208 // %12:_(s12) = G_MERGE_VALUES %10, %11
2209 //
2210 // Padding with undef if necessary:
2211 //
2212 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2213 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2214 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2215 // %7:_(s2) = G_IMPLICIT_DEF
2216 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2217 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2218 // %10:_(s12) = G_MERGE_VALUES %8, %9
2219
2220 const int GCD = std::gcd(SrcSize, WideSize);
2221 LLT GCDTy = LLT::scalar(GCD);
2222
2223 SmallVector<Register, 8> NewMergeRegs;
2224 SmallVector<Register, 8> Unmerges;
2225 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2226
2227 // Decompose the original operands if they don't evenly divide.
2228 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2229 Register SrcReg = MO.getReg();
2230 if (GCD == SrcSize) {
2231 Unmerges.push_back(SrcReg);
2232 } else {
2233 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2234 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2235 Unmerges.push_back(Unmerge.getReg(J));
2236 }
2237 }
2238
2239 // Pad with undef to the next size that is a multiple of the requested size.
2240 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2241 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2242 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2243 Unmerges.push_back(UndefReg);
2244 }
2245
2246 const int PartsPerGCD = WideSize / GCD;
2247
2248 // Build merges of each piece.
2249 ArrayRef<Register> Slicer(Unmerges);
2250 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2251 auto Merge =
2252 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2253 NewMergeRegs.push_back(Merge.getReg(0));
2254 }
2255
2256 // A truncate may be necessary if the requested type doesn't evenly divide the
2257 // original result type.
2258 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2259 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2260 } else {
2261 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2262 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2263 }
2264
2265 MI.eraseFromParent();
2266 return Legalized;
2267}
2268
2270LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2271 LLT WideTy) {
2272 if (TypeIdx != 0)
2273 return UnableToLegalize;
2274
2275 int NumDst = MI.getNumOperands() - 1;
2276 Register SrcReg = MI.getOperand(NumDst).getReg();
2277 LLT SrcTy = MRI.getType(SrcReg);
2278 if (SrcTy.isVector())
2279 return UnableToLegalize;
2280
2281 Register Dst0Reg = MI.getOperand(0).getReg();
2282 LLT DstTy = MRI.getType(Dst0Reg);
2283 if (!DstTy.isScalar())
2284 return UnableToLegalize;
2285
2286 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2287 if (SrcTy.isPointer()) {
2288 const DataLayout &DL = MIRBuilder.getDataLayout();
2289 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2290 LLVM_DEBUG(
2291 dbgs() << "Not casting non-integral address space integer\n");
2292 return UnableToLegalize;
2293 }
2294
2295 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2296 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2297 }
2298
2299 // Widen SrcTy to WideTy. This does not affect the result, but since the
2300 // user requested this size, it is probably better handled than SrcTy and
2301 // should reduce the total number of legalization artifacts.
2302 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2303 SrcTy = WideTy;
2304 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2305 }
2306
2307 // Theres no unmerge type to target. Directly extract the bits from the
2308 // source type
2309 unsigned DstSize = DstTy.getSizeInBits();
2310
2311 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2312 for (int I = 1; I != NumDst; ++I) {
2313 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2314 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2315 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2316 }
2317
2318 MI.eraseFromParent();
2319 return Legalized;
2320 }
2321
2322 // Extend the source to a wider type.
2323 LLT LCMTy = getLCMType(SrcTy, WideTy);
2324
2325 Register WideSrc = SrcReg;
2326 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2327 // TODO: If this is an integral address space, cast to integer and anyext.
2328 if (SrcTy.isPointer()) {
2329 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2330 return UnableToLegalize;
2331 }
2332
2333 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2334 }
2335
2336 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2337
2338 // Create a sequence of unmerges and merges to the original results. Since we
2339 // may have widened the source, we will need to pad the results with dead defs
2340 // to cover the source register.
2341 // e.g. widen s48 to s64:
2342 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2343 //
2344 // =>
2345 // %4:_(s192) = G_ANYEXT %0:_(s96)
2346 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2347 // ; unpack to GCD type, with extra dead defs
2348 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2349 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2350 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2351 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2352 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2353 const LLT GCDTy = getGCDType(WideTy, DstTy);
2354 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2355 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2356
2357 // Directly unmerge to the destination without going through a GCD type
2358 // if possible
2359 if (PartsPerRemerge == 1) {
2360 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2361
2362 for (int I = 0; I != NumUnmerge; ++I) {
2363 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2364
2365 for (int J = 0; J != PartsPerUnmerge; ++J) {
2366 int Idx = I * PartsPerUnmerge + J;
2367 if (Idx < NumDst)
2368 MIB.addDef(MI.getOperand(Idx).getReg());
2369 else {
2370 // Create dead def for excess components.
2371 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2372 }
2373 }
2374
2375 MIB.addUse(Unmerge.getReg(I));
2376 }
2377 } else {
2378 SmallVector<Register, 16> Parts;
2379 for (int J = 0; J != NumUnmerge; ++J)
2380 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2381
2382 SmallVector<Register, 8> RemergeParts;
2383 for (int I = 0; I != NumDst; ++I) {
2384 for (int J = 0; J < PartsPerRemerge; ++J) {
2385 const int Idx = I * PartsPerRemerge + J;
2386 RemergeParts.emplace_back(Parts[Idx]);
2387 }
2388
2389 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2390 RemergeParts.clear();
2391 }
2392 }
2393
2394 MI.eraseFromParent();
2395 return Legalized;
2396}
2397
2399LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2400 LLT WideTy) {
2401 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2402 unsigned Offset = MI.getOperand(2).getImm();
2403
2404 if (TypeIdx == 0) {
2405 if (SrcTy.isVector() || DstTy.isVector())
2406 return UnableToLegalize;
2407
2408 SrcOp Src(SrcReg);
2409 if (SrcTy.isPointer()) {
2410 // Extracts from pointers can be handled only if they are really just
2411 // simple integers.
2412 const DataLayout &DL = MIRBuilder.getDataLayout();
2413 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2414 return UnableToLegalize;
2415
2416 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2417 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2418 SrcTy = SrcAsIntTy;
2419 }
2420
2421 if (DstTy.isPointer())
2422 return UnableToLegalize;
2423
2424 if (Offset == 0) {
2425 // Avoid a shift in the degenerate case.
2426 MIRBuilder.buildTrunc(DstReg,
2427 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2428 MI.eraseFromParent();
2429 return Legalized;
2430 }
2431
2432 // Do a shift in the source type.
2433 LLT ShiftTy = SrcTy;
2434 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2435 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2436 ShiftTy = WideTy;
2437 }
2438
2439 auto LShr = MIRBuilder.buildLShr(
2440 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2441 MIRBuilder.buildTrunc(DstReg, LShr);
2442 MI.eraseFromParent();
2443 return Legalized;
2444 }
2445
2446 if (SrcTy.isScalar()) {
2447 Observer.changingInstr(MI);
2448 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2449 Observer.changedInstr(MI);
2450 return Legalized;
2451 }
2452
2453 if (!SrcTy.isVector())
2454 return UnableToLegalize;
2455
2456 if (DstTy != SrcTy.getElementType())
2457 return UnableToLegalize;
2458
2459 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2460 return UnableToLegalize;
2461
2462 Observer.changingInstr(MI);
2463 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2464
2465 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2466 Offset);
2467 widenScalarDst(MI, WideTy.getScalarType(), 0);
2468 Observer.changedInstr(MI);
2469 return Legalized;
2470}
2471
2473LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2474 LLT WideTy) {
2475 if (TypeIdx != 0 || WideTy.isVector())
2476 return UnableToLegalize;
2477 Observer.changingInstr(MI);
2478 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2479 widenScalarDst(MI, WideTy);
2480 Observer.changedInstr(MI);
2481 return Legalized;
2482}
2483
2485LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2486 LLT WideTy) {
2487 unsigned Opcode;
2488 unsigned ExtOpcode;
2489 std::optional<Register> CarryIn;
2490 switch (MI.getOpcode()) {
2491 default:
2492 llvm_unreachable("Unexpected opcode!");
2493 case TargetOpcode::G_SADDO:
2494 Opcode = TargetOpcode::G_ADD;
2495 ExtOpcode = TargetOpcode::G_SEXT;
2496 break;
2497 case TargetOpcode::G_SSUBO:
2498 Opcode = TargetOpcode::G_SUB;
2499 ExtOpcode = TargetOpcode::G_SEXT;
2500 break;
2501 case TargetOpcode::G_UADDO:
2502 Opcode = TargetOpcode::G_ADD;
2503 ExtOpcode = TargetOpcode::G_ZEXT;
2504 break;
2505 case TargetOpcode::G_USUBO:
2506 Opcode = TargetOpcode::G_SUB;
2507 ExtOpcode = TargetOpcode::G_ZEXT;
2508 break;
2509 case TargetOpcode::G_SADDE:
2510 Opcode = TargetOpcode::G_UADDE;
2511 ExtOpcode = TargetOpcode::G_SEXT;
2512 CarryIn = MI.getOperand(4).getReg();
2513 break;
2514 case TargetOpcode::G_SSUBE:
2515 Opcode = TargetOpcode::G_USUBE;
2516 ExtOpcode = TargetOpcode::G_SEXT;
2517 CarryIn = MI.getOperand(4).getReg();
2518 break;
2519 case TargetOpcode::G_UADDE:
2520 Opcode = TargetOpcode::G_UADDE;
2521 ExtOpcode = TargetOpcode::G_ZEXT;
2522 CarryIn = MI.getOperand(4).getReg();
2523 break;
2524 case TargetOpcode::G_USUBE:
2525 Opcode = TargetOpcode::G_USUBE;
2526 ExtOpcode = TargetOpcode::G_ZEXT;
2527 CarryIn = MI.getOperand(4).getReg();
2528 break;
2529 }
2530
2531 if (TypeIdx == 1) {
2532 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2533
2534 Observer.changingInstr(MI);
2535 if (CarryIn)
2536 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2537 widenScalarDst(MI, WideTy, 1);
2538
2539 Observer.changedInstr(MI);
2540 return Legalized;
2541 }
2542
2543 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2544 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2545 // Do the arithmetic in the larger type.
2546 Register NewOp;
2547 if (CarryIn) {
2548 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2549 NewOp = MIRBuilder
2550 .buildInstr(Opcode, {WideTy, CarryOutTy},
2551 {LHSExt, RHSExt, *CarryIn})
2552 .getReg(0);
2553 } else {
2554 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2555 }
2556 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2557 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2558 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2559 // There is no overflow if the ExtOp is the same as NewOp.
2560 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2561 // Now trunc the NewOp to the original result.
2562 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2563 MI.eraseFromParent();
2564 return Legalized;
2565}
2566
2568LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2569 LLT WideTy) {
2570 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2571 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2572 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2573 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2574 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2575 // We can convert this to:
2576 // 1. Any extend iN to iM
2577 // 2. SHL by M-N
2578 // 3. [US][ADD|SUB|SHL]SAT
2579 // 4. L/ASHR by M-N
2580 //
2581 // It may be more efficient to lower this to a min and a max operation in
2582 // the higher precision arithmetic if the promoted operation isn't legal,
2583 // but this decision is up to the target's lowering request.
2584 Register DstReg = MI.getOperand(0).getReg();
2585
2586 unsigned NewBits = WideTy.getScalarSizeInBits();
2587 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2588
2589 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2590 // must not left shift the RHS to preserve the shift amount.
2591 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2592 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2593 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2594 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2595 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2596 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2597
2598 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2599 {ShiftL, ShiftR}, MI.getFlags());
2600
2601 // Use a shift that will preserve the number of sign bits when the trunc is
2602 // folded away.
2603 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2604 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2605
2606 MIRBuilder.buildTrunc(DstReg, Result);
2607 MI.eraseFromParent();
2608 return Legalized;
2609}
2610
2612LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2613 LLT WideTy) {
2614 if (TypeIdx == 1) {
2615 Observer.changingInstr(MI);
2616 widenScalarDst(MI, WideTy, 1);
2617 Observer.changedInstr(MI);
2618 return Legalized;
2619 }
2620
2621 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2622 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2623 LLT SrcTy = MRI.getType(LHS);
2624 LLT OverflowTy = MRI.getType(OriginalOverflow);
2625 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2626
2627 // To determine if the result overflowed in the larger type, we extend the
2628 // input to the larger type, do the multiply (checking if it overflows),
2629 // then also check the high bits of the result to see if overflow happened
2630 // there.
2631 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2632 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2633 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2634
2635 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2636 // so we don't need to check the overflow result of larger type Mulo.
2637 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2638
2639 unsigned MulOpc =
2640 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2641
2642 MachineInstrBuilder Mulo;
2643 if (WideMulCanOverflow)
2644 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2645 {LeftOperand, RightOperand});
2646 else
2647 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2648
2649 auto Mul = Mulo->getOperand(0);
2650 MIRBuilder.buildTrunc(Result, Mul);
2651
2652 MachineInstrBuilder ExtResult;
2653 // Overflow occurred if it occurred in the larger type, or if the high part
2654 // of the result does not zero/sign-extend the low part. Check this second
2655 // possibility first.
2656 if (IsSigned) {
2657 // For signed, overflow occurred when the high part does not sign-extend
2658 // the low part.
2659 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2660 } else {
2661 // Unsigned overflow occurred when the high part does not zero-extend the
2662 // low part.
2663 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2664 }
2665
2666 if (WideMulCanOverflow) {
2667 auto Overflow =
2668 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2669 // Finally check if the multiplication in the larger type itself overflowed.
2670 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2671 } else {
2672 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2673 }
2674 MI.eraseFromParent();
2675 return Legalized;
2676}
2677
2680 unsigned Opcode = MI.getOpcode();
2681 switch (Opcode) {
2682 default:
2683 return UnableToLegalize;
2684 case TargetOpcode::G_ATOMICRMW_XCHG:
2685 case TargetOpcode::G_ATOMICRMW_ADD:
2686 case TargetOpcode::G_ATOMICRMW_SUB:
2687 case TargetOpcode::G_ATOMICRMW_AND:
2688 case TargetOpcode::G_ATOMICRMW_OR:
2689 case TargetOpcode::G_ATOMICRMW_XOR:
2690 case TargetOpcode::G_ATOMICRMW_MIN:
2691 case TargetOpcode::G_ATOMICRMW_MAX:
2692 case TargetOpcode::G_ATOMICRMW_UMIN:
2693 case TargetOpcode::G_ATOMICRMW_UMAX:
2694 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2695 Observer.changingInstr(MI);
2696 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2697 widenScalarDst(MI, WideTy, 0);
2698 Observer.changedInstr(MI);
2699 return Legalized;
2700 case TargetOpcode::G_ATOMIC_CMPXCHG:
2701 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2702 Observer.changingInstr(MI);
2703 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2704 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2705 widenScalarDst(MI, WideTy, 0);
2706 Observer.changedInstr(MI);
2707 return Legalized;
2708 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2709 if (TypeIdx == 0) {
2710 Observer.changingInstr(MI);
2711 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2712 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2713 widenScalarDst(MI, WideTy, 0);
2714 Observer.changedInstr(MI);
2715 return Legalized;
2716 }
2717 assert(TypeIdx == 1 &&
2718 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2719 Observer.changingInstr(MI);
2720 widenScalarDst(MI, WideTy, 1);
2721 Observer.changedInstr(MI);
2722 return Legalized;
2723 case TargetOpcode::G_EXTRACT:
2724 return widenScalarExtract(MI, TypeIdx, WideTy);
2725 case TargetOpcode::G_INSERT:
2726 return widenScalarInsert(MI, TypeIdx, WideTy);
2727 case TargetOpcode::G_MERGE_VALUES:
2728 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2729 case TargetOpcode::G_UNMERGE_VALUES:
2730 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2731 case TargetOpcode::G_SADDO:
2732 case TargetOpcode::G_SSUBO:
2733 case TargetOpcode::G_UADDO:
2734 case TargetOpcode::G_USUBO:
2735 case TargetOpcode::G_SADDE:
2736 case TargetOpcode::G_SSUBE:
2737 case TargetOpcode::G_UADDE:
2738 case TargetOpcode::G_USUBE:
2739 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2740 case TargetOpcode::G_UMULO:
2741 case TargetOpcode::G_SMULO:
2742 return widenScalarMulo(MI, TypeIdx, WideTy);
2743 case TargetOpcode::G_SADDSAT:
2744 case TargetOpcode::G_SSUBSAT:
2745 case TargetOpcode::G_SSHLSAT:
2746 case TargetOpcode::G_UADDSAT:
2747 case TargetOpcode::G_USUBSAT:
2748 case TargetOpcode::G_USHLSAT:
2749 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2750 case TargetOpcode::G_CTTZ:
2751 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2752 case TargetOpcode::G_CTLZ:
2753 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2754 case TargetOpcode::G_CTPOP: {
2755 if (TypeIdx == 0) {
2756 Observer.changingInstr(MI);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 }
2761
2762 Register SrcReg = MI.getOperand(1).getReg();
2763
2764 // First extend the input.
2765 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2766 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2767 ? TargetOpcode::G_ANYEXT
2768 : TargetOpcode::G_ZEXT;
2769 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2770 LLT CurTy = MRI.getType(SrcReg);
2771 unsigned NewOpc = Opcode;
2772 if (NewOpc == TargetOpcode::G_CTTZ) {
2773 // The count is the same in the larger type except if the original
2774 // value was zero. This can be handled by setting the bit just off
2775 // the top of the original type.
2776 auto TopBit =
2778 MIBSrc = MIRBuilder.buildOr(
2779 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2780 // Now we know the operand is non-zero, use the more relaxed opcode.
2781 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2782 }
2783
2784 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2785
2786 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2787 // An optimization where the result is the CTLZ after the left shift by
2788 // (Difference in widety and current ty), that is,
2789 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2790 // Result = ctlz MIBSrc
2791 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2792 MIRBuilder.buildConstant(WideTy, SizeDiff));
2793 }
2794
2795 // Perform the operation at the larger size.
2796 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2797 // This is already the correct result for CTPOP and CTTZs
2798 if (Opcode == TargetOpcode::G_CTLZ) {
2799 // The correct result is NewOp - (Difference in widety and current ty).
2800 MIBNewOp = MIRBuilder.buildSub(
2801 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2802 }
2803
2804 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2805 MI.eraseFromParent();
2806 return Legalized;
2807 }
2808 case TargetOpcode::G_BSWAP: {
2809 Observer.changingInstr(MI);
2810 Register DstReg = MI.getOperand(0).getReg();
2811
2812 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2813 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2814 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2815 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2816
2817 MI.getOperand(0).setReg(DstExt);
2818
2819 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2820
2821 LLT Ty = MRI.getType(DstReg);
2822 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2823 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2824 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2825
2826 MIRBuilder.buildTrunc(DstReg, ShrReg);
2827 Observer.changedInstr(MI);
2828 return Legalized;
2829 }
2830 case TargetOpcode::G_BITREVERSE: {
2831 Observer.changingInstr(MI);
2832
2833 Register DstReg = MI.getOperand(0).getReg();
2834 LLT Ty = MRI.getType(DstReg);
2835 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2836
2837 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2838 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2839 MI.getOperand(0).setReg(DstExt);
2840 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2841
2842 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2843 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2844 MIRBuilder.buildTrunc(DstReg, Shift);
2845 Observer.changedInstr(MI);
2846 return Legalized;
2847 }
2848 case TargetOpcode::G_FREEZE:
2849 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2850 Observer.changingInstr(MI);
2851 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2852 widenScalarDst(MI, WideTy);
2853 Observer.changedInstr(MI);
2854 return Legalized;
2855
2856 case TargetOpcode::G_ABS:
2857 Observer.changingInstr(MI);
2858 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2859 widenScalarDst(MI, WideTy);
2860 Observer.changedInstr(MI);
2861 return Legalized;
2862
2863 case TargetOpcode::G_ADD:
2864 case TargetOpcode::G_AND:
2865 case TargetOpcode::G_MUL:
2866 case TargetOpcode::G_OR:
2867 case TargetOpcode::G_XOR:
2868 case TargetOpcode::G_SUB:
2869 case TargetOpcode::G_SHUFFLE_VECTOR:
2870 // Perform operation at larger width (any extension is fines here, high bits
2871 // don't affect the result) and then truncate the result back to the
2872 // original type.
2873 Observer.changingInstr(MI);
2874 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2875 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2876 widenScalarDst(MI, WideTy);
2877 Observer.changedInstr(MI);
2878 return Legalized;
2879
2880 case TargetOpcode::G_SBFX:
2881 case TargetOpcode::G_UBFX:
2882 Observer.changingInstr(MI);
2883
2884 if (TypeIdx == 0) {
2885 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2886 widenScalarDst(MI, WideTy);
2887 } else {
2888 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2889 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2890 }
2891
2892 Observer.changedInstr(MI);
2893 return Legalized;
2894
2895 case TargetOpcode::G_SHL:
2896 Observer.changingInstr(MI);
2897
2898 if (TypeIdx == 0) {
2899 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2900 widenScalarDst(MI, WideTy);
2901 } else {
2902 assert(TypeIdx == 1);
2903 // The "number of bits to shift" operand must preserve its value as an
2904 // unsigned integer:
2905 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2906 }
2907
2908 Observer.changedInstr(MI);
2909 return Legalized;
2910
2911 case TargetOpcode::G_ROTR:
2912 case TargetOpcode::G_ROTL:
2913 if (TypeIdx != 1)
2914 return UnableToLegalize;
2915
2916 Observer.changingInstr(MI);
2917 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2918 Observer.changedInstr(MI);
2919 return Legalized;
2920
2921 case TargetOpcode::G_SDIV:
2922 case TargetOpcode::G_SREM:
2923 case TargetOpcode::G_SMIN:
2924 case TargetOpcode::G_SMAX:
2925 case TargetOpcode::G_ABDS:
2926 Observer.changingInstr(MI);
2927 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2928 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2929 widenScalarDst(MI, WideTy);
2930 Observer.changedInstr(MI);
2931 return Legalized;
2932
2933 case TargetOpcode::G_SDIVREM:
2934 Observer.changingInstr(MI);
2935 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2936 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2937 widenScalarDst(MI, WideTy);
2938 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2939 widenScalarDst(MI, WideTy, 1);
2940 Observer.changedInstr(MI);
2941 return Legalized;
2942
2943 case TargetOpcode::G_ASHR:
2944 case TargetOpcode::G_LSHR:
2945 Observer.changingInstr(MI);
2946
2947 if (TypeIdx == 0) {
2948 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2949 : TargetOpcode::G_ZEXT;
2950
2951 widenScalarSrc(MI, WideTy, 1, CvtOp);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 assert(TypeIdx == 1);
2955 // The "number of bits to shift" operand must preserve its value as an
2956 // unsigned integer:
2957 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2958 }
2959
2960 Observer.changedInstr(MI);
2961 return Legalized;
2962 case TargetOpcode::G_UDIV:
2963 case TargetOpcode::G_UREM:
2964 case TargetOpcode::G_ABDU:
2965 Observer.changingInstr(MI);
2966 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2967 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2968 widenScalarDst(MI, WideTy);
2969 Observer.changedInstr(MI);
2970 return Legalized;
2971 case TargetOpcode::G_UDIVREM:
2972 Observer.changingInstr(MI);
2973 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2974 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2975 widenScalarDst(MI, WideTy);
2976 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2977 widenScalarDst(MI, WideTy, 1);
2978 Observer.changedInstr(MI);
2979 return Legalized;
2980 case TargetOpcode::G_UMIN:
2981 case TargetOpcode::G_UMAX: {
2982 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2983
2984 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2985 unsigned ExtOpc =
2986 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
2987 getApproximateEVTForLLT(WideTy, Ctx))
2988 ? TargetOpcode::G_SEXT
2989 : TargetOpcode::G_ZEXT;
2990
2991 Observer.changingInstr(MI);
2992 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2993 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2994 widenScalarDst(MI, WideTy);
2995 Observer.changedInstr(MI);
2996 return Legalized;
2997 }
2998
2999 case TargetOpcode::G_SELECT:
3000 Observer.changingInstr(MI);
3001 if (TypeIdx == 0) {
3002 // Perform operation at larger width (any extension is fine here, high
3003 // bits don't affect the result) and then truncate the result back to the
3004 // original type.
3005 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3006 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3007 widenScalarDst(MI, WideTy);
3008 } else {
3009 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3010 // Explicit extension is required here since high bits affect the result.
3011 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3012 }
3013 Observer.changedInstr(MI);
3014 return Legalized;
3015
3016 case TargetOpcode::G_FPTOSI:
3017 case TargetOpcode::G_FPTOUI:
3018 case TargetOpcode::G_INTRINSIC_LRINT:
3019 case TargetOpcode::G_INTRINSIC_LLRINT:
3020 case TargetOpcode::G_IS_FPCLASS:
3021 Observer.changingInstr(MI);
3022
3023 if (TypeIdx == 0)
3024 widenScalarDst(MI, WideTy);
3025 else
3026 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3027
3028 Observer.changedInstr(MI);
3029 return Legalized;
3030 case TargetOpcode::G_SITOFP:
3031 Observer.changingInstr(MI);
3032
3033 if (TypeIdx == 0)
3034 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3035 else
3036 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3037
3038 Observer.changedInstr(MI);
3039 return Legalized;
3040 case TargetOpcode::G_UITOFP:
3041 Observer.changingInstr(MI);
3042
3043 if (TypeIdx == 0)
3044 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3045 else
3046 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3047
3048 Observer.changedInstr(MI);
3049 return Legalized;
3050 case TargetOpcode::G_FPTOSI_SAT:
3051 case TargetOpcode::G_FPTOUI_SAT:
3052 Observer.changingInstr(MI);
3053
3054 if (TypeIdx == 0) {
3055 Register OldDst = MI.getOperand(0).getReg();
3056 LLT Ty = MRI.getType(OldDst);
3057 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3058 Register NewDst;
3059 MI.getOperand(0).setReg(ExtReg);
3060 uint64_t ShortBits = Ty.getScalarSizeInBits();
3061 uint64_t WideBits = WideTy.getScalarSizeInBits();
3062 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3063 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3064 // z = i16 fptosi_sat(a)
3065 // ->
3066 // x = i32 fptosi_sat(a)
3067 // y = smin(x, 32767)
3068 // z = smax(y, -32768)
3069 auto MaxVal = MIRBuilder.buildConstant(
3070 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3071 auto MinVal = MIRBuilder.buildConstant(
3072 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3073 Register MidReg =
3074 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3075 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3076 } else {
3077 // z = i16 fptoui_sat(a)
3078 // ->
3079 // x = i32 fptoui_sat(a)
3080 // y = smin(x, 65535)
3081 auto MaxVal = MIRBuilder.buildConstant(
3082 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3083 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3084 }
3085 MIRBuilder.buildTrunc(OldDst, NewDst);
3086 } else
3087 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3088
3089 Observer.changedInstr(MI);
3090 return Legalized;
3091 case TargetOpcode::G_LOAD:
3092 case TargetOpcode::G_SEXTLOAD:
3093 case TargetOpcode::G_ZEXTLOAD:
3094 Observer.changingInstr(MI);
3095 widenScalarDst(MI, WideTy);
3096 Observer.changedInstr(MI);
3097 return Legalized;
3098
3099 case TargetOpcode::G_STORE: {
3100 if (TypeIdx != 0)
3101 return UnableToLegalize;
3102
3103 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3104 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3105 if (!Ty.isScalar()) {
3106 // We need to widen the vector element type.
3107 Observer.changingInstr(MI);
3108 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3109 // We also need to adjust the MMO to turn this into a truncating store.
3110 MachineMemOperand &MMO = **MI.memoperands_begin();
3111 MachineFunction &MF = MIRBuilder.getMF();
3112 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3113 MI.setMemRefs(MF, {NewMMO});
3114 Observer.changedInstr(MI);
3115 return Legalized;
3116 }
3117
3118 Observer.changingInstr(MI);
3119
3120 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3121 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3122 widenScalarSrc(MI, WideTy, 0, ExtType);
3123
3124 Observer.changedInstr(MI);
3125 return Legalized;
3126 }
3127 case TargetOpcode::G_CONSTANT: {
3128 MachineOperand &SrcMO = MI.getOperand(1);
3129 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3130 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3131 MRI.getType(MI.getOperand(0).getReg()));
3132 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3133 ExtOpc == TargetOpcode::G_ANYEXT) &&
3134 "Illegal Extend");
3135 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3136 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3137 ? SrcVal.sext(WideTy.getSizeInBits())
3138 : SrcVal.zext(WideTy.getSizeInBits());
3139 Observer.changingInstr(MI);
3140 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3141
3142 widenScalarDst(MI, WideTy);
3143 Observer.changedInstr(MI);
3144 return Legalized;
3145 }
3146 case TargetOpcode::G_FCONSTANT: {
3147 // To avoid changing the bits of the constant due to extension to a larger
3148 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3149 MachineOperand &SrcMO = MI.getOperand(1);
3150 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3151 MIRBuilder.setInstrAndDebugLoc(MI);
3152 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3153 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3154 MI.eraseFromParent();
3155 return Legalized;
3156 }
3157 case TargetOpcode::G_IMPLICIT_DEF: {
3158 Observer.changingInstr(MI);
3159 widenScalarDst(MI, WideTy);
3160 Observer.changedInstr(MI);
3161 return Legalized;
3162 }
3163 case TargetOpcode::G_BRCOND:
3164 Observer.changingInstr(MI);
3165 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3166 Observer.changedInstr(MI);
3167 return Legalized;
3168
3169 case TargetOpcode::G_FCMP:
3170 Observer.changingInstr(MI);
3171 if (TypeIdx == 0)
3172 widenScalarDst(MI, WideTy);
3173 else {
3174 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3175 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3176 }
3177 Observer.changedInstr(MI);
3178 return Legalized;
3179
3180 case TargetOpcode::G_ICMP:
3181 Observer.changingInstr(MI);
3182 if (TypeIdx == 0)
3183 widenScalarDst(MI, WideTy);
3184 else {
3185 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3186 CmpInst::Predicate Pred =
3187 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3188
3189 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3190 unsigned ExtOpcode =
3191 (CmpInst::isSigned(Pred) ||
3192 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3193 getApproximateEVTForLLT(WideTy, Ctx)))
3194 ? TargetOpcode::G_SEXT
3195 : TargetOpcode::G_ZEXT;
3196 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3197 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3198 }
3199 Observer.changedInstr(MI);
3200 return Legalized;
3201
3202 case TargetOpcode::G_PTR_ADD:
3203 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3204 Observer.changingInstr(MI);
3205 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3206 Observer.changedInstr(MI);
3207 return Legalized;
3208
3209 case TargetOpcode::G_PHI: {
3210 assert(TypeIdx == 0 && "Expecting only Idx 0");
3211
3212 Observer.changingInstr(MI);
3213 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3214 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3215 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3216 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3217 }
3218
3219 MachineBasicBlock &MBB = *MI.getParent();
3220 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3221 widenScalarDst(MI, WideTy);
3222 Observer.changedInstr(MI);
3223 return Legalized;
3224 }
3225 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3226 if (TypeIdx == 0) {
3227 Register VecReg = MI.getOperand(1).getReg();
3228 LLT VecTy = MRI.getType(VecReg);
3229 Observer.changingInstr(MI);
3230
3232 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3233 TargetOpcode::G_ANYEXT);
3234
3235 widenScalarDst(MI, WideTy, 0);
3236 Observer.changedInstr(MI);
3237 return Legalized;
3238 }
3239
3240 if (TypeIdx != 2)
3241 return UnableToLegalize;
3242 Observer.changingInstr(MI);
3243 // TODO: Probably should be zext
3244 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3245 Observer.changedInstr(MI);
3246 return Legalized;
3247 }
3248 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3249 if (TypeIdx == 0) {
3250 Observer.changingInstr(MI);
3251 const LLT WideEltTy = WideTy.getElementType();
3252
3253 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3254 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3255 widenScalarDst(MI, WideTy, 0);
3256 Observer.changedInstr(MI);
3257 return Legalized;
3258 }
3259
3260 if (TypeIdx == 1) {
3261 Observer.changingInstr(MI);
3262
3263 Register VecReg = MI.getOperand(1).getReg();
3264 LLT VecTy = MRI.getType(VecReg);
3265 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3266
3267 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3268 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3269 widenScalarDst(MI, WideVecTy, 0);
3270 Observer.changedInstr(MI);
3271 return Legalized;
3272 }
3273
3274 if (TypeIdx == 2) {
3275 Observer.changingInstr(MI);
3276 // TODO: Probably should be zext
3277 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3278 Observer.changedInstr(MI);
3279 return Legalized;
3280 }
3281
3282 return UnableToLegalize;
3283 }
3284 case TargetOpcode::G_FADD:
3285 case TargetOpcode::G_FMUL:
3286 case TargetOpcode::G_FSUB:
3287 case TargetOpcode::G_FMA:
3288 case TargetOpcode::G_FMAD:
3289 case TargetOpcode::G_FNEG:
3290 case TargetOpcode::G_FABS:
3291 case TargetOpcode::G_FCANONICALIZE:
3292 case TargetOpcode::G_FMINNUM:
3293 case TargetOpcode::G_FMAXNUM:
3294 case TargetOpcode::G_FMINNUM_IEEE:
3295 case TargetOpcode::G_FMAXNUM_IEEE:
3296 case TargetOpcode::G_FMINIMUM:
3297 case TargetOpcode::G_FMAXIMUM:
3298 case TargetOpcode::G_FMINIMUMNUM:
3299 case TargetOpcode::G_FMAXIMUMNUM:
3300 case TargetOpcode::G_FDIV:
3301 case TargetOpcode::G_FREM:
3302 case TargetOpcode::G_FCEIL:
3303 case TargetOpcode::G_FFLOOR:
3304 case TargetOpcode::G_FCOS:
3305 case TargetOpcode::G_FSIN:
3306 case TargetOpcode::G_FTAN:
3307 case TargetOpcode::G_FACOS:
3308 case TargetOpcode::G_FASIN:
3309 case TargetOpcode::G_FATAN:
3310 case TargetOpcode::G_FATAN2:
3311 case TargetOpcode::G_FCOSH:
3312 case TargetOpcode::G_FSINH:
3313 case TargetOpcode::G_FTANH:
3314 case TargetOpcode::G_FLOG10:
3315 case TargetOpcode::G_FLOG:
3316 case TargetOpcode::G_FLOG2:
3317 case TargetOpcode::G_FRINT:
3318 case TargetOpcode::G_FNEARBYINT:
3319 case TargetOpcode::G_FSQRT:
3320 case TargetOpcode::G_FEXP:
3321 case TargetOpcode::G_FEXP2:
3322 case TargetOpcode::G_FEXP10:
3323 case TargetOpcode::G_FPOW:
3324 case TargetOpcode::G_INTRINSIC_TRUNC:
3325 case TargetOpcode::G_INTRINSIC_ROUND:
3326 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3327 assert(TypeIdx == 0);
3328 Observer.changingInstr(MI);
3329
3330 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3331 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3332
3333 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3334 Observer.changedInstr(MI);
3335 return Legalized;
3336 case TargetOpcode::G_FPOWI:
3337 case TargetOpcode::G_FLDEXP:
3338 case TargetOpcode::G_STRICT_FLDEXP: {
3339 if (TypeIdx == 0) {
3340 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3341 return UnableToLegalize;
3342
3343 Observer.changingInstr(MI);
3344 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3345 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3346 Observer.changedInstr(MI);
3347 return Legalized;
3348 }
3349
3350 if (TypeIdx == 1) {
3351 // For some reason SelectionDAG tries to promote to a libcall without
3352 // actually changing the integer type for promotion.
3353 Observer.changingInstr(MI);
3354 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3355 Observer.changedInstr(MI);
3356 return Legalized;
3357 }
3358
3359 return UnableToLegalize;
3360 }
3361 case TargetOpcode::G_FFREXP: {
3362 Observer.changingInstr(MI);
3363
3364 if (TypeIdx == 0) {
3365 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3366 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3367 } else {
3368 widenScalarDst(MI, WideTy, 1);
3369 }
3370
3371 Observer.changedInstr(MI);
3372 return Legalized;
3373 }
3374 case TargetOpcode::G_INTTOPTR:
3375 if (TypeIdx != 1)
3376 return UnableToLegalize;
3377
3378 Observer.changingInstr(MI);
3379 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3380 Observer.changedInstr(MI);
3381 return Legalized;
3382 case TargetOpcode::G_PTRTOINT:
3383 if (TypeIdx != 0)
3384 return UnableToLegalize;
3385
3386 Observer.changingInstr(MI);
3387 widenScalarDst(MI, WideTy, 0);
3388 Observer.changedInstr(MI);
3389 return Legalized;
3390 case TargetOpcode::G_BUILD_VECTOR: {
3391 Observer.changingInstr(MI);
3392
3393 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3394 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3395 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3396
3397 // Avoid changing the result vector type if the source element type was
3398 // requested.
3399 if (TypeIdx == 1) {
3400 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3401 } else {
3402 widenScalarDst(MI, WideTy, 0);
3403 }
3404
3405 Observer.changedInstr(MI);
3406 return Legalized;
3407 }
3408 case TargetOpcode::G_SEXT_INREG:
3409 if (TypeIdx != 0)
3410 return UnableToLegalize;
3411
3412 Observer.changingInstr(MI);
3413 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3414 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3415 Observer.changedInstr(MI);
3416 return Legalized;
3417 case TargetOpcode::G_PTRMASK: {
3418 if (TypeIdx != 1)
3419 return UnableToLegalize;
3420 Observer.changingInstr(MI);
3421 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3422 Observer.changedInstr(MI);
3423 return Legalized;
3424 }
3425 case TargetOpcode::G_VECREDUCE_ADD: {
3426 if (TypeIdx != 1)
3427 return UnableToLegalize;
3428 Observer.changingInstr(MI);
3429 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3430 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3431 Observer.changedInstr(MI);
3432 return Legalized;
3433 }
3434 case TargetOpcode::G_VECREDUCE_FADD:
3435 case TargetOpcode::G_VECREDUCE_FMUL:
3436 case TargetOpcode::G_VECREDUCE_FMIN:
3437 case TargetOpcode::G_VECREDUCE_FMAX:
3438 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3439 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3440 if (TypeIdx != 0)
3441 return UnableToLegalize;
3442 Observer.changingInstr(MI);
3443 Register VecReg = MI.getOperand(1).getReg();
3444 LLT VecTy = MRI.getType(VecReg);
3445 LLT WideVecTy = VecTy.isVector()
3446 ? LLT::vector(VecTy.getElementCount(), WideTy)
3447 : WideTy;
3448 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3449 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3450 Observer.changedInstr(MI);
3451 return Legalized;
3452 }
3453 case TargetOpcode::G_VSCALE: {
3454 MachineOperand &SrcMO = MI.getOperand(1);
3455 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3456 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3457 // The CImm is always a signed value
3458 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3459 Observer.changingInstr(MI);
3460 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3461 widenScalarDst(MI, WideTy);
3462 Observer.changedInstr(MI);
3463 return Legalized;
3464 }
3465 case TargetOpcode::G_SPLAT_VECTOR: {
3466 if (TypeIdx != 1)
3467 return UnableToLegalize;
3468
3469 Observer.changingInstr(MI);
3470 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3471 Observer.changedInstr(MI);
3472 return Legalized;
3473 }
3474 case TargetOpcode::G_INSERT_SUBVECTOR: {
3475 if (TypeIdx != 0)
3476 return UnableToLegalize;
3477
3479 Register BigVec = IS.getBigVec();
3480 Register SubVec = IS.getSubVec();
3481
3482 LLT SubVecTy = MRI.getType(SubVec);
3483 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3484
3485 // Widen the G_INSERT_SUBVECTOR
3486 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3487 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3488 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3489 IS.getIndexImm());
3490
3491 // Truncate back down
3492 auto SplatZero = MIRBuilder.buildSplatVector(
3493 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3494 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3495 SplatZero);
3496
3497 MI.eraseFromParent();
3498
3499 return Legalized;
3500 }
3501 }
3502}
3503
3505 MachineIRBuilder &B, Register Src, LLT Ty) {
3506 auto Unmerge = B.buildUnmerge(Ty, Src);
3507 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3508 Pieces.push_back(Unmerge.getReg(I));
3509}
3510
3511static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3512 MachineIRBuilder &MIRBuilder) {
3513 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3514 MachineFunction &MF = MIRBuilder.getMF();
3515 const DataLayout &DL = MIRBuilder.getDataLayout();
3516 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3517 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3518 LLT DstLLT = MRI.getType(DstReg);
3519
3520 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3521
3522 auto Addr = MIRBuilder.buildConstantPool(
3523 AddrPtrTy,
3524 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3525
3526 MachineMemOperand *MMO =
3528 MachineMemOperand::MOLoad, DstLLT, Alignment);
3529
3530 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3531}
3532
3535 const MachineOperand &ConstOperand = MI.getOperand(1);
3536 const Constant *ConstantVal = ConstOperand.getCImm();
3537
3538 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3539 MI.eraseFromParent();
3540
3541 return Legalized;
3542}
3543
3546 const MachineOperand &ConstOperand = MI.getOperand(1);
3547 const Constant *ConstantVal = ConstOperand.getFPImm();
3548
3549 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3550 MI.eraseFromParent();
3551
3552 return Legalized;
3553}
3554
3557 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3558 if (SrcTy.isVector()) {
3559 LLT SrcEltTy = SrcTy.getElementType();
3561
3562 if (DstTy.isVector()) {
3563 int NumDstElt = DstTy.getNumElements();
3564 int NumSrcElt = SrcTy.getNumElements();
3565
3566 LLT DstEltTy = DstTy.getElementType();
3567 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3568 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3569
3570 // If there's an element size mismatch, insert intermediate casts to match
3571 // the result element type.
3572 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3573 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3574 //
3575 // =>
3576 //
3577 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3578 // %3:_(<2 x s8>) = G_BITCAST %2
3579 // %4:_(<2 x s8>) = G_BITCAST %3
3580 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3581 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3582 SrcPartTy = SrcEltTy;
3583 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3584 //
3585 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3586 //
3587 // =>
3588 //
3589 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3590 // %3:_(s16) = G_BITCAST %2
3591 // %4:_(s16) = G_BITCAST %3
3592 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3593 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3594 DstCastTy = DstEltTy;
3595 }
3596
3597 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3598 for (Register &SrcReg : SrcRegs)
3599 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3600 } else
3601 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3602
3603 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3604 MI.eraseFromParent();
3605 return Legalized;
3606 }
3607
3608 if (DstTy.isVector()) {
3610 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3611 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3612 MI.eraseFromParent();
3613 return Legalized;
3614 }
3615
3616 return UnableToLegalize;
3617}
3618
3619/// Figure out the bit offset into a register when coercing a vector index for
3620/// the wide element type. This is only for the case when promoting vector to
3621/// one with larger elements.
3622//
3623///
3624/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3625/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3627 Register Idx,
3628 unsigned NewEltSize,
3629 unsigned OldEltSize) {
3630 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3631 LLT IdxTy = B.getMRI()->getType(Idx);
3632
3633 // Now figure out the amount we need to shift to get the target bits.
3634 auto OffsetMask = B.buildConstant(
3635 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3636 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3637 return B.buildShl(IdxTy, OffsetIdx,
3638 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3639}
3640
3641/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3642/// is casting to a vector with a smaller element size, perform multiple element
3643/// extracts and merge the results. If this is coercing to a vector with larger
3644/// elements, index the bitcasted vector and extract the target element with bit
3645/// operations. This is intended to force the indexing in the native register
3646/// size for architectures that can dynamically index the register file.
3649 LLT CastTy) {
3650 if (TypeIdx != 1)
3651 return UnableToLegalize;
3652
3653 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3654
3655 LLT SrcEltTy = SrcVecTy.getElementType();
3656 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3657 unsigned OldNumElts = SrcVecTy.getNumElements();
3658
3659 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3660 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3661
3662 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3663 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3664 if (NewNumElts > OldNumElts) {
3665 // Decreasing the vector element size
3666 //
3667 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3668 // =>
3669 // v4i32:castx = bitcast x:v2i64
3670 //
3671 // i64 = bitcast
3672 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3673 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3674 //
3675 if (NewNumElts % OldNumElts != 0)
3676 return UnableToLegalize;
3677
3678 // Type of the intermediate result vector.
3679 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3680 LLT MidTy =
3681 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3682
3683 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3684
3685 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3686 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3687
3688 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3689 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3690 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3691 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3692 NewOps[I] = Elt.getReg(0);
3693 }
3694
3695 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3696 MIRBuilder.buildBitcast(Dst, NewVec);
3697 MI.eraseFromParent();
3698 return Legalized;
3699 }
3700
3701 if (NewNumElts < OldNumElts) {
3702 if (NewEltSize % OldEltSize != 0)
3703 return UnableToLegalize;
3704
3705 // This only depends on powers of 2 because we use bit tricks to figure out
3706 // the bit offset we need to shift to get the target element. A general
3707 // expansion could emit division/multiply.
3708 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3709 return UnableToLegalize;
3710
3711 // Increasing the vector element size.
3712 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3713 //
3714 // =>
3715 //
3716 // %cast = G_BITCAST %vec
3717 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3718 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3719 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3720 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3721 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3722 // %elt = G_TRUNC %elt_bits
3723
3724 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3725 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3726
3727 // Divide to get the index in the wider element type.
3728 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3729
3730 Register WideElt = CastVec;
3731 if (CastTy.isVector()) {
3732 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3733 ScaledIdx).getReg(0);
3734 }
3735
3736 // Compute the bit offset into the register of the target element.
3738 MIRBuilder, Idx, NewEltSize, OldEltSize);
3739
3740 // Shift the wide element to get the target element.
3741 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3742 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3743 MI.eraseFromParent();
3744 return Legalized;
3745 }
3746
3747 return UnableToLegalize;
3748}
3749
3750/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3751/// TargetReg, while preserving other bits in \p TargetReg.
3752///
3753/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3755 Register TargetReg, Register InsertReg,
3756 Register OffsetBits) {
3757 LLT TargetTy = B.getMRI()->getType(TargetReg);
3758 LLT InsertTy = B.getMRI()->getType(InsertReg);
3759 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3760 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3761
3762 // Produce a bitmask of the value to insert
3763 auto EltMask = B.buildConstant(
3764 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3765 InsertTy.getSizeInBits()));
3766 // Shift it into position
3767 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3768 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3769
3770 // Clear out the bits in the wide element
3771 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3772
3773 // The value to insert has all zeros already, so stick it into the masked
3774 // wide element.
3775 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3776}
3777
3778/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3779/// is increasing the element size, perform the indexing in the target element
3780/// type, and use bit operations to insert at the element position. This is
3781/// intended for architectures that can dynamically index the register file and
3782/// want to force indexing in the native register size.
3785 LLT CastTy) {
3786 if (TypeIdx != 0)
3787 return UnableToLegalize;
3788
3789 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3790 MI.getFirst4RegLLTs();
3791 LLT VecTy = DstTy;
3792
3793 LLT VecEltTy = VecTy.getElementType();
3794 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3795 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3796 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3797
3798 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3799 unsigned OldNumElts = VecTy.getNumElements();
3800
3801 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3802 if (NewNumElts < OldNumElts) {
3803 if (NewEltSize % OldEltSize != 0)
3804 return UnableToLegalize;
3805
3806 // This only depends on powers of 2 because we use bit tricks to figure out
3807 // the bit offset we need to shift to get the target element. A general
3808 // expansion could emit division/multiply.
3809 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3810 return UnableToLegalize;
3811
3812 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3813 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3814
3815 // Divide to get the index in the wider element type.
3816 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3817
3818 Register ExtractedElt = CastVec;
3819 if (CastTy.isVector()) {
3820 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3821 ScaledIdx).getReg(0);
3822 }
3823
3824 // Compute the bit offset into the register of the target element.
3826 MIRBuilder, Idx, NewEltSize, OldEltSize);
3827
3828 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3829 Val, OffsetBits);
3830 if (CastTy.isVector()) {
3831 InsertedElt = MIRBuilder.buildInsertVectorElement(
3832 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3833 }
3834
3835 MIRBuilder.buildBitcast(Dst, InsertedElt);
3836 MI.eraseFromParent();
3837 return Legalized;
3838 }
3839
3840 return UnableToLegalize;
3841}
3842
3843// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3844// those that have smaller than legal operands.
3845//
3846// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3847//
3848// ===>
3849//
3850// s32 = G_BITCAST <4 x s8>
3851// s32 = G_BITCAST <4 x s8>
3852// s32 = G_BITCAST <4 x s8>
3853// s32 = G_BITCAST <4 x s8>
3854// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3855// <16 x s8> = G_BITCAST <4 x s32>
3858 LLT CastTy) {
3859 // Convert it to CONCAT instruction
3860 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3861 if (!ConcatMI) {
3862 return UnableToLegalize;
3863 }
3864
3865 // Check if bitcast is Legal
3866 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3867 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3868
3869 // Check if the build vector is Legal
3870 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3871 return UnableToLegalize;
3872 }
3873
3874 // Bitcast the sources
3875 SmallVector<Register> BitcastRegs;
3876 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3877 BitcastRegs.push_back(
3878 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3879 .getReg(0));
3880 }
3881
3882 // Build the scalar values into a vector
3883 Register BuildReg =
3884 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3885 MIRBuilder.buildBitcast(DstReg, BuildReg);
3886
3887 MI.eraseFromParent();
3888 return Legalized;
3889}
3890
3891// This bitcasts a shuffle vector to a different type currently of the same
3892// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3893// will be used instead.
3894//
3895// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3896// ===>
3897// <4 x s64> = G_PTRTOINT <4 x p0>
3898// <4 x s64> = G_PTRTOINT <4 x p0>
3899// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3900// <16 x p0> = G_INTTOPTR <16 x s64>
3903 LLT CastTy) {
3904 auto ShuffleMI = cast<GShuffleVector>(&MI);
3905 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3906 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3907
3908 // We currently only handle vectors of the same size.
3909 if (TypeIdx != 0 ||
3910 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3911 CastTy.getElementCount() != DstTy.getElementCount())
3912 return UnableToLegalize;
3913
3914 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3915
3916 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3917 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3918 auto Shuf =
3919 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3920 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3921
3922 MI.eraseFromParent();
3923 return Legalized;
3924}
3925
3926/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3927///
3928/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3929///
3930/// ===>
3931///
3932/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3933/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3934/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3937 LLT CastTy) {
3938 auto ES = cast<GExtractSubvector>(&MI);
3939
3940 if (!CastTy.isVector())
3941 return UnableToLegalize;
3942
3943 if (TypeIdx != 0)
3944 return UnableToLegalize;
3945
3946 Register Dst = ES->getReg(0);
3947 Register Src = ES->getSrcVec();
3948 uint64_t Idx = ES->getIndexImm();
3949
3950 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3951
3952 LLT DstTy = MRI.getType(Dst);
3953 LLT SrcTy = MRI.getType(Src);
3954 ElementCount DstTyEC = DstTy.getElementCount();
3955 ElementCount SrcTyEC = SrcTy.getElementCount();
3956 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3957 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3958
3959 if (DstTy == CastTy)
3960 return Legalized;
3961
3962 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3963 return UnableToLegalize;
3964
3965 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3966 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3967 if (CastEltSize < DstEltSize)
3968 return UnableToLegalize;
3969
3970 auto AdjustAmt = CastEltSize / DstEltSize;
3971 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3972 SrcTyMinElts % AdjustAmt != 0)
3973 return UnableToLegalize;
3974
3975 Idx /= AdjustAmt;
3976 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3977 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3978 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3979 MIRBuilder.buildBitcast(Dst, PromotedES);
3980
3981 ES->eraseFromParent();
3982 return Legalized;
3983}
3984
3985/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3986///
3987/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3988/// <vscale x 8 x i1>,
3989/// N
3990///
3991/// ===>
3992///
3993/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3994/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3995/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3996/// <vscale x 1 x i8>, N / 8
3997/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4000 LLT CastTy) {
4001 auto ES = cast<GInsertSubvector>(&MI);
4002
4003 if (!CastTy.isVector())
4004 return UnableToLegalize;
4005
4006 if (TypeIdx != 0)
4007 return UnableToLegalize;
4008
4009 Register Dst = ES->getReg(0);
4010 Register BigVec = ES->getBigVec();
4011 Register SubVec = ES->getSubVec();
4012 uint64_t Idx = ES->getIndexImm();
4013
4014 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4015
4016 LLT DstTy = MRI.getType(Dst);
4017 LLT BigVecTy = MRI.getType(BigVec);
4018 LLT SubVecTy = MRI.getType(SubVec);
4019
4020 if (DstTy == CastTy)
4021 return Legalized;
4022
4023 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4024 return UnableToLegalize;
4025
4026 ElementCount DstTyEC = DstTy.getElementCount();
4027 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4028 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4029 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4030 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4031 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4032
4033 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4034 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4035 if (CastEltSize < DstEltSize)
4036 return UnableToLegalize;
4037
4038 auto AdjustAmt = CastEltSize / DstEltSize;
4039 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4040 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4041 return UnableToLegalize;
4042
4043 Idx /= AdjustAmt;
4044 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4045 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4046 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4047 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4048 auto PromotedIS =
4049 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4050 MIRBuilder.buildBitcast(Dst, PromotedIS);
4051
4052 ES->eraseFromParent();
4053 return Legalized;
4054}
4055
4057 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4058 Register DstReg = LoadMI.getDstReg();
4059 Register PtrReg = LoadMI.getPointerReg();
4060 LLT DstTy = MRI.getType(DstReg);
4061 MachineMemOperand &MMO = LoadMI.getMMO();
4062 LLT MemTy = MMO.getMemoryType();
4063 MachineFunction &MF = MIRBuilder.getMF();
4064
4065 unsigned MemSizeInBits = MemTy.getSizeInBits();
4066 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4067
4068 if (MemSizeInBits != MemStoreSizeInBits) {
4069 if (MemTy.isVector())
4070 return UnableToLegalize;
4071
4072 // Promote to a byte-sized load if not loading an integral number of
4073 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4074 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4075 MachineMemOperand *NewMMO =
4076 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4077
4078 Register LoadReg = DstReg;
4079 LLT LoadTy = DstTy;
4080
4081 // If this wasn't already an extending load, we need to widen the result
4082 // register to avoid creating a load with a narrower result than the source.
4083 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4084 LoadTy = WideMemTy;
4085 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4086 }
4087
4088 if (isa<GSExtLoad>(LoadMI)) {
4089 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4090 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4091 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4092 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4093 // The extra bits are guaranteed to be zero, since we stored them that
4094 // way. A zext load from Wide thus automatically gives zext from MemVT.
4095 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4096 } else {
4097 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4098 }
4099
4100 if (DstTy != LoadTy)
4101 MIRBuilder.buildTrunc(DstReg, LoadReg);
4102
4103 LoadMI.eraseFromParent();
4104 return Legalized;
4105 }
4106
4107 // Big endian lowering not implemented.
4108 if (MIRBuilder.getDataLayout().isBigEndian())
4109 return UnableToLegalize;
4110
4111 // This load needs splitting into power of 2 sized loads.
4112 //
4113 // Our strategy here is to generate anyextending loads for the smaller
4114 // types up to next power-2 result type, and then combine the two larger
4115 // result values together, before truncating back down to the non-pow-2
4116 // type.
4117 // E.g. v1 = i24 load =>
4118 // v2 = i32 zextload (2 byte)
4119 // v3 = i32 load (1 byte)
4120 // v4 = i32 shl v3, 16
4121 // v5 = i32 or v4, v2
4122 // v1 = i24 trunc v5
4123 // By doing this we generate the correct truncate which should get
4124 // combined away as an artifact with a matching extend.
4125
4126 uint64_t LargeSplitSize, SmallSplitSize;
4127
4128 if (!isPowerOf2_32(MemSizeInBits)) {
4129 // This load needs splitting into power of 2 sized loads.
4130 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4131 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4132 } else {
4133 // This is already a power of 2, but we still need to split this in half.
4134 //
4135 // Assume we're being asked to decompose an unaligned load.
4136 // TODO: If this requires multiple splits, handle them all at once.
4137 auto &Ctx = MF.getFunction().getContext();
4138 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4139 return UnableToLegalize;
4140
4141 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4142 }
4143
4144 if (MemTy.isVector()) {
4145 // TODO: Handle vector extloads
4146 if (MemTy != DstTy)
4147 return UnableToLegalize;
4148
4149 Align Alignment = LoadMI.getAlign();
4150 // Given an alignment larger than the size of the memory, we can increase
4151 // the size of the load without needing to scalarize it.
4152 if (Alignment.value() * 8 > MemSizeInBits &&
4155 DstTy.getElementType());
4156 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4157 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4158 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4159 NewLoad.getReg(0));
4160 LoadMI.eraseFromParent();
4161 return Legalized;
4162 }
4163
4164 // TODO: We can do better than scalarizing the vector and at least split it
4165 // in half.
4166 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4167 }
4168
4169 MachineMemOperand *LargeMMO =
4170 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4171 MachineMemOperand *SmallMMO =
4172 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4173
4174 LLT PtrTy = MRI.getType(PtrReg);
4175 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4176 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4177 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4178 PtrReg, *LargeMMO);
4179
4180 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4181 LargeSplitSize / 8);
4182 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4183 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4184 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4185 SmallPtr, *SmallMMO);
4186
4187 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4188 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4189
4190 if (AnyExtTy == DstTy)
4191 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4192 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4193 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4194 MIRBuilder.buildTrunc(DstReg, {Or});
4195 } else {
4196 assert(DstTy.isPointer() && "expected pointer");
4197 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4198
4199 // FIXME: We currently consider this to be illegal for non-integral address
4200 // spaces, but we need still need a way to reinterpret the bits.
4201 MIRBuilder.buildIntToPtr(DstReg, Or);
4202 }
4203
4204 LoadMI.eraseFromParent();
4205 return Legalized;
4206}
4207
4209 // Lower a non-power of 2 store into multiple pow-2 stores.
4210 // E.g. split an i24 store into an i16 store + i8 store.
4211 // We do this by first extending the stored value to the next largest power
4212 // of 2 type, and then using truncating stores to store the components.
4213 // By doing this, likewise with G_LOAD, generate an extend that can be
4214 // artifact-combined away instead of leaving behind extracts.
4215 Register SrcReg = StoreMI.getValueReg();
4216 Register PtrReg = StoreMI.getPointerReg();
4217 LLT SrcTy = MRI.getType(SrcReg);
4218 MachineFunction &MF = MIRBuilder.getMF();
4219 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4220 LLT MemTy = MMO.getMemoryType();
4221
4222 unsigned StoreWidth = MemTy.getSizeInBits();
4223 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4224
4225 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4226 // Promote to a byte-sized store with upper bits zero if not
4227 // storing an integral number of bytes. For example, promote
4228 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4229 LLT WideTy = LLT::scalar(StoreSizeInBits);
4230
4231 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4232 // Avoid creating a store with a narrower source than result.
4233 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4234 SrcTy = WideTy;
4235 }
4236
4237 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4238
4239 MachineMemOperand *NewMMO =
4240 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4241 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4242 StoreMI.eraseFromParent();
4243 return Legalized;
4244 }
4245
4246 if (MemTy.isVector()) {
4247 if (MemTy != SrcTy)
4248 return scalarizeVectorBooleanStore(StoreMI);
4249
4250 // TODO: We can do better than scalarizing the vector and at least split it
4251 // in half.
4252 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4253 }
4254
4255 unsigned MemSizeInBits = MemTy.getSizeInBits();
4256 uint64_t LargeSplitSize, SmallSplitSize;
4257
4258 if (!isPowerOf2_32(MemSizeInBits)) {
4259 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4260 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4261 } else {
4262 auto &Ctx = MF.getFunction().getContext();
4263 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4264 return UnableToLegalize; // Don't know what we're being asked to do.
4265
4266 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4267 }
4268
4269 // Extend to the next pow-2. If this store was itself the result of lowering,
4270 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4271 // that's wider than the stored size.
4272 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4273 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4274
4275 if (SrcTy.isPointer()) {
4276 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4277 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4278 }
4279
4280 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4281
4282 // Obtain the smaller value by shifting away the larger value.
4283 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4284 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4285
4286 // Generate the PtrAdd and truncating stores.
4287 LLT PtrTy = MRI.getType(PtrReg);
4288 auto OffsetCst = MIRBuilder.buildConstant(
4289 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4290 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4291
4292 MachineMemOperand *LargeMMO =
4293 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4294 MachineMemOperand *SmallMMO =
4295 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4296 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4297 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4298 StoreMI.eraseFromParent();
4299 return Legalized;
4300}
4301
4304 Register SrcReg = StoreMI.getValueReg();
4305 Register PtrReg = StoreMI.getPointerReg();
4306 LLT SrcTy = MRI.getType(SrcReg);
4307 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4308 LLT MemTy = MMO.getMemoryType();
4309 LLT MemScalarTy = MemTy.getElementType();
4310 MachineFunction &MF = MIRBuilder.getMF();
4311
4312 assert(SrcTy.isVector() && "Expect a vector store type");
4313
4314 if (!MemScalarTy.isByteSized()) {
4315 // We need to build an integer scalar of the vector bit pattern.
4316 // It's not legal for us to add padding when storing a vector.
4317 unsigned NumBits = MemTy.getSizeInBits();
4318 LLT IntTy = LLT::scalar(NumBits);
4319 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4320 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4321
4322 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4323 auto Elt = MIRBuilder.buildExtractVectorElement(
4324 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4325 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4326 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4327 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4328 ? (MemTy.getNumElements() - 1) - I
4329 : I;
4330 auto ShiftAmt = MIRBuilder.buildConstant(
4331 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4332 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4333 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4334 }
4335 auto PtrInfo = MMO.getPointerInfo();
4336 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4337 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4338 StoreMI.eraseFromParent();
4339 return Legalized;
4340 }
4341
4342 // TODO: implement simple scalarization.
4343 return UnableToLegalize;
4344}
4345
4347LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4348 switch (MI.getOpcode()) {
4349 case TargetOpcode::G_LOAD: {
4350 if (TypeIdx != 0)
4351 return UnableToLegalize;
4352 MachineMemOperand &MMO = **MI.memoperands_begin();
4353
4354 // Not sure how to interpret a bitcast of an extending load.
4355 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4356 return UnableToLegalize;
4357
4358 Observer.changingInstr(MI);
4359 bitcastDst(MI, CastTy, 0);
4360 MMO.setType(CastTy);
4361 // The range metadata is no longer valid when reinterpreted as a different
4362 // type.
4363 MMO.clearRanges();
4364 Observer.changedInstr(MI);
4365 return Legalized;
4366 }
4367 case TargetOpcode::G_STORE: {
4368 if (TypeIdx != 0)
4369 return UnableToLegalize;
4370
4371 MachineMemOperand &MMO = **MI.memoperands_begin();
4372
4373 // Not sure how to interpret a bitcast of a truncating store.
4374 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4375 return UnableToLegalize;
4376
4377 Observer.changingInstr(MI);
4378 bitcastSrc(MI, CastTy, 0);
4379 MMO.setType(CastTy);
4380 Observer.changedInstr(MI);
4381 return Legalized;
4382 }
4383 case TargetOpcode::G_SELECT: {
4384 if (TypeIdx != 0)
4385 return UnableToLegalize;
4386
4387 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4388 LLVM_DEBUG(
4389 dbgs() << "bitcast action not implemented for vector select\n");
4390 return UnableToLegalize;
4391 }
4392
4393 Observer.changingInstr(MI);
4394 bitcastSrc(MI, CastTy, 2);
4395 bitcastSrc(MI, CastTy, 3);
4396 bitcastDst(MI, CastTy, 0);
4397 Observer.changedInstr(MI);
4398 return Legalized;
4399 }
4400 case TargetOpcode::G_AND:
4401 case TargetOpcode::G_OR:
4402 case TargetOpcode::G_XOR: {
4403 Observer.changingInstr(MI);
4404 bitcastSrc(MI, CastTy, 1);
4405 bitcastSrc(MI, CastTy, 2);
4406 bitcastDst(MI, CastTy, 0);
4407 Observer.changedInstr(MI);
4408 return Legalized;
4409 }
4410 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4411 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4412 case TargetOpcode::G_INSERT_VECTOR_ELT:
4413 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4414 case TargetOpcode::G_CONCAT_VECTORS:
4415 return bitcastConcatVector(MI, TypeIdx, CastTy);
4416 case TargetOpcode::G_SHUFFLE_VECTOR:
4417 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4418 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4419 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4420 case TargetOpcode::G_INSERT_SUBVECTOR:
4421 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4422 default:
4423 return UnableToLegalize;
4424 }
4425}
4426
4427// Legalize an instruction by changing the opcode in place.
4428void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4430 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4432}
4433
4435LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4436 using namespace TargetOpcode;
4437
4438 switch(MI.getOpcode()) {
4439 default:
4440 return UnableToLegalize;
4441 case TargetOpcode::G_FCONSTANT:
4442 return lowerFConstant(MI);
4443 case TargetOpcode::G_BITCAST:
4444 return lowerBitcast(MI);
4445 case TargetOpcode::G_SREM:
4446 case TargetOpcode::G_UREM: {
4447 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4448 auto Quot =
4449 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4450 {MI.getOperand(1), MI.getOperand(2)});
4451
4452 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4453 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4454 MI.eraseFromParent();
4455 return Legalized;
4456 }
4457 case TargetOpcode::G_SADDO:
4458 case TargetOpcode::G_SSUBO:
4459 return lowerSADDO_SSUBO(MI);
4460 case TargetOpcode::G_SADDE:
4461 return lowerSADDE(MI);
4462 case TargetOpcode::G_SSUBE:
4463 return lowerSSUBE(MI);
4464 case TargetOpcode::G_UMULH:
4465 case TargetOpcode::G_SMULH:
4466 return lowerSMULH_UMULH(MI);
4467 case TargetOpcode::G_SMULO:
4468 case TargetOpcode::G_UMULO: {
4469 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4470 // result.
4471 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4472 LLT Ty = MRI.getType(Res);
4473
4474 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4475 ? TargetOpcode::G_SMULH
4476 : TargetOpcode::G_UMULH;
4477
4478 Observer.changingInstr(MI);
4479 const auto &TII = MIRBuilder.getTII();
4480 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4481 MI.removeOperand(1);
4482 Observer.changedInstr(MI);
4483
4484 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4485 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4486
4487 // Move insert point forward so we can use the Res register if needed.
4488 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4489
4490 // For *signed* multiply, overflow is detected by checking:
4491 // (hi != (lo >> bitwidth-1))
4492 if (Opcode == TargetOpcode::G_SMULH) {
4493 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4494 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4495 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4496 } else {
4497 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4498 }
4499 return Legalized;
4500 }
4501 case TargetOpcode::G_FNEG: {
4502 auto [Res, SubByReg] = MI.getFirst2Regs();
4503 LLT Ty = MRI.getType(Res);
4504
4505 auto SignMask = MIRBuilder.buildConstant(
4506 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4507 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4508 MI.eraseFromParent();
4509 return Legalized;
4510 }
4511 case TargetOpcode::G_FSUB:
4512 case TargetOpcode::G_STRICT_FSUB: {
4513 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4514 LLT Ty = MRI.getType(Res);
4515
4516 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4517 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4518
4519 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4520 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4521 else
4522 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4523
4524 MI.eraseFromParent();
4525 return Legalized;
4526 }
4527 case TargetOpcode::G_FMAD:
4528 return lowerFMad(MI);
4529 case TargetOpcode::G_FFLOOR:
4530 return lowerFFloor(MI);
4531 case TargetOpcode::G_LROUND:
4532 case TargetOpcode::G_LLROUND: {
4533 Register DstReg = MI.getOperand(0).getReg();
4534 Register SrcReg = MI.getOperand(1).getReg();
4535 LLT SrcTy = MRI.getType(SrcReg);
4536 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4537 {SrcReg});
4538 MIRBuilder.buildFPTOSI(DstReg, Round);
4539 MI.eraseFromParent();
4540 return Legalized;
4541 }
4542 case TargetOpcode::G_INTRINSIC_ROUND:
4543 return lowerIntrinsicRound(MI);
4544 case TargetOpcode::G_FRINT: {
4545 // Since round even is the assumed rounding mode for unconstrained FP
4546 // operations, rint and roundeven are the same operation.
4547 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4548 return Legalized;
4549 }
4550 case TargetOpcode::G_INTRINSIC_LRINT:
4551 case TargetOpcode::G_INTRINSIC_LLRINT: {
4552 Register DstReg = MI.getOperand(0).getReg();
4553 Register SrcReg = MI.getOperand(1).getReg();
4554 LLT SrcTy = MRI.getType(SrcReg);
4555 auto Round =
4556 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4557 MIRBuilder.buildFPTOSI(DstReg, Round);
4558 MI.eraseFromParent();
4559 return Legalized;
4560 }
4561 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4562 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4563 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4564 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4565 **MI.memoperands_begin());
4566 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4567 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4568 MI.eraseFromParent();
4569 return Legalized;
4570 }
4571 case TargetOpcode::G_LOAD:
4572 case TargetOpcode::G_SEXTLOAD:
4573 case TargetOpcode::G_ZEXTLOAD:
4574 return lowerLoad(cast<GAnyLoad>(MI));
4575 case TargetOpcode::G_STORE:
4576 return lowerStore(cast<GStore>(MI));
4577 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4578 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4579 case TargetOpcode::G_CTLZ:
4580 case TargetOpcode::G_CTTZ:
4581 case TargetOpcode::G_CTPOP:
4582 return lowerBitCount(MI);
4583 case G_UADDO: {
4584 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4585
4586 Register NewRes = MRI.cloneVirtualRegister(Res);
4587
4588 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4589 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4590
4591 MIRBuilder.buildCopy(Res, NewRes);
4592
4593 MI.eraseFromParent();
4594 return Legalized;
4595 }
4596 case G_UADDE: {
4597 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4598 const LLT CondTy = MRI.getType(CarryOut);
4599 const LLT Ty = MRI.getType(Res);
4600
4601 Register NewRes = MRI.cloneVirtualRegister(Res);
4602
4603 // Initial add of the two operands.
4604 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4605
4606 // Initial check for carry.
4607 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4608
4609 // Add the sum and the carry.
4610 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4611 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4612
4613 // Second check for carry. We can only carry if the initial sum is all 1s
4614 // and the carry is set, resulting in a new sum of 0.
4615 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4616 auto ResEqZero =
4617 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4618 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4619 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4620
4621 MIRBuilder.buildCopy(Res, NewRes);
4622
4623 MI.eraseFromParent();
4624 return Legalized;
4625 }
4626 case G_USUBO: {
4627 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4628
4629 MIRBuilder.buildSub(Res, LHS, RHS);
4630 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4631
4632 MI.eraseFromParent();
4633 return Legalized;
4634 }
4635 case G_USUBE: {
4636 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4637 const LLT CondTy = MRI.getType(BorrowOut);
4638 const LLT Ty = MRI.getType(Res);
4639
4640 // Initial subtract of the two operands.
4641 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4642
4643 // Initial check for borrow.
4644 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4645
4646 // Subtract the borrow from the first subtract.
4647 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4648 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4649
4650 // Second check for borrow. We can only borrow if the initial difference is
4651 // 0 and the borrow is set, resulting in a new difference of all 1s.
4652 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4653 auto TmpResEqZero =
4654 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4655 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4656 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4657
4658 MI.eraseFromParent();
4659 return Legalized;
4660 }
4661 case G_UITOFP:
4662 return lowerUITOFP(MI);
4663 case G_SITOFP:
4664 return lowerSITOFP(MI);
4665 case G_FPTOUI:
4666 return lowerFPTOUI(MI);
4667 case G_FPTOSI:
4668 return lowerFPTOSI(MI);
4669 case G_FPTOUI_SAT:
4670 case G_FPTOSI_SAT:
4671 return lowerFPTOINT_SAT(MI);
4672 case G_FPTRUNC:
4673 return lowerFPTRUNC(MI);
4674 case G_FPOWI:
4675 return lowerFPOWI(MI);
4676 case G_SMIN:
4677 case G_SMAX:
4678 case G_UMIN:
4679 case G_UMAX:
4680 return lowerMinMax(MI);
4681 case G_SCMP:
4682 case G_UCMP:
4683 return lowerThreewayCompare(MI);
4684 case G_FCOPYSIGN:
4685 return lowerFCopySign(MI);
4686 case G_FMINNUM:
4687 case G_FMAXNUM:
4688 case G_FMINIMUMNUM:
4689 case G_FMAXIMUMNUM:
4690 return lowerFMinNumMaxNum(MI);
4691 case G_MERGE_VALUES:
4692 return lowerMergeValues(MI);
4693 case G_UNMERGE_VALUES:
4694 return lowerUnmergeValues(MI);
4695 case TargetOpcode::G_SEXT_INREG: {
4696 assert(MI.getOperand(2).isImm() && "Expected immediate");
4697 int64_t SizeInBits = MI.getOperand(2).getImm();
4698
4699 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4700 LLT DstTy = MRI.getType(DstReg);
4701 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4702
4703 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4704 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4705 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4706 MI.eraseFromParent();
4707 return Legalized;
4708 }
4709 case G_EXTRACT_VECTOR_ELT:
4710 case G_INSERT_VECTOR_ELT:
4712 case G_SHUFFLE_VECTOR:
4713 return lowerShuffleVector(MI);
4714 case G_VECTOR_COMPRESS:
4715 return lowerVECTOR_COMPRESS(MI);
4716 case G_DYN_STACKALLOC:
4717 return lowerDynStackAlloc(MI);
4718 case G_STACKSAVE:
4719 return lowerStackSave(MI);
4720 case G_STACKRESTORE:
4721 return lowerStackRestore(MI);
4722 case G_EXTRACT:
4723 return lowerExtract(MI);
4724 case G_INSERT:
4725 return lowerInsert(MI);
4726 case G_BSWAP:
4727 return lowerBswap(MI);
4728 case G_BITREVERSE:
4729 return lowerBitreverse(MI);
4730 case G_READ_REGISTER:
4731 case G_WRITE_REGISTER:
4732 return lowerReadWriteRegister(MI);
4733 case G_UADDSAT:
4734 case G_USUBSAT: {
4735 // Try to make a reasonable guess about which lowering strategy to use. The
4736 // target can override this with custom lowering and calling the
4737 // implementation functions.
4738 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4739 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4740 return lowerAddSubSatToMinMax(MI);
4742 }
4743 case G_SADDSAT:
4744 case G_SSUBSAT: {
4745 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4746
4747 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4748 // since it's a shorter expansion. However, we would need to figure out the
4749 // preferred boolean type for the carry out for the query.
4750 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4751 return lowerAddSubSatToMinMax(MI);
4753 }
4754 case G_SSHLSAT:
4755 case G_USHLSAT:
4756 return lowerShlSat(MI);
4757 case G_ABS:
4758 return lowerAbsToAddXor(MI);
4759 case G_ABDS:
4760 case G_ABDU: {
4761 bool IsSigned = MI.getOpcode() == G_ABDS;
4762 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4763 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4764 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4765 return lowerAbsDiffToMinMax(MI);
4766 }
4767 return lowerAbsDiffToSelect(MI);
4768 }
4769 case G_FABS:
4770 return lowerFAbs(MI);
4771 case G_SELECT:
4772 return lowerSelect(MI);
4773 case G_IS_FPCLASS:
4774 return lowerISFPCLASS(MI);
4775 case G_SDIVREM:
4776 case G_UDIVREM:
4777 return lowerDIVREM(MI);
4778 case G_FSHL:
4779 case G_FSHR:
4780 return lowerFunnelShift(MI);
4781 case G_ROTL:
4782 case G_ROTR:
4783 return lowerRotate(MI);
4784 case G_MEMSET:
4785 case G_MEMCPY:
4786 case G_MEMMOVE:
4787 return lowerMemCpyFamily(MI);
4788 case G_MEMCPY_INLINE:
4789 return lowerMemcpyInline(MI);
4790 case G_ZEXT:
4791 case G_SEXT:
4792 case G_ANYEXT:
4793 return lowerEXT(MI);
4794 case G_TRUNC:
4795 return lowerTRUNC(MI);
4797 return lowerVectorReduction(MI);
4798 case G_VAARG:
4799 return lowerVAArg(MI);
4800 case G_ATOMICRMW_SUB: {
4801 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4802 const LLT ValTy = MRI.getType(Val);
4803 MachineMemOperand *MMO = *MI.memoperands_begin();
4804
4805 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4806 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4807 MI.eraseFromParent();
4808 return Legalized;
4809 }
4810 }
4811}
4812
4814 Align MinAlign) const {
4815 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4816 // datalayout for the preferred alignment. Also there should be a target hook
4817 // for this to allow targets to reduce the alignment and ignore the
4818 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4819 // the type.
4820 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4821}
4822
4825 MachinePointerInfo &PtrInfo) {
4826 MachineFunction &MF = MIRBuilder.getMF();
4827 const DataLayout &DL = MIRBuilder.getDataLayout();
4828 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4829
4830 unsigned AddrSpace = DL.getAllocaAddrSpace();
4831 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4832
4833 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4834 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4835}
4836
4838 const SrcOp &Val) {
4839 LLT SrcTy = Val.getLLTTy(MRI);
4840 Align StackTypeAlign =
4841 std::max(getStackTemporaryAlignment(SrcTy),
4843 MachinePointerInfo PtrInfo;
4844 auto StackTemp =
4845 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4846
4847 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4848 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4849}
4850
4852 LLT VecTy) {
4853 LLT IdxTy = B.getMRI()->getType(IdxReg);
4854 unsigned NElts = VecTy.getNumElements();
4855
4856 int64_t IdxVal;
4857 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4858 if (IdxVal < VecTy.getNumElements())
4859 return IdxReg;
4860 // If a constant index would be out of bounds, clamp it as well.
4861 }
4862
4863 if (isPowerOf2_32(NElts)) {
4864 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4865 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4866 }
4867
4868 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4869 .getReg(0);
4870}
4871
4873 Register Index) {
4874 LLT EltTy = VecTy.getElementType();
4875
4876 // Calculate the element offset and add it to the pointer.
4877 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4878 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4879 "Converting bits to bytes lost precision");
4880
4881 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4882
4883 // Convert index to the correct size for the address space.
4884 const DataLayout &DL = MIRBuilder.getDataLayout();
4885 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4886 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4887 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4888 if (IdxTy != MRI.getType(Index))
4889 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4890
4891 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4892 MIRBuilder.buildConstant(IdxTy, EltSize));
4893
4894 LLT PtrTy = MRI.getType(VecPtr);
4895 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4896}
4897
4898#ifndef NDEBUG
4899/// Check that all vector operands have same number of elements. Other operands
4900/// should be listed in NonVecOp.
4903 std::initializer_list<unsigned> NonVecOpIndices) {
4904 if (MI.getNumMemOperands() != 0)
4905 return false;
4906
4907 LLT VecTy = MRI.getType(MI.getReg(0));
4908 if (!VecTy.isVector())
4909 return false;
4910 unsigned NumElts = VecTy.getNumElements();
4911
4912 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4913 MachineOperand &Op = MI.getOperand(OpIdx);
4914 if (!Op.isReg()) {
4915 if (!is_contained(NonVecOpIndices, OpIdx))
4916 return false;
4917 continue;
4918 }
4919
4920 LLT Ty = MRI.getType(Op.getReg());
4921 if (!Ty.isVector()) {
4922 if (!is_contained(NonVecOpIndices, OpIdx))
4923 return false;
4924 continue;
4925 }
4926
4927 if (Ty.getNumElements() != NumElts)
4928 return false;
4929 }
4930
4931 return true;
4932}
4933#endif
4934
4935/// Fill \p DstOps with DstOps that have same number of elements combined as
4936/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4937/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4938/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4939static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4940 unsigned NumElts) {
4941 LLT LeftoverTy;
4942 assert(Ty.isVector() && "Expected vector type");
4943 LLT EltTy = Ty.getElementType();
4944 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4945 int NumParts, NumLeftover;
4946 std::tie(NumParts, NumLeftover) =
4947 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4948
4949 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4950 for (int i = 0; i < NumParts; ++i) {
4951 DstOps.push_back(NarrowTy);
4952 }
4953
4954 if (LeftoverTy.isValid()) {
4955 assert(NumLeftover == 1 && "expected exactly one leftover");
4956 DstOps.push_back(LeftoverTy);
4957 }
4958}
4959
4960/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4961/// made from \p Op depending on operand type.
4963 MachineOperand &Op) {
4964 for (unsigned i = 0; i < N; ++i) {
4965 if (Op.isReg())
4966 Ops.push_back(Op.getReg());
4967 else if (Op.isImm())
4968 Ops.push_back(Op.getImm());
4969 else if (Op.isPredicate())
4970 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4971 else
4972 llvm_unreachable("Unsupported type");
4973 }
4974}
4975
4976// Handle splitting vector operations which need to have the same number of
4977// elements in each type index, but each type index may have a different element
4978// type.
4979//
4980// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4981// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4982// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4983//
4984// Also handles some irregular breakdown cases, e.g.
4985// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4986// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4987// s64 = G_SHL s64, s32
4990 GenericMachineInstr &MI, unsigned NumElts,
4991 std::initializer_list<unsigned> NonVecOpIndices) {
4992 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4993 "Non-compatible opcode or not specified non-vector operands");
4994 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4995
4996 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4997 unsigned NumDefs = MI.getNumDefs();
4998
4999 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5000 // Build instructions with DstOps to use instruction found by CSE directly.
5001 // CSE copies found instruction into given vreg when building with vreg dest.
5002 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5003 // Output registers will be taken from created instructions.
5004 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5005 for (unsigned i = 0; i < NumDefs; ++i) {
5006 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5007 }
5008
5009 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5010 // Operands listed in NonVecOpIndices will be used as is without splitting;
5011 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5012 // scalar condition (op 1), immediate in sext_inreg (op 2).
5013 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5014 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5015 ++UseIdx, ++UseNo) {
5016 if (is_contained(NonVecOpIndices, UseIdx)) {
5017 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5018 MI.getOperand(UseIdx));
5019 } else {
5020 SmallVector<Register, 8> SplitPieces;
5021 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5022 MRI);
5023 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5024 }
5025 }
5026
5027 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5028
5029 // Take i-th piece of each input operand split and build sub-vector/scalar
5030 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5031 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5033 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5034 Defs.push_back(OutputOpsPieces[DstNo][i]);
5035
5037 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5038 Uses.push_back(InputOpsPieces[InputNo][i]);
5039
5040 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5041 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5042 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5043 }
5044
5045 // Merge small outputs into MI's output for each def operand.
5046 if (NumLeftovers) {
5047 for (unsigned i = 0; i < NumDefs; ++i)
5048 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5049 } else {
5050 for (unsigned i = 0; i < NumDefs; ++i)
5051 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5052 }
5053
5054 MI.eraseFromParent();
5055 return Legalized;
5056}
5057
5060 unsigned NumElts) {
5061 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5062
5063 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5064 unsigned NumDefs = MI.getNumDefs();
5065
5066 SmallVector<DstOp, 8> OutputOpsPieces;
5067 SmallVector<Register, 8> OutputRegs;
5068 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5069
5070 // Instructions that perform register split will be inserted in basic block
5071 // where register is defined (basic block is in the next operand).
5072 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5073 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5074 UseIdx += 2, ++UseNo) {
5075 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5076 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5077 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5078 MIRBuilder, MRI);
5079 }
5080
5081 // Build PHIs with fewer elements.
5082 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5083 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5084 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5085 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5086 Phi.addDef(
5087 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5088 OutputRegs.push_back(Phi.getReg(0));
5089
5090 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5091 Phi.addUse(InputOpsPieces[j][i]);
5092 Phi.add(MI.getOperand(1 + j * 2 + 1));
5093 }
5094 }
5095
5096 // Set the insert point after the existing PHIs
5097 MachineBasicBlock &MBB = *MI.getParent();
5098 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5099
5100 // Merge small outputs into MI's def.
5101 if (NumLeftovers) {
5102 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5103 } else {
5104 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5105 }
5106
5107 MI.eraseFromParent();
5108 return Legalized;
5109}
5110
5113 unsigned TypeIdx,
5114 LLT NarrowTy) {
5115 const int NumDst = MI.getNumOperands() - 1;
5116 const Register SrcReg = MI.getOperand(NumDst).getReg();
5117 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5118 LLT SrcTy = MRI.getType(SrcReg);
5119
5120 if (TypeIdx != 1 || NarrowTy == DstTy)
5121 return UnableToLegalize;
5122
5123 // Requires compatible types. Otherwise SrcReg should have been defined by
5124 // merge-like instruction that would get artifact combined. Most likely
5125 // instruction that defines SrcReg has to perform more/fewer elements
5126 // legalization compatible with NarrowTy.
5127 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5128 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5129
5130 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5131 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5132 return UnableToLegalize;
5133
5134 // This is most likely DstTy (smaller then register size) packed in SrcTy
5135 // (larger then register size) and since unmerge was not combined it will be
5136 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5137 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5138
5139 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5140 //
5141 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5142 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5143 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5144 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5145 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5146 const int PartsPerUnmerge = NumDst / NumUnmerge;
5147
5148 for (int I = 0; I != NumUnmerge; ++I) {
5149 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5150
5151 for (int J = 0; J != PartsPerUnmerge; ++J)
5152 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5153 MIB.addUse(Unmerge.getReg(I));
5154 }
5155
5156 MI.eraseFromParent();
5157 return Legalized;
5158}
5159
5162 LLT NarrowTy) {
5163 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5164 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5165 // that should have been artifact combined. Most likely instruction that uses
5166 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5167 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5168 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5169 if (NarrowTy == SrcTy)
5170 return UnableToLegalize;
5171
5172 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5173 // is for old mir tests. Since the changes to more/fewer elements it should no
5174 // longer be possible to generate MIR like this when starting from llvm-ir
5175 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5176 if (TypeIdx == 1) {
5177 assert(SrcTy.isVector() && "Expected vector types");
5178 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5179 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5180 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5181 return UnableToLegalize;
5182 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5183 //
5184 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5185 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5186 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5187 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5188 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5189 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5190
5192 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5193 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5194 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5195 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5196 Elts.push_back(Unmerge.getReg(j));
5197 }
5198
5199 SmallVector<Register, 8> NarrowTyElts;
5200 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5201 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5202 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5203 ++i, Offset += NumNarrowTyElts) {
5204 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5205 NarrowTyElts.push_back(
5206 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5207 }
5208
5209 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5210 MI.eraseFromParent();
5211 return Legalized;
5212 }
5213
5214 assert(TypeIdx == 0 && "Bad type index");
5215 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5216 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5217 return UnableToLegalize;
5218
5219 // This is most likely SrcTy (smaller then register size) packed in DstTy
5220 // (larger then register size) and since merge was not combined it will be
5221 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5222 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5223
5224 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5225 //
5226 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5227 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5228 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5229 SmallVector<Register, 8> NarrowTyElts;
5230 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5231 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5232 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5233 for (unsigned i = 0; i < NumParts; ++i) {
5235 for (unsigned j = 0; j < NumElts; ++j)
5236 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5237 NarrowTyElts.push_back(
5238 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5239 }
5240
5241 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5242 MI.eraseFromParent();
5243 return Legalized;
5244}
5245
5248 unsigned TypeIdx,
5249 LLT NarrowVecTy) {
5250 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5251 Register InsertVal;
5252 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5253
5254 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5255 if (IsInsert)
5256 InsertVal = MI.getOperand(2).getReg();
5257
5258 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5259 LLT VecTy = MRI.getType(SrcVec);
5260
5261 // If the index is a constant, we can really break this down as you would
5262 // expect, and index into the target size pieces.
5263 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5264 if (MaybeCst) {
5265 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5266 // Avoid out of bounds indexing the pieces.
5267 if (IdxVal >= VecTy.getNumElements()) {
5268 MIRBuilder.buildUndef(DstReg);
5269 MI.eraseFromParent();
5270 return Legalized;
5271 }
5272
5273 if (!NarrowVecTy.isVector()) {
5274 SmallVector<Register, 8> SplitPieces;
5275 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5276 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5277 if (IsInsert) {
5278 SplitPieces[IdxVal] = InsertVal;
5279 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5280 } else {
5281 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5282 }
5283 } else {
5284 SmallVector<Register, 8> VecParts;
5285 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5286
5287 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5288 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5289 TargetOpcode::G_ANYEXT);
5290
5291 unsigned NewNumElts = NarrowVecTy.getNumElements();
5292
5293 LLT IdxTy = MRI.getType(Idx);
5294 int64_t PartIdx = IdxVal / NewNumElts;
5295 auto NewIdx =
5296 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5297
5298 if (IsInsert) {
5299 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5300
5301 // Use the adjusted index to insert into one of the subvectors.
5302 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5303 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5304 VecParts[PartIdx] = InsertPart.getReg(0);
5305
5306 // Recombine the inserted subvector with the others to reform the result
5307 // vector.
5308 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5309 } else {
5310 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5311 }
5312 }
5313
5314 MI.eraseFromParent();
5315 return Legalized;
5316 }
5317
5318 // With a variable index, we can't perform the operation in a smaller type, so
5319 // we're forced to expand this.
5320 //
5321 // TODO: We could emit a chain of compare/select to figure out which piece to
5322 // index.
5324}
5325
5328 LLT NarrowTy) {
5329 // FIXME: Don't know how to handle secondary types yet.
5330 if (TypeIdx != 0)
5331 return UnableToLegalize;
5332
5333 if (!NarrowTy.isByteSized()) {
5334 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5335 return UnableToLegalize;
5336 }
5337
5338 // This implementation doesn't work for atomics. Give up instead of doing
5339 // something invalid.
5340 if (LdStMI.isAtomic())
5341 return UnableToLegalize;
5342
5343 bool IsLoad = isa<GLoad>(LdStMI);
5344 Register ValReg = LdStMI.getReg(0);
5345 Register AddrReg = LdStMI.getPointerReg();
5346 LLT ValTy = MRI.getType(ValReg);
5347
5348 // FIXME: Do we need a distinct NarrowMemory legalize action?
5349 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5350 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5351 return UnableToLegalize;
5352 }
5353
5354 int NumParts = -1;
5355 int NumLeftover = -1;
5356 LLT LeftoverTy;
5357 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5358 if (IsLoad) {
5359 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5360 } else {
5361 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5362 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5363 NumParts = NarrowRegs.size();
5364 NumLeftover = NarrowLeftoverRegs.size();
5365 }
5366 }
5367
5368 if (NumParts == -1)
5369 return UnableToLegalize;
5370
5371 LLT PtrTy = MRI.getType(AddrReg);
5372 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5373
5374 unsigned TotalSize = ValTy.getSizeInBits();
5375
5376 // Split the load/store into PartTy sized pieces starting at Offset. If this
5377 // is a load, return the new registers in ValRegs. For a store, each elements
5378 // of ValRegs should be PartTy. Returns the next offset that needs to be
5379 // handled.
5380 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5381 auto MMO = LdStMI.getMMO();
5382 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5383 unsigned NumParts, unsigned Offset) -> unsigned {
5384 MachineFunction &MF = MIRBuilder.getMF();
5385 unsigned PartSize = PartTy.getSizeInBits();
5386 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5387 ++Idx) {
5388 unsigned ByteOffset = Offset / 8;
5389 Register NewAddrReg;
5390
5391 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5392 ByteOffset);
5393
5394 MachineMemOperand *NewMMO =
5395 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5396
5397 if (IsLoad) {
5398 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5399 ValRegs.push_back(Dst);
5400 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5401 } else {
5402 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5403 }
5404 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5405 }
5406
5407 return Offset;
5408 };
5409
5410 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5411 unsigned HandledOffset =
5412 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5413
5414 // Handle the rest of the register if this isn't an even type breakdown.
5415 if (LeftoverTy.isValid())
5416 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5417
5418 if (IsLoad) {
5419 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5420 LeftoverTy, NarrowLeftoverRegs);
5421 }
5422
5423 LdStMI.eraseFromParent();
5424 return Legalized;
5425}
5426
5429 LLT NarrowTy) {
5430 using namespace TargetOpcode;
5432 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5433
5434 switch (MI.getOpcode()) {
5435 case G_IMPLICIT_DEF:
5436 case G_TRUNC:
5437 case G_AND:
5438 case G_OR:
5439 case G_XOR:
5440 case G_ADD:
5441 case G_SUB:
5442 case G_MUL:
5443 case G_PTR_ADD:
5444 case G_SMULH:
5445 case G_UMULH:
5446 case G_FADD:
5447 case G_FMUL:
5448 case G_FSUB:
5449 case G_FNEG:
5450 case G_FABS:
5451 case G_FCANONICALIZE:
5452 case G_FDIV:
5453 case G_FREM:
5454 case G_FMA:
5455 case G_FMAD:
5456 case G_FPOW:
5457 case G_FEXP:
5458 case G_FEXP2:
5459 case G_FEXP10:
5460 case G_FLOG:
5461 case G_FLOG2:
5462 case G_FLOG10:
5463 case G_FLDEXP:
5464 case G_FNEARBYINT:
5465 case G_FCEIL:
5466 case G_FFLOOR:
5467 case G_FRINT:
5468 case G_INTRINSIC_LRINT:
5469 case G_INTRINSIC_LLRINT:
5470 case G_INTRINSIC_ROUND:
5471 case G_INTRINSIC_ROUNDEVEN:
5472 case G_LROUND:
5473 case G_LLROUND:
5474 case G_INTRINSIC_TRUNC:
5475 case G_FCOS:
5476 case G_FSIN:
5477 case G_FTAN:
5478 case G_FACOS:
5479 case G_FASIN:
5480 case G_FATAN:
5481 case G_FATAN2:
5482 case G_FCOSH:
5483 case G_FSINH:
5484 case G_FTANH:
5485 case G_FSQRT:
5486 case G_BSWAP:
5487 case G_BITREVERSE:
5488 case G_SDIV:
5489 case G_UDIV:
5490 case G_SREM:
5491 case G_UREM:
5492 case G_SDIVREM:
5493 case G_UDIVREM:
5494 case G_SMIN:
5495 case G_SMAX:
5496 case G_UMIN:
5497 case G_UMAX:
5498 case G_ABS:
5499 case G_FMINNUM:
5500 case G_FMAXNUM:
5501 case G_FMINNUM_IEEE:
5502 case G_FMAXNUM_IEEE:
5503 case G_FMINIMUM:
5504 case G_FMAXIMUM:
5505 case G_FMINIMUMNUM:
5506 case G_FMAXIMUMNUM:
5507 case G_FSHL:
5508 case G_FSHR:
5509 case G_ROTL:
5510 case G_ROTR:
5511 case G_FREEZE:
5512 case G_SADDSAT:
5513 case G_SSUBSAT:
5514 case G_UADDSAT:
5515 case G_USUBSAT:
5516 case G_UMULO:
5517 case G_SMULO:
5518 case G_SHL:
5519 case G_LSHR:
5520 case G_ASHR:
5521 case G_SSHLSAT:
5522 case G_USHLSAT:
5523 case G_CTLZ:
5524 case G_CTLZ_ZERO_UNDEF:
5525 case G_CTTZ:
5526 case G_CTTZ_ZERO_UNDEF:
5527 case G_CTPOP:
5528 case G_FCOPYSIGN:
5529 case G_ZEXT:
5530 case G_SEXT:
5531 case G_ANYEXT:
5532 case G_FPEXT:
5533 case G_FPTRUNC:
5534 case G_SITOFP:
5535 case G_UITOFP:
5536 case G_FPTOSI:
5537 case G_FPTOUI:
5538 case G_FPTOSI_SAT:
5539 case G_FPTOUI_SAT:
5540 case G_INTTOPTR:
5541 case G_PTRTOINT:
5542 case G_ADDRSPACE_CAST:
5543 case G_UADDO:
5544 case G_USUBO:
5545 case G_UADDE:
5546 case G_USUBE:
5547 case G_SADDO:
5548 case G_SSUBO:
5549 case G_SADDE:
5550 case G_SSUBE:
5551 case G_STRICT_FADD:
5552 case G_STRICT_FSUB:
5553 case G_STRICT_FMUL:
5554 case G_STRICT_FMA:
5555 case G_STRICT_FLDEXP:
5556 case G_FFREXP:
5557 return fewerElementsVectorMultiEltType(GMI, NumElts);
5558 case G_ICMP:
5559 case G_FCMP:
5560 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5561 case G_IS_FPCLASS:
5562 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5563 case G_SELECT:
5564 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5565 return fewerElementsVectorMultiEltType(GMI, NumElts);
5566 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5567 case G_PHI:
5568 return fewerElementsVectorPhi(GMI, NumElts);
5569 case G_UNMERGE_VALUES:
5570 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5571 case G_BUILD_VECTOR:
5572 assert(TypeIdx == 0 && "not a vector type index");
5573 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5574 case G_CONCAT_VECTORS:
5575 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5576 return UnableToLegalize;
5577 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5578 case G_EXTRACT_VECTOR_ELT:
5579 case G_INSERT_VECTOR_ELT:
5580 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5581 case G_LOAD:
5582 case G_STORE:
5583 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5584 case G_SEXT_INREG:
5585 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5587 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5588 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5589 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5590 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5591 case G_SHUFFLE_VECTOR:
5592 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5593 case G_FPOWI:
5594 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5595 case G_BITCAST:
5596 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5597 case G_INTRINSIC_FPTRUNC_ROUND:
5598 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5599 default:
5600 return UnableToLegalize;
5601 }
5602}
5603
5606 LLT NarrowTy) {
5607 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5608 "Not a bitcast operation");
5609
5610 if (TypeIdx != 0)
5611 return UnableToLegalize;
5612
5613 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5614
5615 unsigned NewElemCount =
5616 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5617 SmallVector<Register> SrcVRegs, BitcastVRegs;
5618 if (NewElemCount == 1) {
5619 LLT SrcNarrowTy = SrcTy.getElementType();
5620
5621 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5622 getUnmergeResults(SrcVRegs, *Unmerge);
5623 } else {
5624 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5625
5626 // Split the Src and Dst Reg into smaller registers
5627 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5628 return UnableToLegalize;
5629 }
5630
5631 // Build new smaller bitcast instructions
5632 // Not supporting Leftover types for now but will have to
5633 for (Register Reg : SrcVRegs)
5634 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5635
5636 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5637 MI.eraseFromParent();
5638 return Legalized;
5639}
5640
5642 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5643 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5644 if (TypeIdx != 0)
5645 return UnableToLegalize;
5646
5647 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5648 MI.getFirst3RegLLTs();
5649 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5650 // The shuffle should be canonicalized by now.
5651 if (DstTy != Src1Ty)
5652 return UnableToLegalize;
5653 if (DstTy != Src2Ty)
5654 return UnableToLegalize;
5655
5656 if (!isPowerOf2_32(DstTy.getNumElements()))
5657 return UnableToLegalize;
5658
5659 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5660 // Further legalization attempts will be needed to do split further.
5661 NarrowTy =
5662 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5663 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5664
5665 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5666 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5667 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5668 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5669 SplitSrc2Regs[1]};
5670
5671 Register Hi, Lo;
5672
5673 // If Lo or Hi uses elements from at most two of the four input vectors, then
5674 // express it as a vector shuffle of those two inputs. Otherwise extract the
5675 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5677 for (unsigned High = 0; High < 2; ++High) {
5678 Register &Output = High ? Hi : Lo;
5679
5680 // Build a shuffle mask for the output, discovering on the fly which
5681 // input vectors to use as shuffle operands (recorded in InputUsed).
5682 // If building a suitable shuffle vector proves too hard, then bail
5683 // out with useBuildVector set.
5684 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5685 unsigned FirstMaskIdx = High * NewElts;
5686 bool UseBuildVector = false;
5687 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5688 // The mask element. This indexes into the input.
5689 int Idx = Mask[FirstMaskIdx + MaskOffset];
5690
5691 // The input vector this mask element indexes into.
5692 unsigned Input = (unsigned)Idx / NewElts;
5693
5694 if (Input >= std::size(Inputs)) {
5695 // The mask element does not index into any input vector.
5696 Ops.push_back(-1);
5697 continue;
5698 }
5699
5700 // Turn the index into an offset from the start of the input vector.
5701 Idx -= Input * NewElts;
5702
5703 // Find or create a shuffle vector operand to hold this input.
5704 unsigned OpNo;
5705 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5706 if (InputUsed[OpNo] == Input) {
5707 // This input vector is already an operand.
5708 break;
5709 } else if (InputUsed[OpNo] == -1U) {
5710 // Create a new operand for this input vector.
5711 InputUsed[OpNo] = Input;
5712 break;
5713 }
5714 }
5715
5716 if (OpNo >= std::size(InputUsed)) {
5717 // More than two input vectors used! Give up on trying to create a
5718 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5719 UseBuildVector = true;
5720 break;
5721 }
5722
5723 // Add the mask index for the new shuffle vector.
5724 Ops.push_back(Idx + OpNo * NewElts);
5725 }
5726
5727 if (UseBuildVector) {
5728 LLT EltTy = NarrowTy.getElementType();
5730
5731 // Extract the input elements by hand.
5732 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5733 // The mask element. This indexes into the input.
5734 int Idx = Mask[FirstMaskIdx + MaskOffset];
5735
5736 // The input vector this mask element indexes into.
5737 unsigned Input = (unsigned)Idx / NewElts;
5738
5739 if (Input >= std::size(Inputs)) {
5740 // The mask element is "undef" or indexes off the end of the input.
5741 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5742 continue;
5743 }
5744
5745 // Turn the index into an offset from the start of the input vector.
5746 Idx -= Input * NewElts;
5747
5748 // Extract the vector element by hand.
5749 SVOps.push_back(MIRBuilder
5750 .buildExtractVectorElement(
5751 EltTy, Inputs[Input],
5752 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5753 .getReg(0));
5754 }
5755
5756 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5757 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5758 } else if (InputUsed[0] == -1U) {
5759 // No input vectors were used! The result is undefined.
5760 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5761 } else {
5762 Register Op0 = Inputs[InputUsed[0]];
5763 // If only one input was used, use an undefined vector for the other.
5764 Register Op1 = InputUsed[1] == -1U
5765 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5766 : Inputs[InputUsed[1]];
5767 // At least one input vector was used. Create a new shuffle vector.
5768 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5769 }
5770
5771 Ops.clear();
5772 }
5773
5774 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5775 MI.eraseFromParent();
5776 return Legalized;
5777}
5778
5780 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5781 auto &RdxMI = cast<GVecReduce>(MI);
5782
5783 if (TypeIdx != 1)
5784 return UnableToLegalize;
5785
5786 // The semantics of the normal non-sequential reductions allow us to freely
5787 // re-associate the operation.
5788 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5789
5790 if (NarrowTy.isVector() &&
5791 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5792 return UnableToLegalize;
5793
5794 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5795 SmallVector<Register> SplitSrcs;
5796 // If NarrowTy is a scalar then we're being asked to scalarize.
5797 const unsigned NumParts =
5798 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5799 : SrcTy.getNumElements();
5800
5801 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5802 if (NarrowTy.isScalar()) {
5803 if (DstTy != NarrowTy)
5804 return UnableToLegalize; // FIXME: handle implicit extensions.
5805
5806 if (isPowerOf2_32(NumParts)) {
5807 // Generate a tree of scalar operations to reduce the critical path.
5808 SmallVector<Register> PartialResults;
5809 unsigned NumPartsLeft = NumParts;
5810 while (NumPartsLeft > 1) {
5811 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5812 PartialResults.emplace_back(
5814 .buildInstr(ScalarOpc, {NarrowTy},
5815 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5816 .getReg(0));
5817 }
5818 SplitSrcs = PartialResults;
5819 PartialResults.clear();
5820 NumPartsLeft = SplitSrcs.size();
5821 }
5822 assert(SplitSrcs.size() == 1);
5823 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5824 MI.eraseFromParent();
5825 return Legalized;
5826 }
5827 // If we can't generate a tree, then just do sequential operations.
5828 Register Acc = SplitSrcs[0];
5829 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5830 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5831 .getReg(0);
5832 MIRBuilder.buildCopy(DstReg, Acc);
5833 MI.eraseFromParent();
5834 return Legalized;
5835 }
5836 SmallVector<Register> PartialReductions;
5837 for (unsigned Part = 0; Part < NumParts; ++Part) {
5838 PartialReductions.push_back(
5839 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5840 .getReg(0));
5841 }
5842
5843 // If the types involved are powers of 2, we can generate intermediate vector
5844 // ops, before generating a final reduction operation.
5845 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5846 isPowerOf2_32(NarrowTy.getNumElements())) {
5847 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5848 }
5849
5850 Register Acc = PartialReductions[0];
5851 for (unsigned Part = 1; Part < NumParts; ++Part) {
5852 if (Part == NumParts - 1) {
5853 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5854 {Acc, PartialReductions[Part]});
5855 } else {
5856 Acc = MIRBuilder
5857 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5858 .getReg(0);
5859 }
5860 }
5861 MI.eraseFromParent();
5862 return Legalized;
5863}
5864
5867 unsigned int TypeIdx,
5868 LLT NarrowTy) {
5869 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5870 MI.getFirst3RegLLTs();
5871 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5872 DstTy != NarrowTy)
5873 return UnableToLegalize;
5874
5875 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5876 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5877 "Unexpected vecreduce opcode");
5878 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5879 ? TargetOpcode::G_FADD
5880 : TargetOpcode::G_FMUL;
5881
5882 SmallVector<Register> SplitSrcs;
5883 unsigned NumParts = SrcTy.getNumElements();
5884 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5885 Register Acc = ScalarReg;
5886 for (unsigned i = 0; i < NumParts; i++)
5887 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5888 .getReg(0);
5889
5890 MIRBuilder.buildCopy(DstReg, Acc);
5891 MI.eraseFromParent();
5892 return Legalized;
5893}
5894
5896LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5897 LLT SrcTy, LLT NarrowTy,
5898 unsigned ScalarOpc) {
5899 SmallVector<Register> SplitSrcs;
5900 // Split the sources into NarrowTy size pieces.
5901 extractParts(SrcReg, NarrowTy,
5902 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5903 MIRBuilder, MRI);
5904 // We're going to do a tree reduction using vector operations until we have
5905 // one NarrowTy size value left.
5906 while (SplitSrcs.size() > 1) {
5907 SmallVector<Register> PartialRdxs;
5908 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5909 Register LHS = SplitSrcs[Idx];
5910 Register RHS = SplitSrcs[Idx + 1];
5911 // Create the intermediate vector op.
5912 Register Res =
5913 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5914 PartialRdxs.push_back(Res);
5915 }
5916 SplitSrcs = std::move(PartialRdxs);
5917 }
5918 // Finally generate the requested NarrowTy based reduction.
5919 Observer.changingInstr(MI);
5920 MI.getOperand(1).setReg(SplitSrcs[0]);
5921 Observer.changedInstr(MI);
5922 return Legalized;
5923}
5924
5927 const LLT HalfTy, const LLT AmtTy) {
5928
5929 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5930 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5931 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5932
5933 if (Amt.isZero()) {
5934 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5935 MI.eraseFromParent();
5936 return Legalized;
5937 }
5938
5939 LLT NVT = HalfTy;
5940 unsigned NVTBits = HalfTy.getSizeInBits();
5941 unsigned VTBits = 2 * NVTBits;
5942
5943 SrcOp Lo(Register(0)), Hi(Register(0));
5944 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5945 if (Amt.ugt(VTBits)) {
5946 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5947 } else if (Amt.ugt(NVTBits)) {
5948 Lo = MIRBuilder.buildConstant(NVT, 0);
5949 Hi = MIRBuilder.buildShl(NVT, InL,
5950 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5951 } else if (Amt == NVTBits) {
5952 Lo = MIRBuilder.buildConstant(NVT, 0);
5953 Hi = InL;
5954 } else {
5955 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5956 auto OrLHS =
5957 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5958 auto OrRHS = MIRBuilder.buildLShr(
5959 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5960 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5961 }
5962 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5963 if (Amt.ugt(VTBits)) {
5964 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5965 } else if (Amt.ugt(NVTBits)) {
5966 Lo = MIRBuilder.buildLShr(NVT, InH,
5967 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5968 Hi = MIRBuilder.buildConstant(NVT, 0);
5969 } else if (Amt == NVTBits) {
5970 Lo = InH;
5971 Hi = MIRBuilder.buildConstant(NVT, 0);
5972 } else {
5973 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5974
5975 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5976 auto OrRHS = MIRBuilder.buildShl(
5977 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5978
5979 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5980 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5981 }
5982 } else {
5983 if (Amt.ugt(VTBits)) {
5984 Hi = Lo = MIRBuilder.buildAShr(
5985 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5986 } else if (Amt.ugt(NVTBits)) {
5987 Lo = MIRBuilder.buildAShr(NVT, InH,
5988 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5989 Hi = MIRBuilder.buildAShr(NVT, InH,
5990 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5991 } else if (Amt == NVTBits) {
5992 Lo = InH;
5993 Hi = MIRBuilder.buildAShr(NVT, InH,
5994 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5995 } else {
5996 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5997
5998 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5999 auto OrRHS = MIRBuilder.buildShl(
6000 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6001
6002 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6003 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6004 }
6005 }
6006
6007 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6008 MI.eraseFromParent();
6009
6010 return Legalized;
6011}
6012
6015 LLT RequestedTy) {
6016 if (TypeIdx == 1) {
6017 Observer.changingInstr(MI);
6018 narrowScalarSrc(MI, RequestedTy, 2);
6019 Observer.changedInstr(MI);
6020 return Legalized;
6021 }
6022
6023 Register DstReg = MI.getOperand(0).getReg();
6024 LLT DstTy = MRI.getType(DstReg);
6025 if (DstTy.isVector())
6026 return UnableToLegalize;
6027
6028 Register Amt = MI.getOperand(2).getReg();
6029 LLT ShiftAmtTy = MRI.getType(Amt);
6030 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6031 if (DstEltSize % 2 != 0)
6032 return UnableToLegalize;
6033
6034 // Check if we should use multi-way splitting instead of recursive binary
6035 // splitting.
6036 //
6037 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6038 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6039 // and dependency chains created by usual binary splitting approach
6040 // (128->64->32).
6041 //
6042 // The >= 8 parts threshold ensures we only use this optimization when binary
6043 // splitting would require multiple recursive passes, avoiding overhead for
6044 // simple 2-way splits where binary approach is sufficient.
6045 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6046 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6047 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6048 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6049 // steps).
6050 if (NumParts >= 8)
6051 return narrowScalarShiftMultiway(MI, RequestedTy);
6052 }
6053
6054 // Fall back to binary splitting:
6055 // Ignore the input type. We can only go to exactly half the size of the
6056 // input. If that isn't small enough, the resulting pieces will be further
6057 // legalized.
6058 const unsigned NewBitSize = DstEltSize / 2;
6059 const LLT HalfTy = LLT::scalar(NewBitSize);
6060 const LLT CondTy = LLT::scalar(1);
6061
6062 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6063 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6064 ShiftAmtTy);
6065 }
6066
6067 // TODO: Expand with known bits.
6068
6069 // Handle the fully general expansion by an unknown amount.
6070 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6071
6072 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6073 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6074 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6075
6076 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6077 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6078
6079 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6080 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6081 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6082
6083 Register ResultRegs[2];
6084 switch (MI.getOpcode()) {
6085 case TargetOpcode::G_SHL: {
6086 // Short: ShAmt < NewBitSize
6087 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6088
6089 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6090 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6091 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6092
6093 // Long: ShAmt >= NewBitSize
6094 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6095 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6096
6097 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6098 auto Hi = MIRBuilder.buildSelect(
6099 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6100
6101 ResultRegs[0] = Lo.getReg(0);
6102 ResultRegs[1] = Hi.getReg(0);
6103 break;
6104 }
6105 case TargetOpcode::G_LSHR:
6106 case TargetOpcode::G_ASHR: {
6107 // Short: ShAmt < NewBitSize
6108 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6109
6110 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6111 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6112 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6113
6114 // Long: ShAmt >= NewBitSize
6116 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6117 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6118 } else {
6119 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6120 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6121 }
6122 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6123 {InH, AmtExcess}); // Lo from Hi part.
6124
6125 auto Lo = MIRBuilder.buildSelect(
6126 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6127
6128 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6129
6130 ResultRegs[0] = Lo.getReg(0);
6131 ResultRegs[1] = Hi.getReg(0);
6132 break;
6133 }
6134 default:
6135 llvm_unreachable("not a shift");
6136 }
6137
6138 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6139 MI.eraseFromParent();
6140 return Legalized;
6141}
6142
6144 unsigned PartIdx,
6145 unsigned NumParts,
6146 ArrayRef<Register> SrcParts,
6147 const ShiftParams &Params,
6148 LLT TargetTy, LLT ShiftAmtTy) {
6149 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6150 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6151 assert(WordShiftConst && BitShiftConst && "Expected constants");
6152
6153 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6154 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6155 const bool NeedsInterWordShift = ShiftBits != 0;
6156
6157 switch (Opcode) {
6158 case TargetOpcode::G_SHL: {
6159 // Data moves from lower indices to higher indices
6160 // If this part would come from a source beyond our range, it's zero
6161 if (PartIdx < ShiftWords)
6162 return Params.Zero;
6163
6164 unsigned SrcIdx = PartIdx - ShiftWords;
6165 if (!NeedsInterWordShift)
6166 return SrcParts[SrcIdx];
6167
6168 // Combine shifted main part with carry from previous part
6169 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6170 if (SrcIdx > 0) {
6171 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6172 Params.InvBitShift);
6173 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6174 }
6175 return Hi.getReg(0);
6176 }
6177
6178 case TargetOpcode::G_LSHR: {
6179 unsigned SrcIdx = PartIdx + ShiftWords;
6180 if (SrcIdx >= NumParts)
6181 return Params.Zero;
6182 if (!NeedsInterWordShift)
6183 return SrcParts[SrcIdx];
6184
6185 // Combine shifted main part with carry from next part
6186 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6187 if (SrcIdx + 1 < NumParts) {
6188 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6189 Params.InvBitShift);
6190 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6191 }
6192 return Lo.getReg(0);
6193 }
6194
6195 case TargetOpcode::G_ASHR: {
6196 // Like LSHR but preserves sign bit
6197 unsigned SrcIdx = PartIdx + ShiftWords;
6198 if (SrcIdx >= NumParts)
6199 return Params.SignBit;
6200 if (!NeedsInterWordShift)
6201 return SrcParts[SrcIdx];
6202
6203 // Only the original MSB part uses arithmetic shift to preserve sign. All
6204 // other parts use logical shift since they're just moving data bits.
6205 auto Lo =
6206 (SrcIdx == NumParts - 1)
6207 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6208 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6209 Register HiSrc =
6210 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6211 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6212 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6213 }
6214
6215 default:
6216 llvm_unreachable("not a shift");
6217 }
6218}
6219
6221 Register MainOperand,
6222 Register ShiftAmt,
6223 LLT TargetTy,
6224 Register CarryOperand) {
6225 // This helper generates a single output part for variable shifts by combining
6226 // the main operand (shifted by BitShift) with carry bits from an adjacent
6227 // part.
6228
6229 // For G_ASHR, individual parts don't have their own sign bit, only the
6230 // complete value does. So we use LSHR for the main operand shift in ASHR
6231 // context.
6232 unsigned MainOpcode =
6233 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6234
6235 // Perform the primary shift on the main operand
6236 Register MainShifted =
6237 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6238 .getReg(0);
6239
6240 // No carry operand available
6241 if (!CarryOperand.isValid())
6242 return MainShifted;
6243
6244 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6245 // so carry bits aren't needed.
6246 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6247 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6248 LLT BoolTy = LLT::scalar(1);
6249 auto IsZeroBitShift =
6250 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6251
6252 // Extract bits from the adjacent part that will "carry over" into this part.
6253 // The carry direction is opposite to the main shift direction, so we can
6254 // align the two shifted values before combining them with OR.
6255
6256 // Determine the carry shift opcode (opposite direction)
6257 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6258 : TargetOpcode::G_SHL;
6259
6260 // Calculate inverse shift amount: BitWidth - ShiftAmt
6261 auto TargetBitsConst =
6262 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6263 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6264
6265 // Shift the carry operand
6266 Register CarryBits =
6268 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6269 .getReg(0);
6270
6271 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6272 // TargetBits which would be poison for the individual carry shift operation).
6273 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6274 Register SafeCarryBits =
6275 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6276 .getReg(0);
6277
6278 // Combine the main shifted part with the carry bits
6279 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6280}
6281
6284 const APInt &Amt,
6285 LLT TargetTy,
6286 LLT ShiftAmtTy) {
6287 // Any wide shift can be decomposed into WordShift + BitShift components.
6288 // When shift amount is known constant, directly compute the decomposition
6289 // values and generate constant registers.
6290 Register DstReg = MI.getOperand(0).getReg();
6291 Register SrcReg = MI.getOperand(1).getReg();
6292 LLT DstTy = MRI.getType(DstReg);
6293
6294 const unsigned DstBits = DstTy.getScalarSizeInBits();
6295 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6296 const unsigned NumParts = DstBits / TargetBits;
6297
6298 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6299
6300 // When the shift amount is known at compile time, we just calculate which
6301 // source parts contribute to each output part.
6302
6303 SmallVector<Register, 8> SrcParts;
6304 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6305
6306 if (Amt.isZero()) {
6307 // No shift needed, just copy
6308 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6309 MI.eraseFromParent();
6310 return Legalized;
6311 }
6312
6313 ShiftParams Params;
6314 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6315 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6316
6317 // Generate constants and values needed by all shift types
6318 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6319 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6320 Params.InvBitShift =
6321 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6322 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6323
6324 // For ASHR, we need the sign-extended value to fill shifted-out positions
6325 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6326 Params.SignBit =
6328 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6329 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6330 .getReg(0);
6331
6332 SmallVector<Register, 8> DstParts(NumParts);
6333 for (unsigned I = 0; I < NumParts; ++I)
6334 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6335 Params, TargetTy, ShiftAmtTy);
6336
6337 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6338 MI.eraseFromParent();
6339 return Legalized;
6340}
6341
6344 Register DstReg = MI.getOperand(0).getReg();
6345 Register SrcReg = MI.getOperand(1).getReg();
6346 Register AmtReg = MI.getOperand(2).getReg();
6347 LLT DstTy = MRI.getType(DstReg);
6348 LLT ShiftAmtTy = MRI.getType(AmtReg);
6349
6350 const unsigned DstBits = DstTy.getScalarSizeInBits();
6351 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6352 const unsigned NumParts = DstBits / TargetBits;
6353
6354 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6355 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6356
6357 // If the shift amount is known at compile time, we can use direct indexing
6358 // instead of generating select chains in the general case.
6359 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6360 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6361 ShiftAmtTy);
6362
6363 // For runtime-variable shift amounts, we must generate a more complex
6364 // sequence that handles all possible shift values using select chains.
6365
6366 // Split the input into target-sized pieces
6367 SmallVector<Register, 8> SrcParts;
6368 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6369
6370 // Shifting by zero should be a no-op.
6371 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6372 LLT BoolTy = LLT::scalar(1);
6373 auto IsZeroShift =
6374 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6375
6376 // Any wide shift can be decomposed into two components:
6377 // 1. WordShift: number of complete target-sized words to shift
6378 // 2. BitShift: number of bits to shift within each word
6379 //
6380 // Example: 128-bit >> 50 with 32-bit target:
6381 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6382 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6383 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6384 auto TargetBitsLog2Const =
6385 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6386 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6387
6388 Register WordShift =
6389 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6390 Register BitShift =
6391 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6392
6393 // Fill values:
6394 // - SHL/LSHR: fill with zeros
6395 // - ASHR: fill with sign-extended MSB
6396 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6397
6398 Register FillValue;
6399 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6400 auto TargetBitsMinusOneConst =
6401 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6402 FillValue = MIRBuilder
6403 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6404 TargetBitsMinusOneConst)
6405 .getReg(0);
6406 } else {
6407 FillValue = ZeroReg;
6408 }
6409
6410 SmallVector<Register, 8> DstParts(NumParts);
6411
6412 // For each output part, generate a select chain that chooses the correct
6413 // result based on the runtime WordShift value. This handles all possible
6414 // word shift amounts by pre-calculating what each would produce.
6415 for (unsigned I = 0; I < NumParts; ++I) {
6416 // Initialize with appropriate default value for this shift type
6417 Register InBoundsResult = FillValue;
6418
6419 // clang-format off
6420 // Build a branchless select chain by pre-computing results for all possible
6421 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6422 //
6423 // K=0: select(WordShift==0, result0, FillValue)
6424 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6425 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6426 // clang-format on
6427 for (unsigned K = 0; K < NumParts; ++K) {
6428 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6429 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6430 WordShift, WordShiftKConst);
6431
6432 // Calculate source indices for this word shift
6433 //
6434 // For 4-part 128-bit value with K=1 word shift:
6435 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6436 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6437 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6438 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6439 int MainSrcIdx;
6440 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6441
6442 switch (MI.getOpcode()) {
6443 case TargetOpcode::G_SHL:
6444 MainSrcIdx = (int)I - (int)K;
6445 CarrySrcIdx = MainSrcIdx - 1;
6446 break;
6447 case TargetOpcode::G_LSHR:
6448 case TargetOpcode::G_ASHR:
6449 MainSrcIdx = (int)I + (int)K;
6450 CarrySrcIdx = MainSrcIdx + 1;
6451 break;
6452 default:
6453 llvm_unreachable("Not a shift");
6454 }
6455
6456 // Check bounds and build the result for this word shift
6457 Register ResultForK;
6458 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6459 Register MainOp = SrcParts[MainSrcIdx];
6460 Register CarryOp;
6461
6462 // Determine carry operand with bounds checking
6463 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6464 CarryOp = SrcParts[CarrySrcIdx];
6465 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6466 CarrySrcIdx >= (int)NumParts)
6467 CarryOp = FillValue; // Use sign extension
6468
6469 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6470 TargetTy, CarryOp);
6471 } else {
6472 // Out of bounds - use fill value for this k
6473 ResultForK = FillValue;
6474 }
6475
6476 // Select this result if WordShift equals k
6477 InBoundsResult =
6479 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6480 .getReg(0);
6481 }
6482
6483 // Handle zero-shift special case: if shift is 0, use original input
6484 DstParts[I] =
6486 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6487 .getReg(0);
6488 }
6489
6490 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6491 MI.eraseFromParent();
6492 return Legalized;
6493}
6494
6497 LLT MoreTy) {
6498 assert(TypeIdx == 0 && "Expecting only Idx 0");
6499
6500 Observer.changingInstr(MI);
6501 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6502 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6503 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6504 moreElementsVectorSrc(MI, MoreTy, I);
6505 }
6506
6507 MachineBasicBlock &MBB = *MI.getParent();
6508 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6509 moreElementsVectorDst(MI, MoreTy, 0);
6510 Observer.changedInstr(MI);
6511 return Legalized;
6512}
6513
6514MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6515 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6516 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6517
6518 switch (Opcode) {
6519 default:
6521 "getNeutralElementForVecReduce called with invalid opcode!");
6522 case TargetOpcode::G_VECREDUCE_ADD:
6523 case TargetOpcode::G_VECREDUCE_OR:
6524 case TargetOpcode::G_VECREDUCE_XOR:
6525 case TargetOpcode::G_VECREDUCE_UMAX:
6526 return MIRBuilder.buildConstant(Ty, 0);
6527 case TargetOpcode::G_VECREDUCE_MUL:
6528 return MIRBuilder.buildConstant(Ty, 1);
6529 case TargetOpcode::G_VECREDUCE_AND:
6530 case TargetOpcode::G_VECREDUCE_UMIN:
6532 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6533 case TargetOpcode::G_VECREDUCE_SMAX:
6535 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6536 case TargetOpcode::G_VECREDUCE_SMIN:
6538 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6539 case TargetOpcode::G_VECREDUCE_FADD:
6540 return MIRBuilder.buildFConstant(Ty, -0.0);
6541 case TargetOpcode::G_VECREDUCE_FMUL:
6542 return MIRBuilder.buildFConstant(Ty, 1.0);
6543 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6544 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6545 assert(false && "getNeutralElementForVecReduce unimplemented for "
6546 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6547 }
6548 llvm_unreachable("switch expected to return!");
6549}
6550
6553 LLT MoreTy) {
6554 unsigned Opc = MI.getOpcode();
6555 switch (Opc) {
6556 case TargetOpcode::G_IMPLICIT_DEF:
6557 case TargetOpcode::G_LOAD: {
6558 if (TypeIdx != 0)
6559 return UnableToLegalize;
6560 Observer.changingInstr(MI);
6561 moreElementsVectorDst(MI, MoreTy, 0);
6562 Observer.changedInstr(MI);
6563 return Legalized;
6564 }
6565 case TargetOpcode::G_STORE:
6566 if (TypeIdx != 0)
6567 return UnableToLegalize;
6568 Observer.changingInstr(MI);
6569 moreElementsVectorSrc(MI, MoreTy, 0);
6570 Observer.changedInstr(MI);
6571 return Legalized;
6572 case TargetOpcode::G_AND:
6573 case TargetOpcode::G_OR:
6574 case TargetOpcode::G_XOR:
6575 case TargetOpcode::G_ADD:
6576 case TargetOpcode::G_SUB:
6577 case TargetOpcode::G_MUL:
6578 case TargetOpcode::G_FADD:
6579 case TargetOpcode::G_FSUB:
6580 case TargetOpcode::G_FMUL:
6581 case TargetOpcode::G_FDIV:
6582 case TargetOpcode::G_FCOPYSIGN:
6583 case TargetOpcode::G_UADDSAT:
6584 case TargetOpcode::G_USUBSAT:
6585 case TargetOpcode::G_SADDSAT:
6586 case TargetOpcode::G_SSUBSAT:
6587 case TargetOpcode::G_SMIN:
6588 case TargetOpcode::G_SMAX:
6589 case TargetOpcode::G_UMIN:
6590 case TargetOpcode::G_UMAX:
6591 case TargetOpcode::G_FMINNUM:
6592 case TargetOpcode::G_FMAXNUM:
6593 case TargetOpcode::G_FMINNUM_IEEE:
6594 case TargetOpcode::G_FMAXNUM_IEEE:
6595 case TargetOpcode::G_FMINIMUM:
6596 case TargetOpcode::G_FMAXIMUM:
6597 case TargetOpcode::G_FMINIMUMNUM:
6598 case TargetOpcode::G_FMAXIMUMNUM:
6599 case TargetOpcode::G_STRICT_FADD:
6600 case TargetOpcode::G_STRICT_FSUB:
6601 case TargetOpcode::G_STRICT_FMUL:
6602 case TargetOpcode::G_SHL:
6603 case TargetOpcode::G_ASHR:
6604 case TargetOpcode::G_LSHR: {
6605 Observer.changingInstr(MI);
6606 moreElementsVectorSrc(MI, MoreTy, 1);
6607 moreElementsVectorSrc(MI, MoreTy, 2);
6608 moreElementsVectorDst(MI, MoreTy, 0);
6609 Observer.changedInstr(MI);
6610 return Legalized;
6611 }
6612 case TargetOpcode::G_FMA:
6613 case TargetOpcode::G_STRICT_FMA:
6614 case TargetOpcode::G_FSHR:
6615 case TargetOpcode::G_FSHL: {
6616 Observer.changingInstr(MI);
6617 moreElementsVectorSrc(MI, MoreTy, 1);
6618 moreElementsVectorSrc(MI, MoreTy, 2);
6619 moreElementsVectorSrc(MI, MoreTy, 3);
6620 moreElementsVectorDst(MI, MoreTy, 0);
6621 Observer.changedInstr(MI);
6622 return Legalized;
6623 }
6624 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6625 case TargetOpcode::G_EXTRACT:
6626 if (TypeIdx != 1)
6627 return UnableToLegalize;
6628 Observer.changingInstr(MI);
6629 moreElementsVectorSrc(MI, MoreTy, 1);
6630 Observer.changedInstr(MI);
6631 return Legalized;
6632 case TargetOpcode::G_INSERT:
6633 case TargetOpcode::G_INSERT_VECTOR_ELT:
6634 case TargetOpcode::G_FREEZE:
6635 case TargetOpcode::G_FNEG:
6636 case TargetOpcode::G_FABS:
6637 case TargetOpcode::G_FSQRT:
6638 case TargetOpcode::G_FCEIL:
6639 case TargetOpcode::G_FFLOOR:
6640 case TargetOpcode::G_FNEARBYINT:
6641 case TargetOpcode::G_FRINT:
6642 case TargetOpcode::G_INTRINSIC_ROUND:
6643 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6644 case TargetOpcode::G_INTRINSIC_TRUNC:
6645 case TargetOpcode::G_BITREVERSE:
6646 case TargetOpcode::G_BSWAP:
6647 case TargetOpcode::G_FCANONICALIZE:
6648 case TargetOpcode::G_SEXT_INREG:
6649 case TargetOpcode::G_ABS:
6650 case TargetOpcode::G_CTLZ:
6651 case TargetOpcode::G_CTPOP:
6652 if (TypeIdx != 0)
6653 return UnableToLegalize;
6654 Observer.changingInstr(MI);
6655 moreElementsVectorSrc(MI, MoreTy, 1);
6656 moreElementsVectorDst(MI, MoreTy, 0);
6657 Observer.changedInstr(MI);
6658 return Legalized;
6659 case TargetOpcode::G_SELECT: {
6660 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6661 if (TypeIdx == 1) {
6662 if (!CondTy.isScalar() ||
6663 DstTy.getElementCount() != MoreTy.getElementCount())
6664 return UnableToLegalize;
6665
6666 // This is turning a scalar select of vectors into a vector
6667 // select. Broadcast the select condition.
6668 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6669 Observer.changingInstr(MI);
6670 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6671 Observer.changedInstr(MI);
6672 return Legalized;
6673 }
6674
6675 if (CondTy.isVector())
6676 return UnableToLegalize;
6677
6678 Observer.changingInstr(MI);
6679 moreElementsVectorSrc(MI, MoreTy, 2);
6680 moreElementsVectorSrc(MI, MoreTy, 3);
6681 moreElementsVectorDst(MI, MoreTy, 0);
6682 Observer.changedInstr(MI);
6683 return Legalized;
6684 }
6685 case TargetOpcode::G_UNMERGE_VALUES:
6686 return UnableToLegalize;
6687 case TargetOpcode::G_PHI:
6688 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6689 case TargetOpcode::G_SHUFFLE_VECTOR:
6690 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6691 case TargetOpcode::G_BUILD_VECTOR: {
6693 for (auto Op : MI.uses()) {
6694 Elts.push_back(Op.getReg());
6695 }
6696
6697 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6698 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6699 }
6700
6701 MIRBuilder.buildDeleteTrailingVectorElements(
6702 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6703 MI.eraseFromParent();
6704 return Legalized;
6705 }
6706 case TargetOpcode::G_SEXT:
6707 case TargetOpcode::G_ZEXT:
6708 case TargetOpcode::G_ANYEXT:
6709 case TargetOpcode::G_TRUNC:
6710 case TargetOpcode::G_FPTRUNC:
6711 case TargetOpcode::G_FPEXT:
6712 case TargetOpcode::G_FPTOSI:
6713 case TargetOpcode::G_FPTOUI:
6714 case TargetOpcode::G_FPTOSI_SAT:
6715 case TargetOpcode::G_FPTOUI_SAT:
6716 case TargetOpcode::G_SITOFP:
6717 case TargetOpcode::G_UITOFP: {
6718 Observer.changingInstr(MI);
6719 LLT SrcExtTy;
6720 LLT DstExtTy;
6721 if (TypeIdx == 0) {
6722 DstExtTy = MoreTy;
6723 SrcExtTy = LLT::fixed_vector(
6724 MoreTy.getNumElements(),
6725 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6726 } else {
6727 DstExtTy = LLT::fixed_vector(
6728 MoreTy.getNumElements(),
6729 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6730 SrcExtTy = MoreTy;
6731 }
6732 moreElementsVectorSrc(MI, SrcExtTy, 1);
6733 moreElementsVectorDst(MI, DstExtTy, 0);
6734 Observer.changedInstr(MI);
6735 return Legalized;
6736 }
6737 case TargetOpcode::G_ICMP:
6738 case TargetOpcode::G_FCMP: {
6739 if (TypeIdx != 1)
6740 return UnableToLegalize;
6741
6742 Observer.changingInstr(MI);
6743 moreElementsVectorSrc(MI, MoreTy, 2);
6744 moreElementsVectorSrc(MI, MoreTy, 3);
6745 LLT CondTy = LLT::fixed_vector(
6746 MoreTy.getNumElements(),
6747 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6748 moreElementsVectorDst(MI, CondTy, 0);
6749 Observer.changedInstr(MI);
6750 return Legalized;
6751 }
6752 case TargetOpcode::G_BITCAST: {
6753 if (TypeIdx != 0)
6754 return UnableToLegalize;
6755
6756 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6757 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6758
6759 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6760 if (coefficient % DstTy.getNumElements() != 0)
6761 return UnableToLegalize;
6762
6763 coefficient = coefficient / DstTy.getNumElements();
6764
6765 LLT NewTy = SrcTy.changeElementCount(
6766 ElementCount::get(coefficient, MoreTy.isScalable()));
6767 Observer.changingInstr(MI);
6768 moreElementsVectorSrc(MI, NewTy, 1);
6769 moreElementsVectorDst(MI, MoreTy, 0);
6770 Observer.changedInstr(MI);
6771 return Legalized;
6772 }
6773 case TargetOpcode::G_VECREDUCE_FADD:
6774 case TargetOpcode::G_VECREDUCE_FMUL:
6775 case TargetOpcode::G_VECREDUCE_ADD:
6776 case TargetOpcode::G_VECREDUCE_MUL:
6777 case TargetOpcode::G_VECREDUCE_AND:
6778 case TargetOpcode::G_VECREDUCE_OR:
6779 case TargetOpcode::G_VECREDUCE_XOR:
6780 case TargetOpcode::G_VECREDUCE_SMAX:
6781 case TargetOpcode::G_VECREDUCE_SMIN:
6782 case TargetOpcode::G_VECREDUCE_UMAX:
6783 case TargetOpcode::G_VECREDUCE_UMIN: {
6784 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6785 MachineOperand &MO = MI.getOperand(1);
6786 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6787 auto NeutralElement = getNeutralElementForVecReduce(
6788 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6789
6790 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6791 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6792 i != e; i++) {
6793 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6794 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6795 NeutralElement, Idx);
6796 }
6797
6798 Observer.changingInstr(MI);
6799 MO.setReg(NewVec.getReg(0));
6800 Observer.changedInstr(MI);
6801 return Legalized;
6802 }
6803
6804 default:
6805 return UnableToLegalize;
6806 }
6807}
6808
6811 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6812 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6813 unsigned MaskNumElts = Mask.size();
6814 unsigned SrcNumElts = SrcTy.getNumElements();
6815 LLT DestEltTy = DstTy.getElementType();
6816
6817 if (MaskNumElts == SrcNumElts)
6818 return Legalized;
6819
6820 if (MaskNumElts < SrcNumElts) {
6821 // Extend mask to match new destination vector size with
6822 // undef values.
6823 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6824 llvm::copy(Mask, NewMask.begin());
6825
6826 moreElementsVectorDst(MI, SrcTy, 0);
6827 MIRBuilder.setInstrAndDebugLoc(MI);
6828 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6829 MI.getOperand(1).getReg(),
6830 MI.getOperand(2).getReg(), NewMask);
6831 MI.eraseFromParent();
6832
6833 return Legalized;
6834 }
6835
6836 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6837 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6838 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6839
6840 // Create new source vectors by concatenating the initial
6841 // source vectors with undefined vectors of the same size.
6842 auto Undef = MIRBuilder.buildUndef(SrcTy);
6843 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6844 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6845 MOps1[0] = MI.getOperand(1).getReg();
6846 MOps2[0] = MI.getOperand(2).getReg();
6847
6848 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6849 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6850
6851 // Readjust mask for new input vector length.
6852 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6853 for (unsigned I = 0; I != MaskNumElts; ++I) {
6854 int Idx = Mask[I];
6855 if (Idx >= static_cast<int>(SrcNumElts))
6856 Idx += PaddedMaskNumElts - SrcNumElts;
6857 MappedOps[I] = Idx;
6858 }
6859
6860 // If we got more elements than required, extract subvector.
6861 if (MaskNumElts != PaddedMaskNumElts) {
6862 auto Shuffle =
6863 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6864
6865 SmallVector<Register, 16> Elts(MaskNumElts);
6866 for (unsigned I = 0; I < MaskNumElts; ++I) {
6867 Elts[I] =
6868 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6869 .getReg(0);
6870 }
6871 MIRBuilder.buildBuildVector(DstReg, Elts);
6872 } else {
6873 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6874 }
6875
6876 MI.eraseFromParent();
6878}
6879
6882 unsigned int TypeIdx, LLT MoreTy) {
6883 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6884 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6885 unsigned NumElts = DstTy.getNumElements();
6886 unsigned WidenNumElts = MoreTy.getNumElements();
6887
6888 if (DstTy.isVector() && Src1Ty.isVector() &&
6889 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6891 }
6892
6893 if (TypeIdx != 0)
6894 return UnableToLegalize;
6895
6896 // Expect a canonicalized shuffle.
6897 if (DstTy != Src1Ty || DstTy != Src2Ty)
6898 return UnableToLegalize;
6899
6900 moreElementsVectorSrc(MI, MoreTy, 1);
6901 moreElementsVectorSrc(MI, MoreTy, 2);
6902
6903 // Adjust mask based on new input vector length.
6904 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6905 for (unsigned I = 0; I != NumElts; ++I) {
6906 int Idx = Mask[I];
6907 if (Idx < static_cast<int>(NumElts))
6908 NewMask[I] = Idx;
6909 else
6910 NewMask[I] = Idx - NumElts + WidenNumElts;
6911 }
6912 moreElementsVectorDst(MI, MoreTy, 0);
6913 MIRBuilder.setInstrAndDebugLoc(MI);
6914 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6915 MI.getOperand(1).getReg(),
6916 MI.getOperand(2).getReg(), NewMask);
6917 MI.eraseFromParent();
6918 return Legalized;
6919}
6920
6921void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6922 ArrayRef<Register> Src1Regs,
6923 ArrayRef<Register> Src2Regs,
6924 LLT NarrowTy) {
6926 unsigned SrcParts = Src1Regs.size();
6927 unsigned DstParts = DstRegs.size();
6928
6929 unsigned DstIdx = 0; // Low bits of the result.
6930 Register FactorSum =
6931 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6932 DstRegs[DstIdx] = FactorSum;
6933
6934 Register CarrySumPrevDstIdx;
6936
6937 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6938 // Collect low parts of muls for DstIdx.
6939 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6940 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6942 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6943 Factors.push_back(Mul.getReg(0));
6944 }
6945 // Collect high parts of muls from previous DstIdx.
6946 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6947 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6948 MachineInstrBuilder Umulh =
6949 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6950 Factors.push_back(Umulh.getReg(0));
6951 }
6952 // Add CarrySum from additions calculated for previous DstIdx.
6953 if (DstIdx != 1) {
6954 Factors.push_back(CarrySumPrevDstIdx);
6955 }
6956
6957 Register CarrySum;
6958 // Add all factors and accumulate all carries into CarrySum.
6959 if (DstIdx != DstParts - 1) {
6960 MachineInstrBuilder Uaddo =
6961 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6962 FactorSum = Uaddo.getReg(0);
6963 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6964 for (unsigned i = 2; i < Factors.size(); ++i) {
6965 MachineInstrBuilder Uaddo =
6966 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6967 FactorSum = Uaddo.getReg(0);
6968 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6969 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6970 }
6971 } else {
6972 // Since value for the next index is not calculated, neither is CarrySum.
6973 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6974 for (unsigned i = 2; i < Factors.size(); ++i)
6975 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6976 }
6977
6978 CarrySumPrevDstIdx = CarrySum;
6979 DstRegs[DstIdx] = FactorSum;
6980 Factors.clear();
6981 }
6982}
6983
6986 LLT NarrowTy) {
6987 if (TypeIdx != 0)
6988 return UnableToLegalize;
6989
6990 Register DstReg = MI.getOperand(0).getReg();
6991 LLT DstType = MRI.getType(DstReg);
6992 // FIXME: add support for vector types
6993 if (DstType.isVector())
6994 return UnableToLegalize;
6995
6996 unsigned Opcode = MI.getOpcode();
6997 unsigned OpO, OpE, OpF;
6998 switch (Opcode) {
6999 case TargetOpcode::G_SADDO:
7000 case TargetOpcode::G_SADDE:
7001 case TargetOpcode::G_UADDO:
7002 case TargetOpcode::G_UADDE:
7003 case TargetOpcode::G_ADD:
7004 OpO = TargetOpcode::G_UADDO;
7005 OpE = TargetOpcode::G_UADDE;
7006 OpF = TargetOpcode::G_UADDE;
7007 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7008 OpF = TargetOpcode::G_SADDE;
7009 break;
7010 case TargetOpcode::G_SSUBO:
7011 case TargetOpcode::G_SSUBE:
7012 case TargetOpcode::G_USUBO:
7013 case TargetOpcode::G_USUBE:
7014 case TargetOpcode::G_SUB:
7015 OpO = TargetOpcode::G_USUBO;
7016 OpE = TargetOpcode::G_USUBE;
7017 OpF = TargetOpcode::G_USUBE;
7018 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7019 OpF = TargetOpcode::G_SSUBE;
7020 break;
7021 default:
7022 llvm_unreachable("Unexpected add/sub opcode!");
7023 }
7024
7025 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7026 unsigned NumDefs = MI.getNumExplicitDefs();
7027 Register Src1 = MI.getOperand(NumDefs).getReg();
7028 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7029 Register CarryDst, CarryIn;
7030 if (NumDefs == 2)
7031 CarryDst = MI.getOperand(1).getReg();
7032 if (MI.getNumOperands() == NumDefs + 3)
7033 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7034
7035 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7036 LLT LeftoverTy, DummyTy;
7037 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7038 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7039 MIRBuilder, MRI);
7040 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7041 MRI);
7042
7043 int NarrowParts = Src1Regs.size();
7044 Src1Regs.append(Src1Left);
7045 Src2Regs.append(Src2Left);
7046 DstRegs.reserve(Src1Regs.size());
7047
7048 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7049 Register DstReg =
7050 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7051 Register CarryOut;
7052 // Forward the final carry-out to the destination register
7053 if (i == e - 1 && CarryDst)
7054 CarryOut = CarryDst;
7055 else
7056 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7057
7058 if (!CarryIn) {
7059 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7060 {Src1Regs[i], Src2Regs[i]});
7061 } else if (i == e - 1) {
7062 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7063 {Src1Regs[i], Src2Regs[i], CarryIn});
7064 } else {
7065 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7066 {Src1Regs[i], Src2Regs[i], CarryIn});
7067 }
7068
7069 DstRegs.push_back(DstReg);
7070 CarryIn = CarryOut;
7071 }
7072 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7073 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7074 ArrayRef(DstRegs).drop_front(NarrowParts));
7075
7076 MI.eraseFromParent();
7077 return Legalized;
7078}
7079
7082 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7083
7084 LLT Ty = MRI.getType(DstReg);
7085 if (Ty.isVector())
7086 return UnableToLegalize;
7087
7088 unsigned Size = Ty.getSizeInBits();
7089 unsigned NarrowSize = NarrowTy.getSizeInBits();
7090 if (Size % NarrowSize != 0)
7091 return UnableToLegalize;
7092
7093 unsigned NumParts = Size / NarrowSize;
7094 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7095 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7096
7097 SmallVector<Register, 2> Src1Parts, Src2Parts;
7098 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7099 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7100 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7101 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7102
7103 // Take only high half of registers if this is high mul.
7104 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7105 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7106 MI.eraseFromParent();
7107 return Legalized;
7108}
7109
7112 LLT NarrowTy) {
7113 if (TypeIdx != 0)
7114 return UnableToLegalize;
7115
7116 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7117
7118 Register Src = MI.getOperand(1).getReg();
7119 LLT SrcTy = MRI.getType(Src);
7120
7121 // If all finite floats fit into the narrowed integer type, we can just swap
7122 // out the result type. This is practically only useful for conversions from
7123 // half to at least 16-bits, so just handle the one case.
7124 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7125 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7126 return UnableToLegalize;
7127
7128 Observer.changingInstr(MI);
7129 narrowScalarDst(MI, NarrowTy, 0,
7130 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7131 Observer.changedInstr(MI);
7132 return Legalized;
7133}
7134
7137 LLT NarrowTy) {
7138 if (TypeIdx != 1)
7139 return UnableToLegalize;
7140
7141 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7142
7143 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7144 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7145 // NarrowSize.
7146 if (SizeOp1 % NarrowSize != 0)
7147 return UnableToLegalize;
7148 int NumParts = SizeOp1 / NarrowSize;
7149
7150 SmallVector<Register, 2> SrcRegs, DstRegs;
7151 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7152 MIRBuilder, MRI);
7153
7154 Register OpReg = MI.getOperand(0).getReg();
7155 uint64_t OpStart = MI.getOperand(2).getImm();
7156 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7157 for (int i = 0; i < NumParts; ++i) {
7158 unsigned SrcStart = i * NarrowSize;
7159
7160 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7161 // No part of the extract uses this subregister, ignore it.
7162 continue;
7163 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7164 // The entire subregister is extracted, forward the value.
7165 DstRegs.push_back(SrcRegs[i]);
7166 continue;
7167 }
7168
7169 // OpSegStart is where this destination segment would start in OpReg if it
7170 // extended infinitely in both directions.
7171 int64_t ExtractOffset;
7172 uint64_t SegSize;
7173 if (OpStart < SrcStart) {
7174 ExtractOffset = 0;
7175 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7176 } else {
7177 ExtractOffset = OpStart - SrcStart;
7178 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7179 }
7180
7181 Register SegReg = SrcRegs[i];
7182 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7183 // A genuine extract is needed.
7184 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7185 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7186 }
7187
7188 DstRegs.push_back(SegReg);
7189 }
7190
7191 Register DstReg = MI.getOperand(0).getReg();
7192 if (MRI.getType(DstReg).isVector())
7193 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7194 else if (DstRegs.size() > 1)
7195 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7196 else
7197 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7198 MI.eraseFromParent();
7199 return Legalized;
7200}
7201
7204 LLT NarrowTy) {
7205 // FIXME: Don't know how to handle secondary types yet.
7206 if (TypeIdx != 0)
7207 return UnableToLegalize;
7208
7209 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7210 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7211 LLT LeftoverTy;
7212 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7213 LeftoverRegs, MIRBuilder, MRI);
7214
7215 SrcRegs.append(LeftoverRegs);
7216
7217 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7218 Register OpReg = MI.getOperand(2).getReg();
7219 uint64_t OpStart = MI.getOperand(3).getImm();
7220 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7221 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7222 unsigned DstStart = I * NarrowSize;
7223
7224 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7225 // The entire subregister is defined by this insert, forward the new
7226 // value.
7227 DstRegs.push_back(OpReg);
7228 continue;
7229 }
7230
7231 Register SrcReg = SrcRegs[I];
7232 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7233 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7234 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7235 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7236 }
7237
7238 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7239 // No part of the insert affects this subregister, forward the original.
7240 DstRegs.push_back(SrcReg);
7241 continue;
7242 }
7243
7244 // OpSegStart is where this destination segment would start in OpReg if it
7245 // extended infinitely in both directions.
7246 int64_t ExtractOffset, InsertOffset;
7247 uint64_t SegSize;
7248 if (OpStart < DstStart) {
7249 InsertOffset = 0;
7250 ExtractOffset = DstStart - OpStart;
7251 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7252 } else {
7253 InsertOffset = OpStart - DstStart;
7254 ExtractOffset = 0;
7255 SegSize =
7256 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7257 }
7258
7259 Register SegReg = OpReg;
7260 if (ExtractOffset != 0 || SegSize != OpSize) {
7261 // A genuine extract is needed.
7262 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7263 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7264 }
7265
7266 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7267 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7268 DstRegs.push_back(DstReg);
7269 }
7270
7271 uint64_t WideSize = DstRegs.size() * NarrowSize;
7272 Register DstReg = MI.getOperand(0).getReg();
7273 if (WideSize > RegTy.getSizeInBits()) {
7274 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7275 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7276 MIRBuilder.buildTrunc(DstReg, MergeReg);
7277 } else
7278 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7279
7280 MI.eraseFromParent();
7281 return Legalized;
7282}
7283
7286 LLT NarrowTy) {
7287 Register DstReg = MI.getOperand(0).getReg();
7288 LLT DstTy = MRI.getType(DstReg);
7289
7290 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7291
7292 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7293 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7294 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7295 LLT LeftoverTy;
7296 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7297 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7298 return UnableToLegalize;
7299
7300 LLT Unused;
7301 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7302 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7303 llvm_unreachable("inconsistent extractParts result");
7304
7305 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7306 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7307 {Src0Regs[I], Src1Regs[I]});
7308 DstRegs.push_back(Inst.getReg(0));
7309 }
7310
7311 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7312 auto Inst = MIRBuilder.buildInstr(
7313 MI.getOpcode(),
7314 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7315 DstLeftoverRegs.push_back(Inst.getReg(0));
7316 }
7317
7318 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7319 LeftoverTy, DstLeftoverRegs);
7320
7321 MI.eraseFromParent();
7322 return Legalized;
7323}
7324
7327 LLT NarrowTy) {
7328 if (TypeIdx != 0)
7329 return UnableToLegalize;
7330
7331 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7332
7333 LLT DstTy = MRI.getType(DstReg);
7334 if (DstTy.isVector())
7335 return UnableToLegalize;
7336
7338 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7339 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7340 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7341
7342 MI.eraseFromParent();
7343 return Legalized;
7344}
7345
7348 LLT NarrowTy) {
7349 if (TypeIdx != 0)
7350 return UnableToLegalize;
7351
7352 Register CondReg = MI.getOperand(1).getReg();
7353 LLT CondTy = MRI.getType(CondReg);
7354 if (CondTy.isVector()) // TODO: Handle vselect
7355 return UnableToLegalize;
7356
7357 Register DstReg = MI.getOperand(0).getReg();
7358 LLT DstTy = MRI.getType(DstReg);
7359
7360 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7361 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7362 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7363 LLT LeftoverTy;
7364 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7365 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7366 return UnableToLegalize;
7367
7368 LLT Unused;
7369 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7370 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7371 llvm_unreachable("inconsistent extractParts result");
7372
7373 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7374 auto Select = MIRBuilder.buildSelect(NarrowTy,
7375 CondReg, Src1Regs[I], Src2Regs[I]);
7376 DstRegs.push_back(Select.getReg(0));
7377 }
7378
7379 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7380 auto Select = MIRBuilder.buildSelect(
7381 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7382 DstLeftoverRegs.push_back(Select.getReg(0));
7383 }
7384
7385 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7386 LeftoverTy, DstLeftoverRegs);
7387
7388 MI.eraseFromParent();
7389 return Legalized;
7390}
7391
7394 LLT NarrowTy) {
7395 if (TypeIdx != 1)
7396 return UnableToLegalize;
7397
7398 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7399 unsigned NarrowSize = NarrowTy.getSizeInBits();
7400
7401 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7402 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7403
7405 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7406 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7407 auto C_0 = B.buildConstant(NarrowTy, 0);
7408 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7409 UnmergeSrc.getReg(1), C_0);
7410 auto LoCTLZ = IsUndef ?
7411 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7412 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7413 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7414 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7415 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7416 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7417
7418 MI.eraseFromParent();
7419 return Legalized;
7420 }
7421
7422 return UnableToLegalize;
7423}
7424
7427 LLT NarrowTy) {
7428 if (TypeIdx != 1)
7429 return UnableToLegalize;
7430
7431 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7432 unsigned NarrowSize = NarrowTy.getSizeInBits();
7433
7434 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7435 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7436
7438 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7439 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7440 auto C_0 = B.buildConstant(NarrowTy, 0);
7441 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7442 UnmergeSrc.getReg(0), C_0);
7443 auto HiCTTZ = IsUndef ?
7444 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7445 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7446 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7447 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7448 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7449 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7450
7451 MI.eraseFromParent();
7452 return Legalized;
7453 }
7454
7455 return UnableToLegalize;
7456}
7457
7460 LLT NarrowTy) {
7461 if (TypeIdx != 1)
7462 return UnableToLegalize;
7463
7464 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7465 unsigned NarrowSize = NarrowTy.getSizeInBits();
7466
7467 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7468 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7469
7470 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7471 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7472 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7473
7474 MI.eraseFromParent();
7475 return Legalized;
7476 }
7477
7478 return UnableToLegalize;
7479}
7480
7483 LLT NarrowTy) {
7484 if (TypeIdx != 1)
7485 return UnableToLegalize;
7486
7488 Register ExpReg = MI.getOperand(2).getReg();
7489 LLT ExpTy = MRI.getType(ExpReg);
7490
7491 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7492
7493 // Clamp the exponent to the range of the target type.
7494 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7495 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7496 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7497 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7498
7499 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7500 Observer.changingInstr(MI);
7501 MI.getOperand(2).setReg(Trunc.getReg(0));
7502 Observer.changedInstr(MI);
7503 return Legalized;
7504}
7505
7508 unsigned Opc = MI.getOpcode();
7509 const auto &TII = MIRBuilder.getTII();
7510 auto isSupported = [this](const LegalityQuery &Q) {
7511 auto QAction = LI.getAction(Q).Action;
7512 return QAction == Legal || QAction == Libcall || QAction == Custom;
7513 };
7514 switch (Opc) {
7515 default:
7516 return UnableToLegalize;
7517 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7518 // This trivially expands to CTLZ.
7519 Observer.changingInstr(MI);
7520 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7521 Observer.changedInstr(MI);
7522 return Legalized;
7523 }
7524 case TargetOpcode::G_CTLZ: {
7525 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7526 unsigned Len = SrcTy.getSizeInBits();
7527
7528 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7529 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7530 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7531 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7532 auto ICmp = MIRBuilder.buildICmp(
7533 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7534 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7535 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7536 MI.eraseFromParent();
7537 return Legalized;
7538 }
7539 // for now, we do this:
7540 // NewLen = NextPowerOf2(Len);
7541 // x = x | (x >> 1);
7542 // x = x | (x >> 2);
7543 // ...
7544 // x = x | (x >>16);
7545 // x = x | (x >>32); // for 64-bit input
7546 // Upto NewLen/2
7547 // return Len - popcount(x);
7548 //
7549 // Ref: "Hacker's Delight" by Henry Warren
7550 Register Op = SrcReg;
7551 unsigned NewLen = PowerOf2Ceil(Len);
7552 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7553 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7554 auto MIBOp = MIRBuilder.buildOr(
7555 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7556 Op = MIBOp.getReg(0);
7557 }
7558 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7559 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7560 MIBPop);
7561 MI.eraseFromParent();
7562 return Legalized;
7563 }
7564 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7565 // This trivially expands to CTTZ.
7566 Observer.changingInstr(MI);
7567 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7568 Observer.changedInstr(MI);
7569 return Legalized;
7570 }
7571 case TargetOpcode::G_CTTZ: {
7572 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7573
7574 unsigned Len = SrcTy.getSizeInBits();
7575 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7576 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7577 // zero.
7578 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7579 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7580 auto ICmp = MIRBuilder.buildICmp(
7581 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7582 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7583 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7584 MI.eraseFromParent();
7585 return Legalized;
7586 }
7587 // for now, we use: { return popcount(~x & (x - 1)); }
7588 // unless the target has ctlz but not ctpop, in which case we use:
7589 // { return 32 - nlz(~x & (x-1)); }
7590 // Ref: "Hacker's Delight" by Henry Warren
7591 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7592 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7593 auto MIBTmp = MIRBuilder.buildAnd(
7594 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7595 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7596 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7597 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7598 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7599 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7600 MI.eraseFromParent();
7601 return Legalized;
7602 }
7603 Observer.changingInstr(MI);
7604 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7605 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7606 Observer.changedInstr(MI);
7607 return Legalized;
7608 }
7609 case TargetOpcode::G_CTPOP: {
7610 Register SrcReg = MI.getOperand(1).getReg();
7611 LLT Ty = MRI.getType(SrcReg);
7612 unsigned Size = Ty.getSizeInBits();
7614
7615 // Count set bits in blocks of 2 bits. Default approach would be
7616 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7617 // We use following formula instead:
7618 // B2Count = val - { (val >> 1) & 0x55555555 }
7619 // since it gives same result in blocks of 2 with one instruction less.
7620 auto C_1 = B.buildConstant(Ty, 1);
7621 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7622 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7623 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7624 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7625 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7626
7627 // In order to get count in blocks of 4 add values from adjacent block of 2.
7628 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7629 auto C_2 = B.buildConstant(Ty, 2);
7630 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7631 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7632 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7633 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7634 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7635 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7636
7637 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7638 // addition since count value sits in range {0,...,8} and 4 bits are enough
7639 // to hold such binary values. After addition high 4 bits still hold count
7640 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7641 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7642 auto C_4 = B.buildConstant(Ty, 4);
7643 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7644 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7645 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7646 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7647 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7648
7649 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7650 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7651 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7652 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7653
7654 // Shift count result from 8 high bits to low bits.
7655 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7656
7657 auto IsMulSupported = [this](const LLT Ty) {
7658 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7659 return Action == Legal || Action == WidenScalar || Action == Custom;
7660 };
7661 if (IsMulSupported(Ty)) {
7662 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7663 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7664 } else {
7665 auto ResTmp = B8Count;
7666 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7667 auto ShiftC = B.buildConstant(Ty, Shift);
7668 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7669 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7670 }
7671 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7672 }
7673 MI.eraseFromParent();
7674 return Legalized;
7675 }
7676 }
7677}
7678
7679// Check that (every element of) Reg is undef or not an exact multiple of BW.
7681 Register Reg, unsigned BW) {
7682 return matchUnaryPredicate(
7683 MRI, Reg,
7684 [=](const Constant *C) {
7685 // Null constant here means an undef.
7687 return !CI || CI->getValue().urem(BW) != 0;
7688 },
7689 /*AllowUndefs*/ true);
7690}
7691
7694 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7695 LLT Ty = MRI.getType(Dst);
7696 LLT ShTy = MRI.getType(Z);
7697
7698 unsigned BW = Ty.getScalarSizeInBits();
7699
7700 if (!isPowerOf2_32(BW))
7701 return UnableToLegalize;
7702
7703 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7704 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7705
7706 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7707 // fshl X, Y, Z -> fshr X, Y, -Z
7708 // fshr X, Y, Z -> fshl X, Y, -Z
7709 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7710 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7711 } else {
7712 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7713 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7714 auto One = MIRBuilder.buildConstant(ShTy, 1);
7715 if (IsFSHL) {
7716 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7717 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7718 } else {
7719 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7720 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7721 }
7722
7723 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7724 }
7725
7726 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7727 MI.eraseFromParent();
7728 return Legalized;
7729}
7730
7733 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7734 LLT Ty = MRI.getType(Dst);
7735 LLT ShTy = MRI.getType(Z);
7736
7737 const unsigned BW = Ty.getScalarSizeInBits();
7738 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7739
7740 Register ShX, ShY;
7741 Register ShAmt, InvShAmt;
7742
7743 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7744 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7745 // fshl: X << C | Y >> (BW - C)
7746 // fshr: X << (BW - C) | Y >> C
7747 // where C = Z % BW is not zero
7748 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7749 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7750 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7751 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7752 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7753 } else {
7754 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7755 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7756 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7757 if (isPowerOf2_32(BW)) {
7758 // Z % BW -> Z & (BW - 1)
7759 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7760 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7761 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7762 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7763 } else {
7764 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7765 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7766 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7767 }
7768
7769 auto One = MIRBuilder.buildConstant(ShTy, 1);
7770 if (IsFSHL) {
7771 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7772 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7773 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7774 } else {
7775 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7776 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7777 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7778 }
7779 }
7780
7781 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7782 MI.eraseFromParent();
7783 return Legalized;
7784}
7785
7788 // These operations approximately do the following (while avoiding undefined
7789 // shifts by BW):
7790 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7791 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7792 Register Dst = MI.getOperand(0).getReg();
7793 LLT Ty = MRI.getType(Dst);
7794 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7795
7796 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7797 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7798
7799 // TODO: Use smarter heuristic that accounts for vector legalization.
7800 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7801 return lowerFunnelShiftAsShifts(MI);
7802
7803 // This only works for powers of 2, fallback to shifts if it fails.
7804 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7805 if (Result == UnableToLegalize)
7806 return lowerFunnelShiftAsShifts(MI);
7807 return Result;
7808}
7809
7811 auto [Dst, Src] = MI.getFirst2Regs();
7812 LLT DstTy = MRI.getType(Dst);
7813 LLT SrcTy = MRI.getType(Src);
7814
7815 uint32_t DstTySize = DstTy.getSizeInBits();
7816 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7817 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7818
7819 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7820 !isPowerOf2_32(SrcTyScalarSize))
7821 return UnableToLegalize;
7822
7823 // The step between extend is too large, split it by creating an intermediate
7824 // extend instruction
7825 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7826 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7827 // If the destination type is illegal, split it into multiple statements
7828 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7829 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7830 // Unmerge the vector
7831 LLT EltTy = MidTy.changeElementCount(
7833 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7834
7835 // ZExt the vectors
7836 LLT ZExtResTy = DstTy.changeElementCount(
7838 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7839 {UnmergeSrc.getReg(0)});
7840 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7841 {UnmergeSrc.getReg(1)});
7842
7843 // Merge the ending vectors
7844 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7845
7846 MI.eraseFromParent();
7847 return Legalized;
7848 }
7849 return UnableToLegalize;
7850}
7851
7853 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7854 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7855 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7856 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7857 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7858 // %lo16(<4 x s16>) = G_TRUNC %inlo
7859 // %hi16(<4 x s16>) = G_TRUNC %inhi
7860 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7861 // %res(<8 x s8>) = G_TRUNC %in16
7862
7863 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7864
7865 Register DstReg = MI.getOperand(0).getReg();
7866 Register SrcReg = MI.getOperand(1).getReg();
7867 LLT DstTy = MRI.getType(DstReg);
7868 LLT SrcTy = MRI.getType(SrcReg);
7869
7870 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7872 isPowerOf2_32(SrcTy.getNumElements()) &&
7873 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7874 // Split input type.
7875 LLT SplitSrcTy = SrcTy.changeElementCount(
7876 SrcTy.getElementCount().divideCoefficientBy(2));
7877
7878 // First, split the source into two smaller vectors.
7879 SmallVector<Register, 2> SplitSrcs;
7880 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7881
7882 // Truncate the splits into intermediate narrower elements.
7883 LLT InterTy;
7884 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7885 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7886 else
7887 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7888 for (Register &Src : SplitSrcs)
7889 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7890
7891 // Combine the new truncates into one vector
7892 auto Merge = MIRBuilder.buildMergeLikeInstr(
7893 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7894
7895 // Truncate the new vector to the final result type
7896 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7897 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7898 else
7899 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7900
7901 MI.eraseFromParent();
7902
7903 return Legalized;
7904 }
7905 return UnableToLegalize;
7906}
7907
7910 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7911 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7912 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7913 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7914 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7915 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7916 MI.eraseFromParent();
7917 return Legalized;
7918}
7919
7921 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7922
7923 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7924 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7925
7926 MIRBuilder.setInstrAndDebugLoc(MI);
7927
7928 // If a rotate in the other direction is supported, use it.
7929 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7930 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7931 isPowerOf2_32(EltSizeInBits))
7932 return lowerRotateWithReverseRotate(MI);
7933
7934 // If a funnel shift is supported, use it.
7935 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7936 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7937 bool IsFShLegal = false;
7938 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7939 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7940 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7941 Register R3) {
7942 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7943 MI.eraseFromParent();
7944 return Legalized;
7945 };
7946 // If a funnel shift in the other direction is supported, use it.
7947 if (IsFShLegal) {
7948 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7949 } else if (isPowerOf2_32(EltSizeInBits)) {
7950 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7951 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7952 }
7953 }
7954
7955 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7956 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7957 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7958 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7959 Register ShVal;
7960 Register RevShiftVal;
7961 if (isPowerOf2_32(EltSizeInBits)) {
7962 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7963 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7964 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7965 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7966 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7967 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7968 RevShiftVal =
7969 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7970 } else {
7971 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7972 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7973 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7974 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7975 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7976 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7977 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7978 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7979 RevShiftVal =
7980 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7981 }
7982 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7983 MI.eraseFromParent();
7984 return Legalized;
7985}
7986
7987// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7988// representation.
7991 auto [Dst, Src] = MI.getFirst2Regs();
7992 const LLT S64 = LLT::scalar(64);
7993 const LLT S32 = LLT::scalar(32);
7994 const LLT S1 = LLT::scalar(1);
7995
7996 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7997
7998 // unsigned cul2f(ulong u) {
7999 // uint lz = clz(u);
8000 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8001 // u = (u << lz) & 0x7fffffffffffffffUL;
8002 // ulong t = u & 0xffffffffffUL;
8003 // uint v = (e << 23) | (uint)(u >> 40);
8004 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8005 // return as_float(v + r);
8006 // }
8007
8008 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8009 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8010
8011 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8012
8013 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8014 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8015
8016 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8017 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8018
8019 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8020 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8021
8022 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8023
8024 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8025 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8026
8027 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8028 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8029 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8030
8031 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8032 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8033 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8034 auto One = MIRBuilder.buildConstant(S32, 1);
8035
8036 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8037 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8038 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8039 MIRBuilder.buildAdd(Dst, V, R);
8040
8041 MI.eraseFromParent();
8042 return Legalized;
8043}
8044
8045// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8046// operations and G_SITOFP
8049 auto [Dst, Src] = MI.getFirst2Regs();
8050 const LLT S64 = LLT::scalar(64);
8051 const LLT S32 = LLT::scalar(32);
8052 const LLT S1 = LLT::scalar(1);
8053
8054 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8055
8056 // For i64 < INT_MAX we simply reuse SITOFP.
8057 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8058 // saved before division, convert to float by SITOFP, multiply the result
8059 // by 2.
8060 auto One = MIRBuilder.buildConstant(S64, 1);
8061 auto Zero = MIRBuilder.buildConstant(S64, 0);
8062 // Result if Src < INT_MAX
8063 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8064 // Result if Src >= INT_MAX
8065 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8066 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8067 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8068 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8069 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8070 // Check if the original value is larger than INT_MAX by comparing with
8071 // zero to pick one of the two conversions.
8072 auto IsLarge =
8073 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8074 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8075
8076 MI.eraseFromParent();
8077 return Legalized;
8078}
8079
8080// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8081// IEEE double representation.
8084 auto [Dst, Src] = MI.getFirst2Regs();
8085 const LLT S64 = LLT::scalar(64);
8086 const LLT S32 = LLT::scalar(32);
8087
8088 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8089
8090 // We create double value from 32 bit parts with 32 exponent difference.
8091 // Note that + and - are float operations that adjust the implicit leading
8092 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8093 //
8094 // X = 2^52 * 1.0...LowBits
8095 // Y = 2^84 * 1.0...HighBits
8096 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8097 // = - 2^52 * 1.0...HighBits
8098 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8099 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8100 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8101 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8102 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8103 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8104
8105 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8106 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8107 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8108 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8109 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8110 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8111 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8112
8113 MI.eraseFromParent();
8114 return Legalized;
8115}
8116
8117/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8118/// convert fpround f64->f16 without double-rounding, so we manually perform the
8119/// lowering here where we know it is valid.
8122 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8123 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8124 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8125 : MIRBuilder.buildSITOFP(SrcTy, Src);
8126 LLT S32Ty = SrcTy.changeElementSize(32);
8127 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8128 MIRBuilder.buildFPTrunc(Dst, M2);
8129 MI.eraseFromParent();
8131}
8132
8134 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8135
8136 if (SrcTy == LLT::scalar(1)) {
8137 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8138 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8139 MIRBuilder.buildSelect(Dst, Src, True, False);
8140 MI.eraseFromParent();
8141 return Legalized;
8142 }
8143
8144 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8145 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8146
8147 if (SrcTy != LLT::scalar(64))
8148 return UnableToLegalize;
8149
8150 if (DstTy == LLT::scalar(32))
8151 // TODO: SelectionDAG has several alternative expansions to port which may
8152 // be more reasonable depending on the available instructions. We also need
8153 // a more advanced mechanism to choose an optimal version depending on
8154 // target features such as sitofp or CTLZ availability.
8156
8157 if (DstTy == LLT::scalar(64))
8159
8160 return UnableToLegalize;
8161}
8162
8164 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8165
8166 const LLT S64 = LLT::scalar(64);
8167 const LLT S32 = LLT::scalar(32);
8168 const LLT S1 = LLT::scalar(1);
8169
8170 if (SrcTy == S1) {
8171 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8172 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8173 MIRBuilder.buildSelect(Dst, Src, True, False);
8174 MI.eraseFromParent();
8175 return Legalized;
8176 }
8177
8178 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8179 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8180
8181 if (SrcTy != S64)
8182 return UnableToLegalize;
8183
8184 if (DstTy == S32) {
8185 // signed cl2f(long l) {
8186 // long s = l >> 63;
8187 // float r = cul2f((l + s) ^ s);
8188 // return s ? -r : r;
8189 // }
8190 Register L = Src;
8191 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8192 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8193
8194 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8195 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8196 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8197
8198 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8199 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8200 MIRBuilder.buildConstant(S64, 0));
8201 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8202 MI.eraseFromParent();
8203 return Legalized;
8204 }
8205
8206 return UnableToLegalize;
8207}
8208
8210 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8211 const LLT S64 = LLT::scalar(64);
8212 const LLT S32 = LLT::scalar(32);
8213
8214 if (SrcTy != S64 && SrcTy != S32)
8215 return UnableToLegalize;
8216 if (DstTy != S32 && DstTy != S64)
8217 return UnableToLegalize;
8218
8219 // FPTOSI gives same result as FPTOUI for positive signed integers.
8220 // FPTOUI needs to deal with fp values that convert to unsigned integers
8221 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8222
8223 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8224 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8226 APInt::getZero(SrcTy.getSizeInBits()));
8227 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8228
8229 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8230
8231 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8232 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8233 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8234 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8235 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8236 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8237 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8238
8239 const LLT S1 = LLT::scalar(1);
8240
8241 MachineInstrBuilder FCMP =
8242 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8243 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8244
8245 MI.eraseFromParent();
8246 return Legalized;
8247}
8248
8250 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8251 const LLT S64 = LLT::scalar(64);
8252 const LLT S32 = LLT::scalar(32);
8253
8254 // FIXME: Only f32 to i64 conversions are supported.
8255 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8256 return UnableToLegalize;
8257
8258 // Expand f32 -> i64 conversion
8259 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8260 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8261
8262 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8263
8264 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8265 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8266
8267 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8268 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8269
8270 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8271 APInt::getSignMask(SrcEltBits));
8272 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8273 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8274 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8275 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8276
8277 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8278 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8279 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8280
8281 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8282 R = MIRBuilder.buildZExt(DstTy, R);
8283
8284 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8285 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8286 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8287 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8288
8289 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8290 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8291
8292 const LLT S1 = LLT::scalar(1);
8293 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8294 S1, Exponent, ExponentLoBit);
8295
8296 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8297
8298 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8299 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8300
8301 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8302
8303 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8304 S1, Exponent, ZeroSrcTy);
8305
8306 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8307 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8308
8309 MI.eraseFromParent();
8310 return Legalized;
8311}
8312
8315 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8316
8317 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8318 unsigned SatWidth = DstTy.getScalarSizeInBits();
8319
8320 // Determine minimum and maximum integer values and their corresponding
8321 // floating-point values.
8322 APInt MinInt, MaxInt;
8323 if (IsSigned) {
8324 MinInt = APInt::getSignedMinValue(SatWidth);
8325 MaxInt = APInt::getSignedMaxValue(SatWidth);
8326 } else {
8327 MinInt = APInt::getMinValue(SatWidth);
8328 MaxInt = APInt::getMaxValue(SatWidth);
8329 }
8330
8331 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8332 APFloat MinFloat(Semantics);
8333 APFloat MaxFloat(Semantics);
8334
8335 APFloat::opStatus MinStatus =
8336 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8337 APFloat::opStatus MaxStatus =
8338 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8339 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8340 !(MaxStatus & APFloat::opStatus::opInexact);
8341
8342 // If the integer bounds are exactly representable as floats, emit a
8343 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8344 // and selects.
8345 if (AreExactFloatBounds) {
8346 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8347 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8348 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8349 SrcTy.changeElementSize(1), Src, MaxC);
8350 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8351 // Clamp by MaxFloat from above. NaN cannot occur.
8352 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8353 auto MinP =
8354 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8356 auto Min =
8357 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8358 // Convert clamped value to integer. In the unsigned case we're done,
8359 // because we mapped NaN to MinFloat, which will cast to zero.
8360 if (!IsSigned) {
8361 MIRBuilder.buildFPTOUI(Dst, Min);
8362 MI.eraseFromParent();
8363 return Legalized;
8364 }
8365
8366 // Otherwise, select 0 if Src is NaN.
8367 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8368 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8369 DstTy.changeElementSize(1), Src, Src);
8370 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8371 FpToInt);
8372 MI.eraseFromParent();
8373 return Legalized;
8374 }
8375
8376 // Result of direct conversion. The assumption here is that the operation is
8377 // non-trapping and it's fine to apply it to an out-of-range value if we
8378 // select it away later.
8379 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8380 : MIRBuilder.buildFPTOUI(DstTy, Src);
8381
8382 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8383 // MinInt if Src is NaN.
8384 auto ULT =
8385 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8386 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8387 auto Max = MIRBuilder.buildSelect(
8388 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8389 // If Src OGT MaxFloat, select MaxInt.
8390 auto OGT =
8391 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8392 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8393
8394 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8395 // is already zero.
8396 if (!IsSigned) {
8397 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8398 Max);
8399 MI.eraseFromParent();
8400 return Legalized;
8401 }
8402
8403 // Otherwise, select 0 if Src is NaN.
8404 auto Min = MIRBuilder.buildSelect(
8405 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8406 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8407 DstTy.changeElementSize(1), Src, Src);
8408 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8409 MI.eraseFromParent();
8410 return Legalized;
8411}
8412
8413// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8416 const LLT S1 = LLT::scalar(1);
8417 const LLT S32 = LLT::scalar(32);
8418
8419 auto [Dst, Src] = MI.getFirst2Regs();
8420 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8421 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8422
8423 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8424 return UnableToLegalize;
8425
8426 if (MI.getFlag(MachineInstr::FmAfn)) {
8427 unsigned Flags = MI.getFlags();
8428 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8429 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8430 MI.eraseFromParent();
8431 return Legalized;
8432 }
8433
8434 const unsigned ExpMask = 0x7ff;
8435 const unsigned ExpBiasf64 = 1023;
8436 const unsigned ExpBiasf16 = 15;
8437
8438 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8439 Register U = Unmerge.getReg(0);
8440 Register UH = Unmerge.getReg(1);
8441
8442 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8443 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8444
8445 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8446 // add the f16 bias (15) to get the biased exponent for the f16 format.
8447 E = MIRBuilder.buildAdd(
8448 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8449
8450 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8451 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8452
8453 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8454 MIRBuilder.buildConstant(S32, 0x1ff));
8455 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8456
8457 auto Zero = MIRBuilder.buildConstant(S32, 0);
8458 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8459 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8460 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8461
8462 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8463 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8464 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8465 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8466
8467 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8468 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8469
8470 // N = M | (E << 12);
8471 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8472 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8473
8474 // B = clamp(1-E, 0, 13);
8475 auto One = MIRBuilder.buildConstant(S32, 1);
8476 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8477 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8478 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8479
8480 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8481 MIRBuilder.buildConstant(S32, 0x1000));
8482
8483 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8484 auto D0 = MIRBuilder.buildShl(S32, D, B);
8485
8486 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8487 D0, SigSetHigh);
8488 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8489 D = MIRBuilder.buildOr(S32, D, D1);
8490
8491 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8492 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8493
8494 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8495 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8496
8497 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8498 MIRBuilder.buildConstant(S32, 3));
8499 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8500
8501 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8502 MIRBuilder.buildConstant(S32, 5));
8503 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8504
8505 V1 = MIRBuilder.buildOr(S32, V0, V1);
8506 V = MIRBuilder.buildAdd(S32, V, V1);
8507
8508 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8509 E, MIRBuilder.buildConstant(S32, 30));
8510 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8511 MIRBuilder.buildConstant(S32, 0x7c00), V);
8512
8513 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8514 E, MIRBuilder.buildConstant(S32, 1039));
8515 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8516
8517 // Extract the sign bit.
8518 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8519 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8520
8521 // Insert the sign bit
8522 V = MIRBuilder.buildOr(S32, Sign, V);
8523
8524 MIRBuilder.buildTrunc(Dst, V);
8525 MI.eraseFromParent();
8526 return Legalized;
8527}
8528
8531 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8532 const LLT S64 = LLT::scalar(64);
8533 const LLT S16 = LLT::scalar(16);
8534
8535 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8537
8538 return UnableToLegalize;
8539}
8540
8542 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8543 LLT Ty = MRI.getType(Dst);
8544
8545 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8546 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8547 MI.eraseFromParent();
8548 return Legalized;
8549}
8550
8552 switch (Opc) {
8553 case TargetOpcode::G_SMIN:
8554 return CmpInst::ICMP_SLT;
8555 case TargetOpcode::G_SMAX:
8556 return CmpInst::ICMP_SGT;
8557 case TargetOpcode::G_UMIN:
8558 return CmpInst::ICMP_ULT;
8559 case TargetOpcode::G_UMAX:
8560 return CmpInst::ICMP_UGT;
8561 default:
8562 llvm_unreachable("not in integer min/max");
8563 }
8564}
8565
8567 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8568
8569 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8570 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8571
8572 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8573 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8574
8575 MI.eraseFromParent();
8576 return Legalized;
8577}
8578
8581 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8582
8583 Register Dst = Cmp->getReg(0);
8584 LLT DstTy = MRI.getType(Dst);
8585 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8586 LLT CmpTy = DstTy.changeElementSize(1);
8587
8588 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8591 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8594
8595 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8596 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8597 Cmp->getRHSReg());
8598 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8599 Cmp->getRHSReg());
8600
8601 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8602 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8603 if (TLI.preferSelectsOverBooleanArithmetic(
8604 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8606 auto One = MIRBuilder.buildConstant(DstTy, 1);
8607 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8608
8609 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8610 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8611 } else {
8613 std::swap(IsGT, IsLT);
8614 // Extend boolean results to DstTy, which is at least i2, before subtracting
8615 // them.
8616 unsigned BoolExtOp =
8617 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8618 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8619 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8620 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8621 }
8622
8623 MI.eraseFromParent();
8624 return Legalized;
8625}
8626
8629 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8630 const int Src0Size = Src0Ty.getScalarSizeInBits();
8631 const int Src1Size = Src1Ty.getScalarSizeInBits();
8632
8633 auto SignBitMask = MIRBuilder.buildConstant(
8634 Src0Ty, APInt::getSignMask(Src0Size));
8635
8636 auto NotSignBitMask = MIRBuilder.buildConstant(
8637 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8638
8639 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8640 Register And1;
8641 if (Src0Ty == Src1Ty) {
8642 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8643 } else if (Src0Size > Src1Size) {
8644 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8645 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8646 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8647 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8648 } else {
8649 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8650 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8651 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8652 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8653 }
8654
8655 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8656 // constants are a nan and -0.0, but the final result should preserve
8657 // everything.
8658 unsigned Flags = MI.getFlags();
8659
8660 // We masked the sign bit and the not-sign bit, so these are disjoint.
8661 Flags |= MachineInstr::Disjoint;
8662
8663 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8664
8665 MI.eraseFromParent();
8666 return Legalized;
8667}
8668
8671 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8672 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8673 // depend on fminnum/fmaxnum.
8674
8675 unsigned NewOp;
8676 switch (MI.getOpcode()) {
8677 case TargetOpcode::G_FMINNUM:
8678 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8679 break;
8680 case TargetOpcode::G_FMINIMUMNUM:
8681 NewOp = TargetOpcode::G_FMINNUM;
8682 break;
8683 case TargetOpcode::G_FMAXNUM:
8684 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8685 break;
8686 case TargetOpcode::G_FMAXIMUMNUM:
8687 NewOp = TargetOpcode::G_FMAXNUM;
8688 break;
8689 default:
8690 llvm_unreachable("unexpected min/max opcode");
8691 }
8692
8693 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8694 LLT Ty = MRI.getType(Dst);
8695
8696 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8697 // Insert canonicalizes if it's possible we need to quiet to get correct
8698 // sNaN behavior.
8699
8700 // Note this must be done here, and not as an optimization combine in the
8701 // absence of a dedicate quiet-snan instruction as we're using an
8702 // omni-purpose G_FCANONICALIZE.
8703 if (!isKnownNeverSNaN(Src0, MRI))
8704 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8705
8706 if (!isKnownNeverSNaN(Src1, MRI))
8707 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8708 }
8709
8710 // If there are no nans, it's safe to simply replace this with the non-IEEE
8711 // version.
8712 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8713 MI.eraseFromParent();
8714 return Legalized;
8715}
8716
8718 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8719 Register DstReg = MI.getOperand(0).getReg();
8720 LLT Ty = MRI.getType(DstReg);
8721 unsigned Flags = MI.getFlags();
8722
8723 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8724 Flags);
8725 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8726 MI.eraseFromParent();
8727 return Legalized;
8728}
8729
8732 auto [DstReg, X] = MI.getFirst2Regs();
8733 const unsigned Flags = MI.getFlags();
8734 const LLT Ty = MRI.getType(DstReg);
8735 const LLT CondTy = Ty.changeElementSize(1);
8736
8737 // round(x) =>
8738 // t = trunc(x);
8739 // d = fabs(x - t);
8740 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8741 // return t + o;
8742
8743 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8744
8745 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8746 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8747
8748 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8749 auto Cmp =
8750 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8751
8752 // Could emit G_UITOFP instead
8753 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8754 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8755 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8756 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8757
8758 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8759
8760 MI.eraseFromParent();
8761 return Legalized;
8762}
8763
8765 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8766 unsigned Flags = MI.getFlags();
8767 LLT Ty = MRI.getType(DstReg);
8768 const LLT CondTy = Ty.changeElementSize(1);
8769
8770 // result = trunc(src);
8771 // if (src < 0.0 && src != result)
8772 // result += -1.0.
8773
8774 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8775 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8776
8777 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8778 SrcReg, Zero, Flags);
8779 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8780 SrcReg, Trunc, Flags);
8781 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8782 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8783
8784 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8785 MI.eraseFromParent();
8786 return Legalized;
8787}
8788
8791 const unsigned NumOps = MI.getNumOperands();
8792 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8793 unsigned PartSize = Src0Ty.getSizeInBits();
8794
8795 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8796 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8797
8798 for (unsigned I = 2; I != NumOps; ++I) {
8799 const unsigned Offset = (I - 1) * PartSize;
8800
8801 Register SrcReg = MI.getOperand(I).getReg();
8802 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8803
8804 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8805 MRI.createGenericVirtualRegister(WideTy);
8806
8807 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8808 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8809 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8810 ResultReg = NextResult;
8811 }
8812
8813 if (DstTy.isPointer()) {
8814 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8815 DstTy.getAddressSpace())) {
8816 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8817 return UnableToLegalize;
8818 }
8819
8820 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8821 }
8822
8823 MI.eraseFromParent();
8824 return Legalized;
8825}
8826
8829 const unsigned NumDst = MI.getNumOperands() - 1;
8830 Register SrcReg = MI.getOperand(NumDst).getReg();
8831 Register Dst0Reg = MI.getOperand(0).getReg();
8832 LLT DstTy = MRI.getType(Dst0Reg);
8833 if (DstTy.isPointer())
8834 return UnableToLegalize; // TODO
8835
8836 SrcReg = coerceToScalar(SrcReg);
8837 if (!SrcReg)
8838 return UnableToLegalize;
8839
8840 // Expand scalarizing unmerge as bitcast to integer and shift.
8841 LLT IntTy = MRI.getType(SrcReg);
8842
8843 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8844
8845 const unsigned DstSize = DstTy.getSizeInBits();
8846 unsigned Offset = DstSize;
8847 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8848 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8849 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8850 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8851 }
8852
8853 MI.eraseFromParent();
8854 return Legalized;
8855}
8856
8857/// Lower a vector extract or insert by writing the vector to a stack temporary
8858/// and reloading the element or vector.
8859///
8860/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8861/// =>
8862/// %stack_temp = G_FRAME_INDEX
8863/// G_STORE %vec, %stack_temp
8864/// %idx = clamp(%idx, %vec.getNumElements())
8865/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8866/// %dst = G_LOAD %element_ptr
8869 Register DstReg = MI.getOperand(0).getReg();
8870 Register SrcVec = MI.getOperand(1).getReg();
8871 Register InsertVal;
8872 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8873 InsertVal = MI.getOperand(2).getReg();
8874
8875 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8876
8877 LLT VecTy = MRI.getType(SrcVec);
8878 LLT EltTy = VecTy.getElementType();
8879 unsigned NumElts = VecTy.getNumElements();
8880
8881 int64_t IdxVal;
8882 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8884 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8885
8886 if (InsertVal) {
8887 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8888 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8889 } else {
8890 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8891 }
8892
8893 MI.eraseFromParent();
8894 return Legalized;
8895 }
8896
8897 if (!EltTy.isByteSized()) { // Not implemented.
8898 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8899 return UnableToLegalize;
8900 }
8901
8902 unsigned EltBytes = EltTy.getSizeInBytes();
8903 Align VecAlign = getStackTemporaryAlignment(VecTy);
8904 Align EltAlign;
8905
8906 MachinePointerInfo PtrInfo;
8907 auto StackTemp = createStackTemporary(
8908 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8909 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8910
8911 // Get the pointer to the element, and be sure not to hit undefined behavior
8912 // if the index is out of bounds.
8913 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8914
8915 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8916 int64_t Offset = IdxVal * EltBytes;
8917 PtrInfo = PtrInfo.getWithOffset(Offset);
8918 EltAlign = commonAlignment(VecAlign, Offset);
8919 } else {
8920 // We lose information with a variable offset.
8921 EltAlign = getStackTemporaryAlignment(EltTy);
8922 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8923 }
8924
8925 if (InsertVal) {
8926 // Write the inserted element
8927 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8928
8929 // Reload the whole vector.
8930 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8931 } else {
8932 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8933 }
8934
8935 MI.eraseFromParent();
8936 return Legalized;
8937}
8938
8941 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8942 MI.getFirst3RegLLTs();
8943 LLT IdxTy = LLT::scalar(32);
8944
8945 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8946 Register Undef;
8948 LLT EltTy = DstTy.getScalarType();
8949
8950 for (int Idx : Mask) {
8951 if (Idx < 0) {
8952 if (!Undef.isValid())
8953 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8954 BuildVec.push_back(Undef);
8955 continue;
8956 }
8957
8958 if (Src0Ty.isScalar()) {
8959 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8960 } else {
8961 int NumElts = Src0Ty.getNumElements();
8962 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8963 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8964 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8965 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8966 BuildVec.push_back(Extract.getReg(0));
8967 }
8968 }
8969
8970 if (DstTy.isVector())
8971 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8972 else
8973 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8974 MI.eraseFromParent();
8975 return Legalized;
8976}
8977
8980 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8981 MI.getFirst4RegLLTs();
8982
8983 if (VecTy.isScalableVector())
8984 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8985
8986 Align VecAlign = getStackTemporaryAlignment(VecTy);
8987 MachinePointerInfo PtrInfo;
8988 Register StackPtr =
8989 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8990 PtrInfo)
8991 .getReg(0);
8992 MachinePointerInfo ValPtrInfo =
8994
8995 LLT IdxTy = LLT::scalar(32);
8996 LLT ValTy = VecTy.getElementType();
8997 Align ValAlign = getStackTemporaryAlignment(ValTy);
8998
8999 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9000
9001 bool HasPassthru =
9002 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9003
9004 if (HasPassthru)
9005 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9006
9007 Register LastWriteVal;
9008 std::optional<APInt> PassthruSplatVal =
9009 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9010
9011 if (PassthruSplatVal.has_value()) {
9012 LastWriteVal =
9013 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9014 } else if (HasPassthru) {
9015 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9016 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9017 {LLT::scalar(32)}, {Popcount});
9018
9019 Register LastElmtPtr =
9020 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9021 LastWriteVal =
9022 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9023 .getReg(0);
9024 }
9025
9026 unsigned NumElmts = VecTy.getNumElements();
9027 for (unsigned I = 0; I < NumElmts; ++I) {
9028 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9029 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9030 Register ElmtPtr =
9031 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9032 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9033
9034 LLT MaskITy = MaskTy.getElementType();
9035 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9036 if (MaskITy.getSizeInBits() > 1)
9037 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9038
9039 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9040 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9041
9042 if (HasPassthru && I == NumElmts - 1) {
9043 auto EndOfVector =
9044 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9045 auto AllLanesSelected = MIRBuilder.buildICmp(
9046 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9047 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9048 {OutPos, EndOfVector});
9049 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9050
9051 LastWriteVal =
9052 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9053 .getReg(0);
9054 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9055 }
9056 }
9057
9058 // TODO: Use StackPtr's FrameIndex alignment.
9059 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9060
9061 MI.eraseFromParent();
9062 return Legalized;
9063}
9064
9066 Register AllocSize,
9067 Align Alignment,
9068 LLT PtrTy) {
9069 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9070
9071 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9072 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9073
9074 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9075 // have to generate an extra instruction to negate the alloc and then use
9076 // G_PTR_ADD to add the negative offset.
9077 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9078 if (Alignment > Align(1)) {
9079 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9080 AlignMask.negate();
9081 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9082 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9083 }
9084
9085 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9086}
9087
9090 const auto &MF = *MI.getMF();
9091 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9092 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9093 return UnableToLegalize;
9094
9095 Register Dst = MI.getOperand(0).getReg();
9096 Register AllocSize = MI.getOperand(1).getReg();
9097 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9098
9099 LLT PtrTy = MRI.getType(Dst);
9100 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9101 Register SPTmp =
9102 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9103
9104 MIRBuilder.buildCopy(SPReg, SPTmp);
9105 MIRBuilder.buildCopy(Dst, SPTmp);
9106
9107 MI.eraseFromParent();
9108 return Legalized;
9109}
9110
9113 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9114 if (!StackPtr)
9115 return UnableToLegalize;
9116
9117 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9118 MI.eraseFromParent();
9119 return Legalized;
9120}
9121
9124 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9125 if (!StackPtr)
9126 return UnableToLegalize;
9127
9128 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9129 MI.eraseFromParent();
9130 return Legalized;
9131}
9132
9135 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9136 unsigned Offset = MI.getOperand(2).getImm();
9137
9138 // Extract sub-vector or one element
9139 if (SrcTy.isVector()) {
9140 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9141 unsigned DstSize = DstTy.getSizeInBits();
9142
9143 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9144 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9145 // Unmerge and allow access to each Src element for the artifact combiner.
9146 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9147
9148 // Take element(s) we need to extract and copy it (merge them).
9149 SmallVector<Register, 8> SubVectorElts;
9150 for (unsigned Idx = Offset / SrcEltSize;
9151 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9152 SubVectorElts.push_back(Unmerge.getReg(Idx));
9153 }
9154 if (SubVectorElts.size() == 1)
9155 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9156 else
9157 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9158
9159 MI.eraseFromParent();
9160 return Legalized;
9161 }
9162 }
9163
9164 if (DstTy.isScalar() &&
9165 (SrcTy.isScalar() ||
9166 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9167 LLT SrcIntTy = SrcTy;
9168 if (!SrcTy.isScalar()) {
9169 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9170 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9171 }
9172
9173 if (Offset == 0)
9174 MIRBuilder.buildTrunc(DstReg, SrcReg);
9175 else {
9176 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9177 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9178 MIRBuilder.buildTrunc(DstReg, Shr);
9179 }
9180
9181 MI.eraseFromParent();
9182 return Legalized;
9183 }
9184
9185 return UnableToLegalize;
9186}
9187
9189 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9190 uint64_t Offset = MI.getOperand(3).getImm();
9191
9192 LLT DstTy = MRI.getType(Src);
9193 LLT InsertTy = MRI.getType(InsertSrc);
9194
9195 // Insert sub-vector or one element
9196 if (DstTy.isVector() && !InsertTy.isPointer()) {
9197 LLT EltTy = DstTy.getElementType();
9198 unsigned EltSize = EltTy.getSizeInBits();
9199 unsigned InsertSize = InsertTy.getSizeInBits();
9200
9201 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9202 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9203 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9205 unsigned Idx = 0;
9206 // Elements from Src before insert start Offset
9207 for (; Idx < Offset / EltSize; ++Idx) {
9208 DstElts.push_back(UnmergeSrc.getReg(Idx));
9209 }
9210
9211 // Replace elements in Src with elements from InsertSrc
9212 if (InsertTy.getSizeInBits() > EltSize) {
9213 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9214 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9215 ++Idx, ++i) {
9216 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9217 }
9218 } else {
9219 DstElts.push_back(InsertSrc);
9220 ++Idx;
9221 }
9222
9223 // Remaining elements from Src after insert
9224 for (; Idx < DstTy.getNumElements(); ++Idx) {
9225 DstElts.push_back(UnmergeSrc.getReg(Idx));
9226 }
9227
9228 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9229 MI.eraseFromParent();
9230 return Legalized;
9231 }
9232 }
9233
9234 if (InsertTy.isVector() ||
9235 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9236 return UnableToLegalize;
9237
9238 const DataLayout &DL = MIRBuilder.getDataLayout();
9239 if ((DstTy.isPointer() &&
9240 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9241 (InsertTy.isPointer() &&
9242 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9243 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9244 return UnableToLegalize;
9245 }
9246
9247 LLT IntDstTy = DstTy;
9248
9249 if (!DstTy.isScalar()) {
9250 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9251 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9252 }
9253
9254 if (!InsertTy.isScalar()) {
9255 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9256 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9257 }
9258
9259 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9260 if (Offset != 0) {
9261 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9262 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9263 }
9264
9266 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9267
9268 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9269 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9270 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9271
9272 MIRBuilder.buildCast(Dst, Or);
9273 MI.eraseFromParent();
9274 return Legalized;
9275}
9276
9279 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9280 MI.getFirst4RegLLTs();
9281 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9282
9283 LLT Ty = Dst0Ty;
9284 LLT BoolTy = Dst1Ty;
9285
9286 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9287
9288 if (IsAdd)
9289 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9290 else
9291 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9292
9293 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9294
9295 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9296
9297 // For an addition, the result should be less than one of the operands (LHS)
9298 // if and only if the other operand (RHS) is negative, otherwise there will
9299 // be overflow.
9300 // For a subtraction, the result should be less than one of the operands
9301 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9302 // otherwise there will be overflow.
9303 auto ResultLowerThanLHS =
9304 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9305 auto ConditionRHS = MIRBuilder.buildICmp(
9306 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9307
9308 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9309
9310 MIRBuilder.buildCopy(Dst0, NewDst0);
9311 MI.eraseFromParent();
9312
9313 return Legalized;
9314}
9315
9317 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9318 const LLT Ty = MRI.getType(Res);
9319
9320 // sum = LHS + RHS + zext(CarryIn)
9321 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9322 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9323 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9324 MIRBuilder.buildCopy(Res, Sum);
9325
9326 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9327 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9328 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9329 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9330
9331 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9332 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9333
9334 MI.eraseFromParent();
9335 return Legalized;
9336}
9337
9339 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9340 const LLT Ty = MRI.getType(Res);
9341
9342 // Diff = LHS - (RHS + zext(CarryIn))
9343 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9344 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9345 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9346 MIRBuilder.buildCopy(Res, Diff);
9347
9348 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9349 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9350 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9351 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9352 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9353 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9354
9355 MI.eraseFromParent();
9356 return Legalized;
9357}
9358
9361 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9362 LLT Ty = MRI.getType(Res);
9363 bool IsSigned;
9364 bool IsAdd;
9365 unsigned BaseOp;
9366 switch (MI.getOpcode()) {
9367 default:
9368 llvm_unreachable("unexpected addsat/subsat opcode");
9369 case TargetOpcode::G_UADDSAT:
9370 IsSigned = false;
9371 IsAdd = true;
9372 BaseOp = TargetOpcode::G_ADD;
9373 break;
9374 case TargetOpcode::G_SADDSAT:
9375 IsSigned = true;
9376 IsAdd = true;
9377 BaseOp = TargetOpcode::G_ADD;
9378 break;
9379 case TargetOpcode::G_USUBSAT:
9380 IsSigned = false;
9381 IsAdd = false;
9382 BaseOp = TargetOpcode::G_SUB;
9383 break;
9384 case TargetOpcode::G_SSUBSAT:
9385 IsSigned = true;
9386 IsAdd = false;
9387 BaseOp = TargetOpcode::G_SUB;
9388 break;
9389 }
9390
9391 if (IsSigned) {
9392 // sadd.sat(a, b) ->
9393 // hi = 0x7fffffff - smax(a, 0)
9394 // lo = 0x80000000 - smin(a, 0)
9395 // a + smin(smax(lo, b), hi)
9396 // ssub.sat(a, b) ->
9397 // lo = smax(a, -1) - 0x7fffffff
9398 // hi = smin(a, -1) - 0x80000000
9399 // a - smin(smax(lo, b), hi)
9400 // TODO: AMDGPU can use a "median of 3" instruction here:
9401 // a +/- med3(lo, b, hi)
9402 uint64_t NumBits = Ty.getScalarSizeInBits();
9403 auto MaxVal =
9404 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9405 auto MinVal =
9406 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9408 if (IsAdd) {
9409 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9410 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9411 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9412 } else {
9413 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9414 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9415 MaxVal);
9416 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9417 MinVal);
9418 }
9419 auto RHSClamped =
9420 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9421 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9422 } else {
9423 // uadd.sat(a, b) -> a + umin(~a, b)
9424 // usub.sat(a, b) -> a - umin(a, b)
9425 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9426 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9427 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9428 }
9429
9430 MI.eraseFromParent();
9431 return Legalized;
9432}
9433
9436 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9437 LLT Ty = MRI.getType(Res);
9438 LLT BoolTy = Ty.changeElementSize(1);
9439 bool IsSigned;
9440 bool IsAdd;
9441 unsigned OverflowOp;
9442 switch (MI.getOpcode()) {
9443 default:
9444 llvm_unreachable("unexpected addsat/subsat opcode");
9445 case TargetOpcode::G_UADDSAT:
9446 IsSigned = false;
9447 IsAdd = true;
9448 OverflowOp = TargetOpcode::G_UADDO;
9449 break;
9450 case TargetOpcode::G_SADDSAT:
9451 IsSigned = true;
9452 IsAdd = true;
9453 OverflowOp = TargetOpcode::G_SADDO;
9454 break;
9455 case TargetOpcode::G_USUBSAT:
9456 IsSigned = false;
9457 IsAdd = false;
9458 OverflowOp = TargetOpcode::G_USUBO;
9459 break;
9460 case TargetOpcode::G_SSUBSAT:
9461 IsSigned = true;
9462 IsAdd = false;
9463 OverflowOp = TargetOpcode::G_SSUBO;
9464 break;
9465 }
9466
9467 auto OverflowRes =
9468 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9469 Register Tmp = OverflowRes.getReg(0);
9470 Register Ov = OverflowRes.getReg(1);
9471 MachineInstrBuilder Clamp;
9472 if (IsSigned) {
9473 // sadd.sat(a, b) ->
9474 // {tmp, ov} = saddo(a, b)
9475 // ov ? (tmp >>s 31) + 0x80000000 : r
9476 // ssub.sat(a, b) ->
9477 // {tmp, ov} = ssubo(a, b)
9478 // ov ? (tmp >>s 31) + 0x80000000 : r
9479 uint64_t NumBits = Ty.getScalarSizeInBits();
9480 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9481 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9482 auto MinVal =
9483 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9484 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9485 } else {
9486 // uadd.sat(a, b) ->
9487 // {tmp, ov} = uaddo(a, b)
9488 // ov ? 0xffffffff : tmp
9489 // usub.sat(a, b) ->
9490 // {tmp, ov} = usubo(a, b)
9491 // ov ? 0 : tmp
9492 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9493 }
9494 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9495
9496 MI.eraseFromParent();
9497 return Legalized;
9498}
9499
9502 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9503 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9504 "Expected shlsat opcode!");
9505 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9506 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9507 LLT Ty = MRI.getType(Res);
9508 LLT BoolTy = Ty.changeElementSize(1);
9509
9510 unsigned BW = Ty.getScalarSizeInBits();
9511 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9512 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9513 : MIRBuilder.buildLShr(Ty, Result, RHS);
9514
9515 MachineInstrBuilder SatVal;
9516 if (IsSigned) {
9517 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9518 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9519 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9520 MIRBuilder.buildConstant(Ty, 0));
9521 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9522 } else {
9523 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9524 }
9525 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9526 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9527
9528 MI.eraseFromParent();
9529 return Legalized;
9530}
9531
9533 auto [Dst, Src] = MI.getFirst2Regs();
9534 const LLT Ty = MRI.getType(Src);
9535 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9536 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9537
9538 // Swap most and least significant byte, set remaining bytes in Res to zero.
9539 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9540 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9541 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9542 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9543
9544 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9545 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9546 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9547 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9548 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9549 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9550 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9551 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9552 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9553 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9554 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9555 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9556 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9557 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9558 }
9559 Res.getInstr()->getOperand(0).setReg(Dst);
9560
9561 MI.eraseFromParent();
9562 return Legalized;
9563}
9564
9565//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9567 MachineInstrBuilder Src, const APInt &Mask) {
9568 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9569 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9570 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9571 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9572 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9573 return B.buildOr(Dst, LHS, RHS);
9574}
9575
9578 auto [Dst, Src] = MI.getFirst2Regs();
9579 const LLT SrcTy = MRI.getType(Src);
9580 unsigned Size = SrcTy.getScalarSizeInBits();
9581 unsigned VSize = SrcTy.getSizeInBits();
9582
9583 if (Size >= 8) {
9584 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9585 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9586 {LLT::fixed_vector(VSize / 8, 8),
9587 LLT::fixed_vector(VSize / 8, 8)}}))) {
9588 // If bitreverse is legal for i8 vector of the same size, then cast
9589 // to i8 vector type.
9590 // e.g. v4s32 -> v16s8
9591 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9592 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9593 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9594 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9595 MIRBuilder.buildBitcast(Dst, RBIT);
9596 } else {
9597 MachineInstrBuilder BSWAP =
9598 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9599
9600 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9601 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9602 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9603 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9604 APInt::getSplat(Size, APInt(8, 0xF0)));
9605
9606 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9607 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9608 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9609 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9610 APInt::getSplat(Size, APInt(8, 0xCC)));
9611
9612 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9613 // 6|7
9614 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9615 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9616 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9617 }
9618 } else {
9619 // Expand bitreverse for types smaller than 8 bits.
9621 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9623 if (I < J) {
9624 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9625 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9626 } else {
9627 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9628 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9629 }
9630
9631 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9632 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9633 if (I == 0)
9634 Tmp = Tmp2;
9635 else
9636 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9637 }
9638 MIRBuilder.buildCopy(Dst, Tmp);
9639 }
9640
9641 MI.eraseFromParent();
9642 return Legalized;
9643}
9644
9647 MachineFunction &MF = MIRBuilder.getMF();
9648
9649 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9650 int NameOpIdx = IsRead ? 1 : 0;
9651 int ValRegIndex = IsRead ? 0 : 1;
9652
9653 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9654 const LLT Ty = MRI.getType(ValReg);
9655 const MDString *RegStr = cast<MDString>(
9656 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9657
9658 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9659 if (!PhysReg) {
9660 const Function &Fn = MF.getFunction();
9662 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9663 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9664 Fn, MI.getDebugLoc()));
9665 if (IsRead)
9666 MIRBuilder.buildUndef(ValReg);
9667
9668 MI.eraseFromParent();
9669 return Legalized;
9670 }
9671
9672 if (IsRead)
9673 MIRBuilder.buildCopy(ValReg, PhysReg);
9674 else
9675 MIRBuilder.buildCopy(PhysReg, ValReg);
9676
9677 MI.eraseFromParent();
9678 return Legalized;
9679}
9680
9683 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9684 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9685 Register Result = MI.getOperand(0).getReg();
9686 LLT OrigTy = MRI.getType(Result);
9687 auto SizeInBits = OrigTy.getScalarSizeInBits();
9688 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9689
9690 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9691 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9692 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9693 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9694
9695 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9696 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9697 MIRBuilder.buildTrunc(Result, Shifted);
9698
9699 MI.eraseFromParent();
9700 return Legalized;
9701}
9702
9705 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9706 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9707
9708 if (Mask == fcNone) {
9709 MIRBuilder.buildConstant(DstReg, 0);
9710 MI.eraseFromParent();
9711 return Legalized;
9712 }
9713 if (Mask == fcAllFlags) {
9714 MIRBuilder.buildConstant(DstReg, 1);
9715 MI.eraseFromParent();
9716 return Legalized;
9717 }
9718
9719 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9720 // version
9721
9722 unsigned BitSize = SrcTy.getScalarSizeInBits();
9723 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9724
9725 LLT IntTy = LLT::scalar(BitSize);
9726 if (SrcTy.isVector())
9727 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9728 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9729
9730 // Various masks.
9731 APInt SignBit = APInt::getSignMask(BitSize);
9732 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9733 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9734 APInt ExpMask = Inf;
9735 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9736 APInt QNaNBitMask =
9737 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9738 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9739
9740 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9741 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9742 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9743 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9744 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9745
9746 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9747 auto Sign =
9748 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9749
9750 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9751 // Clang doesn't support capture of structured bindings:
9752 LLT DstTyCopy = DstTy;
9753 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9754 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9755 };
9756
9757 // Tests that involve more than one class should be processed first.
9758 if ((Mask & fcFinite) == fcFinite) {
9759 // finite(V) ==> abs(V) u< exp_mask
9760 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9761 ExpMaskC));
9762 Mask &= ~fcFinite;
9763 } else if ((Mask & fcFinite) == fcPosFinite) {
9764 // finite(V) && V > 0 ==> V u< exp_mask
9765 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9766 ExpMaskC));
9767 Mask &= ~fcPosFinite;
9768 } else if ((Mask & fcFinite) == fcNegFinite) {
9769 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9770 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9771 ExpMaskC);
9772 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9773 appendToRes(And);
9774 Mask &= ~fcNegFinite;
9775 }
9776
9777 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9778 // fcZero | fcSubnormal => test all exponent bits are 0
9779 // TODO: Handle sign bit specific cases
9780 // TODO: Handle inverted case
9781 if (PartialCheck == (fcZero | fcSubnormal)) {
9782 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9783 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9784 ExpBits, ZeroC));
9785 Mask &= ~PartialCheck;
9786 }
9787 }
9788
9789 // Check for individual classes.
9790 if (FPClassTest PartialCheck = Mask & fcZero) {
9791 if (PartialCheck == fcPosZero)
9792 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9793 AsInt, ZeroC));
9794 else if (PartialCheck == fcZero)
9795 appendToRes(
9796 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9797 else // fcNegZero
9798 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9799 AsInt, SignBitC));
9800 }
9801
9802 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9803 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9804 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9805 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9806 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9807 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9808 auto SubnormalRes =
9809 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9810 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9811 if (PartialCheck == fcNegSubnormal)
9812 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9813 appendToRes(SubnormalRes);
9814 }
9815
9816 if (FPClassTest PartialCheck = Mask & fcInf) {
9817 if (PartialCheck == fcPosInf)
9818 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9819 AsInt, InfC));
9820 else if (PartialCheck == fcInf)
9821 appendToRes(
9822 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9823 else { // fcNegInf
9824 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9825 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9826 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9827 AsInt, NegInfC));
9828 }
9829 }
9830
9831 if (FPClassTest PartialCheck = Mask & fcNan) {
9832 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9833 if (PartialCheck == fcNan) {
9834 // isnan(V) ==> abs(V) u> int(inf)
9835 appendToRes(
9836 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9837 } else if (PartialCheck == fcQNan) {
9838 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9839 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9840 InfWithQnanBitC));
9841 } else { // fcSNan
9842 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9843 // abs(V) u< (unsigned(Inf) | quiet_bit)
9844 auto IsNan =
9845 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9846 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9847 Abs, InfWithQnanBitC);
9848 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9849 }
9850 }
9851
9852 if (FPClassTest PartialCheck = Mask & fcNormal) {
9853 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9854 // (max_exp-1))
9855 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9856 auto ExpMinusOne = MIRBuilder.buildSub(
9857 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9858 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9859 auto NormalRes =
9860 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9861 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9862 if (PartialCheck == fcNegNormal)
9863 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9864 else if (PartialCheck == fcPosNormal) {
9865 auto PosSign = MIRBuilder.buildXor(
9866 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
9867 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9868 }
9869 appendToRes(NormalRes);
9870 }
9871
9872 MIRBuilder.buildCopy(DstReg, Res);
9873 MI.eraseFromParent();
9874 return Legalized;
9875}
9876
9878 // Implement G_SELECT in terms of XOR, AND, OR.
9879 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9880 MI.getFirst4RegLLTs();
9881
9882 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9883 if (IsEltPtr) {
9884 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9885 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9886 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9887 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9888 DstTy = NewTy;
9889 }
9890
9891 if (MaskTy.isScalar()) {
9892 // Turn the scalar condition into a vector condition mask if needed.
9893
9894 Register MaskElt = MaskReg;
9895
9896 // The condition was potentially zero extended before, but we want a sign
9897 // extended boolean.
9898 if (MaskTy != LLT::scalar(1))
9899 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9900
9901 // Continue the sign extension (or truncate) to match the data type.
9902 MaskElt =
9903 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9904
9905 if (DstTy.isVector()) {
9906 // Generate a vector splat idiom.
9907 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9908 MaskReg = ShufSplat.getReg(0);
9909 } else {
9910 MaskReg = MaskElt;
9911 }
9912 MaskTy = DstTy;
9913 } else if (!DstTy.isVector()) {
9914 // Cannot handle the case that mask is a vector and dst is a scalar.
9915 return UnableToLegalize;
9916 }
9917
9918 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9919 return UnableToLegalize;
9920 }
9921
9922 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9923 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9924 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9925 if (IsEltPtr) {
9926 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9927 MIRBuilder.buildIntToPtr(DstReg, Or);
9928 } else {
9929 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9930 }
9931 MI.eraseFromParent();
9932 return Legalized;
9933}
9934
9936 // Split DIVREM into individual instructions.
9937 unsigned Opcode = MI.getOpcode();
9938
9939 MIRBuilder.buildInstr(
9940 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9941 : TargetOpcode::G_UDIV,
9942 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9943 MIRBuilder.buildInstr(
9944 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9945 : TargetOpcode::G_UREM,
9946 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9947 MI.eraseFromParent();
9948 return Legalized;
9949}
9950
9953 // Expand %res = G_ABS %a into:
9954 // %v1 = G_ASHR %a, scalar_size-1
9955 // %v2 = G_ADD %a, %v1
9956 // %res = G_XOR %v2, %v1
9957 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9958 Register OpReg = MI.getOperand(1).getReg();
9959 auto ShiftAmt =
9960 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9961 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9962 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9963 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9964 MI.eraseFromParent();
9965 return Legalized;
9966}
9967
9970 // Expand %res = G_ABS %a into:
9971 // %v1 = G_CONSTANT 0
9972 // %v2 = G_SUB %v1, %a
9973 // %res = G_SMAX %a, %v2
9974 Register SrcReg = MI.getOperand(1).getReg();
9975 LLT Ty = MRI.getType(SrcReg);
9976 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9977 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9978 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9979 MI.eraseFromParent();
9980 return Legalized;
9981}
9982
9985 Register SrcReg = MI.getOperand(1).getReg();
9986 Register DestReg = MI.getOperand(0).getReg();
9987 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9988 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9989 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9990 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9991 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9992 MI.eraseFromParent();
9993 return Legalized;
9994}
9995
9998 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
9999 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10000 "Expected G_ABDS or G_ABDU instruction");
10001
10002 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10003 LLT Ty = MRI.getType(LHS);
10004
10005 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10006 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10007 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10008 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10009 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10012 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10013 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10014
10015 MI.eraseFromParent();
10016 return Legalized;
10017}
10018
10021 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10022 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10023 "Expected G_ABDS or G_ABDU instruction");
10024
10025 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10026 LLT Ty = MRI.getType(LHS);
10027
10028 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10029 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10030 Register MaxReg, MinReg;
10031 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10032 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10033 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10034 } else {
10035 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10036 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10037 }
10038 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10039
10040 MI.eraseFromParent();
10041 return Legalized;
10042}
10043
10045 Register SrcReg = MI.getOperand(1).getReg();
10046 Register DstReg = MI.getOperand(0).getReg();
10047
10048 LLT Ty = MRI.getType(DstReg);
10049
10050 // Reset sign bit
10051 MIRBuilder.buildAnd(
10052 DstReg, SrcReg,
10053 MIRBuilder.buildConstant(
10054 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10055
10056 MI.eraseFromParent();
10057 return Legalized;
10058}
10059
10062 Register SrcReg = MI.getOperand(1).getReg();
10063 LLT SrcTy = MRI.getType(SrcReg);
10064 LLT DstTy = MRI.getType(SrcReg);
10065
10066 // The source could be a scalar if the IR type was <1 x sN>.
10067 if (SrcTy.isScalar()) {
10068 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10069 return UnableToLegalize; // FIXME: handle extension.
10070 // This can be just a plain copy.
10071 Observer.changingInstr(MI);
10072 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10073 Observer.changedInstr(MI);
10074 return Legalized;
10075 }
10076 return UnableToLegalize;
10077}
10078
10080 MachineFunction &MF = *MI.getMF();
10081 const DataLayout &DL = MIRBuilder.getDataLayout();
10082 LLVMContext &Ctx = MF.getFunction().getContext();
10083 Register ListPtr = MI.getOperand(1).getReg();
10084 LLT PtrTy = MRI.getType(ListPtr);
10085
10086 // LstPtr is a pointer to the head of the list. Get the address
10087 // of the head of the list.
10088 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10089 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10090 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10091 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10092
10093 const Align A(MI.getOperand(2).getImm());
10094 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10095 if (A > TLI.getMinStackArgumentAlignment()) {
10096 Register AlignAmt =
10097 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10098 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10099 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10100 VAList = AndDst.getReg(0);
10101 }
10102
10103 // Increment the pointer, VAList, to the next vaarg
10104 // The list should be bumped by the size of element in the current head of
10105 // list.
10106 Register Dst = MI.getOperand(0).getReg();
10107 LLT LLTTy = MRI.getType(Dst);
10108 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10109 auto IncAmt =
10110 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10111 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10112
10113 // Store the increment VAList to the legalized pointer
10115 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10116 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10117 // Load the actual argument out of the pointer VAList
10118 Align EltAlignment = DL.getABITypeAlign(Ty);
10119 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10120 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10121 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10122
10123 MI.eraseFromParent();
10124 return Legalized;
10125}
10126
10128 // On Darwin, -Os means optimize for size without hurting performance, so
10129 // only really optimize for size when -Oz (MinSize) is used.
10131 return MF.getFunction().hasMinSize();
10132 return MF.getFunction().hasOptSize();
10133}
10134
10135// Returns a list of types to use for memory op lowering in MemOps. A partial
10136// port of findOptimalMemOpLowering in TargetLowering.
10137static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10138 unsigned Limit, const MemOp &Op,
10139 unsigned DstAS, unsigned SrcAS,
10140 const AttributeList &FuncAttributes,
10141 const TargetLowering &TLI) {
10142 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10143 return false;
10144
10145 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10146
10147 if (Ty == LLT()) {
10148 // Use the largest scalar type whose alignment constraints are satisfied.
10149 // We only need to check DstAlign here as SrcAlign is always greater or
10150 // equal to DstAlign (or zero).
10151 Ty = LLT::scalar(64);
10152 if (Op.isFixedDstAlign())
10153 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10154 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10155 Ty = LLT::scalar(Ty.getSizeInBytes());
10156 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10157 // FIXME: check for the largest legal type we can load/store to.
10158 }
10159
10160 unsigned NumMemOps = 0;
10161 uint64_t Size = Op.size();
10162 while (Size) {
10163 unsigned TySize = Ty.getSizeInBytes();
10164 while (TySize > Size) {
10165 // For now, only use non-vector load / store's for the left-over pieces.
10166 LLT NewTy = Ty;
10167 // FIXME: check for mem op safety and legality of the types. Not all of
10168 // SDAGisms map cleanly to GISel concepts.
10169 if (NewTy.isVector())
10170 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10171 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10172 unsigned NewTySize = NewTy.getSizeInBytes();
10173 assert(NewTySize > 0 && "Could not find appropriate type");
10174
10175 // If the new LLT cannot cover all of the remaining bits, then consider
10176 // issuing a (or a pair of) unaligned and overlapping load / store.
10177 unsigned Fast;
10178 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10179 MVT VT = getMVTForLLT(Ty);
10180 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10182 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10184 Fast)
10185 TySize = Size;
10186 else {
10187 Ty = NewTy;
10188 TySize = NewTySize;
10189 }
10190 }
10191
10192 if (++NumMemOps > Limit)
10193 return false;
10194
10195 MemOps.push_back(Ty);
10196 Size -= TySize;
10197 }
10198
10199 return true;
10200}
10201
10202// Get a vectorized representation of the memset value operand, GISel edition.
10204 MachineRegisterInfo &MRI = *MIB.getMRI();
10205 unsigned NumBits = Ty.getScalarSizeInBits();
10206 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10207 if (!Ty.isVector() && ValVRegAndVal) {
10208 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10209 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10210 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10211 }
10212
10213 // Extend the byte value to the larger type, and then multiply by a magic
10214 // value 0x010101... in order to replicate it across every byte.
10215 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10216 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10217 return MIB.buildConstant(Ty, 0).getReg(0);
10218 }
10219
10220 LLT ExtType = Ty.getScalarType();
10221 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10222 if (NumBits > 8) {
10223 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10224 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10225 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10226 }
10227
10228 // For vector types create a G_BUILD_VECTOR.
10229 if (Ty.isVector())
10230 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10231
10232 return Val;
10233}
10234
10236LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10237 uint64_t KnownLen, Align Alignment,
10238 bool IsVolatile) {
10239 auto &MF = *MI.getParent()->getParent();
10240 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10241 auto &DL = MF.getDataLayout();
10242 LLVMContext &C = MF.getFunction().getContext();
10243
10244 assert(KnownLen != 0 && "Have a zero length memset length!");
10245
10246 bool DstAlignCanChange = false;
10247 MachineFrameInfo &MFI = MF.getFrameInfo();
10248 bool OptSize = shouldLowerMemFuncForSize(MF);
10249
10250 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10251 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10252 DstAlignCanChange = true;
10253
10254 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10255 std::vector<LLT> MemOps;
10256
10257 const auto &DstMMO = **MI.memoperands_begin();
10258 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10259
10260 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10261 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10262
10263 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10264 MemOp::Set(KnownLen, DstAlignCanChange,
10265 Alignment,
10266 /*IsZeroMemset=*/IsZeroVal,
10267 /*IsVolatile=*/IsVolatile),
10268 DstPtrInfo.getAddrSpace(), ~0u,
10269 MF.getFunction().getAttributes(), TLI))
10270 return UnableToLegalize;
10271
10272 if (DstAlignCanChange) {
10273 // Get an estimate of the type from the LLT.
10274 Type *IRTy = getTypeForLLT(MemOps[0], C);
10275 Align NewAlign = DL.getABITypeAlign(IRTy);
10276 if (NewAlign > Alignment) {
10277 Alignment = NewAlign;
10278 unsigned FI = FIDef->getOperand(1).getIndex();
10279 // Give the stack frame object a larger alignment if needed.
10280 if (MFI.getObjectAlign(FI) < Alignment)
10281 MFI.setObjectAlignment(FI, Alignment);
10282 }
10283 }
10284
10285 MachineIRBuilder MIB(MI);
10286 // Find the largest store and generate the bit pattern for it.
10287 LLT LargestTy = MemOps[0];
10288 for (unsigned i = 1; i < MemOps.size(); i++)
10289 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10290 LargestTy = MemOps[i];
10291
10292 // The memset stored value is always defined as an s8, so in order to make it
10293 // work with larger store types we need to repeat the bit pattern across the
10294 // wider type.
10295 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10296
10297 if (!MemSetValue)
10298 return UnableToLegalize;
10299
10300 // Generate the stores. For each store type in the list, we generate the
10301 // matching store of that type to the destination address.
10302 LLT PtrTy = MRI.getType(Dst);
10303 unsigned DstOff = 0;
10304 unsigned Size = KnownLen;
10305 for (unsigned I = 0; I < MemOps.size(); I++) {
10306 LLT Ty = MemOps[I];
10307 unsigned TySize = Ty.getSizeInBytes();
10308 if (TySize > Size) {
10309 // Issuing an unaligned load / store pair that overlaps with the previous
10310 // pair. Adjust the offset accordingly.
10311 assert(I == MemOps.size() - 1 && I != 0);
10312 DstOff -= TySize - Size;
10313 }
10314
10315 // If this store is smaller than the largest store see whether we can get
10316 // the smaller value for free with a truncate.
10317 Register Value = MemSetValue;
10318 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10319 MVT VT = getMVTForLLT(Ty);
10320 MVT LargestVT = getMVTForLLT(LargestTy);
10321 if (!LargestTy.isVector() && !Ty.isVector() &&
10322 TLI.isTruncateFree(LargestVT, VT))
10323 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10324 else
10325 Value = getMemsetValue(Val, Ty, MIB);
10326 if (!Value)
10327 return UnableToLegalize;
10328 }
10329
10330 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10331
10332 Register Ptr = Dst;
10333 if (DstOff != 0) {
10334 auto Offset =
10335 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10336 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10337 }
10338
10339 MIB.buildStore(Value, Ptr, *StoreMMO);
10340 DstOff += Ty.getSizeInBytes();
10341 Size -= TySize;
10342 }
10343
10344 MI.eraseFromParent();
10345 return Legalized;
10346}
10347
10349LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10350 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10351
10352 auto [Dst, Src, Len] = MI.getFirst3Regs();
10353
10354 const auto *MMOIt = MI.memoperands_begin();
10355 const MachineMemOperand *MemOp = *MMOIt;
10356 bool IsVolatile = MemOp->isVolatile();
10357
10358 // See if this is a constant length copy
10359 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10360 // FIXME: support dynamically sized G_MEMCPY_INLINE
10361 assert(LenVRegAndVal &&
10362 "inline memcpy with dynamic size is not yet supported");
10363 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10364 if (KnownLen == 0) {
10365 MI.eraseFromParent();
10366 return Legalized;
10367 }
10368
10369 const auto &DstMMO = **MI.memoperands_begin();
10370 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10371 Align DstAlign = DstMMO.getBaseAlign();
10372 Align SrcAlign = SrcMMO.getBaseAlign();
10373
10374 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10375 IsVolatile);
10376}
10377
10379LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10380 uint64_t KnownLen, Align DstAlign,
10381 Align SrcAlign, bool IsVolatile) {
10382 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10383 return lowerMemcpy(MI, Dst, Src, KnownLen,
10384 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10385 IsVolatile);
10386}
10387
10389LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10390 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10391 Align SrcAlign, bool IsVolatile) {
10392 auto &MF = *MI.getParent()->getParent();
10393 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10394 auto &DL = MF.getDataLayout();
10396
10397 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10398
10399 bool DstAlignCanChange = false;
10400 MachineFrameInfo &MFI = MF.getFrameInfo();
10401 Align Alignment = std::min(DstAlign, SrcAlign);
10402
10403 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10404 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10405 DstAlignCanChange = true;
10406
10407 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10408 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10409 // if the memcpy is in a tail call position.
10410
10411 std::vector<LLT> MemOps;
10412
10413 const auto &DstMMO = **MI.memoperands_begin();
10414 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10415 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10416 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10417
10419 MemOps, Limit,
10420 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10421 IsVolatile),
10422 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10423 MF.getFunction().getAttributes(), TLI))
10424 return UnableToLegalize;
10425
10426 if (DstAlignCanChange) {
10427 // Get an estimate of the type from the LLT.
10428 Type *IRTy = getTypeForLLT(MemOps[0], C);
10429 Align NewAlign = DL.getABITypeAlign(IRTy);
10430
10431 // Don't promote to an alignment that would require dynamic stack
10432 // realignment.
10434 if (!TRI->hasStackRealignment(MF))
10435 if (MaybeAlign StackAlign = DL.getStackAlignment())
10436 NewAlign = std::min(NewAlign, *StackAlign);
10437
10438 if (NewAlign > Alignment) {
10439 Alignment = NewAlign;
10440 unsigned FI = FIDef->getOperand(1).getIndex();
10441 // Give the stack frame object a larger alignment if needed.
10442 if (MFI.getObjectAlign(FI) < Alignment)
10443 MFI.setObjectAlignment(FI, Alignment);
10444 }
10445 }
10446
10447 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10448
10449 MachineIRBuilder MIB(MI);
10450 // Now we need to emit a pair of load and stores for each of the types we've
10451 // collected. I.e. for each type, generate a load from the source pointer of
10452 // that type width, and then generate a corresponding store to the dest buffer
10453 // of that value loaded. This can result in a sequence of loads and stores
10454 // mixed types, depending on what the target specifies as good types to use.
10455 unsigned CurrOffset = 0;
10456 unsigned Size = KnownLen;
10457 for (auto CopyTy : MemOps) {
10458 // Issuing an unaligned load / store pair that overlaps with the previous
10459 // pair. Adjust the offset accordingly.
10460 if (CopyTy.getSizeInBytes() > Size)
10461 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10462
10463 // Construct MMOs for the accesses.
10464 auto *LoadMMO =
10465 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10466 auto *StoreMMO =
10467 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10468
10469 // Create the load.
10470 Register LoadPtr = Src;
10472 if (CurrOffset != 0) {
10473 LLT SrcTy = MRI.getType(Src);
10474 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10475 .getReg(0);
10476 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10477 }
10478 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10479
10480 // Create the store.
10481 Register StorePtr = Dst;
10482 if (CurrOffset != 0) {
10483 LLT DstTy = MRI.getType(Dst);
10484 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10485 }
10486 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10487 CurrOffset += CopyTy.getSizeInBytes();
10488 Size -= CopyTy.getSizeInBytes();
10489 }
10490
10491 MI.eraseFromParent();
10492 return Legalized;
10493}
10494
10496LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10497 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10498 bool IsVolatile) {
10499 auto &MF = *MI.getParent()->getParent();
10500 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10501 auto &DL = MF.getDataLayout();
10502 LLVMContext &C = MF.getFunction().getContext();
10503
10504 assert(KnownLen != 0 && "Have a zero length memmove length!");
10505
10506 bool DstAlignCanChange = false;
10507 MachineFrameInfo &MFI = MF.getFrameInfo();
10508 bool OptSize = shouldLowerMemFuncForSize(MF);
10509 Align Alignment = std::min(DstAlign, SrcAlign);
10510
10511 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10512 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10513 DstAlignCanChange = true;
10514
10515 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10516 std::vector<LLT> MemOps;
10517
10518 const auto &DstMMO = **MI.memoperands_begin();
10519 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10520 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10521 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10522
10523 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10524 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10525 // same thing here.
10527 MemOps, Limit,
10528 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10529 /*IsVolatile*/ true),
10530 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10531 MF.getFunction().getAttributes(), TLI))
10532 return UnableToLegalize;
10533
10534 if (DstAlignCanChange) {
10535 // Get an estimate of the type from the LLT.
10536 Type *IRTy = getTypeForLLT(MemOps[0], C);
10537 Align NewAlign = DL.getABITypeAlign(IRTy);
10538
10539 // Don't promote to an alignment that would require dynamic stack
10540 // realignment.
10541 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10542 if (!TRI->hasStackRealignment(MF))
10543 if (MaybeAlign StackAlign = DL.getStackAlignment())
10544 NewAlign = std::min(NewAlign, *StackAlign);
10545
10546 if (NewAlign > Alignment) {
10547 Alignment = NewAlign;
10548 unsigned FI = FIDef->getOperand(1).getIndex();
10549 // Give the stack frame object a larger alignment if needed.
10550 if (MFI.getObjectAlign(FI) < Alignment)
10551 MFI.setObjectAlignment(FI, Alignment);
10552 }
10553 }
10554
10555 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10556
10557 MachineIRBuilder MIB(MI);
10558 // Memmove requires that we perform the loads first before issuing the stores.
10559 // Apart from that, this loop is pretty much doing the same thing as the
10560 // memcpy codegen function.
10561 unsigned CurrOffset = 0;
10562 SmallVector<Register, 16> LoadVals;
10563 for (auto CopyTy : MemOps) {
10564 // Construct MMO for the load.
10565 auto *LoadMMO =
10566 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10567
10568 // Create the load.
10569 Register LoadPtr = Src;
10570 if (CurrOffset != 0) {
10571 LLT SrcTy = MRI.getType(Src);
10572 auto Offset =
10573 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10574 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10575 }
10576 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10577 CurrOffset += CopyTy.getSizeInBytes();
10578 }
10579
10580 CurrOffset = 0;
10581 for (unsigned I = 0; I < MemOps.size(); ++I) {
10582 LLT CopyTy = MemOps[I];
10583 // Now store the values loaded.
10584 auto *StoreMMO =
10585 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10586
10587 Register StorePtr = Dst;
10588 if (CurrOffset != 0) {
10589 LLT DstTy = MRI.getType(Dst);
10590 auto Offset =
10591 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10592 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10593 }
10594 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10595 CurrOffset += CopyTy.getSizeInBytes();
10596 }
10597 MI.eraseFromParent();
10598 return Legalized;
10599}
10600
10603 const unsigned Opc = MI.getOpcode();
10604 // This combine is fairly complex so it's not written with a separate
10605 // matcher function.
10606 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10607 Opc == TargetOpcode::G_MEMSET) &&
10608 "Expected memcpy like instruction");
10609
10610 auto MMOIt = MI.memoperands_begin();
10611 const MachineMemOperand *MemOp = *MMOIt;
10612
10613 Align DstAlign = MemOp->getBaseAlign();
10614 Align SrcAlign;
10615 auto [Dst, Src, Len] = MI.getFirst3Regs();
10616
10617 if (Opc != TargetOpcode::G_MEMSET) {
10618 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10619 MemOp = *(++MMOIt);
10620 SrcAlign = MemOp->getBaseAlign();
10621 }
10622
10623 // See if this is a constant length copy
10624 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10625 if (!LenVRegAndVal)
10626 return UnableToLegalize;
10627 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10628
10629 if (KnownLen == 0) {
10630 MI.eraseFromParent();
10631 return Legalized;
10632 }
10633
10634 if (MaxLen && KnownLen > MaxLen)
10635 return UnableToLegalize;
10636
10637 bool IsVolatile = MemOp->isVolatile();
10638 if (Opc == TargetOpcode::G_MEMCPY) {
10639 auto &MF = *MI.getParent()->getParent();
10640 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10641 bool OptSize = shouldLowerMemFuncForSize(MF);
10642 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10643 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10644 IsVolatile);
10645 }
10646 if (Opc == TargetOpcode::G_MEMMOVE)
10647 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10648 if (Opc == TargetOpcode::G_MEMSET)
10649 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10650 return UnableToLegalize;
10651}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
bool isSigned() const
Definition InstrTypes.h:932
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:611
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2033
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:232
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1622
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1189
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:368
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1815
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:241
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:299
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)