LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FLOG10:
475 RTLIBCASE(LOG10_F);
476 case TargetOpcode::G_FLOG:
477 RTLIBCASE(LOG_F);
478 case TargetOpcode::G_FLOG2:
479 RTLIBCASE(LOG2_F);
480 case TargetOpcode::G_FLDEXP:
481 RTLIBCASE(LDEXP_F);
482 case TargetOpcode::G_FCEIL:
483 RTLIBCASE(CEIL_F);
484 case TargetOpcode::G_FFLOOR:
485 RTLIBCASE(FLOOR_F);
486 case TargetOpcode::G_FMINNUM:
487 RTLIBCASE(FMIN_F);
488 case TargetOpcode::G_FMAXNUM:
489 RTLIBCASE(FMAX_F);
490 case TargetOpcode::G_FSQRT:
491 RTLIBCASE(SQRT_F);
492 case TargetOpcode::G_FRINT:
493 RTLIBCASE(RINT_F);
494 case TargetOpcode::G_FNEARBYINT:
495 RTLIBCASE(NEARBYINT_F);
496 case TargetOpcode::G_INTRINSIC_TRUNC:
497 RTLIBCASE(TRUNC_F);
498 case TargetOpcode::G_INTRINSIC_ROUND:
499 RTLIBCASE(ROUND_F);
500 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
501 RTLIBCASE(ROUNDEVEN_F);
502 case TargetOpcode::G_INTRINSIC_LRINT:
503 RTLIBCASE(LRINT_F);
504 case TargetOpcode::G_INTRINSIC_LLRINT:
505 RTLIBCASE(LLRINT_F);
506 }
507 llvm_unreachable("Unknown libcall function");
508#undef RTLIBCASE_INT
509#undef RTLIBCASE
510}
511
512/// True if an instruction is in tail position in its caller. Intended for
513/// legalizing libcalls as tail calls when possible.
516 const TargetInstrInfo &TII,
518 MachineBasicBlock &MBB = *MI.getParent();
519 const Function &F = MBB.getParent()->getFunction();
520
521 // Conservatively require the attributes of the call to match those of
522 // the return. Ignore NoAlias and NonNull because they don't affect the
523 // call sequence.
524 AttributeList CallerAttrs = F.getAttributes();
525 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
526 .removeAttribute(Attribute::NoAlias)
527 .removeAttribute(Attribute::NonNull)
528 .hasAttributes())
529 return false;
530
531 // It's not safe to eliminate the sign / zero extension of the return value.
532 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
533 CallerAttrs.hasRetAttr(Attribute::SExt))
534 return false;
535
536 // Only tail call if the following instruction is a standard return or if we
537 // have a `thisreturn` callee, and a sequence like:
538 //
539 // G_MEMCPY %0, %1, %2
540 // $x0 = COPY %0
541 // RET_ReallyLR implicit $x0
542 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
543 if (Next != MBB.instr_end() && Next->isCopy()) {
544 if (MI.getOpcode() == TargetOpcode::G_BZERO)
545 return false;
546
547 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
548 // mempy/etc routines return the same parameter. For other it will be the
549 // returned value.
550 Register VReg = MI.getOperand(0).getReg();
551 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
552 return false;
553
554 Register PReg = Next->getOperand(0).getReg();
555 if (!PReg.isPhysical())
556 return false;
557
558 auto Ret = next_nodbg(Next, MBB.instr_end());
559 if (Ret == MBB.instr_end() || !Ret->isReturn())
560 return false;
561
562 if (Ret->getNumImplicitOperands() != 1)
563 return false;
564
565 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
566 return false;
567
568 // Skip over the COPY that we just validated.
569 Next = Ret;
570 }
571
572 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
573 return false;
574
575 return true;
576}
577
579llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
580 const CallLowering::ArgInfo &Result,
582 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
583 MachineInstr *MI) {
584 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
585
587 Info.CallConv = CC;
588 Info.Callee = MachineOperand::CreateES(Name);
589 Info.OrigRet = Result;
590 if (MI)
591 Info.IsTailCall =
592 (Result.Ty->isVoidTy() ||
593 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
594 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
595 *MIRBuilder.getMRI());
596
597 llvm::append_range(Info.OrigArgs, Args);
598 if (!CLI.lowerCall(MIRBuilder, Info))
600
601 if (MI && Info.LoweredTailCall) {
602 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
603
604 // Check debug locations before removing the return.
605 LocObserver.checkpoint(true);
606
607 // We must have a return following the call (or debug insts) to get past
608 // isLibCallInTailPosition.
609 do {
610 MachineInstr *Next = MI->getNextNode();
611 assert(Next &&
612 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
613 "Expected instr following MI to be return or debug inst?");
614 // We lowered a tail call, so the call is now the return from the block.
615 // Delete the old return.
616 Next->eraseFromParent();
617 } while (MI->getNextNode());
618
619 // We expect to lose the debug location from the return.
620 LocObserver.checkpoint(false);
621 }
623}
624
626llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
627 const CallLowering::ArgInfo &Result,
629 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
630 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
631 const char *Name = TLI.getLibcallName(Libcall);
632 if (!Name)
634 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
635 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
636}
637
638// Useful for libcalls where all operands have the same type.
641 Type *OpType, LostDebugLocObserver &LocObserver) {
642 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
643
644 // FIXME: What does the original arg index mean here?
646 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
647 Args.push_back({MO.getReg(), OpType, 0});
648 return createLibcall(MIRBuilder, Libcall,
649 {MI.getOperand(0).getReg(), OpType, 0}, Args,
650 LocObserver, &MI);
651}
652
653LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
654 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
655 LostDebugLocObserver &LocObserver) {
656 MachineFunction &MF = *MI.getMF();
657 MachineRegisterInfo &MRI = MF.getRegInfo();
658
659 Register DstSin = MI.getOperand(0).getReg();
660 Register DstCos = MI.getOperand(1).getReg();
661 Register Src = MI.getOperand(2).getReg();
662 LLT DstTy = MRI.getType(DstSin);
663
664 int MemSize = DstTy.getSizeInBytes();
665 Align Alignment = getStackTemporaryAlignment(DstTy);
666 const DataLayout &DL = MIRBuilder.getDataLayout();
667 unsigned AddrSpace = DL.getAllocaAddrSpace();
668 MachinePointerInfo PtrInfo;
669
670 Register StackPtrSin =
671 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
672 .getReg(0);
673 Register StackPtrCos =
674 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
675 .getReg(0);
676
677 auto &Ctx = MF.getFunction().getContext();
678 auto LibcallResult =
680 {{0}, Type::getVoidTy(Ctx), 0},
681 {{Src, OpType, 0},
682 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
683 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
684 LocObserver, &MI);
685
686 if (LibcallResult != LegalizeResult::Legalized)
688
690 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
692 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
693
694 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
695 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
696 MI.eraseFromParent();
697
699}
700
703 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
704 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
705
707 // Add all the args, except for the last which is an imm denoting 'tail'.
708 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
709 Register Reg = MI.getOperand(i).getReg();
710
711 // Need derive an IR type for call lowering.
712 LLT OpLLT = MRI.getType(Reg);
713 Type *OpTy = nullptr;
714 if (OpLLT.isPointer())
715 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
716 else
717 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
718 Args.push_back({Reg, OpTy, 0});
719 }
720
721 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
722 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
723 RTLIB::Libcall RTLibcall;
724 unsigned Opc = MI.getOpcode();
725 const char *Name;
726 switch (Opc) {
727 case TargetOpcode::G_BZERO:
728 RTLibcall = RTLIB::BZERO;
729 Name = TLI.getLibcallName(RTLibcall);
730 break;
731 case TargetOpcode::G_MEMCPY:
732 RTLibcall = RTLIB::MEMCPY;
733 Name = TLI.getMemcpyName();
734 Args[0].Flags[0].setReturned();
735 break;
736 case TargetOpcode::G_MEMMOVE:
737 RTLibcall = RTLIB::MEMMOVE;
738 Name = TLI.getLibcallName(RTLibcall);
739 Args[0].Flags[0].setReturned();
740 break;
741 case TargetOpcode::G_MEMSET:
742 RTLibcall = RTLIB::MEMSET;
743 Name = TLI.getLibcallName(RTLibcall);
744 Args[0].Flags[0].setReturned();
745 break;
746 default:
747 llvm_unreachable("unsupported opcode");
748 }
749
750 // Unsupported libcall on the target.
751 if (!Name) {
752 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
753 << MIRBuilder.getTII().getName(Opc) << "\n");
755 }
756
758 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
759 Info.Callee = MachineOperand::CreateES(Name);
760 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
761 Info.IsTailCall =
762 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
763 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
764
765 llvm::append_range(Info.OrigArgs, Args);
766 if (!CLI.lowerCall(MIRBuilder, Info))
768
769 if (Info.LoweredTailCall) {
770 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
771
772 // Check debug locations before removing the return.
773 LocObserver.checkpoint(true);
774
775 // We must have a return following the call (or debug insts) to get past
776 // isLibCallInTailPosition.
777 do {
778 MachineInstr *Next = MI.getNextNode();
779 assert(Next &&
780 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
781 "Expected instr following MI to be return or debug inst?");
782 // We lowered a tail call, so the call is now the return from the block.
783 // Delete the old return.
784 Next->eraseFromParent();
785 } while (MI.getNextNode());
786
787 // We expect to lose the debug location from the return.
788 LocObserver.checkpoint(false);
789 }
790
792}
793
794static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
795 unsigned Opc = MI.getOpcode();
796 auto &AtomicMI = cast<GMemOperation>(MI);
797 auto &MMO = AtomicMI.getMMO();
798 auto Ordering = MMO.getMergedOrdering();
799 LLT MemType = MMO.getMemoryType();
800 uint64_t MemSize = MemType.getSizeInBytes();
801 if (MemType.isVector())
802 return RTLIB::UNKNOWN_LIBCALL;
803
804#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
805#define LCALL5(A) \
806 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
807 switch (Opc) {
808 case TargetOpcode::G_ATOMIC_CMPXCHG:
809 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
810 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
811 return getOutlineAtomicHelper(LC, Ordering, MemSize);
812 }
813 case TargetOpcode::G_ATOMICRMW_XCHG: {
814 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
815 return getOutlineAtomicHelper(LC, Ordering, MemSize);
816 }
817 case TargetOpcode::G_ATOMICRMW_ADD:
818 case TargetOpcode::G_ATOMICRMW_SUB: {
819 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
820 return getOutlineAtomicHelper(LC, Ordering, MemSize);
821 }
822 case TargetOpcode::G_ATOMICRMW_AND: {
823 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
824 return getOutlineAtomicHelper(LC, Ordering, MemSize);
825 }
826 case TargetOpcode::G_ATOMICRMW_OR: {
827 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
828 return getOutlineAtomicHelper(LC, Ordering, MemSize);
829 }
830 case TargetOpcode::G_ATOMICRMW_XOR: {
831 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
832 return getOutlineAtomicHelper(LC, Ordering, MemSize);
833 }
834 default:
835 return RTLIB::UNKNOWN_LIBCALL;
836 }
837#undef LCALLS
838#undef LCALL5
839}
840
843 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
844
845 Type *RetTy;
846 SmallVector<Register> RetRegs;
848 unsigned Opc = MI.getOpcode();
849 switch (Opc) {
850 case TargetOpcode::G_ATOMIC_CMPXCHG:
851 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
853 LLT SuccessLLT;
854 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
855 MI.getFirst4RegLLTs();
856 RetRegs.push_back(Ret);
857 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
858 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
859 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
860 NewLLT) = MI.getFirst5RegLLTs();
861 RetRegs.push_back(Success);
862 RetTy = StructType::get(
863 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
864 }
865 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
866 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
867 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
868 break;
869 }
870 case TargetOpcode::G_ATOMICRMW_XCHG:
871 case TargetOpcode::G_ATOMICRMW_ADD:
872 case TargetOpcode::G_ATOMICRMW_SUB:
873 case TargetOpcode::G_ATOMICRMW_AND:
874 case TargetOpcode::G_ATOMICRMW_OR:
875 case TargetOpcode::G_ATOMICRMW_XOR: {
876 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
877 RetRegs.push_back(Ret);
878 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
879 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
880 Val =
881 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
882 .getReg(0);
883 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
884 Val =
885 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
886 .getReg(0);
887 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
888 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
889 break;
890 }
891 default:
892 llvm_unreachable("unsupported opcode");
893 }
894
895 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
896 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
897 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
898 const char *Name = TLI.getLibcallName(RTLibcall);
899
900 // Unsupported libcall on the target.
901 if (!Name) {
902 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
903 << MIRBuilder.getTII().getName(Opc) << "\n");
905 }
906
908 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
909 Info.Callee = MachineOperand::CreateES(Name);
910 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
911
912 llvm::append_range(Info.OrigArgs, Args);
913 if (!CLI.lowerCall(MIRBuilder, Info))
915
917}
918
919static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
920 Type *FromType) {
921 auto ToMVT = MVT::getVT(ToType);
922 auto FromMVT = MVT::getVT(FromType);
923
924 switch (Opcode) {
925 case TargetOpcode::G_FPEXT:
926 return RTLIB::getFPEXT(FromMVT, ToMVT);
927 case TargetOpcode::G_FPTRUNC:
928 return RTLIB::getFPROUND(FromMVT, ToMVT);
929 case TargetOpcode::G_FPTOSI:
930 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
931 case TargetOpcode::G_FPTOUI:
932 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
933 case TargetOpcode::G_SITOFP:
934 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
935 case TargetOpcode::G_UITOFP:
936 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
937 }
938 llvm_unreachable("Unsupported libcall function");
939}
940
943 Type *FromType, LostDebugLocObserver &LocObserver,
944 const TargetLowering &TLI, bool IsSigned = false) {
945 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
946 if (FromType->isIntegerTy()) {
947 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
948 Arg.Flags[0].setSExt();
949 else
950 Arg.Flags[0].setZExt();
951 }
952
953 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
954 return createLibcall(MIRBuilder, Libcall,
955 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
956 &MI);
957}
958
959static RTLIB::Libcall
961 RTLIB::Libcall RTLibcall;
962 switch (MI.getOpcode()) {
963 case TargetOpcode::G_GET_FPENV:
964 RTLibcall = RTLIB::FEGETENV;
965 break;
966 case TargetOpcode::G_SET_FPENV:
967 case TargetOpcode::G_RESET_FPENV:
968 RTLibcall = RTLIB::FESETENV;
969 break;
970 case TargetOpcode::G_GET_FPMODE:
971 RTLibcall = RTLIB::FEGETMODE;
972 break;
973 case TargetOpcode::G_SET_FPMODE:
974 case TargetOpcode::G_RESET_FPMODE:
975 RTLibcall = RTLIB::FESETMODE;
976 break;
977 default:
978 llvm_unreachable("Unexpected opcode");
979 }
980 return RTLibcall;
981}
982
983// Some library functions that read FP state (fegetmode, fegetenv) write the
984// state into a region in memory. IR intrinsics that do the same operations
985// (get_fpmode, get_fpenv) return the state as integer value. To implement these
986// intrinsics via the library functions, we need to use temporary variable,
987// for example:
988//
989// %0:_(s32) = G_GET_FPMODE
990//
991// is transformed to:
992//
993// %1:_(p0) = G_FRAME_INDEX %stack.0
994// BL &fegetmode
995// %0:_(s32) = G_LOAD % 1
996//
998LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1000 LostDebugLocObserver &LocObserver) {
1001 const DataLayout &DL = MIRBuilder.getDataLayout();
1002 auto &MF = MIRBuilder.getMF();
1003 auto &MRI = *MIRBuilder.getMRI();
1004 auto &Ctx = MF.getFunction().getContext();
1005
1006 // Create temporary, where library function will put the read state.
1007 Register Dst = MI.getOperand(0).getReg();
1008 LLT StateTy = MRI.getType(Dst);
1009 TypeSize StateSize = StateTy.getSizeInBytes();
1010 Align TempAlign = getStackTemporaryAlignment(StateTy);
1011 MachinePointerInfo TempPtrInfo;
1012 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1013
1014 // Create a call to library function, with the temporary as an argument.
1015 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1016 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1017 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1018 auto Res =
1019 createLibcall(MIRBuilder, RTLibcall,
1020 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1021 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1022 LocObserver, nullptr);
1023 if (Res != LegalizerHelper::Legalized)
1024 return Res;
1025
1026 // Create a load from the temporary.
1027 MachineMemOperand *MMO = MF.getMachineMemOperand(
1028 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1029 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1030
1032}
1033
1034// Similar to `createGetStateLibcall` the function calls a library function
1035// using transient space in stack. In this case the library function reads
1036// content of memory region.
1038LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1040 LostDebugLocObserver &LocObserver) {
1041 const DataLayout &DL = MIRBuilder.getDataLayout();
1042 auto &MF = MIRBuilder.getMF();
1043 auto &MRI = *MIRBuilder.getMRI();
1044 auto &Ctx = MF.getFunction().getContext();
1045
1046 // Create temporary, where library function will get the new state.
1047 Register Src = MI.getOperand(0).getReg();
1048 LLT StateTy = MRI.getType(Src);
1049 TypeSize StateSize = StateTy.getSizeInBytes();
1050 Align TempAlign = getStackTemporaryAlignment(StateTy);
1051 MachinePointerInfo TempPtrInfo;
1052 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1053
1054 // Put the new state into the temporary.
1055 MachineMemOperand *MMO = MF.getMachineMemOperand(
1056 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1057 MIRBuilder.buildStore(Src, Temp, *MMO);
1058
1059 // Create a call to library function, with the temporary as an argument.
1060 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1061 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1062 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1063 return createLibcall(MIRBuilder, RTLibcall,
1064 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1065 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1066 LocObserver, nullptr);
1067}
1068
1069/// Returns the corresponding libcall for the given Pred and
1070/// the ICMP predicate that should be generated to compare with #0
1071/// after the libcall.
1072static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1074#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1075 do { \
1076 switch (Size) { \
1077 case 32: \
1078 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1079 case 64: \
1080 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1081 case 128: \
1082 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1083 default: \
1084 llvm_unreachable("unexpected size"); \
1085 } \
1086 } while (0)
1087
1088 switch (Pred) {
1089 case CmpInst::FCMP_OEQ:
1091 case CmpInst::FCMP_UNE:
1093 case CmpInst::FCMP_OGE:
1095 case CmpInst::FCMP_OLT:
1097 case CmpInst::FCMP_OLE:
1099 case CmpInst::FCMP_OGT:
1101 case CmpInst::FCMP_UNO:
1103 default:
1104 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1105 }
1106}
1107
1109LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1111 LostDebugLocObserver &LocObserver) {
1112 auto &MF = MIRBuilder.getMF();
1113 auto &Ctx = MF.getFunction().getContext();
1114 const GFCmp *Cmp = cast<GFCmp>(&MI);
1115
1116 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1117 unsigned Size = OpLLT.getSizeInBits();
1118 if ((Size != 32 && Size != 64 && Size != 128) ||
1119 OpLLT != MRI.getType(Cmp->getRHSReg()))
1120 return UnableToLegalize;
1121
1122 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1123
1124 // DstReg type is s32
1125 const Register DstReg = Cmp->getReg(0);
1126 LLT DstTy = MRI.getType(DstReg);
1127 const auto Cond = Cmp->getCond();
1128
1129 // Reference:
1130 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1131 // Generates a libcall followed by ICMP.
1132 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1133 const CmpInst::Predicate ICmpPred,
1134 const DstOp &Res) -> Register {
1135 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1136 constexpr LLT TempLLT = LLT::scalar(32);
1137 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1138 // Generate libcall, holding result in Temp
1139 const auto Status = createLibcall(
1140 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1141 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1142 LocObserver, &MI);
1143 if (!Status)
1144 return {};
1145
1146 // Compare temp with #0 to get the final result.
1147 return MIRBuilder
1148 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1149 .getReg(0);
1150 };
1151
1152 // Simple case if we have a direct mapping from predicate to libcall
1153 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1154 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1155 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1156 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1157 return Legalized;
1158 }
1159 return UnableToLegalize;
1160 }
1161
1162 // No direct mapping found, should be generated as combination of libcalls.
1163
1164 switch (Cond) {
1165 case CmpInst::FCMP_UEQ: {
1166 // FCMP_UEQ: unordered or equal
1167 // Convert into (FCMP_OEQ || FCMP_UNO).
1168
1169 const auto [OeqLibcall, OeqPred] =
1171 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1172
1173 const auto [UnoLibcall, UnoPred] =
1175 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1176 if (Oeq && Uno)
1177 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1178 else
1179 return UnableToLegalize;
1180
1181 break;
1182 }
1183 case CmpInst::FCMP_ONE: {
1184 // FCMP_ONE: ordered and operands are unequal
1185 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1186
1187 // We inverse the predicate instead of generating a NOT
1188 // to save one instruction.
1189 // On AArch64 isel can even select two cmp into a single ccmp.
1190 const auto [OeqLibcall, OeqPred] =
1192 const auto NotOeq =
1193 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1194
1195 const auto [UnoLibcall, UnoPred] =
1197 const auto NotUno =
1198 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1199
1200 if (NotOeq && NotUno)
1201 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1202 else
1203 return UnableToLegalize;
1204
1205 break;
1206 }
1207 case CmpInst::FCMP_ULT:
1208 case CmpInst::FCMP_UGE:
1209 case CmpInst::FCMP_UGT:
1210 case CmpInst::FCMP_ULE:
1211 case CmpInst::FCMP_ORD: {
1212 // Convert into: !(inverse(Pred))
1213 // E.g. FCMP_ULT becomes !FCMP_OGE
1214 // This is equivalent to the following, but saves some instructions.
1215 // MIRBuilder.buildNot(
1216 // PredTy,
1217 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1218 // Op1, Op2));
1219 const auto [InversedLibcall, InversedPred] =
1221 if (!BuildLibcall(InversedLibcall,
1222 CmpInst::getInversePredicate(InversedPred), DstReg))
1223 return UnableToLegalize;
1224 break;
1225 }
1226 default:
1227 return UnableToLegalize;
1228 }
1229
1230 return Legalized;
1231}
1232
1233// The function is used to legalize operations that set default environment
1234// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1235// On most targets supported in glibc FE_DFL_MODE is defined as
1236// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1237// it is not true, the target must provide custom lowering.
1239LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1241 LostDebugLocObserver &LocObserver) {
1242 const DataLayout &DL = MIRBuilder.getDataLayout();
1243 auto &MF = MIRBuilder.getMF();
1244 auto &Ctx = MF.getFunction().getContext();
1245
1246 // Create an argument for the library function.
1247 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1248 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1249 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1250 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1251 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1252 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1253 MIRBuilder.buildIntToPtr(Dest, DefValue);
1254
1255 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1256 return createLibcall(MIRBuilder, RTLibcall,
1257 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1258 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1259 LocObserver, &MI);
1260}
1261
1264 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1265
1266 switch (MI.getOpcode()) {
1267 default:
1268 return UnableToLegalize;
1269 case TargetOpcode::G_MUL:
1270 case TargetOpcode::G_SDIV:
1271 case TargetOpcode::G_UDIV:
1272 case TargetOpcode::G_SREM:
1273 case TargetOpcode::G_UREM:
1274 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1275 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1276 unsigned Size = LLTy.getSizeInBits();
1277 Type *HLTy = IntegerType::get(Ctx, Size);
1278 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1279 if (Status != Legalized)
1280 return Status;
1281 break;
1282 }
1283 case TargetOpcode::G_FADD:
1284 case TargetOpcode::G_FSUB:
1285 case TargetOpcode::G_FMUL:
1286 case TargetOpcode::G_FDIV:
1287 case TargetOpcode::G_FMA:
1288 case TargetOpcode::G_FPOW:
1289 case TargetOpcode::G_FREM:
1290 case TargetOpcode::G_FCOS:
1291 case TargetOpcode::G_FSIN:
1292 case TargetOpcode::G_FTAN:
1293 case TargetOpcode::G_FACOS:
1294 case TargetOpcode::G_FASIN:
1295 case TargetOpcode::G_FATAN:
1296 case TargetOpcode::G_FATAN2:
1297 case TargetOpcode::G_FCOSH:
1298 case TargetOpcode::G_FSINH:
1299 case TargetOpcode::G_FTANH:
1300 case TargetOpcode::G_FLOG10:
1301 case TargetOpcode::G_FLOG:
1302 case TargetOpcode::G_FLOG2:
1303 case TargetOpcode::G_FEXP:
1304 case TargetOpcode::G_FEXP2:
1305 case TargetOpcode::G_FEXP10:
1306 case TargetOpcode::G_FCEIL:
1307 case TargetOpcode::G_FFLOOR:
1308 case TargetOpcode::G_FMINNUM:
1309 case TargetOpcode::G_FMAXNUM:
1310 case TargetOpcode::G_FSQRT:
1311 case TargetOpcode::G_FRINT:
1312 case TargetOpcode::G_FNEARBYINT:
1313 case TargetOpcode::G_INTRINSIC_TRUNC:
1314 case TargetOpcode::G_INTRINSIC_ROUND:
1315 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1316 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1317 unsigned Size = LLTy.getSizeInBits();
1318 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1319 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1320 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1321 return UnableToLegalize;
1322 }
1323 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1324 if (Status != Legalized)
1325 return Status;
1326 break;
1327 }
1328 case TargetOpcode::G_FSINCOS: {
1329 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1330 unsigned Size = LLTy.getSizeInBits();
1331 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1332 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1333 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1334 return UnableToLegalize;
1335 }
1336 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1337 }
1338 case TargetOpcode::G_LROUND:
1339 case TargetOpcode::G_LLROUND:
1340 case TargetOpcode::G_INTRINSIC_LRINT:
1341 case TargetOpcode::G_INTRINSIC_LLRINT: {
1342 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1343 unsigned Size = LLTy.getSizeInBits();
1344 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1345 Type *ITy = IntegerType::get(
1346 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1347 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1348 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1349 return UnableToLegalize;
1350 }
1351 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1353 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1354 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1355 if (Status != Legalized)
1356 return Status;
1357 MI.eraseFromParent();
1358 return Legalized;
1359 }
1360 case TargetOpcode::G_FPOWI:
1361 case TargetOpcode::G_FLDEXP: {
1362 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1363 unsigned Size = LLTy.getSizeInBits();
1364 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1365 Type *ITy = IntegerType::get(
1366 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1373 {MI.getOperand(1).getReg(), HLTy, 0},
1374 {MI.getOperand(2).getReg(), ITy, 1}};
1375 Args[1].Flags[0].setSExt();
1377 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1378 Args, LocObserver, &MI);
1379 if (Status != Legalized)
1380 return Status;
1381 break;
1382 }
1383 case TargetOpcode::G_FPEXT:
1384 case TargetOpcode::G_FPTRUNC: {
1385 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1386 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1387 if (!FromTy || !ToTy)
1388 return UnableToLegalize;
1390 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1391 if (Status != Legalized)
1392 return Status;
1393 break;
1394 }
1395 case TargetOpcode::G_FCMP: {
1396 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1397 if (Status != Legalized)
1398 return Status;
1399 MI.eraseFromParent();
1400 return Status;
1401 }
1402 case TargetOpcode::G_FPTOSI:
1403 case TargetOpcode::G_FPTOUI: {
1404 // FIXME: Support other types
1405 Type *FromTy =
1406 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1407 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1408 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1409 return UnableToLegalize;
1411 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1412 if (Status != Legalized)
1413 return Status;
1414 break;
1415 }
1416 case TargetOpcode::G_SITOFP:
1417 case TargetOpcode::G_UITOFP: {
1418 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1419 Type *ToTy =
1420 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1421 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1422 return UnableToLegalize;
1423 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1425 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1426 LocObserver, TLI, IsSigned);
1427 if (Status != Legalized)
1428 return Status;
1429 break;
1430 }
1431 case TargetOpcode::G_ATOMICRMW_XCHG:
1432 case TargetOpcode::G_ATOMICRMW_ADD:
1433 case TargetOpcode::G_ATOMICRMW_SUB:
1434 case TargetOpcode::G_ATOMICRMW_AND:
1435 case TargetOpcode::G_ATOMICRMW_OR:
1436 case TargetOpcode::G_ATOMICRMW_XOR:
1437 case TargetOpcode::G_ATOMIC_CMPXCHG:
1438 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1440 if (Status != Legalized)
1441 return Status;
1442 break;
1443 }
1444 case TargetOpcode::G_BZERO:
1445 case TargetOpcode::G_MEMCPY:
1446 case TargetOpcode::G_MEMMOVE:
1447 case TargetOpcode::G_MEMSET: {
1448 LegalizeResult Result =
1449 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1450 if (Result != Legalized)
1451 return Result;
1452 MI.eraseFromParent();
1453 return Result;
1454 }
1455 case TargetOpcode::G_GET_FPENV:
1456 case TargetOpcode::G_GET_FPMODE: {
1457 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1458 if (Result != Legalized)
1459 return Result;
1460 break;
1461 }
1462 case TargetOpcode::G_SET_FPENV:
1463 case TargetOpcode::G_SET_FPMODE: {
1464 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1465 if (Result != Legalized)
1466 return Result;
1467 break;
1468 }
1469 case TargetOpcode::G_RESET_FPENV:
1470 case TargetOpcode::G_RESET_FPMODE: {
1471 LegalizeResult Result =
1472 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1473 if (Result != Legalized)
1474 return Result;
1475 break;
1476 }
1477 }
1478
1479 MI.eraseFromParent();
1480 return Legalized;
1481}
1482
1484 unsigned TypeIdx,
1485 LLT NarrowTy) {
1486 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1487 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1488
1489 switch (MI.getOpcode()) {
1490 default:
1491 return UnableToLegalize;
1492 case TargetOpcode::G_IMPLICIT_DEF: {
1493 Register DstReg = MI.getOperand(0).getReg();
1494 LLT DstTy = MRI.getType(DstReg);
1495
1496 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1497 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1498 // FIXME: Although this would also be legal for the general case, it causes
1499 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1500 // combines not being hit). This seems to be a problem related to the
1501 // artifact combiner.
1502 if (SizeOp0 % NarrowSize != 0) {
1503 LLT ImplicitTy = NarrowTy;
1504 if (DstTy.isVector())
1505 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1506
1507 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1508 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1509
1510 MI.eraseFromParent();
1511 return Legalized;
1512 }
1513
1514 int NumParts = SizeOp0 / NarrowSize;
1515
1517 for (int i = 0; i < NumParts; ++i)
1518 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1519
1520 if (DstTy.isVector())
1521 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1522 else
1523 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1524 MI.eraseFromParent();
1525 return Legalized;
1526 }
1527 case TargetOpcode::G_CONSTANT: {
1528 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1529 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1530 unsigned TotalSize = Ty.getSizeInBits();
1531 unsigned NarrowSize = NarrowTy.getSizeInBits();
1532 int NumParts = TotalSize / NarrowSize;
1533
1534 SmallVector<Register, 4> PartRegs;
1535 for (int I = 0; I != NumParts; ++I) {
1536 unsigned Offset = I * NarrowSize;
1537 auto K = MIRBuilder.buildConstant(NarrowTy,
1538 Val.lshr(Offset).trunc(NarrowSize));
1539 PartRegs.push_back(K.getReg(0));
1540 }
1541
1542 LLT LeftoverTy;
1543 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1544 SmallVector<Register, 1> LeftoverRegs;
1545 if (LeftoverBits != 0) {
1546 LeftoverTy = LLT::scalar(LeftoverBits);
1547 auto K = MIRBuilder.buildConstant(
1548 LeftoverTy,
1549 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1550 LeftoverRegs.push_back(K.getReg(0));
1551 }
1552
1553 insertParts(MI.getOperand(0).getReg(),
1554 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1555
1556 MI.eraseFromParent();
1557 return Legalized;
1558 }
1559 case TargetOpcode::G_SEXT:
1560 case TargetOpcode::G_ZEXT:
1561 case TargetOpcode::G_ANYEXT:
1562 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1563 case TargetOpcode::G_TRUNC: {
1564 if (TypeIdx != 1)
1565 return UnableToLegalize;
1566
1567 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1568 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1569 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1570 return UnableToLegalize;
1571 }
1572
1573 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1574 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1575 MI.eraseFromParent();
1576 return Legalized;
1577 }
1578 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1579 case TargetOpcode::G_FREEZE: {
1580 if (TypeIdx != 0)
1581 return UnableToLegalize;
1582
1583 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1584 // Should widen scalar first
1585 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1586 return UnableToLegalize;
1587
1588 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1590 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1591 Parts.push_back(
1592 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1593 .getReg(0));
1594 }
1595
1596 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1597 MI.eraseFromParent();
1598 return Legalized;
1599 }
1600 case TargetOpcode::G_ADD:
1601 case TargetOpcode::G_SUB:
1602 case TargetOpcode::G_SADDO:
1603 case TargetOpcode::G_SSUBO:
1604 case TargetOpcode::G_SADDE:
1605 case TargetOpcode::G_SSUBE:
1606 case TargetOpcode::G_UADDO:
1607 case TargetOpcode::G_USUBO:
1608 case TargetOpcode::G_UADDE:
1609 case TargetOpcode::G_USUBE:
1610 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1611 case TargetOpcode::G_MUL:
1612 case TargetOpcode::G_UMULH:
1613 return narrowScalarMul(MI, NarrowTy);
1614 case TargetOpcode::G_EXTRACT:
1615 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1616 case TargetOpcode::G_INSERT:
1617 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1618 case TargetOpcode::G_LOAD: {
1619 auto &LoadMI = cast<GLoad>(MI);
1620 Register DstReg = LoadMI.getDstReg();
1621 LLT DstTy = MRI.getType(DstReg);
1622 if (DstTy.isVector())
1623 return UnableToLegalize;
1624
1625 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1626 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1627 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1628 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1629 LoadMI.eraseFromParent();
1630 return Legalized;
1631 }
1632
1633 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1634 }
1635 case TargetOpcode::G_ZEXTLOAD:
1636 case TargetOpcode::G_SEXTLOAD: {
1637 auto &LoadMI = cast<GExtLoad>(MI);
1638 Register DstReg = LoadMI.getDstReg();
1639 Register PtrReg = LoadMI.getPointerReg();
1640
1641 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1642 auto &MMO = LoadMI.getMMO();
1643 unsigned MemSize = MMO.getSizeInBits().getValue();
1644
1645 if (MemSize == NarrowSize) {
1646 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1647 } else if (MemSize < NarrowSize) {
1648 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1649 } else if (MemSize > NarrowSize) {
1650 // FIXME: Need to split the load.
1651 return UnableToLegalize;
1652 }
1653
1654 if (isa<GZExtLoad>(LoadMI))
1655 MIRBuilder.buildZExt(DstReg, TmpReg);
1656 else
1657 MIRBuilder.buildSExt(DstReg, TmpReg);
1658
1659 LoadMI.eraseFromParent();
1660 return Legalized;
1661 }
1662 case TargetOpcode::G_STORE: {
1663 auto &StoreMI = cast<GStore>(MI);
1664
1665 Register SrcReg = StoreMI.getValueReg();
1666 LLT SrcTy = MRI.getType(SrcReg);
1667 if (SrcTy.isVector())
1668 return UnableToLegalize;
1669
1670 int NumParts = SizeOp0 / NarrowSize;
1671 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1672 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1673 if (SrcTy.isVector() && LeftoverBits != 0)
1674 return UnableToLegalize;
1675
1676 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1677 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1678 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1679 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1680 StoreMI.eraseFromParent();
1681 return Legalized;
1682 }
1683
1684 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1685 }
1686 case TargetOpcode::G_SELECT:
1687 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1688 case TargetOpcode::G_AND:
1689 case TargetOpcode::G_OR:
1690 case TargetOpcode::G_XOR: {
1691 // Legalize bitwise operation:
1692 // A = BinOp<Ty> B, C
1693 // into:
1694 // B1, ..., BN = G_UNMERGE_VALUES B
1695 // C1, ..., CN = G_UNMERGE_VALUES C
1696 // A1 = BinOp<Ty/N> B1, C2
1697 // ...
1698 // AN = BinOp<Ty/N> BN, CN
1699 // A = G_MERGE_VALUES A1, ..., AN
1700 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1701 }
1702 case TargetOpcode::G_SHL:
1703 case TargetOpcode::G_LSHR:
1704 case TargetOpcode::G_ASHR:
1705 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1706 case TargetOpcode::G_CTLZ:
1707 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1708 case TargetOpcode::G_CTTZ:
1709 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1710 case TargetOpcode::G_CTPOP:
1711 if (TypeIdx == 1)
1712 switch (MI.getOpcode()) {
1713 case TargetOpcode::G_CTLZ:
1714 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1715 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1716 case TargetOpcode::G_CTTZ:
1717 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1718 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1719 case TargetOpcode::G_CTPOP:
1720 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1721 default:
1722 return UnableToLegalize;
1723 }
1724
1725 Observer.changingInstr(MI);
1726 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1727 Observer.changedInstr(MI);
1728 return Legalized;
1729 case TargetOpcode::G_INTTOPTR:
1730 if (TypeIdx != 1)
1731 return UnableToLegalize;
1732
1733 Observer.changingInstr(MI);
1734 narrowScalarSrc(MI, NarrowTy, 1);
1735 Observer.changedInstr(MI);
1736 return Legalized;
1737 case TargetOpcode::G_PTRTOINT:
1738 if (TypeIdx != 0)
1739 return UnableToLegalize;
1740
1741 Observer.changingInstr(MI);
1742 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1743 Observer.changedInstr(MI);
1744 return Legalized;
1745 case TargetOpcode::G_PHI: {
1746 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1747 // NarrowSize.
1748 if (SizeOp0 % NarrowSize != 0)
1749 return UnableToLegalize;
1750
1751 unsigned NumParts = SizeOp0 / NarrowSize;
1752 SmallVector<Register, 2> DstRegs(NumParts);
1753 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1754 Observer.changingInstr(MI);
1755 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1756 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1757 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1758 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1759 SrcRegs[i / 2], MIRBuilder, MRI);
1760 }
1761 MachineBasicBlock &MBB = *MI.getParent();
1762 MIRBuilder.setInsertPt(MBB, MI);
1763 for (unsigned i = 0; i < NumParts; ++i) {
1764 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1766 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1767 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1768 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1769 }
1770 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1771 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1772 Observer.changedInstr(MI);
1773 MI.eraseFromParent();
1774 return Legalized;
1775 }
1776 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1777 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1778 if (TypeIdx != 2)
1779 return UnableToLegalize;
1780
1781 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1782 Observer.changingInstr(MI);
1783 narrowScalarSrc(MI, NarrowTy, OpIdx);
1784 Observer.changedInstr(MI);
1785 return Legalized;
1786 }
1787 case TargetOpcode::G_ICMP: {
1788 Register LHS = MI.getOperand(2).getReg();
1789 LLT SrcTy = MRI.getType(LHS);
1790 CmpInst::Predicate Pred =
1791 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1792
1793 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1794 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1795 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1796 LHSLeftoverRegs, MIRBuilder, MRI))
1797 return UnableToLegalize;
1798
1799 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1800 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1801 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1802 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1803 return UnableToLegalize;
1804
1805 // We now have the LHS and RHS of the compare split into narrow-type
1806 // registers, plus potentially some leftover type.
1807 Register Dst = MI.getOperand(0).getReg();
1808 LLT ResTy = MRI.getType(Dst);
1809 if (ICmpInst::isEquality(Pred)) {
1810 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1811 // them together. For each equal part, the result should be all 0s. For
1812 // each non-equal part, we'll get at least one 1.
1813 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1815 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1816 auto LHS = std::get<0>(LHSAndRHS);
1817 auto RHS = std::get<1>(LHSAndRHS);
1818 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1819 Xors.push_back(Xor);
1820 }
1821
1822 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1823 // to the desired narrow type so that we can OR them together later.
1824 SmallVector<Register, 4> WidenedXors;
1825 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1826 auto LHS = std::get<0>(LHSAndRHS);
1827 auto RHS = std::get<1>(LHSAndRHS);
1828 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1829 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1830 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1831 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1832 llvm::append_range(Xors, WidenedXors);
1833 }
1834
1835 // Now, for each part we broke up, we know if they are equal/not equal
1836 // based off the G_XOR. We can OR these all together and compare against
1837 // 0 to get the result.
1838 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1839 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1840 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1841 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1842 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1843 } else {
1844 Register CmpIn;
1845 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1846 Register CmpOut;
1847 CmpInst::Predicate PartPred;
1848
1849 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1850 PartPred = Pred;
1851 CmpOut = Dst;
1852 } else {
1853 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1854 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1855 }
1856
1857 if (!CmpIn) {
1858 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1859 RHSPartRegs[I]);
1860 } else {
1861 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1862 RHSPartRegs[I]);
1863 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1864 LHSPartRegs[I], RHSPartRegs[I]);
1865 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1866 }
1867
1868 CmpIn = CmpOut;
1869 }
1870
1871 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1872 Register CmpOut;
1873 CmpInst::Predicate PartPred;
1874
1875 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1876 PartPred = Pred;
1877 CmpOut = Dst;
1878 } else {
1879 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1880 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1881 }
1882
1883 if (!CmpIn) {
1884 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1885 RHSLeftoverRegs[I]);
1886 } else {
1887 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1888 RHSLeftoverRegs[I]);
1889 auto CmpEq =
1890 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1891 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1892 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1893 }
1894
1895 CmpIn = CmpOut;
1896 }
1897 }
1898 MI.eraseFromParent();
1899 return Legalized;
1900 }
1901 case TargetOpcode::G_FCMP:
1902 if (TypeIdx != 0)
1903 return UnableToLegalize;
1904
1905 Observer.changingInstr(MI);
1906 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1907 Observer.changedInstr(MI);
1908 return Legalized;
1909
1910 case TargetOpcode::G_SEXT_INREG: {
1911 if (TypeIdx != 0)
1912 return UnableToLegalize;
1913
1914 int64_t SizeInBits = MI.getOperand(2).getImm();
1915
1916 // So long as the new type has more bits than the bits we're extending we
1917 // don't need to break it apart.
1918 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1919 Observer.changingInstr(MI);
1920 // We don't lose any non-extension bits by truncating the src and
1921 // sign-extending the dst.
1922 MachineOperand &MO1 = MI.getOperand(1);
1923 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1924 MO1.setReg(TruncMIB.getReg(0));
1925
1926 MachineOperand &MO2 = MI.getOperand(0);
1927 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1928 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1929 MIRBuilder.buildSExt(MO2, DstExt);
1930 MO2.setReg(DstExt);
1931 Observer.changedInstr(MI);
1932 return Legalized;
1933 }
1934
1935 // Break it apart. Components below the extension point are unmodified. The
1936 // component containing the extension point becomes a narrower SEXT_INREG.
1937 // Components above it are ashr'd from the component containing the
1938 // extension point.
1939 if (SizeOp0 % NarrowSize != 0)
1940 return UnableToLegalize;
1941 int NumParts = SizeOp0 / NarrowSize;
1942
1943 // List the registers where the destination will be scattered.
1945 // List the registers where the source will be split.
1947
1948 // Create all the temporary registers.
1949 for (int i = 0; i < NumParts; ++i) {
1950 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1951
1952 SrcRegs.push_back(SrcReg);
1953 }
1954
1955 // Explode the big arguments into smaller chunks.
1956 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1957
1958 Register AshrCstReg =
1959 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1960 .getReg(0);
1961 Register FullExtensionReg;
1962 Register PartialExtensionReg;
1963
1964 // Do the operation on each small part.
1965 for (int i = 0; i < NumParts; ++i) {
1966 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1967 DstRegs.push_back(SrcRegs[i]);
1968 PartialExtensionReg = DstRegs.back();
1969 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1970 assert(PartialExtensionReg &&
1971 "Expected to visit partial extension before full");
1972 if (FullExtensionReg) {
1973 DstRegs.push_back(FullExtensionReg);
1974 continue;
1975 }
1976 DstRegs.push_back(
1977 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1978 .getReg(0));
1979 FullExtensionReg = DstRegs.back();
1980 } else {
1981 DstRegs.push_back(
1983 .buildInstr(
1984 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1985 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1986 .getReg(0));
1987 PartialExtensionReg = DstRegs.back();
1988 }
1989 }
1990
1991 // Gather the destination registers into the final destination.
1992 Register DstReg = MI.getOperand(0).getReg();
1993 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1994 MI.eraseFromParent();
1995 return Legalized;
1996 }
1997 case TargetOpcode::G_BSWAP:
1998 case TargetOpcode::G_BITREVERSE: {
1999 if (SizeOp0 % NarrowSize != 0)
2000 return UnableToLegalize;
2001
2002 Observer.changingInstr(MI);
2003 SmallVector<Register, 2> SrcRegs, DstRegs;
2004 unsigned NumParts = SizeOp0 / NarrowSize;
2005 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2006 MIRBuilder, MRI);
2007
2008 for (unsigned i = 0; i < NumParts; ++i) {
2009 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2010 {SrcRegs[NumParts - 1 - i]});
2011 DstRegs.push_back(DstPart.getReg(0));
2012 }
2013
2014 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2015
2016 Observer.changedInstr(MI);
2017 MI.eraseFromParent();
2018 return Legalized;
2019 }
2020 case TargetOpcode::G_PTR_ADD:
2021 case TargetOpcode::G_PTRMASK: {
2022 if (TypeIdx != 1)
2023 return UnableToLegalize;
2024 Observer.changingInstr(MI);
2025 narrowScalarSrc(MI, NarrowTy, 2);
2026 Observer.changedInstr(MI);
2027 return Legalized;
2028 }
2029 case TargetOpcode::G_FPTOUI:
2030 case TargetOpcode::G_FPTOSI:
2031 case TargetOpcode::G_FPTOUI_SAT:
2032 case TargetOpcode::G_FPTOSI_SAT:
2033 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2034 case TargetOpcode::G_FPEXT:
2035 if (TypeIdx != 0)
2036 return UnableToLegalize;
2037 Observer.changingInstr(MI);
2038 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2039 Observer.changedInstr(MI);
2040 return Legalized;
2041 case TargetOpcode::G_FLDEXP:
2042 case TargetOpcode::G_STRICT_FLDEXP:
2043 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2044 case TargetOpcode::G_VSCALE: {
2045 Register Dst = MI.getOperand(0).getReg();
2046 LLT Ty = MRI.getType(Dst);
2047
2048 // Assume VSCALE(1) fits into a legal integer
2049 const APInt One(NarrowTy.getSizeInBits(), 1);
2050 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2051 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2052 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2053 MIRBuilder.buildMul(Dst, ZExt, C);
2054
2055 MI.eraseFromParent();
2056 return Legalized;
2057 }
2058 }
2059}
2060
2062 LLT Ty = MRI.getType(Val);
2063 if (Ty.isScalar())
2064 return Val;
2065
2066 const DataLayout &DL = MIRBuilder.getDataLayout();
2067 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2068 if (Ty.isPointer()) {
2069 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2070 return Register();
2071 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2072 }
2073
2074 Register NewVal = Val;
2075
2076 assert(Ty.isVector());
2077 if (Ty.isPointerVector())
2078 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2079 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2080}
2081
2083 unsigned OpIdx, unsigned ExtOpcode) {
2084 MachineOperand &MO = MI.getOperand(OpIdx);
2085 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2086 MO.setReg(ExtB.getReg(0));
2087}
2088
2090 unsigned OpIdx) {
2091 MachineOperand &MO = MI.getOperand(OpIdx);
2092 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2093 MO.setReg(ExtB.getReg(0));
2094}
2095
2097 unsigned OpIdx, unsigned TruncOpcode) {
2098 MachineOperand &MO = MI.getOperand(OpIdx);
2099 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2100 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2101 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2102 MO.setReg(DstExt);
2103}
2104
2106 unsigned OpIdx, unsigned ExtOpcode) {
2107 MachineOperand &MO = MI.getOperand(OpIdx);
2108 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2109 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2110 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2111 MO.setReg(DstTrunc);
2112}
2113
2115 unsigned OpIdx) {
2116 MachineOperand &MO = MI.getOperand(OpIdx);
2117 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2118 Register Dst = MO.getReg();
2119 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2120 MO.setReg(DstExt);
2121 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2122}
2123
2125 unsigned OpIdx) {
2126 MachineOperand &MO = MI.getOperand(OpIdx);
2127 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2128}
2129
2131 MachineOperand &Op = MI.getOperand(OpIdx);
2132 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2133}
2134
2136 MachineOperand &MO = MI.getOperand(OpIdx);
2137 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2138 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2139 MIRBuilder.buildBitcast(MO, CastDst);
2140 MO.setReg(CastDst);
2141}
2142
2144LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2145 LLT WideTy) {
2146 if (TypeIdx != 1)
2147 return UnableToLegalize;
2148
2149 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2150 if (DstTy.isVector())
2151 return UnableToLegalize;
2152
2153 LLT SrcTy = MRI.getType(Src1Reg);
2154 const int DstSize = DstTy.getSizeInBits();
2155 const int SrcSize = SrcTy.getSizeInBits();
2156 const int WideSize = WideTy.getSizeInBits();
2157 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2158
2159 unsigned NumOps = MI.getNumOperands();
2160 unsigned NumSrc = MI.getNumOperands() - 1;
2161 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2162
2163 if (WideSize >= DstSize) {
2164 // Directly pack the bits in the target type.
2165 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2166
2167 for (unsigned I = 2; I != NumOps; ++I) {
2168 const unsigned Offset = (I - 1) * PartSize;
2169
2170 Register SrcReg = MI.getOperand(I).getReg();
2171 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2172
2173 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2174
2175 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2176 MRI.createGenericVirtualRegister(WideTy);
2177
2178 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2179 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2180 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2181 ResultReg = NextResult;
2182 }
2183
2184 if (WideSize > DstSize)
2185 MIRBuilder.buildTrunc(DstReg, ResultReg);
2186 else if (DstTy.isPointer())
2187 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2188
2189 MI.eraseFromParent();
2190 return Legalized;
2191 }
2192
2193 // Unmerge the original values to the GCD type, and recombine to the next
2194 // multiple greater than the original type.
2195 //
2196 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2197 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2198 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2199 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2200 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2201 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2202 // %12:_(s12) = G_MERGE_VALUES %10, %11
2203 //
2204 // Padding with undef if necessary:
2205 //
2206 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2207 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2208 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2209 // %7:_(s2) = G_IMPLICIT_DEF
2210 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2211 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2212 // %10:_(s12) = G_MERGE_VALUES %8, %9
2213
2214 const int GCD = std::gcd(SrcSize, WideSize);
2215 LLT GCDTy = LLT::scalar(GCD);
2216
2217 SmallVector<Register, 8> NewMergeRegs;
2218 SmallVector<Register, 8> Unmerges;
2219 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2220
2221 // Decompose the original operands if they don't evenly divide.
2222 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2223 Register SrcReg = MO.getReg();
2224 if (GCD == SrcSize) {
2225 Unmerges.push_back(SrcReg);
2226 } else {
2227 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2228 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2229 Unmerges.push_back(Unmerge.getReg(J));
2230 }
2231 }
2232
2233 // Pad with undef to the next size that is a multiple of the requested size.
2234 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2235 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2236 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2237 Unmerges.push_back(UndefReg);
2238 }
2239
2240 const int PartsPerGCD = WideSize / GCD;
2241
2242 // Build merges of each piece.
2243 ArrayRef<Register> Slicer(Unmerges);
2244 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2245 auto Merge =
2246 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2247 NewMergeRegs.push_back(Merge.getReg(0));
2248 }
2249
2250 // A truncate may be necessary if the requested type doesn't evenly divide the
2251 // original result type.
2252 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2253 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2254 } else {
2255 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2256 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2257 }
2258
2259 MI.eraseFromParent();
2260 return Legalized;
2261}
2262
2264LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2265 LLT WideTy) {
2266 if (TypeIdx != 0)
2267 return UnableToLegalize;
2268
2269 int NumDst = MI.getNumOperands() - 1;
2270 Register SrcReg = MI.getOperand(NumDst).getReg();
2271 LLT SrcTy = MRI.getType(SrcReg);
2272 if (SrcTy.isVector())
2273 return UnableToLegalize;
2274
2275 Register Dst0Reg = MI.getOperand(0).getReg();
2276 LLT DstTy = MRI.getType(Dst0Reg);
2277 if (!DstTy.isScalar())
2278 return UnableToLegalize;
2279
2280 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2281 if (SrcTy.isPointer()) {
2282 const DataLayout &DL = MIRBuilder.getDataLayout();
2283 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2284 LLVM_DEBUG(
2285 dbgs() << "Not casting non-integral address space integer\n");
2286 return UnableToLegalize;
2287 }
2288
2289 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2290 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2291 }
2292
2293 // Widen SrcTy to WideTy. This does not affect the result, but since the
2294 // user requested this size, it is probably better handled than SrcTy and
2295 // should reduce the total number of legalization artifacts.
2296 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2297 SrcTy = WideTy;
2298 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2299 }
2300
2301 // Theres no unmerge type to target. Directly extract the bits from the
2302 // source type
2303 unsigned DstSize = DstTy.getSizeInBits();
2304
2305 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2306 for (int I = 1; I != NumDst; ++I) {
2307 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2308 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2309 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2310 }
2311
2312 MI.eraseFromParent();
2313 return Legalized;
2314 }
2315
2316 // Extend the source to a wider type.
2317 LLT LCMTy = getLCMType(SrcTy, WideTy);
2318
2319 Register WideSrc = SrcReg;
2320 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2321 // TODO: If this is an integral address space, cast to integer and anyext.
2322 if (SrcTy.isPointer()) {
2323 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2324 return UnableToLegalize;
2325 }
2326
2327 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2328 }
2329
2330 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2331
2332 // Create a sequence of unmerges and merges to the original results. Since we
2333 // may have widened the source, we will need to pad the results with dead defs
2334 // to cover the source register.
2335 // e.g. widen s48 to s64:
2336 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2337 //
2338 // =>
2339 // %4:_(s192) = G_ANYEXT %0:_(s96)
2340 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2341 // ; unpack to GCD type, with extra dead defs
2342 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2343 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2344 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2345 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2346 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2347 const LLT GCDTy = getGCDType(WideTy, DstTy);
2348 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2349 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2350
2351 // Directly unmerge to the destination without going through a GCD type
2352 // if possible
2353 if (PartsPerRemerge == 1) {
2354 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2355
2356 for (int I = 0; I != NumUnmerge; ++I) {
2357 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2358
2359 for (int J = 0; J != PartsPerUnmerge; ++J) {
2360 int Idx = I * PartsPerUnmerge + J;
2361 if (Idx < NumDst)
2362 MIB.addDef(MI.getOperand(Idx).getReg());
2363 else {
2364 // Create dead def for excess components.
2365 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2366 }
2367 }
2368
2369 MIB.addUse(Unmerge.getReg(I));
2370 }
2371 } else {
2372 SmallVector<Register, 16> Parts;
2373 for (int J = 0; J != NumUnmerge; ++J)
2374 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2375
2376 SmallVector<Register, 8> RemergeParts;
2377 for (int I = 0; I != NumDst; ++I) {
2378 for (int J = 0; J < PartsPerRemerge; ++J) {
2379 const int Idx = I * PartsPerRemerge + J;
2380 RemergeParts.emplace_back(Parts[Idx]);
2381 }
2382
2383 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2384 RemergeParts.clear();
2385 }
2386 }
2387
2388 MI.eraseFromParent();
2389 return Legalized;
2390}
2391
2393LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2394 LLT WideTy) {
2395 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2396 unsigned Offset = MI.getOperand(2).getImm();
2397
2398 if (TypeIdx == 0) {
2399 if (SrcTy.isVector() || DstTy.isVector())
2400 return UnableToLegalize;
2401
2402 SrcOp Src(SrcReg);
2403 if (SrcTy.isPointer()) {
2404 // Extracts from pointers can be handled only if they are really just
2405 // simple integers.
2406 const DataLayout &DL = MIRBuilder.getDataLayout();
2407 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2408 return UnableToLegalize;
2409
2410 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2411 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2412 SrcTy = SrcAsIntTy;
2413 }
2414
2415 if (DstTy.isPointer())
2416 return UnableToLegalize;
2417
2418 if (Offset == 0) {
2419 // Avoid a shift in the degenerate case.
2420 MIRBuilder.buildTrunc(DstReg,
2421 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2422 MI.eraseFromParent();
2423 return Legalized;
2424 }
2425
2426 // Do a shift in the source type.
2427 LLT ShiftTy = SrcTy;
2428 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2429 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2430 ShiftTy = WideTy;
2431 }
2432
2433 auto LShr = MIRBuilder.buildLShr(
2434 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2435 MIRBuilder.buildTrunc(DstReg, LShr);
2436 MI.eraseFromParent();
2437 return Legalized;
2438 }
2439
2440 if (SrcTy.isScalar()) {
2441 Observer.changingInstr(MI);
2442 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2443 Observer.changedInstr(MI);
2444 return Legalized;
2445 }
2446
2447 if (!SrcTy.isVector())
2448 return UnableToLegalize;
2449
2450 if (DstTy != SrcTy.getElementType())
2451 return UnableToLegalize;
2452
2453 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2454 return UnableToLegalize;
2455
2456 Observer.changingInstr(MI);
2457 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2458
2459 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2460 Offset);
2461 widenScalarDst(MI, WideTy.getScalarType(), 0);
2462 Observer.changedInstr(MI);
2463 return Legalized;
2464}
2465
2467LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2468 LLT WideTy) {
2469 if (TypeIdx != 0 || WideTy.isVector())
2470 return UnableToLegalize;
2471 Observer.changingInstr(MI);
2472 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2473 widenScalarDst(MI, WideTy);
2474 Observer.changedInstr(MI);
2475 return Legalized;
2476}
2477
2479LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2480 LLT WideTy) {
2481 unsigned Opcode;
2482 unsigned ExtOpcode;
2483 std::optional<Register> CarryIn;
2484 switch (MI.getOpcode()) {
2485 default:
2486 llvm_unreachable("Unexpected opcode!");
2487 case TargetOpcode::G_SADDO:
2488 Opcode = TargetOpcode::G_ADD;
2489 ExtOpcode = TargetOpcode::G_SEXT;
2490 break;
2491 case TargetOpcode::G_SSUBO:
2492 Opcode = TargetOpcode::G_SUB;
2493 ExtOpcode = TargetOpcode::G_SEXT;
2494 break;
2495 case TargetOpcode::G_UADDO:
2496 Opcode = TargetOpcode::G_ADD;
2497 ExtOpcode = TargetOpcode::G_ZEXT;
2498 break;
2499 case TargetOpcode::G_USUBO:
2500 Opcode = TargetOpcode::G_SUB;
2501 ExtOpcode = TargetOpcode::G_ZEXT;
2502 break;
2503 case TargetOpcode::G_SADDE:
2504 Opcode = TargetOpcode::G_UADDE;
2505 ExtOpcode = TargetOpcode::G_SEXT;
2506 CarryIn = MI.getOperand(4).getReg();
2507 break;
2508 case TargetOpcode::G_SSUBE:
2509 Opcode = TargetOpcode::G_USUBE;
2510 ExtOpcode = TargetOpcode::G_SEXT;
2511 CarryIn = MI.getOperand(4).getReg();
2512 break;
2513 case TargetOpcode::G_UADDE:
2514 Opcode = TargetOpcode::G_UADDE;
2515 ExtOpcode = TargetOpcode::G_ZEXT;
2516 CarryIn = MI.getOperand(4).getReg();
2517 break;
2518 case TargetOpcode::G_USUBE:
2519 Opcode = TargetOpcode::G_USUBE;
2520 ExtOpcode = TargetOpcode::G_ZEXT;
2521 CarryIn = MI.getOperand(4).getReg();
2522 break;
2523 }
2524
2525 if (TypeIdx == 1) {
2526 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2527
2528 Observer.changingInstr(MI);
2529 if (CarryIn)
2530 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2531 widenScalarDst(MI, WideTy, 1);
2532
2533 Observer.changedInstr(MI);
2534 return Legalized;
2535 }
2536
2537 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2538 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2539 // Do the arithmetic in the larger type.
2540 Register NewOp;
2541 if (CarryIn) {
2542 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2543 NewOp = MIRBuilder
2544 .buildInstr(Opcode, {WideTy, CarryOutTy},
2545 {LHSExt, RHSExt, *CarryIn})
2546 .getReg(0);
2547 } else {
2548 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2549 }
2550 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2551 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2552 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2553 // There is no overflow if the ExtOp is the same as NewOp.
2554 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2555 // Now trunc the NewOp to the original result.
2556 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2557 MI.eraseFromParent();
2558 return Legalized;
2559}
2560
2562LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2563 LLT WideTy) {
2564 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2565 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2566 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2567 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2568 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2569 // We can convert this to:
2570 // 1. Any extend iN to iM
2571 // 2. SHL by M-N
2572 // 3. [US][ADD|SUB|SHL]SAT
2573 // 4. L/ASHR by M-N
2574 //
2575 // It may be more efficient to lower this to a min and a max operation in
2576 // the higher precision arithmetic if the promoted operation isn't legal,
2577 // but this decision is up to the target's lowering request.
2578 Register DstReg = MI.getOperand(0).getReg();
2579
2580 unsigned NewBits = WideTy.getScalarSizeInBits();
2581 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2582
2583 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2584 // must not left shift the RHS to preserve the shift amount.
2585 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2586 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2587 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2588 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2589 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2590 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2591
2592 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2593 {ShiftL, ShiftR}, MI.getFlags());
2594
2595 // Use a shift that will preserve the number of sign bits when the trunc is
2596 // folded away.
2597 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2598 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2599
2600 MIRBuilder.buildTrunc(DstReg, Result);
2601 MI.eraseFromParent();
2602 return Legalized;
2603}
2604
2606LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2607 LLT WideTy) {
2608 if (TypeIdx == 1) {
2609 Observer.changingInstr(MI);
2610 widenScalarDst(MI, WideTy, 1);
2611 Observer.changedInstr(MI);
2612 return Legalized;
2613 }
2614
2615 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2616 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2617 LLT SrcTy = MRI.getType(LHS);
2618 LLT OverflowTy = MRI.getType(OriginalOverflow);
2619 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2620
2621 // To determine if the result overflowed in the larger type, we extend the
2622 // input to the larger type, do the multiply (checking if it overflows),
2623 // then also check the high bits of the result to see if overflow happened
2624 // there.
2625 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2626 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2627 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2628
2629 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2630 // so we don't need to check the overflow result of larger type Mulo.
2631 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2632
2633 unsigned MulOpc =
2634 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2635
2636 MachineInstrBuilder Mulo;
2637 if (WideMulCanOverflow)
2638 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2639 {LeftOperand, RightOperand});
2640 else
2641 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2642
2643 auto Mul = Mulo->getOperand(0);
2644 MIRBuilder.buildTrunc(Result, Mul);
2645
2646 MachineInstrBuilder ExtResult;
2647 // Overflow occurred if it occurred in the larger type, or if the high part
2648 // of the result does not zero/sign-extend the low part. Check this second
2649 // possibility first.
2650 if (IsSigned) {
2651 // For signed, overflow occurred when the high part does not sign-extend
2652 // the low part.
2653 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2654 } else {
2655 // Unsigned overflow occurred when the high part does not zero-extend the
2656 // low part.
2657 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2658 }
2659
2660 if (WideMulCanOverflow) {
2661 auto Overflow =
2662 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2663 // Finally check if the multiplication in the larger type itself overflowed.
2664 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2665 } else {
2666 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2667 }
2668 MI.eraseFromParent();
2669 return Legalized;
2670}
2671
2674 unsigned Opcode = MI.getOpcode();
2675 switch (Opcode) {
2676 default:
2677 return UnableToLegalize;
2678 case TargetOpcode::G_ATOMICRMW_XCHG:
2679 case TargetOpcode::G_ATOMICRMW_ADD:
2680 case TargetOpcode::G_ATOMICRMW_SUB:
2681 case TargetOpcode::G_ATOMICRMW_AND:
2682 case TargetOpcode::G_ATOMICRMW_OR:
2683 case TargetOpcode::G_ATOMICRMW_XOR:
2684 case TargetOpcode::G_ATOMICRMW_MIN:
2685 case TargetOpcode::G_ATOMICRMW_MAX:
2686 case TargetOpcode::G_ATOMICRMW_UMIN:
2687 case TargetOpcode::G_ATOMICRMW_UMAX:
2688 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2689 Observer.changingInstr(MI);
2690 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2691 widenScalarDst(MI, WideTy, 0);
2692 Observer.changedInstr(MI);
2693 return Legalized;
2694 case TargetOpcode::G_ATOMIC_CMPXCHG:
2695 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2696 Observer.changingInstr(MI);
2697 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2698 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2699 widenScalarDst(MI, WideTy, 0);
2700 Observer.changedInstr(MI);
2701 return Legalized;
2702 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2703 if (TypeIdx == 0) {
2704 Observer.changingInstr(MI);
2705 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2706 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2707 widenScalarDst(MI, WideTy, 0);
2708 Observer.changedInstr(MI);
2709 return Legalized;
2710 }
2711 assert(TypeIdx == 1 &&
2712 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2713 Observer.changingInstr(MI);
2714 widenScalarDst(MI, WideTy, 1);
2715 Observer.changedInstr(MI);
2716 return Legalized;
2717 case TargetOpcode::G_EXTRACT:
2718 return widenScalarExtract(MI, TypeIdx, WideTy);
2719 case TargetOpcode::G_INSERT:
2720 return widenScalarInsert(MI, TypeIdx, WideTy);
2721 case TargetOpcode::G_MERGE_VALUES:
2722 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2723 case TargetOpcode::G_UNMERGE_VALUES:
2724 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2725 case TargetOpcode::G_SADDO:
2726 case TargetOpcode::G_SSUBO:
2727 case TargetOpcode::G_UADDO:
2728 case TargetOpcode::G_USUBO:
2729 case TargetOpcode::G_SADDE:
2730 case TargetOpcode::G_SSUBE:
2731 case TargetOpcode::G_UADDE:
2732 case TargetOpcode::G_USUBE:
2733 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2734 case TargetOpcode::G_UMULO:
2735 case TargetOpcode::G_SMULO:
2736 return widenScalarMulo(MI, TypeIdx, WideTy);
2737 case TargetOpcode::G_SADDSAT:
2738 case TargetOpcode::G_SSUBSAT:
2739 case TargetOpcode::G_SSHLSAT:
2740 case TargetOpcode::G_UADDSAT:
2741 case TargetOpcode::G_USUBSAT:
2742 case TargetOpcode::G_USHLSAT:
2743 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2744 case TargetOpcode::G_CTTZ:
2745 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2746 case TargetOpcode::G_CTLZ:
2747 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2748 case TargetOpcode::G_CTPOP: {
2749 if (TypeIdx == 0) {
2750 Observer.changingInstr(MI);
2751 widenScalarDst(MI, WideTy, 0);
2752 Observer.changedInstr(MI);
2753 return Legalized;
2754 }
2755
2756 Register SrcReg = MI.getOperand(1).getReg();
2757
2758 // First extend the input.
2759 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2760 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2761 ? TargetOpcode::G_ANYEXT
2762 : TargetOpcode::G_ZEXT;
2763 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2764 LLT CurTy = MRI.getType(SrcReg);
2765 unsigned NewOpc = Opcode;
2766 if (NewOpc == TargetOpcode::G_CTTZ) {
2767 // The count is the same in the larger type except if the original
2768 // value was zero. This can be handled by setting the bit just off
2769 // the top of the original type.
2770 auto TopBit =
2772 MIBSrc = MIRBuilder.buildOr(
2773 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2774 // Now we know the operand is non-zero, use the more relaxed opcode.
2775 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2776 }
2777
2778 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2779
2780 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2781 // An optimization where the result is the CTLZ after the left shift by
2782 // (Difference in widety and current ty), that is,
2783 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2784 // Result = ctlz MIBSrc
2785 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2786 MIRBuilder.buildConstant(WideTy, SizeDiff));
2787 }
2788
2789 // Perform the operation at the larger size.
2790 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2791 // This is already the correct result for CTPOP and CTTZs
2792 if (Opcode == TargetOpcode::G_CTLZ) {
2793 // The correct result is NewOp - (Difference in widety and current ty).
2794 MIBNewOp = MIRBuilder.buildSub(
2795 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2796 }
2797
2798 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2799 MI.eraseFromParent();
2800 return Legalized;
2801 }
2802 case TargetOpcode::G_BSWAP: {
2803 Observer.changingInstr(MI);
2804 Register DstReg = MI.getOperand(0).getReg();
2805
2806 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2807 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2808 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2809 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2810
2811 MI.getOperand(0).setReg(DstExt);
2812
2813 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2814
2815 LLT Ty = MRI.getType(DstReg);
2816 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2817 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2818 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2819
2820 MIRBuilder.buildTrunc(DstReg, ShrReg);
2821 Observer.changedInstr(MI);
2822 return Legalized;
2823 }
2824 case TargetOpcode::G_BITREVERSE: {
2825 Observer.changingInstr(MI);
2826
2827 Register DstReg = MI.getOperand(0).getReg();
2828 LLT Ty = MRI.getType(DstReg);
2829 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2830
2831 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2832 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2833 MI.getOperand(0).setReg(DstExt);
2834 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2835
2836 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2837 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2838 MIRBuilder.buildTrunc(DstReg, Shift);
2839 Observer.changedInstr(MI);
2840 return Legalized;
2841 }
2842 case TargetOpcode::G_FREEZE:
2843 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2844 Observer.changingInstr(MI);
2845 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2846 widenScalarDst(MI, WideTy);
2847 Observer.changedInstr(MI);
2848 return Legalized;
2849
2850 case TargetOpcode::G_ABS:
2851 Observer.changingInstr(MI);
2852 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2853 widenScalarDst(MI, WideTy);
2854 Observer.changedInstr(MI);
2855 return Legalized;
2856
2857 case TargetOpcode::G_ADD:
2858 case TargetOpcode::G_AND:
2859 case TargetOpcode::G_MUL:
2860 case TargetOpcode::G_OR:
2861 case TargetOpcode::G_XOR:
2862 case TargetOpcode::G_SUB:
2863 case TargetOpcode::G_SHUFFLE_VECTOR:
2864 // Perform operation at larger width (any extension is fines here, high bits
2865 // don't affect the result) and then truncate the result back to the
2866 // original type.
2867 Observer.changingInstr(MI);
2868 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2869 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2870 widenScalarDst(MI, WideTy);
2871 Observer.changedInstr(MI);
2872 return Legalized;
2873
2874 case TargetOpcode::G_SBFX:
2875 case TargetOpcode::G_UBFX:
2876 Observer.changingInstr(MI);
2877
2878 if (TypeIdx == 0) {
2879 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2880 widenScalarDst(MI, WideTy);
2881 } else {
2882 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2883 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2884 }
2885
2886 Observer.changedInstr(MI);
2887 return Legalized;
2888
2889 case TargetOpcode::G_SHL:
2890 Observer.changingInstr(MI);
2891
2892 if (TypeIdx == 0) {
2893 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2894 widenScalarDst(MI, WideTy);
2895 } else {
2896 assert(TypeIdx == 1);
2897 // The "number of bits to shift" operand must preserve its value as an
2898 // unsigned integer:
2899 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2900 }
2901
2902 Observer.changedInstr(MI);
2903 return Legalized;
2904
2905 case TargetOpcode::G_ROTR:
2906 case TargetOpcode::G_ROTL:
2907 if (TypeIdx != 1)
2908 return UnableToLegalize;
2909
2910 Observer.changingInstr(MI);
2911 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_SDIV:
2916 case TargetOpcode::G_SREM:
2917 case TargetOpcode::G_SMIN:
2918 case TargetOpcode::G_SMAX:
2919 case TargetOpcode::G_ABDS:
2920 Observer.changingInstr(MI);
2921 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2922 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2923 widenScalarDst(MI, WideTy);
2924 Observer.changedInstr(MI);
2925 return Legalized;
2926
2927 case TargetOpcode::G_SDIVREM:
2928 Observer.changingInstr(MI);
2929 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2930 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2931 widenScalarDst(MI, WideTy);
2932 widenScalarDst(MI, WideTy, 1);
2933 Observer.changedInstr(MI);
2934 return Legalized;
2935
2936 case TargetOpcode::G_ASHR:
2937 case TargetOpcode::G_LSHR:
2938 Observer.changingInstr(MI);
2939
2940 if (TypeIdx == 0) {
2941 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2942 : TargetOpcode::G_ZEXT;
2943
2944 widenScalarSrc(MI, WideTy, 1, CvtOp);
2945 widenScalarDst(MI, WideTy);
2946 } else {
2947 assert(TypeIdx == 1);
2948 // The "number of bits to shift" operand must preserve its value as an
2949 // unsigned integer:
2950 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2951 }
2952
2953 Observer.changedInstr(MI);
2954 return Legalized;
2955 case TargetOpcode::G_UDIV:
2956 case TargetOpcode::G_UREM:
2957 case TargetOpcode::G_ABDU:
2958 Observer.changingInstr(MI);
2959 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2960 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2961 widenScalarDst(MI, WideTy);
2962 Observer.changedInstr(MI);
2963 return Legalized;
2964 case TargetOpcode::G_UDIVREM:
2965 Observer.changingInstr(MI);
2966 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2967 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2968 widenScalarDst(MI, WideTy);
2969 widenScalarDst(MI, WideTy, 1);
2970 Observer.changedInstr(MI);
2971 return Legalized;
2972 case TargetOpcode::G_UMIN:
2973 case TargetOpcode::G_UMAX: {
2974 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2975
2976 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2977 unsigned ExtOpc =
2978 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
2979 getApproximateEVTForLLT(WideTy, Ctx))
2980 ? TargetOpcode::G_SEXT
2981 : TargetOpcode::G_ZEXT;
2982
2983 Observer.changingInstr(MI);
2984 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2985 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2986 widenScalarDst(MI, WideTy);
2987 Observer.changedInstr(MI);
2988 return Legalized;
2989 }
2990
2991 case TargetOpcode::G_SELECT:
2992 Observer.changingInstr(MI);
2993 if (TypeIdx == 0) {
2994 // Perform operation at larger width (any extension is fine here, high
2995 // bits don't affect the result) and then truncate the result back to the
2996 // original type.
2997 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2998 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2999 widenScalarDst(MI, WideTy);
3000 } else {
3001 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3002 // Explicit extension is required here since high bits affect the result.
3003 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3004 }
3005 Observer.changedInstr(MI);
3006 return Legalized;
3007
3008 case TargetOpcode::G_FPTOSI:
3009 case TargetOpcode::G_FPTOUI:
3010 case TargetOpcode::G_INTRINSIC_LRINT:
3011 case TargetOpcode::G_INTRINSIC_LLRINT:
3012 case TargetOpcode::G_IS_FPCLASS:
3013 Observer.changingInstr(MI);
3014
3015 if (TypeIdx == 0)
3016 widenScalarDst(MI, WideTy);
3017 else
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3019
3020 Observer.changedInstr(MI);
3021 return Legalized;
3022 case TargetOpcode::G_SITOFP:
3023 Observer.changingInstr(MI);
3024
3025 if (TypeIdx == 0)
3026 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3027 else
3028 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3029
3030 Observer.changedInstr(MI);
3031 return Legalized;
3032 case TargetOpcode::G_UITOFP:
3033 Observer.changingInstr(MI);
3034
3035 if (TypeIdx == 0)
3036 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3037 else
3038 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3039
3040 Observer.changedInstr(MI);
3041 return Legalized;
3042 case TargetOpcode::G_FPTOSI_SAT:
3043 case TargetOpcode::G_FPTOUI_SAT:
3044 Observer.changingInstr(MI);
3045
3046 if (TypeIdx == 0) {
3047 Register OldDst = MI.getOperand(0).getReg();
3048 LLT Ty = MRI.getType(OldDst);
3049 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3050 Register NewDst;
3051 MI.getOperand(0).setReg(ExtReg);
3052 uint64_t ShortBits = Ty.getScalarSizeInBits();
3053 uint64_t WideBits = WideTy.getScalarSizeInBits();
3054 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3055 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3056 // z = i16 fptosi_sat(a)
3057 // ->
3058 // x = i32 fptosi_sat(a)
3059 // y = smin(x, 32767)
3060 // z = smax(y, -32768)
3061 auto MaxVal = MIRBuilder.buildConstant(
3062 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3063 auto MinVal = MIRBuilder.buildConstant(
3064 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3065 Register MidReg =
3066 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3067 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3068 } else {
3069 // z = i16 fptoui_sat(a)
3070 // ->
3071 // x = i32 fptoui_sat(a)
3072 // y = smin(x, 65535)
3073 auto MaxVal = MIRBuilder.buildConstant(
3074 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3075 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3076 }
3077 MIRBuilder.buildTrunc(OldDst, NewDst);
3078 } else
3079 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3080
3081 Observer.changedInstr(MI);
3082 return Legalized;
3083 case TargetOpcode::G_LOAD:
3084 case TargetOpcode::G_SEXTLOAD:
3085 case TargetOpcode::G_ZEXTLOAD:
3086 Observer.changingInstr(MI);
3087 widenScalarDst(MI, WideTy);
3088 Observer.changedInstr(MI);
3089 return Legalized;
3090
3091 case TargetOpcode::G_STORE: {
3092 if (TypeIdx != 0)
3093 return UnableToLegalize;
3094
3095 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3096 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3097 if (!Ty.isScalar()) {
3098 // We need to widen the vector element type.
3099 Observer.changingInstr(MI);
3100 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3101 // We also need to adjust the MMO to turn this into a truncating store.
3102 MachineMemOperand &MMO = **MI.memoperands_begin();
3103 MachineFunction &MF = MIRBuilder.getMF();
3104 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3105 MI.setMemRefs(MF, {NewMMO});
3106 Observer.changedInstr(MI);
3107 return Legalized;
3108 }
3109
3110 Observer.changingInstr(MI);
3111
3112 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3113 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3114 widenScalarSrc(MI, WideTy, 0, ExtType);
3115
3116 Observer.changedInstr(MI);
3117 return Legalized;
3118 }
3119 case TargetOpcode::G_CONSTANT: {
3120 MachineOperand &SrcMO = MI.getOperand(1);
3121 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3122 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3123 MRI.getType(MI.getOperand(0).getReg()));
3124 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3125 ExtOpc == TargetOpcode::G_ANYEXT) &&
3126 "Illegal Extend");
3127 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3128 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3129 ? SrcVal.sext(WideTy.getSizeInBits())
3130 : SrcVal.zext(WideTy.getSizeInBits());
3131 Observer.changingInstr(MI);
3132 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3133
3134 widenScalarDst(MI, WideTy);
3135 Observer.changedInstr(MI);
3136 return Legalized;
3137 }
3138 case TargetOpcode::G_FCONSTANT: {
3139 // To avoid changing the bits of the constant due to extension to a larger
3140 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3141 MachineOperand &SrcMO = MI.getOperand(1);
3142 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3143 MIRBuilder.setInstrAndDebugLoc(MI);
3144 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3145 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3146 MI.eraseFromParent();
3147 return Legalized;
3148 }
3149 case TargetOpcode::G_IMPLICIT_DEF: {
3150 Observer.changingInstr(MI);
3151 widenScalarDst(MI, WideTy);
3152 Observer.changedInstr(MI);
3153 return Legalized;
3154 }
3155 case TargetOpcode::G_BRCOND:
3156 Observer.changingInstr(MI);
3157 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3158 Observer.changedInstr(MI);
3159 return Legalized;
3160
3161 case TargetOpcode::G_FCMP:
3162 Observer.changingInstr(MI);
3163 if (TypeIdx == 0)
3164 widenScalarDst(MI, WideTy);
3165 else {
3166 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3167 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3168 }
3169 Observer.changedInstr(MI);
3170 return Legalized;
3171
3172 case TargetOpcode::G_ICMP:
3173 Observer.changingInstr(MI);
3174 if (TypeIdx == 0)
3175 widenScalarDst(MI, WideTy);
3176 else {
3177 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3178 CmpInst::Predicate Pred =
3179 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3180
3181 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3182 unsigned ExtOpcode =
3183 (CmpInst::isSigned(Pred) ||
3184 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3185 getApproximateEVTForLLT(WideTy, Ctx)))
3186 ? TargetOpcode::G_SEXT
3187 : TargetOpcode::G_ZEXT;
3188 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3189 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3190 }
3191 Observer.changedInstr(MI);
3192 return Legalized;
3193
3194 case TargetOpcode::G_PTR_ADD:
3195 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3196 Observer.changingInstr(MI);
3197 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3198 Observer.changedInstr(MI);
3199 return Legalized;
3200
3201 case TargetOpcode::G_PHI: {
3202 assert(TypeIdx == 0 && "Expecting only Idx 0");
3203
3204 Observer.changingInstr(MI);
3205 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3206 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3207 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3208 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3209 }
3210
3211 MachineBasicBlock &MBB = *MI.getParent();
3212 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3213 widenScalarDst(MI, WideTy);
3214 Observer.changedInstr(MI);
3215 return Legalized;
3216 }
3217 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3218 if (TypeIdx == 0) {
3219 Register VecReg = MI.getOperand(1).getReg();
3220 LLT VecTy = MRI.getType(VecReg);
3221 Observer.changingInstr(MI);
3222
3224 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3225 TargetOpcode::G_ANYEXT);
3226
3227 widenScalarDst(MI, WideTy, 0);
3228 Observer.changedInstr(MI);
3229 return Legalized;
3230 }
3231
3232 if (TypeIdx != 2)
3233 return UnableToLegalize;
3234 Observer.changingInstr(MI);
3235 // TODO: Probably should be zext
3236 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3237 Observer.changedInstr(MI);
3238 return Legalized;
3239 }
3240 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3241 if (TypeIdx == 0) {
3242 Observer.changingInstr(MI);
3243 const LLT WideEltTy = WideTy.getElementType();
3244
3245 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3246 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3247 widenScalarDst(MI, WideTy, 0);
3248 Observer.changedInstr(MI);
3249 return Legalized;
3250 }
3251
3252 if (TypeIdx == 1) {
3253 Observer.changingInstr(MI);
3254
3255 Register VecReg = MI.getOperand(1).getReg();
3256 LLT VecTy = MRI.getType(VecReg);
3257 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3258
3259 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3260 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3261 widenScalarDst(MI, WideVecTy, 0);
3262 Observer.changedInstr(MI);
3263 return Legalized;
3264 }
3265
3266 if (TypeIdx == 2) {
3267 Observer.changingInstr(MI);
3268 // TODO: Probably should be zext
3269 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3270 Observer.changedInstr(MI);
3271 return Legalized;
3272 }
3273
3274 return UnableToLegalize;
3275 }
3276 case TargetOpcode::G_FADD:
3277 case TargetOpcode::G_FMUL:
3278 case TargetOpcode::G_FSUB:
3279 case TargetOpcode::G_FMA:
3280 case TargetOpcode::G_FMAD:
3281 case TargetOpcode::G_FNEG:
3282 case TargetOpcode::G_FABS:
3283 case TargetOpcode::G_FCANONICALIZE:
3284 case TargetOpcode::G_FMINNUM:
3285 case TargetOpcode::G_FMAXNUM:
3286 case TargetOpcode::G_FMINNUM_IEEE:
3287 case TargetOpcode::G_FMAXNUM_IEEE:
3288 case TargetOpcode::G_FMINIMUM:
3289 case TargetOpcode::G_FMAXIMUM:
3290 case TargetOpcode::G_FMINIMUMNUM:
3291 case TargetOpcode::G_FMAXIMUMNUM:
3292 case TargetOpcode::G_FDIV:
3293 case TargetOpcode::G_FREM:
3294 case TargetOpcode::G_FCEIL:
3295 case TargetOpcode::G_FFLOOR:
3296 case TargetOpcode::G_FCOS:
3297 case TargetOpcode::G_FSIN:
3298 case TargetOpcode::G_FTAN:
3299 case TargetOpcode::G_FACOS:
3300 case TargetOpcode::G_FASIN:
3301 case TargetOpcode::G_FATAN:
3302 case TargetOpcode::G_FATAN2:
3303 case TargetOpcode::G_FCOSH:
3304 case TargetOpcode::G_FSINH:
3305 case TargetOpcode::G_FTANH:
3306 case TargetOpcode::G_FLOG10:
3307 case TargetOpcode::G_FLOG:
3308 case TargetOpcode::G_FLOG2:
3309 case TargetOpcode::G_FRINT:
3310 case TargetOpcode::G_FNEARBYINT:
3311 case TargetOpcode::G_FSQRT:
3312 case TargetOpcode::G_FEXP:
3313 case TargetOpcode::G_FEXP2:
3314 case TargetOpcode::G_FEXP10:
3315 case TargetOpcode::G_FPOW:
3316 case TargetOpcode::G_INTRINSIC_TRUNC:
3317 case TargetOpcode::G_INTRINSIC_ROUND:
3318 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3319 assert(TypeIdx == 0);
3320 Observer.changingInstr(MI);
3321
3322 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3323 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3324
3325 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3326 Observer.changedInstr(MI);
3327 return Legalized;
3328 case TargetOpcode::G_FPOWI:
3329 case TargetOpcode::G_FLDEXP:
3330 case TargetOpcode::G_STRICT_FLDEXP: {
3331 if (TypeIdx == 0) {
3332 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3333 return UnableToLegalize;
3334
3335 Observer.changingInstr(MI);
3336 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3337 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3338 Observer.changedInstr(MI);
3339 return Legalized;
3340 }
3341
3342 if (TypeIdx == 1) {
3343 // For some reason SelectionDAG tries to promote to a libcall without
3344 // actually changing the integer type for promotion.
3345 Observer.changingInstr(MI);
3346 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3347 Observer.changedInstr(MI);
3348 return Legalized;
3349 }
3350
3351 return UnableToLegalize;
3352 }
3353 case TargetOpcode::G_FFREXP: {
3354 Observer.changingInstr(MI);
3355
3356 if (TypeIdx == 0) {
3357 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3358 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3359 } else {
3360 widenScalarDst(MI, WideTy, 1);
3361 }
3362
3363 Observer.changedInstr(MI);
3364 return Legalized;
3365 }
3366 case TargetOpcode::G_INTTOPTR:
3367 if (TypeIdx != 1)
3368 return UnableToLegalize;
3369
3370 Observer.changingInstr(MI);
3371 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3372 Observer.changedInstr(MI);
3373 return Legalized;
3374 case TargetOpcode::G_PTRTOINT:
3375 if (TypeIdx != 0)
3376 return UnableToLegalize;
3377
3378 Observer.changingInstr(MI);
3379 widenScalarDst(MI, WideTy, 0);
3380 Observer.changedInstr(MI);
3381 return Legalized;
3382 case TargetOpcode::G_BUILD_VECTOR: {
3383 Observer.changingInstr(MI);
3384
3385 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3386 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3387 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3388
3389 // Avoid changing the result vector type if the source element type was
3390 // requested.
3391 if (TypeIdx == 1) {
3392 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3393 } else {
3394 widenScalarDst(MI, WideTy, 0);
3395 }
3396
3397 Observer.changedInstr(MI);
3398 return Legalized;
3399 }
3400 case TargetOpcode::G_SEXT_INREG:
3401 if (TypeIdx != 0)
3402 return UnableToLegalize;
3403
3404 Observer.changingInstr(MI);
3405 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3406 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3407 Observer.changedInstr(MI);
3408 return Legalized;
3409 case TargetOpcode::G_PTRMASK: {
3410 if (TypeIdx != 1)
3411 return UnableToLegalize;
3412 Observer.changingInstr(MI);
3413 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3414 Observer.changedInstr(MI);
3415 return Legalized;
3416 }
3417 case TargetOpcode::G_VECREDUCE_ADD: {
3418 if (TypeIdx != 1)
3419 return UnableToLegalize;
3420 Observer.changingInstr(MI);
3421 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3422 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3423 Observer.changedInstr(MI);
3424 return Legalized;
3425 }
3426 case TargetOpcode::G_VECREDUCE_FADD:
3427 case TargetOpcode::G_VECREDUCE_FMUL:
3428 case TargetOpcode::G_VECREDUCE_FMIN:
3429 case TargetOpcode::G_VECREDUCE_FMAX:
3430 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3431 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3432 if (TypeIdx != 0)
3433 return UnableToLegalize;
3434 Observer.changingInstr(MI);
3435 Register VecReg = MI.getOperand(1).getReg();
3436 LLT VecTy = MRI.getType(VecReg);
3437 LLT WideVecTy = VecTy.isVector()
3438 ? LLT::vector(VecTy.getElementCount(), WideTy)
3439 : WideTy;
3440 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3441 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3442 Observer.changedInstr(MI);
3443 return Legalized;
3444 }
3445 case TargetOpcode::G_VSCALE: {
3446 MachineOperand &SrcMO = MI.getOperand(1);
3447 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3448 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3449 // The CImm is always a signed value
3450 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3451 Observer.changingInstr(MI);
3452 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3453 widenScalarDst(MI, WideTy);
3454 Observer.changedInstr(MI);
3455 return Legalized;
3456 }
3457 case TargetOpcode::G_SPLAT_VECTOR: {
3458 if (TypeIdx != 1)
3459 return UnableToLegalize;
3460
3461 Observer.changingInstr(MI);
3462 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3463 Observer.changedInstr(MI);
3464 return Legalized;
3465 }
3466 case TargetOpcode::G_INSERT_SUBVECTOR: {
3467 if (TypeIdx != 0)
3468 return UnableToLegalize;
3469
3471 Register BigVec = IS.getBigVec();
3472 Register SubVec = IS.getSubVec();
3473
3474 LLT SubVecTy = MRI.getType(SubVec);
3475 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3476
3477 // Widen the G_INSERT_SUBVECTOR
3478 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3479 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3480 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3481 IS.getIndexImm());
3482
3483 // Truncate back down
3484 auto SplatZero = MIRBuilder.buildSplatVector(
3485 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3486 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3487 SplatZero);
3488
3489 MI.eraseFromParent();
3490
3491 return Legalized;
3492 }
3493 }
3494}
3495
3497 MachineIRBuilder &B, Register Src, LLT Ty) {
3498 auto Unmerge = B.buildUnmerge(Ty, Src);
3499 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3500 Pieces.push_back(Unmerge.getReg(I));
3501}
3502
3503static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3504 MachineIRBuilder &MIRBuilder) {
3505 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3506 MachineFunction &MF = MIRBuilder.getMF();
3507 const DataLayout &DL = MIRBuilder.getDataLayout();
3508 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3509 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3510 LLT DstLLT = MRI.getType(DstReg);
3511
3512 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3513
3514 auto Addr = MIRBuilder.buildConstantPool(
3515 AddrPtrTy,
3516 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3517
3518 MachineMemOperand *MMO =
3520 MachineMemOperand::MOLoad, DstLLT, Alignment);
3521
3522 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3523}
3524
3527 const MachineOperand &ConstOperand = MI.getOperand(1);
3528 const Constant *ConstantVal = ConstOperand.getCImm();
3529
3530 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3531 MI.eraseFromParent();
3532
3533 return Legalized;
3534}
3535
3538 const MachineOperand &ConstOperand = MI.getOperand(1);
3539 const Constant *ConstantVal = ConstOperand.getFPImm();
3540
3541 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3542 MI.eraseFromParent();
3543
3544 return Legalized;
3545}
3546
3549 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3550 if (SrcTy.isVector()) {
3551 LLT SrcEltTy = SrcTy.getElementType();
3553
3554 if (DstTy.isVector()) {
3555 int NumDstElt = DstTy.getNumElements();
3556 int NumSrcElt = SrcTy.getNumElements();
3557
3558 LLT DstEltTy = DstTy.getElementType();
3559 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3560 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3561
3562 // If there's an element size mismatch, insert intermediate casts to match
3563 // the result element type.
3564 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3565 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3566 //
3567 // =>
3568 //
3569 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3570 // %3:_(<2 x s8>) = G_BITCAST %2
3571 // %4:_(<2 x s8>) = G_BITCAST %3
3572 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3573 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3574 SrcPartTy = SrcEltTy;
3575 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3576 //
3577 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3578 //
3579 // =>
3580 //
3581 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3582 // %3:_(s16) = G_BITCAST %2
3583 // %4:_(s16) = G_BITCAST %3
3584 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3585 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3586 DstCastTy = DstEltTy;
3587 }
3588
3589 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3590 for (Register &SrcReg : SrcRegs)
3591 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3592 } else
3593 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3594
3595 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3596 MI.eraseFromParent();
3597 return Legalized;
3598 }
3599
3600 if (DstTy.isVector()) {
3602 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3603 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3604 MI.eraseFromParent();
3605 return Legalized;
3606 }
3607
3608 return UnableToLegalize;
3609}
3610
3611/// Figure out the bit offset into a register when coercing a vector index for
3612/// the wide element type. This is only for the case when promoting vector to
3613/// one with larger elements.
3614//
3615///
3616/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3617/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3619 Register Idx,
3620 unsigned NewEltSize,
3621 unsigned OldEltSize) {
3622 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3623 LLT IdxTy = B.getMRI()->getType(Idx);
3624
3625 // Now figure out the amount we need to shift to get the target bits.
3626 auto OffsetMask = B.buildConstant(
3627 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3628 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3629 return B.buildShl(IdxTy, OffsetIdx,
3630 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3631}
3632
3633/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3634/// is casting to a vector with a smaller element size, perform multiple element
3635/// extracts and merge the results. If this is coercing to a vector with larger
3636/// elements, index the bitcasted vector and extract the target element with bit
3637/// operations. This is intended to force the indexing in the native register
3638/// size for architectures that can dynamically index the register file.
3641 LLT CastTy) {
3642 if (TypeIdx != 1)
3643 return UnableToLegalize;
3644
3645 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3646
3647 LLT SrcEltTy = SrcVecTy.getElementType();
3648 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3649 unsigned OldNumElts = SrcVecTy.getNumElements();
3650
3651 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3652 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3653
3654 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3655 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3656 if (NewNumElts > OldNumElts) {
3657 // Decreasing the vector element size
3658 //
3659 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3660 // =>
3661 // v4i32:castx = bitcast x:v2i64
3662 //
3663 // i64 = bitcast
3664 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3665 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3666 //
3667 if (NewNumElts % OldNumElts != 0)
3668 return UnableToLegalize;
3669
3670 // Type of the intermediate result vector.
3671 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3672 LLT MidTy =
3673 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3674
3675 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3676
3677 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3678 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3679
3680 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3681 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3682 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3683 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3684 NewOps[I] = Elt.getReg(0);
3685 }
3686
3687 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3688 MIRBuilder.buildBitcast(Dst, NewVec);
3689 MI.eraseFromParent();
3690 return Legalized;
3691 }
3692
3693 if (NewNumElts < OldNumElts) {
3694 if (NewEltSize % OldEltSize != 0)
3695 return UnableToLegalize;
3696
3697 // This only depends on powers of 2 because we use bit tricks to figure out
3698 // the bit offset we need to shift to get the target element. A general
3699 // expansion could emit division/multiply.
3700 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3701 return UnableToLegalize;
3702
3703 // Increasing the vector element size.
3704 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3705 //
3706 // =>
3707 //
3708 // %cast = G_BITCAST %vec
3709 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3710 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3711 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3712 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3713 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3714 // %elt = G_TRUNC %elt_bits
3715
3716 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3717 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3718
3719 // Divide to get the index in the wider element type.
3720 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3721
3722 Register WideElt = CastVec;
3723 if (CastTy.isVector()) {
3724 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3725 ScaledIdx).getReg(0);
3726 }
3727
3728 // Compute the bit offset into the register of the target element.
3730 MIRBuilder, Idx, NewEltSize, OldEltSize);
3731
3732 // Shift the wide element to get the target element.
3733 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3734 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3735 MI.eraseFromParent();
3736 return Legalized;
3737 }
3738
3739 return UnableToLegalize;
3740}
3741
3742/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3743/// TargetReg, while preserving other bits in \p TargetReg.
3744///
3745/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3747 Register TargetReg, Register InsertReg,
3748 Register OffsetBits) {
3749 LLT TargetTy = B.getMRI()->getType(TargetReg);
3750 LLT InsertTy = B.getMRI()->getType(InsertReg);
3751 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3752 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3753
3754 // Produce a bitmask of the value to insert
3755 auto EltMask = B.buildConstant(
3756 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3757 InsertTy.getSizeInBits()));
3758 // Shift it into position
3759 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3760 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3761
3762 // Clear out the bits in the wide element
3763 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3764
3765 // The value to insert has all zeros already, so stick it into the masked
3766 // wide element.
3767 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3768}
3769
3770/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3771/// is increasing the element size, perform the indexing in the target element
3772/// type, and use bit operations to insert at the element position. This is
3773/// intended for architectures that can dynamically index the register file and
3774/// want to force indexing in the native register size.
3777 LLT CastTy) {
3778 if (TypeIdx != 0)
3779 return UnableToLegalize;
3780
3781 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3782 MI.getFirst4RegLLTs();
3783 LLT VecTy = DstTy;
3784
3785 LLT VecEltTy = VecTy.getElementType();
3786 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3787 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3788 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3789
3790 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3791 unsigned OldNumElts = VecTy.getNumElements();
3792
3793 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3794 if (NewNumElts < OldNumElts) {
3795 if (NewEltSize % OldEltSize != 0)
3796 return UnableToLegalize;
3797
3798 // This only depends on powers of 2 because we use bit tricks to figure out
3799 // the bit offset we need to shift to get the target element. A general
3800 // expansion could emit division/multiply.
3801 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3802 return UnableToLegalize;
3803
3804 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3805 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3806
3807 // Divide to get the index in the wider element type.
3808 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3809
3810 Register ExtractedElt = CastVec;
3811 if (CastTy.isVector()) {
3812 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3813 ScaledIdx).getReg(0);
3814 }
3815
3816 // Compute the bit offset into the register of the target element.
3818 MIRBuilder, Idx, NewEltSize, OldEltSize);
3819
3820 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3821 Val, OffsetBits);
3822 if (CastTy.isVector()) {
3823 InsertedElt = MIRBuilder.buildInsertVectorElement(
3824 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3825 }
3826
3827 MIRBuilder.buildBitcast(Dst, InsertedElt);
3828 MI.eraseFromParent();
3829 return Legalized;
3830 }
3831
3832 return UnableToLegalize;
3833}
3834
3835// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3836// those that have smaller than legal operands.
3837//
3838// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3839//
3840// ===>
3841//
3842// s32 = G_BITCAST <4 x s8>
3843// s32 = G_BITCAST <4 x s8>
3844// s32 = G_BITCAST <4 x s8>
3845// s32 = G_BITCAST <4 x s8>
3846// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3847// <16 x s8> = G_BITCAST <4 x s32>
3850 LLT CastTy) {
3851 // Convert it to CONCAT instruction
3852 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3853 if (!ConcatMI) {
3854 return UnableToLegalize;
3855 }
3856
3857 // Check if bitcast is Legal
3858 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3859 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3860
3861 // Check if the build vector is Legal
3862 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3863 return UnableToLegalize;
3864 }
3865
3866 // Bitcast the sources
3867 SmallVector<Register> BitcastRegs;
3868 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3869 BitcastRegs.push_back(
3870 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3871 .getReg(0));
3872 }
3873
3874 // Build the scalar values into a vector
3875 Register BuildReg =
3876 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3877 MIRBuilder.buildBitcast(DstReg, BuildReg);
3878
3879 MI.eraseFromParent();
3880 return Legalized;
3881}
3882
3883// This bitcasts a shuffle vector to a different type currently of the same
3884// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3885// will be used instead.
3886//
3887// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3888// ===>
3889// <4 x s64> = G_PTRTOINT <4 x p0>
3890// <4 x s64> = G_PTRTOINT <4 x p0>
3891// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3892// <16 x p0> = G_INTTOPTR <16 x s64>
3895 LLT CastTy) {
3896 auto ShuffleMI = cast<GShuffleVector>(&MI);
3897 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3898 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3899
3900 // We currently only handle vectors of the same size.
3901 if (TypeIdx != 0 ||
3902 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3903 CastTy.getElementCount() != DstTy.getElementCount())
3904 return UnableToLegalize;
3905
3906 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3907
3908 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3909 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3910 auto Shuf =
3911 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3912 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3913
3914 MI.eraseFromParent();
3915 return Legalized;
3916}
3917
3918/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3919///
3920/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3921///
3922/// ===>
3923///
3924/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3925/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3926/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3929 LLT CastTy) {
3930 auto ES = cast<GExtractSubvector>(&MI);
3931
3932 if (!CastTy.isVector())
3933 return UnableToLegalize;
3934
3935 if (TypeIdx != 0)
3936 return UnableToLegalize;
3937
3938 Register Dst = ES->getReg(0);
3939 Register Src = ES->getSrcVec();
3940 uint64_t Idx = ES->getIndexImm();
3941
3942 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3943
3944 LLT DstTy = MRI.getType(Dst);
3945 LLT SrcTy = MRI.getType(Src);
3946 ElementCount DstTyEC = DstTy.getElementCount();
3947 ElementCount SrcTyEC = SrcTy.getElementCount();
3948 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3949 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3950
3951 if (DstTy == CastTy)
3952 return Legalized;
3953
3954 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3955 return UnableToLegalize;
3956
3957 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3958 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3959 if (CastEltSize < DstEltSize)
3960 return UnableToLegalize;
3961
3962 auto AdjustAmt = CastEltSize / DstEltSize;
3963 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3964 SrcTyMinElts % AdjustAmt != 0)
3965 return UnableToLegalize;
3966
3967 Idx /= AdjustAmt;
3968 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3969 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3970 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3971 MIRBuilder.buildBitcast(Dst, PromotedES);
3972
3973 ES->eraseFromParent();
3974 return Legalized;
3975}
3976
3977/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3978///
3979/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3980/// <vscale x 8 x i1>,
3981/// N
3982///
3983/// ===>
3984///
3985/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3986/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3987/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3988/// <vscale x 1 x i8>, N / 8
3989/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3992 LLT CastTy) {
3993 auto ES = cast<GInsertSubvector>(&MI);
3994
3995 if (!CastTy.isVector())
3996 return UnableToLegalize;
3997
3998 if (TypeIdx != 0)
3999 return UnableToLegalize;
4000
4001 Register Dst = ES->getReg(0);
4002 Register BigVec = ES->getBigVec();
4003 Register SubVec = ES->getSubVec();
4004 uint64_t Idx = ES->getIndexImm();
4005
4006 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4007
4008 LLT DstTy = MRI.getType(Dst);
4009 LLT BigVecTy = MRI.getType(BigVec);
4010 LLT SubVecTy = MRI.getType(SubVec);
4011
4012 if (DstTy == CastTy)
4013 return Legalized;
4014
4015 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4016 return UnableToLegalize;
4017
4018 ElementCount DstTyEC = DstTy.getElementCount();
4019 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4020 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4021 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4022 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4023 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4024
4025 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4026 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4027 if (CastEltSize < DstEltSize)
4028 return UnableToLegalize;
4029
4030 auto AdjustAmt = CastEltSize / DstEltSize;
4031 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4032 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4033 return UnableToLegalize;
4034
4035 Idx /= AdjustAmt;
4036 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4037 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4038 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4039 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4040 auto PromotedIS =
4041 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4042 MIRBuilder.buildBitcast(Dst, PromotedIS);
4043
4044 ES->eraseFromParent();
4045 return Legalized;
4046}
4047
4049 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4050 Register DstReg = LoadMI.getDstReg();
4051 Register PtrReg = LoadMI.getPointerReg();
4052 LLT DstTy = MRI.getType(DstReg);
4053 MachineMemOperand &MMO = LoadMI.getMMO();
4054 LLT MemTy = MMO.getMemoryType();
4055 MachineFunction &MF = MIRBuilder.getMF();
4056
4057 unsigned MemSizeInBits = MemTy.getSizeInBits();
4058 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4059
4060 if (MemSizeInBits != MemStoreSizeInBits) {
4061 if (MemTy.isVector())
4062 return UnableToLegalize;
4063
4064 // Promote to a byte-sized load if not loading an integral number of
4065 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4066 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4067 MachineMemOperand *NewMMO =
4068 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4069
4070 Register LoadReg = DstReg;
4071 LLT LoadTy = DstTy;
4072
4073 // If this wasn't already an extending load, we need to widen the result
4074 // register to avoid creating a load with a narrower result than the source.
4075 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4076 LoadTy = WideMemTy;
4077 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4078 }
4079
4080 if (isa<GSExtLoad>(LoadMI)) {
4081 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4082 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4083 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4084 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4085 // The extra bits are guaranteed to be zero, since we stored them that
4086 // way. A zext load from Wide thus automatically gives zext from MemVT.
4087 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4088 } else {
4089 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4090 }
4091
4092 if (DstTy != LoadTy)
4093 MIRBuilder.buildTrunc(DstReg, LoadReg);
4094
4095 LoadMI.eraseFromParent();
4096 return Legalized;
4097 }
4098
4099 // Big endian lowering not implemented.
4100 if (MIRBuilder.getDataLayout().isBigEndian())
4101 return UnableToLegalize;
4102
4103 // This load needs splitting into power of 2 sized loads.
4104 //
4105 // Our strategy here is to generate anyextending loads for the smaller
4106 // types up to next power-2 result type, and then combine the two larger
4107 // result values together, before truncating back down to the non-pow-2
4108 // type.
4109 // E.g. v1 = i24 load =>
4110 // v2 = i32 zextload (2 byte)
4111 // v3 = i32 load (1 byte)
4112 // v4 = i32 shl v3, 16
4113 // v5 = i32 or v4, v2
4114 // v1 = i24 trunc v5
4115 // By doing this we generate the correct truncate which should get
4116 // combined away as an artifact with a matching extend.
4117
4118 uint64_t LargeSplitSize, SmallSplitSize;
4119
4120 if (!isPowerOf2_32(MemSizeInBits)) {
4121 // This load needs splitting into power of 2 sized loads.
4122 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4123 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4124 } else {
4125 // This is already a power of 2, but we still need to split this in half.
4126 //
4127 // Assume we're being asked to decompose an unaligned load.
4128 // TODO: If this requires multiple splits, handle them all at once.
4129 auto &Ctx = MF.getFunction().getContext();
4130 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4131 return UnableToLegalize;
4132
4133 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4134 }
4135
4136 if (MemTy.isVector()) {
4137 // TODO: Handle vector extloads
4138 if (MemTy != DstTy)
4139 return UnableToLegalize;
4140
4141 Align Alignment = LoadMI.getAlign();
4142 // Given an alignment larger than the size of the memory, we can increase
4143 // the size of the load without needing to scalarize it.
4144 if (Alignment.value() * 8 > MemSizeInBits &&
4147 DstTy.getElementType());
4148 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4149 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4150 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4151 NewLoad.getReg(0));
4152 LoadMI.eraseFromParent();
4153 return Legalized;
4154 }
4155
4156 // TODO: We can do better than scalarizing the vector and at least split it
4157 // in half.
4158 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4159 }
4160
4161 MachineMemOperand *LargeMMO =
4162 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4163 MachineMemOperand *SmallMMO =
4164 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4165
4166 LLT PtrTy = MRI.getType(PtrReg);
4167 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4168 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4169 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4170 PtrReg, *LargeMMO);
4171
4172 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4173 LargeSplitSize / 8);
4174 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4175 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4176 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4177 SmallPtr, *SmallMMO);
4178
4179 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4180 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4181
4182 if (AnyExtTy == DstTy)
4183 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4184 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4185 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4186 MIRBuilder.buildTrunc(DstReg, {Or});
4187 } else {
4188 assert(DstTy.isPointer() && "expected pointer");
4189 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4190
4191 // FIXME: We currently consider this to be illegal for non-integral address
4192 // spaces, but we need still need a way to reinterpret the bits.
4193 MIRBuilder.buildIntToPtr(DstReg, Or);
4194 }
4195
4196 LoadMI.eraseFromParent();
4197 return Legalized;
4198}
4199
4201 // Lower a non-power of 2 store into multiple pow-2 stores.
4202 // E.g. split an i24 store into an i16 store + i8 store.
4203 // We do this by first extending the stored value to the next largest power
4204 // of 2 type, and then using truncating stores to store the components.
4205 // By doing this, likewise with G_LOAD, generate an extend that can be
4206 // artifact-combined away instead of leaving behind extracts.
4207 Register SrcReg = StoreMI.getValueReg();
4208 Register PtrReg = StoreMI.getPointerReg();
4209 LLT SrcTy = MRI.getType(SrcReg);
4210 MachineFunction &MF = MIRBuilder.getMF();
4211 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4212 LLT MemTy = MMO.getMemoryType();
4213
4214 unsigned StoreWidth = MemTy.getSizeInBits();
4215 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4216
4217 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4218 // Promote to a byte-sized store with upper bits zero if not
4219 // storing an integral number of bytes. For example, promote
4220 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4221 LLT WideTy = LLT::scalar(StoreSizeInBits);
4222
4223 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4224 // Avoid creating a store with a narrower source than result.
4225 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4226 SrcTy = WideTy;
4227 }
4228
4229 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4230
4231 MachineMemOperand *NewMMO =
4232 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4233 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4234 StoreMI.eraseFromParent();
4235 return Legalized;
4236 }
4237
4238 if (MemTy.isVector()) {
4239 if (MemTy != SrcTy)
4240 return scalarizeVectorBooleanStore(StoreMI);
4241
4242 // TODO: We can do better than scalarizing the vector and at least split it
4243 // in half.
4244 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4245 }
4246
4247 unsigned MemSizeInBits = MemTy.getSizeInBits();
4248 uint64_t LargeSplitSize, SmallSplitSize;
4249
4250 if (!isPowerOf2_32(MemSizeInBits)) {
4251 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4252 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4253 } else {
4254 auto &Ctx = MF.getFunction().getContext();
4255 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4256 return UnableToLegalize; // Don't know what we're being asked to do.
4257
4258 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4259 }
4260
4261 // Extend to the next pow-2. If this store was itself the result of lowering,
4262 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4263 // that's wider than the stored size.
4264 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4265 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4266
4267 if (SrcTy.isPointer()) {
4268 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4269 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4270 }
4271
4272 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4273
4274 // Obtain the smaller value by shifting away the larger value.
4275 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4276 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4277
4278 // Generate the PtrAdd and truncating stores.
4279 LLT PtrTy = MRI.getType(PtrReg);
4280 auto OffsetCst = MIRBuilder.buildConstant(
4281 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4282 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4283
4284 MachineMemOperand *LargeMMO =
4285 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4286 MachineMemOperand *SmallMMO =
4287 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4288 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4289 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4290 StoreMI.eraseFromParent();
4291 return Legalized;
4292}
4293
4296 Register SrcReg = StoreMI.getValueReg();
4297 Register PtrReg = StoreMI.getPointerReg();
4298 LLT SrcTy = MRI.getType(SrcReg);
4299 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4300 LLT MemTy = MMO.getMemoryType();
4301 LLT MemScalarTy = MemTy.getElementType();
4302 MachineFunction &MF = MIRBuilder.getMF();
4303
4304 assert(SrcTy.isVector() && "Expect a vector store type");
4305
4306 if (!MemScalarTy.isByteSized()) {
4307 // We need to build an integer scalar of the vector bit pattern.
4308 // It's not legal for us to add padding when storing a vector.
4309 unsigned NumBits = MemTy.getSizeInBits();
4310 LLT IntTy = LLT::scalar(NumBits);
4311 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4312 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4313
4314 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4315 auto Elt = MIRBuilder.buildExtractVectorElement(
4316 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4317 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4318 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4319 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4320 ? (MemTy.getNumElements() - 1) - I
4321 : I;
4322 auto ShiftAmt = MIRBuilder.buildConstant(
4323 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4324 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4325 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4326 }
4327 auto PtrInfo = MMO.getPointerInfo();
4328 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4329 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4330 StoreMI.eraseFromParent();
4331 return Legalized;
4332 }
4333
4334 // TODO: implement simple scalarization.
4335 return UnableToLegalize;
4336}
4337
4339LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4340 switch (MI.getOpcode()) {
4341 case TargetOpcode::G_LOAD: {
4342 if (TypeIdx != 0)
4343 return UnableToLegalize;
4344 MachineMemOperand &MMO = **MI.memoperands_begin();
4345
4346 // Not sure how to interpret a bitcast of an extending load.
4347 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4348 return UnableToLegalize;
4349
4350 Observer.changingInstr(MI);
4351 bitcastDst(MI, CastTy, 0);
4352 MMO.setType(CastTy);
4353 // The range metadata is no longer valid when reinterpreted as a different
4354 // type.
4355 MMO.clearRanges();
4356 Observer.changedInstr(MI);
4357 return Legalized;
4358 }
4359 case TargetOpcode::G_STORE: {
4360 if (TypeIdx != 0)
4361 return UnableToLegalize;
4362
4363 MachineMemOperand &MMO = **MI.memoperands_begin();
4364
4365 // Not sure how to interpret a bitcast of a truncating store.
4366 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4367 return UnableToLegalize;
4368
4369 Observer.changingInstr(MI);
4370 bitcastSrc(MI, CastTy, 0);
4371 MMO.setType(CastTy);
4372 Observer.changedInstr(MI);
4373 return Legalized;
4374 }
4375 case TargetOpcode::G_SELECT: {
4376 if (TypeIdx != 0)
4377 return UnableToLegalize;
4378
4379 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4380 LLVM_DEBUG(
4381 dbgs() << "bitcast action not implemented for vector select\n");
4382 return UnableToLegalize;
4383 }
4384
4385 Observer.changingInstr(MI);
4386 bitcastSrc(MI, CastTy, 2);
4387 bitcastSrc(MI, CastTy, 3);
4388 bitcastDst(MI, CastTy, 0);
4389 Observer.changedInstr(MI);
4390 return Legalized;
4391 }
4392 case TargetOpcode::G_AND:
4393 case TargetOpcode::G_OR:
4394 case TargetOpcode::G_XOR: {
4395 Observer.changingInstr(MI);
4396 bitcastSrc(MI, CastTy, 1);
4397 bitcastSrc(MI, CastTy, 2);
4398 bitcastDst(MI, CastTy, 0);
4399 Observer.changedInstr(MI);
4400 return Legalized;
4401 }
4402 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4403 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4404 case TargetOpcode::G_INSERT_VECTOR_ELT:
4405 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4406 case TargetOpcode::G_CONCAT_VECTORS:
4407 return bitcastConcatVector(MI, TypeIdx, CastTy);
4408 case TargetOpcode::G_SHUFFLE_VECTOR:
4409 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4410 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4411 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4412 case TargetOpcode::G_INSERT_SUBVECTOR:
4413 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4414 default:
4415 return UnableToLegalize;
4416 }
4417}
4418
4419// Legalize an instruction by changing the opcode in place.
4420void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4422 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4424}
4425
4427LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4428 using namespace TargetOpcode;
4429
4430 switch(MI.getOpcode()) {
4431 default:
4432 return UnableToLegalize;
4433 case TargetOpcode::G_FCONSTANT:
4434 return lowerFConstant(MI);
4435 case TargetOpcode::G_BITCAST:
4436 return lowerBitcast(MI);
4437 case TargetOpcode::G_SREM:
4438 case TargetOpcode::G_UREM: {
4439 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4440 auto Quot =
4441 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4442 {MI.getOperand(1), MI.getOperand(2)});
4443
4444 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4445 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4446 MI.eraseFromParent();
4447 return Legalized;
4448 }
4449 case TargetOpcode::G_SADDO:
4450 case TargetOpcode::G_SSUBO:
4451 return lowerSADDO_SSUBO(MI);
4452 case TargetOpcode::G_UMULH:
4453 case TargetOpcode::G_SMULH:
4454 return lowerSMULH_UMULH(MI);
4455 case TargetOpcode::G_SMULO:
4456 case TargetOpcode::G_UMULO: {
4457 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4458 // result.
4459 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4460 LLT Ty = MRI.getType(Res);
4461
4462 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4463 ? TargetOpcode::G_SMULH
4464 : TargetOpcode::G_UMULH;
4465
4466 Observer.changingInstr(MI);
4467 const auto &TII = MIRBuilder.getTII();
4468 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4469 MI.removeOperand(1);
4470 Observer.changedInstr(MI);
4471
4472 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4473 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4474
4475 // Move insert point forward so we can use the Res register if needed.
4476 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4477
4478 // For *signed* multiply, overflow is detected by checking:
4479 // (hi != (lo >> bitwidth-1))
4480 if (Opcode == TargetOpcode::G_SMULH) {
4481 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4482 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4483 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4484 } else {
4485 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4486 }
4487 return Legalized;
4488 }
4489 case TargetOpcode::G_FNEG: {
4490 auto [Res, SubByReg] = MI.getFirst2Regs();
4491 LLT Ty = MRI.getType(Res);
4492
4493 auto SignMask = MIRBuilder.buildConstant(
4494 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4495 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4496 MI.eraseFromParent();
4497 return Legalized;
4498 }
4499 case TargetOpcode::G_FSUB:
4500 case TargetOpcode::G_STRICT_FSUB: {
4501 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4502 LLT Ty = MRI.getType(Res);
4503
4504 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4505 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4506
4507 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4508 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4509 else
4510 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4511
4512 MI.eraseFromParent();
4513 return Legalized;
4514 }
4515 case TargetOpcode::G_FMAD:
4516 return lowerFMad(MI);
4517 case TargetOpcode::G_FFLOOR:
4518 return lowerFFloor(MI);
4519 case TargetOpcode::G_LROUND:
4520 case TargetOpcode::G_LLROUND: {
4521 Register DstReg = MI.getOperand(0).getReg();
4522 Register SrcReg = MI.getOperand(1).getReg();
4523 LLT SrcTy = MRI.getType(SrcReg);
4524 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4525 {SrcReg});
4526 MIRBuilder.buildFPTOSI(DstReg, Round);
4527 MI.eraseFromParent();
4528 return Legalized;
4529 }
4530 case TargetOpcode::G_INTRINSIC_ROUND:
4531 return lowerIntrinsicRound(MI);
4532 case TargetOpcode::G_FRINT: {
4533 // Since round even is the assumed rounding mode for unconstrained FP
4534 // operations, rint and roundeven are the same operation.
4535 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4536 return Legalized;
4537 }
4538 case TargetOpcode::G_INTRINSIC_LRINT:
4539 case TargetOpcode::G_INTRINSIC_LLRINT: {
4540 Register DstReg = MI.getOperand(0).getReg();
4541 Register SrcReg = MI.getOperand(1).getReg();
4542 LLT SrcTy = MRI.getType(SrcReg);
4543 auto Round =
4544 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4545 MIRBuilder.buildFPTOSI(DstReg, Round);
4546 MI.eraseFromParent();
4547 return Legalized;
4548 }
4549 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4550 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4551 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4552 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4553 **MI.memoperands_begin());
4554 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4555 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4556 MI.eraseFromParent();
4557 return Legalized;
4558 }
4559 case TargetOpcode::G_LOAD:
4560 case TargetOpcode::G_SEXTLOAD:
4561 case TargetOpcode::G_ZEXTLOAD:
4562 return lowerLoad(cast<GAnyLoad>(MI));
4563 case TargetOpcode::G_STORE:
4564 return lowerStore(cast<GStore>(MI));
4565 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4566 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4567 case TargetOpcode::G_CTLZ:
4568 case TargetOpcode::G_CTTZ:
4569 case TargetOpcode::G_CTPOP:
4570 return lowerBitCount(MI);
4571 case G_UADDO: {
4572 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4573
4574 Register NewRes = MRI.cloneVirtualRegister(Res);
4575
4576 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4577 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4578
4579 MIRBuilder.buildCopy(Res, NewRes);
4580
4581 MI.eraseFromParent();
4582 return Legalized;
4583 }
4584 case G_UADDE: {
4585 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4586 const LLT CondTy = MRI.getType(CarryOut);
4587 const LLT Ty = MRI.getType(Res);
4588
4589 Register NewRes = MRI.cloneVirtualRegister(Res);
4590
4591 // Initial add of the two operands.
4592 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4593
4594 // Initial check for carry.
4595 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4596
4597 // Add the sum and the carry.
4598 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4599 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4600
4601 // Second check for carry. We can only carry if the initial sum is all 1s
4602 // and the carry is set, resulting in a new sum of 0.
4603 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4604 auto ResEqZero =
4605 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4606 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4607 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4608
4609 MIRBuilder.buildCopy(Res, NewRes);
4610
4611 MI.eraseFromParent();
4612 return Legalized;
4613 }
4614 case G_USUBO: {
4615 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4616
4617 MIRBuilder.buildSub(Res, LHS, RHS);
4618 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4619
4620 MI.eraseFromParent();
4621 return Legalized;
4622 }
4623 case G_USUBE: {
4624 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4625 const LLT CondTy = MRI.getType(BorrowOut);
4626 const LLT Ty = MRI.getType(Res);
4627
4628 // Initial subtract of the two operands.
4629 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4630
4631 // Initial check for borrow.
4632 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4633
4634 // Subtract the borrow from the first subtract.
4635 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4636 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4637
4638 // Second check for borrow. We can only borrow if the initial difference is
4639 // 0 and the borrow is set, resulting in a new difference of all 1s.
4640 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4641 auto TmpResEqZero =
4642 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4643 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4644 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4645
4646 MI.eraseFromParent();
4647 return Legalized;
4648 }
4649 case G_UITOFP:
4650 return lowerUITOFP(MI);
4651 case G_SITOFP:
4652 return lowerSITOFP(MI);
4653 case G_FPTOUI:
4654 return lowerFPTOUI(MI);
4655 case G_FPTOSI:
4656 return lowerFPTOSI(MI);
4657 case G_FPTOUI_SAT:
4658 case G_FPTOSI_SAT:
4659 return lowerFPTOINT_SAT(MI);
4660 case G_FPTRUNC:
4661 return lowerFPTRUNC(MI);
4662 case G_FPOWI:
4663 return lowerFPOWI(MI);
4664 case G_SMIN:
4665 case G_SMAX:
4666 case G_UMIN:
4667 case G_UMAX:
4668 return lowerMinMax(MI);
4669 case G_SCMP:
4670 case G_UCMP:
4671 return lowerThreewayCompare(MI);
4672 case G_FCOPYSIGN:
4673 return lowerFCopySign(MI);
4674 case G_FMINNUM:
4675 case G_FMAXNUM:
4676 case G_FMINIMUMNUM:
4677 case G_FMAXIMUMNUM:
4678 return lowerFMinNumMaxNum(MI);
4679 case G_MERGE_VALUES:
4680 return lowerMergeValues(MI);
4681 case G_UNMERGE_VALUES:
4682 return lowerUnmergeValues(MI);
4683 case TargetOpcode::G_SEXT_INREG: {
4684 assert(MI.getOperand(2).isImm() && "Expected immediate");
4685 int64_t SizeInBits = MI.getOperand(2).getImm();
4686
4687 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4688 LLT DstTy = MRI.getType(DstReg);
4689 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4690
4691 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4692 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4693 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4694 MI.eraseFromParent();
4695 return Legalized;
4696 }
4697 case G_EXTRACT_VECTOR_ELT:
4698 case G_INSERT_VECTOR_ELT:
4700 case G_SHUFFLE_VECTOR:
4701 return lowerShuffleVector(MI);
4702 case G_VECTOR_COMPRESS:
4703 return lowerVECTOR_COMPRESS(MI);
4704 case G_DYN_STACKALLOC:
4705 return lowerDynStackAlloc(MI);
4706 case G_STACKSAVE:
4707 return lowerStackSave(MI);
4708 case G_STACKRESTORE:
4709 return lowerStackRestore(MI);
4710 case G_EXTRACT:
4711 return lowerExtract(MI);
4712 case G_INSERT:
4713 return lowerInsert(MI);
4714 case G_BSWAP:
4715 return lowerBswap(MI);
4716 case G_BITREVERSE:
4717 return lowerBitreverse(MI);
4718 case G_READ_REGISTER:
4719 case G_WRITE_REGISTER:
4720 return lowerReadWriteRegister(MI);
4721 case G_UADDSAT:
4722 case G_USUBSAT: {
4723 // Try to make a reasonable guess about which lowering strategy to use. The
4724 // target can override this with custom lowering and calling the
4725 // implementation functions.
4726 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4727 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4728 return lowerAddSubSatToMinMax(MI);
4730 }
4731 case G_SADDSAT:
4732 case G_SSUBSAT: {
4733 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4734
4735 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4736 // since it's a shorter expansion. However, we would need to figure out the
4737 // preferred boolean type for the carry out for the query.
4738 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4739 return lowerAddSubSatToMinMax(MI);
4741 }
4742 case G_SSHLSAT:
4743 case G_USHLSAT:
4744 return lowerShlSat(MI);
4745 case G_ABS:
4746 return lowerAbsToAddXor(MI);
4747 case G_ABDS:
4748 case G_ABDU: {
4749 bool IsSigned = MI.getOpcode() == G_ABDS;
4750 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4751 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4752 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4753 return lowerAbsDiffToMinMax(MI);
4754 }
4755 return lowerAbsDiffToSelect(MI);
4756 }
4757 case G_FABS:
4758 return lowerFAbs(MI);
4759 case G_SELECT:
4760 return lowerSelect(MI);
4761 case G_IS_FPCLASS:
4762 return lowerISFPCLASS(MI);
4763 case G_SDIVREM:
4764 case G_UDIVREM:
4765 return lowerDIVREM(MI);
4766 case G_FSHL:
4767 case G_FSHR:
4768 return lowerFunnelShift(MI);
4769 case G_ROTL:
4770 case G_ROTR:
4771 return lowerRotate(MI);
4772 case G_MEMSET:
4773 case G_MEMCPY:
4774 case G_MEMMOVE:
4775 return lowerMemCpyFamily(MI);
4776 case G_MEMCPY_INLINE:
4777 return lowerMemcpyInline(MI);
4778 case G_ZEXT:
4779 case G_SEXT:
4780 case G_ANYEXT:
4781 return lowerEXT(MI);
4782 case G_TRUNC:
4783 return lowerTRUNC(MI);
4785 return lowerVectorReduction(MI);
4786 case G_VAARG:
4787 return lowerVAArg(MI);
4788 case G_ATOMICRMW_SUB: {
4789 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4790 const LLT ValTy = MRI.getType(Val);
4791 MachineMemOperand *MMO = *MI.memoperands_begin();
4792
4793 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4794 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4795 MI.eraseFromParent();
4796 return Legalized;
4797 }
4798 }
4799}
4800
4802 Align MinAlign) const {
4803 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4804 // datalayout for the preferred alignment. Also there should be a target hook
4805 // for this to allow targets to reduce the alignment and ignore the
4806 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4807 // the type.
4808 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4809}
4810
4813 MachinePointerInfo &PtrInfo) {
4814 MachineFunction &MF = MIRBuilder.getMF();
4815 const DataLayout &DL = MIRBuilder.getDataLayout();
4816 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4817
4818 unsigned AddrSpace = DL.getAllocaAddrSpace();
4819 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4820
4821 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4822 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4823}
4824
4826 const SrcOp &Val) {
4827 LLT SrcTy = Val.getLLTTy(MRI);
4828 Align StackTypeAlign =
4829 std::max(getStackTemporaryAlignment(SrcTy),
4831 MachinePointerInfo PtrInfo;
4832 auto StackTemp =
4833 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4834
4835 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4836 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4837}
4838
4840 LLT VecTy) {
4841 LLT IdxTy = B.getMRI()->getType(IdxReg);
4842 unsigned NElts = VecTy.getNumElements();
4843
4844 int64_t IdxVal;
4845 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4846 if (IdxVal < VecTy.getNumElements())
4847 return IdxReg;
4848 // If a constant index would be out of bounds, clamp it as well.
4849 }
4850
4851 if (isPowerOf2_32(NElts)) {
4852 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4853 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4854 }
4855
4856 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4857 .getReg(0);
4858}
4859
4861 Register Index) {
4862 LLT EltTy = VecTy.getElementType();
4863
4864 // Calculate the element offset and add it to the pointer.
4865 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4866 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4867 "Converting bits to bytes lost precision");
4868
4869 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4870
4871 // Convert index to the correct size for the address space.
4872 const DataLayout &DL = MIRBuilder.getDataLayout();
4873 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4874 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4875 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4876 if (IdxTy != MRI.getType(Index))
4877 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4878
4879 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4880 MIRBuilder.buildConstant(IdxTy, EltSize));
4881
4882 LLT PtrTy = MRI.getType(VecPtr);
4883 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4884}
4885
4886#ifndef NDEBUG
4887/// Check that all vector operands have same number of elements. Other operands
4888/// should be listed in NonVecOp.
4891 std::initializer_list<unsigned> NonVecOpIndices) {
4892 if (MI.getNumMemOperands() != 0)
4893 return false;
4894
4895 LLT VecTy = MRI.getType(MI.getReg(0));
4896 if (!VecTy.isVector())
4897 return false;
4898 unsigned NumElts = VecTy.getNumElements();
4899
4900 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4901 MachineOperand &Op = MI.getOperand(OpIdx);
4902 if (!Op.isReg()) {
4903 if (!is_contained(NonVecOpIndices, OpIdx))
4904 return false;
4905 continue;
4906 }
4907
4908 LLT Ty = MRI.getType(Op.getReg());
4909 if (!Ty.isVector()) {
4910 if (!is_contained(NonVecOpIndices, OpIdx))
4911 return false;
4912 continue;
4913 }
4914
4915 if (Ty.getNumElements() != NumElts)
4916 return false;
4917 }
4918
4919 return true;
4920}
4921#endif
4922
4923/// Fill \p DstOps with DstOps that have same number of elements combined as
4924/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4925/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4926/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4927static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4928 unsigned NumElts) {
4929 LLT LeftoverTy;
4930 assert(Ty.isVector() && "Expected vector type");
4931 LLT EltTy = Ty.getElementType();
4932 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4933 int NumParts, NumLeftover;
4934 std::tie(NumParts, NumLeftover) =
4935 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4936
4937 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4938 for (int i = 0; i < NumParts; ++i) {
4939 DstOps.push_back(NarrowTy);
4940 }
4941
4942 if (LeftoverTy.isValid()) {
4943 assert(NumLeftover == 1 && "expected exactly one leftover");
4944 DstOps.push_back(LeftoverTy);
4945 }
4946}
4947
4948/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4949/// made from \p Op depending on operand type.
4951 MachineOperand &Op) {
4952 for (unsigned i = 0; i < N; ++i) {
4953 if (Op.isReg())
4954 Ops.push_back(Op.getReg());
4955 else if (Op.isImm())
4956 Ops.push_back(Op.getImm());
4957 else if (Op.isPredicate())
4958 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4959 else
4960 llvm_unreachable("Unsupported type");
4961 }
4962}
4963
4964// Handle splitting vector operations which need to have the same number of
4965// elements in each type index, but each type index may have a different element
4966// type.
4967//
4968// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4969// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4970// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4971//
4972// Also handles some irregular breakdown cases, e.g.
4973// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4974// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4975// s64 = G_SHL s64, s32
4978 GenericMachineInstr &MI, unsigned NumElts,
4979 std::initializer_list<unsigned> NonVecOpIndices) {
4980 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4981 "Non-compatible opcode or not specified non-vector operands");
4982 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4983
4984 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4985 unsigned NumDefs = MI.getNumDefs();
4986
4987 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4988 // Build instructions with DstOps to use instruction found by CSE directly.
4989 // CSE copies found instruction into given vreg when building with vreg dest.
4990 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4991 // Output registers will be taken from created instructions.
4992 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4993 for (unsigned i = 0; i < NumDefs; ++i) {
4994 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4995 }
4996
4997 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4998 // Operands listed in NonVecOpIndices will be used as is without splitting;
4999 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5000 // scalar condition (op 1), immediate in sext_inreg (op 2).
5001 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5002 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5003 ++UseIdx, ++UseNo) {
5004 if (is_contained(NonVecOpIndices, UseIdx)) {
5005 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5006 MI.getOperand(UseIdx));
5007 } else {
5008 SmallVector<Register, 8> SplitPieces;
5009 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5010 MRI);
5011 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5012 }
5013 }
5014
5015 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5016
5017 // Take i-th piece of each input operand split and build sub-vector/scalar
5018 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5019 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5021 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5022 Defs.push_back(OutputOpsPieces[DstNo][i]);
5023
5025 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5026 Uses.push_back(InputOpsPieces[InputNo][i]);
5027
5028 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5029 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5030 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5031 }
5032
5033 // Merge small outputs into MI's output for each def operand.
5034 if (NumLeftovers) {
5035 for (unsigned i = 0; i < NumDefs; ++i)
5036 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5037 } else {
5038 for (unsigned i = 0; i < NumDefs; ++i)
5039 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5040 }
5041
5042 MI.eraseFromParent();
5043 return Legalized;
5044}
5045
5048 unsigned NumElts) {
5049 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5050
5051 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5052 unsigned NumDefs = MI.getNumDefs();
5053
5054 SmallVector<DstOp, 8> OutputOpsPieces;
5055 SmallVector<Register, 8> OutputRegs;
5056 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5057
5058 // Instructions that perform register split will be inserted in basic block
5059 // where register is defined (basic block is in the next operand).
5060 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5061 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5062 UseIdx += 2, ++UseNo) {
5063 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5064 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5065 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5066 MIRBuilder, MRI);
5067 }
5068
5069 // Build PHIs with fewer elements.
5070 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5071 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5072 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5073 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5074 Phi.addDef(
5075 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5076 OutputRegs.push_back(Phi.getReg(0));
5077
5078 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5079 Phi.addUse(InputOpsPieces[j][i]);
5080 Phi.add(MI.getOperand(1 + j * 2 + 1));
5081 }
5082 }
5083
5084 // Set the insert point after the existing PHIs
5085 MachineBasicBlock &MBB = *MI.getParent();
5086 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5087
5088 // Merge small outputs into MI's def.
5089 if (NumLeftovers) {
5090 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5091 } else {
5092 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5093 }
5094
5095 MI.eraseFromParent();
5096 return Legalized;
5097}
5098
5101 unsigned TypeIdx,
5102 LLT NarrowTy) {
5103 const int NumDst = MI.getNumOperands() - 1;
5104 const Register SrcReg = MI.getOperand(NumDst).getReg();
5105 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5106 LLT SrcTy = MRI.getType(SrcReg);
5107
5108 if (TypeIdx != 1 || NarrowTy == DstTy)
5109 return UnableToLegalize;
5110
5111 // Requires compatible types. Otherwise SrcReg should have been defined by
5112 // merge-like instruction that would get artifact combined. Most likely
5113 // instruction that defines SrcReg has to perform more/fewer elements
5114 // legalization compatible with NarrowTy.
5115 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5116 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5117
5118 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5119 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5120 return UnableToLegalize;
5121
5122 // This is most likely DstTy (smaller then register size) packed in SrcTy
5123 // (larger then register size) and since unmerge was not combined it will be
5124 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5125 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5126
5127 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5128 //
5129 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5130 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5131 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5132 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5133 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5134 const int PartsPerUnmerge = NumDst / NumUnmerge;
5135
5136 for (int I = 0; I != NumUnmerge; ++I) {
5137 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5138
5139 for (int J = 0; J != PartsPerUnmerge; ++J)
5140 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5141 MIB.addUse(Unmerge.getReg(I));
5142 }
5143
5144 MI.eraseFromParent();
5145 return Legalized;
5146}
5147
5150 LLT NarrowTy) {
5151 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5152 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5153 // that should have been artifact combined. Most likely instruction that uses
5154 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5155 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5156 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5157 if (NarrowTy == SrcTy)
5158 return UnableToLegalize;
5159
5160 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5161 // is for old mir tests. Since the changes to more/fewer elements it should no
5162 // longer be possible to generate MIR like this when starting from llvm-ir
5163 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5164 if (TypeIdx == 1) {
5165 assert(SrcTy.isVector() && "Expected vector types");
5166 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5167 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5168 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5169 return UnableToLegalize;
5170 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5171 //
5172 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5173 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5174 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5175 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5176 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5177 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5178
5180 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5181 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5182 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5183 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5184 Elts.push_back(Unmerge.getReg(j));
5185 }
5186
5187 SmallVector<Register, 8> NarrowTyElts;
5188 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5189 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5190 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5191 ++i, Offset += NumNarrowTyElts) {
5192 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5193 NarrowTyElts.push_back(
5194 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5195 }
5196
5197 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5198 MI.eraseFromParent();
5199 return Legalized;
5200 }
5201
5202 assert(TypeIdx == 0 && "Bad type index");
5203 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5204 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5205 return UnableToLegalize;
5206
5207 // This is most likely SrcTy (smaller then register size) packed in DstTy
5208 // (larger then register size) and since merge was not combined it will be
5209 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5210 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5211
5212 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5213 //
5214 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5215 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5216 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5217 SmallVector<Register, 8> NarrowTyElts;
5218 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5219 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5220 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5221 for (unsigned i = 0; i < NumParts; ++i) {
5223 for (unsigned j = 0; j < NumElts; ++j)
5224 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5225 NarrowTyElts.push_back(
5226 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5227 }
5228
5229 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5230 MI.eraseFromParent();
5231 return Legalized;
5232}
5233
5236 unsigned TypeIdx,
5237 LLT NarrowVecTy) {
5238 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5239 Register InsertVal;
5240 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5241
5242 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5243 if (IsInsert)
5244 InsertVal = MI.getOperand(2).getReg();
5245
5246 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5247 LLT VecTy = MRI.getType(SrcVec);
5248
5249 // If the index is a constant, we can really break this down as you would
5250 // expect, and index into the target size pieces.
5251 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5252 if (MaybeCst) {
5253 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5254 // Avoid out of bounds indexing the pieces.
5255 if (IdxVal >= VecTy.getNumElements()) {
5256 MIRBuilder.buildUndef(DstReg);
5257 MI.eraseFromParent();
5258 return Legalized;
5259 }
5260
5261 if (!NarrowVecTy.isVector()) {
5262 SmallVector<Register, 8> SplitPieces;
5263 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5264 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5265 if (IsInsert) {
5266 SplitPieces[IdxVal] = InsertVal;
5267 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5268 } else {
5269 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5270 }
5271 } else {
5272 SmallVector<Register, 8> VecParts;
5273 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5274
5275 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5276 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5277 TargetOpcode::G_ANYEXT);
5278
5279 unsigned NewNumElts = NarrowVecTy.getNumElements();
5280
5281 LLT IdxTy = MRI.getType(Idx);
5282 int64_t PartIdx = IdxVal / NewNumElts;
5283 auto NewIdx =
5284 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5285
5286 if (IsInsert) {
5287 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5288
5289 // Use the adjusted index to insert into one of the subvectors.
5290 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5291 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5292 VecParts[PartIdx] = InsertPart.getReg(0);
5293
5294 // Recombine the inserted subvector with the others to reform the result
5295 // vector.
5296 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5297 } else {
5298 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5299 }
5300 }
5301
5302 MI.eraseFromParent();
5303 return Legalized;
5304 }
5305
5306 // With a variable index, we can't perform the operation in a smaller type, so
5307 // we're forced to expand this.
5308 //
5309 // TODO: We could emit a chain of compare/select to figure out which piece to
5310 // index.
5312}
5313
5316 LLT NarrowTy) {
5317 // FIXME: Don't know how to handle secondary types yet.
5318 if (TypeIdx != 0)
5319 return UnableToLegalize;
5320
5321 if (!NarrowTy.isByteSized()) {
5322 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5323 return UnableToLegalize;
5324 }
5325
5326 // This implementation doesn't work for atomics. Give up instead of doing
5327 // something invalid.
5328 if (LdStMI.isAtomic())
5329 return UnableToLegalize;
5330
5331 bool IsLoad = isa<GLoad>(LdStMI);
5332 Register ValReg = LdStMI.getReg(0);
5333 Register AddrReg = LdStMI.getPointerReg();
5334 LLT ValTy = MRI.getType(ValReg);
5335
5336 // FIXME: Do we need a distinct NarrowMemory legalize action?
5337 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5338 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5339 return UnableToLegalize;
5340 }
5341
5342 int NumParts = -1;
5343 int NumLeftover = -1;
5344 LLT LeftoverTy;
5345 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5346 if (IsLoad) {
5347 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5348 } else {
5349 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5350 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5351 NumParts = NarrowRegs.size();
5352 NumLeftover = NarrowLeftoverRegs.size();
5353 }
5354 }
5355
5356 if (NumParts == -1)
5357 return UnableToLegalize;
5358
5359 LLT PtrTy = MRI.getType(AddrReg);
5360 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5361
5362 unsigned TotalSize = ValTy.getSizeInBits();
5363
5364 // Split the load/store into PartTy sized pieces starting at Offset. If this
5365 // is a load, return the new registers in ValRegs. For a store, each elements
5366 // of ValRegs should be PartTy. Returns the next offset that needs to be
5367 // handled.
5368 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5369 auto MMO = LdStMI.getMMO();
5370 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5371 unsigned NumParts, unsigned Offset) -> unsigned {
5372 MachineFunction &MF = MIRBuilder.getMF();
5373 unsigned PartSize = PartTy.getSizeInBits();
5374 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5375 ++Idx) {
5376 unsigned ByteOffset = Offset / 8;
5377 Register NewAddrReg;
5378
5379 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5380 ByteOffset);
5381
5382 MachineMemOperand *NewMMO =
5383 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5384
5385 if (IsLoad) {
5386 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5387 ValRegs.push_back(Dst);
5388 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5389 } else {
5390 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5391 }
5392 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5393 }
5394
5395 return Offset;
5396 };
5397
5398 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5399 unsigned HandledOffset =
5400 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5401
5402 // Handle the rest of the register if this isn't an even type breakdown.
5403 if (LeftoverTy.isValid())
5404 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5405
5406 if (IsLoad) {
5407 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5408 LeftoverTy, NarrowLeftoverRegs);
5409 }
5410
5411 LdStMI.eraseFromParent();
5412 return Legalized;
5413}
5414
5417 LLT NarrowTy) {
5418 using namespace TargetOpcode;
5420 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5421
5422 switch (MI.getOpcode()) {
5423 case G_IMPLICIT_DEF:
5424 case G_TRUNC:
5425 case G_AND:
5426 case G_OR:
5427 case G_XOR:
5428 case G_ADD:
5429 case G_SUB:
5430 case G_MUL:
5431 case G_PTR_ADD:
5432 case G_SMULH:
5433 case G_UMULH:
5434 case G_FADD:
5435 case G_FMUL:
5436 case G_FSUB:
5437 case G_FNEG:
5438 case G_FABS:
5439 case G_FCANONICALIZE:
5440 case G_FDIV:
5441 case G_FREM:
5442 case G_FMA:
5443 case G_FMAD:
5444 case G_FPOW:
5445 case G_FEXP:
5446 case G_FEXP2:
5447 case G_FEXP10:
5448 case G_FLOG:
5449 case G_FLOG2:
5450 case G_FLOG10:
5451 case G_FLDEXP:
5452 case G_FNEARBYINT:
5453 case G_FCEIL:
5454 case G_FFLOOR:
5455 case G_FRINT:
5456 case G_INTRINSIC_LRINT:
5457 case G_INTRINSIC_LLRINT:
5458 case G_INTRINSIC_ROUND:
5459 case G_INTRINSIC_ROUNDEVEN:
5460 case G_LROUND:
5461 case G_LLROUND:
5462 case G_INTRINSIC_TRUNC:
5463 case G_FCOS:
5464 case G_FSIN:
5465 case G_FTAN:
5466 case G_FACOS:
5467 case G_FASIN:
5468 case G_FATAN:
5469 case G_FATAN2:
5470 case G_FCOSH:
5471 case G_FSINH:
5472 case G_FTANH:
5473 case G_FSQRT:
5474 case G_BSWAP:
5475 case G_BITREVERSE:
5476 case G_SDIV:
5477 case G_UDIV:
5478 case G_SREM:
5479 case G_UREM:
5480 case G_SDIVREM:
5481 case G_UDIVREM:
5482 case G_SMIN:
5483 case G_SMAX:
5484 case G_UMIN:
5485 case G_UMAX:
5486 case G_ABS:
5487 case G_FMINNUM:
5488 case G_FMAXNUM:
5489 case G_FMINNUM_IEEE:
5490 case G_FMAXNUM_IEEE:
5491 case G_FMINIMUM:
5492 case G_FMAXIMUM:
5493 case G_FMINIMUMNUM:
5494 case G_FMAXIMUMNUM:
5495 case G_FSHL:
5496 case G_FSHR:
5497 case G_ROTL:
5498 case G_ROTR:
5499 case G_FREEZE:
5500 case G_SADDSAT:
5501 case G_SSUBSAT:
5502 case G_UADDSAT:
5503 case G_USUBSAT:
5504 case G_UMULO:
5505 case G_SMULO:
5506 case G_SHL:
5507 case G_LSHR:
5508 case G_ASHR:
5509 case G_SSHLSAT:
5510 case G_USHLSAT:
5511 case G_CTLZ:
5512 case G_CTLZ_ZERO_UNDEF:
5513 case G_CTTZ:
5514 case G_CTTZ_ZERO_UNDEF:
5515 case G_CTPOP:
5516 case G_FCOPYSIGN:
5517 case G_ZEXT:
5518 case G_SEXT:
5519 case G_ANYEXT:
5520 case G_FPEXT:
5521 case G_FPTRUNC:
5522 case G_SITOFP:
5523 case G_UITOFP:
5524 case G_FPTOSI:
5525 case G_FPTOUI:
5526 case G_FPTOSI_SAT:
5527 case G_FPTOUI_SAT:
5528 case G_INTTOPTR:
5529 case G_PTRTOINT:
5530 case G_ADDRSPACE_CAST:
5531 case G_UADDO:
5532 case G_USUBO:
5533 case G_UADDE:
5534 case G_USUBE:
5535 case G_SADDO:
5536 case G_SSUBO:
5537 case G_SADDE:
5538 case G_SSUBE:
5539 case G_STRICT_FADD:
5540 case G_STRICT_FSUB:
5541 case G_STRICT_FMUL:
5542 case G_STRICT_FMA:
5543 case G_STRICT_FLDEXP:
5544 case G_FFREXP:
5545 return fewerElementsVectorMultiEltType(GMI, NumElts);
5546 case G_ICMP:
5547 case G_FCMP:
5548 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5549 case G_IS_FPCLASS:
5550 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5551 case G_SELECT:
5552 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5553 return fewerElementsVectorMultiEltType(GMI, NumElts);
5554 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5555 case G_PHI:
5556 return fewerElementsVectorPhi(GMI, NumElts);
5557 case G_UNMERGE_VALUES:
5558 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5559 case G_BUILD_VECTOR:
5560 assert(TypeIdx == 0 && "not a vector type index");
5561 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5562 case G_CONCAT_VECTORS:
5563 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5564 return UnableToLegalize;
5565 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5566 case G_EXTRACT_VECTOR_ELT:
5567 case G_INSERT_VECTOR_ELT:
5568 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5569 case G_LOAD:
5570 case G_STORE:
5571 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5572 case G_SEXT_INREG:
5573 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5575 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5576 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5577 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5578 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5579 case G_SHUFFLE_VECTOR:
5580 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5581 case G_FPOWI:
5582 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5583 case G_BITCAST:
5584 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5585 case G_INTRINSIC_FPTRUNC_ROUND:
5586 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5587 default:
5588 return UnableToLegalize;
5589 }
5590}
5591
5594 LLT NarrowTy) {
5595 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5596 "Not a bitcast operation");
5597
5598 if (TypeIdx != 0)
5599 return UnableToLegalize;
5600
5601 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5602
5603 unsigned NewElemCount =
5604 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5605 SmallVector<Register> SrcVRegs, BitcastVRegs;
5606 if (NewElemCount == 1) {
5607 LLT SrcNarrowTy = SrcTy.getElementType();
5608
5609 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5610 getUnmergeResults(SrcVRegs, *Unmerge);
5611 } else {
5612 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5613
5614 // Split the Src and Dst Reg into smaller registers
5615 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5616 return UnableToLegalize;
5617 }
5618
5619 // Build new smaller bitcast instructions
5620 // Not supporting Leftover types for now but will have to
5621 for (Register Reg : SrcVRegs)
5622 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5623
5624 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5625 MI.eraseFromParent();
5626 return Legalized;
5627}
5628
5630 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5631 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5632 if (TypeIdx != 0)
5633 return UnableToLegalize;
5634
5635 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5636 MI.getFirst3RegLLTs();
5637 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5638 // The shuffle should be canonicalized by now.
5639 if (DstTy != Src1Ty)
5640 return UnableToLegalize;
5641 if (DstTy != Src2Ty)
5642 return UnableToLegalize;
5643
5644 if (!isPowerOf2_32(DstTy.getNumElements()))
5645 return UnableToLegalize;
5646
5647 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5648 // Further legalization attempts will be needed to do split further.
5649 NarrowTy =
5650 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5651 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5652
5653 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5654 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5655 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5656 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5657 SplitSrc2Regs[1]};
5658
5659 Register Hi, Lo;
5660
5661 // If Lo or Hi uses elements from at most two of the four input vectors, then
5662 // express it as a vector shuffle of those two inputs. Otherwise extract the
5663 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5665 for (unsigned High = 0; High < 2; ++High) {
5666 Register &Output = High ? Hi : Lo;
5667
5668 // Build a shuffle mask for the output, discovering on the fly which
5669 // input vectors to use as shuffle operands (recorded in InputUsed).
5670 // If building a suitable shuffle vector proves too hard, then bail
5671 // out with useBuildVector set.
5672 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5673 unsigned FirstMaskIdx = High * NewElts;
5674 bool UseBuildVector = false;
5675 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5676 // The mask element. This indexes into the input.
5677 int Idx = Mask[FirstMaskIdx + MaskOffset];
5678
5679 // The input vector this mask element indexes into.
5680 unsigned Input = (unsigned)Idx / NewElts;
5681
5682 if (Input >= std::size(Inputs)) {
5683 // The mask element does not index into any input vector.
5684 Ops.push_back(-1);
5685 continue;
5686 }
5687
5688 // Turn the index into an offset from the start of the input vector.
5689 Idx -= Input * NewElts;
5690
5691 // Find or create a shuffle vector operand to hold this input.
5692 unsigned OpNo;
5693 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5694 if (InputUsed[OpNo] == Input) {
5695 // This input vector is already an operand.
5696 break;
5697 } else if (InputUsed[OpNo] == -1U) {
5698 // Create a new operand for this input vector.
5699 InputUsed[OpNo] = Input;
5700 break;
5701 }
5702 }
5703
5704 if (OpNo >= std::size(InputUsed)) {
5705 // More than two input vectors used! Give up on trying to create a
5706 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5707 UseBuildVector = true;
5708 break;
5709 }
5710
5711 // Add the mask index for the new shuffle vector.
5712 Ops.push_back(Idx + OpNo * NewElts);
5713 }
5714
5715 if (UseBuildVector) {
5716 LLT EltTy = NarrowTy.getElementType();
5718
5719 // Extract the input elements by hand.
5720 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5721 // The mask element. This indexes into the input.
5722 int Idx = Mask[FirstMaskIdx + MaskOffset];
5723
5724 // The input vector this mask element indexes into.
5725 unsigned Input = (unsigned)Idx / NewElts;
5726
5727 if (Input >= std::size(Inputs)) {
5728 // The mask element is "undef" or indexes off the end of the input.
5729 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5730 continue;
5731 }
5732
5733 // Turn the index into an offset from the start of the input vector.
5734 Idx -= Input * NewElts;
5735
5736 // Extract the vector element by hand.
5737 SVOps.push_back(MIRBuilder
5738 .buildExtractVectorElement(
5739 EltTy, Inputs[Input],
5740 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5741 .getReg(0));
5742 }
5743
5744 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5745 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5746 } else if (InputUsed[0] == -1U) {
5747 // No input vectors were used! The result is undefined.
5748 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5749 } else {
5750 Register Op0 = Inputs[InputUsed[0]];
5751 // If only one input was used, use an undefined vector for the other.
5752 Register Op1 = InputUsed[1] == -1U
5753 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5754 : Inputs[InputUsed[1]];
5755 // At least one input vector was used. Create a new shuffle vector.
5756 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5757 }
5758
5759 Ops.clear();
5760 }
5761
5762 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5763 MI.eraseFromParent();
5764 return Legalized;
5765}
5766
5768 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5769 auto &RdxMI = cast<GVecReduce>(MI);
5770
5771 if (TypeIdx != 1)
5772 return UnableToLegalize;
5773
5774 // The semantics of the normal non-sequential reductions allow us to freely
5775 // re-associate the operation.
5776 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5777
5778 if (NarrowTy.isVector() &&
5779 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5780 return UnableToLegalize;
5781
5782 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5783 SmallVector<Register> SplitSrcs;
5784 // If NarrowTy is a scalar then we're being asked to scalarize.
5785 const unsigned NumParts =
5786 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5787 : SrcTy.getNumElements();
5788
5789 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5790 if (NarrowTy.isScalar()) {
5791 if (DstTy != NarrowTy)
5792 return UnableToLegalize; // FIXME: handle implicit extensions.
5793
5794 if (isPowerOf2_32(NumParts)) {
5795 // Generate a tree of scalar operations to reduce the critical path.
5796 SmallVector<Register> PartialResults;
5797 unsigned NumPartsLeft = NumParts;
5798 while (NumPartsLeft > 1) {
5799 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5800 PartialResults.emplace_back(
5802 .buildInstr(ScalarOpc, {NarrowTy},
5803 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5804 .getReg(0));
5805 }
5806 SplitSrcs = PartialResults;
5807 PartialResults.clear();
5808 NumPartsLeft = SplitSrcs.size();
5809 }
5810 assert(SplitSrcs.size() == 1);
5811 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5812 MI.eraseFromParent();
5813 return Legalized;
5814 }
5815 // If we can't generate a tree, then just do sequential operations.
5816 Register Acc = SplitSrcs[0];
5817 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5818 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5819 .getReg(0);
5820 MIRBuilder.buildCopy(DstReg, Acc);
5821 MI.eraseFromParent();
5822 return Legalized;
5823 }
5824 SmallVector<Register> PartialReductions;
5825 for (unsigned Part = 0; Part < NumParts; ++Part) {
5826 PartialReductions.push_back(
5827 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5828 .getReg(0));
5829 }
5830
5831 // If the types involved are powers of 2, we can generate intermediate vector
5832 // ops, before generating a final reduction operation.
5833 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5834 isPowerOf2_32(NarrowTy.getNumElements())) {
5835 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5836 }
5837
5838 Register Acc = PartialReductions[0];
5839 for (unsigned Part = 1; Part < NumParts; ++Part) {
5840 if (Part == NumParts - 1) {
5841 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5842 {Acc, PartialReductions[Part]});
5843 } else {
5844 Acc = MIRBuilder
5845 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5846 .getReg(0);
5847 }
5848 }
5849 MI.eraseFromParent();
5850 return Legalized;
5851}
5852
5855 unsigned int TypeIdx,
5856 LLT NarrowTy) {
5857 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5858 MI.getFirst3RegLLTs();
5859 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5860 DstTy != NarrowTy)
5861 return UnableToLegalize;
5862
5863 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5864 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5865 "Unexpected vecreduce opcode");
5866 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5867 ? TargetOpcode::G_FADD
5868 : TargetOpcode::G_FMUL;
5869
5870 SmallVector<Register> SplitSrcs;
5871 unsigned NumParts = SrcTy.getNumElements();
5872 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5873 Register Acc = ScalarReg;
5874 for (unsigned i = 0; i < NumParts; i++)
5875 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5876 .getReg(0);
5877
5878 MIRBuilder.buildCopy(DstReg, Acc);
5879 MI.eraseFromParent();
5880 return Legalized;
5881}
5882
5884LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5885 LLT SrcTy, LLT NarrowTy,
5886 unsigned ScalarOpc) {
5887 SmallVector<Register> SplitSrcs;
5888 // Split the sources into NarrowTy size pieces.
5889 extractParts(SrcReg, NarrowTy,
5890 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5891 MIRBuilder, MRI);
5892 // We're going to do a tree reduction using vector operations until we have
5893 // one NarrowTy size value left.
5894 while (SplitSrcs.size() > 1) {
5895 SmallVector<Register> PartialRdxs;
5896 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5897 Register LHS = SplitSrcs[Idx];
5898 Register RHS = SplitSrcs[Idx + 1];
5899 // Create the intermediate vector op.
5900 Register Res =
5901 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5902 PartialRdxs.push_back(Res);
5903 }
5904 SplitSrcs = std::move(PartialRdxs);
5905 }
5906 // Finally generate the requested NarrowTy based reduction.
5907 Observer.changingInstr(MI);
5908 MI.getOperand(1).setReg(SplitSrcs[0]);
5909 Observer.changedInstr(MI);
5910 return Legalized;
5911}
5912
5915 const LLT HalfTy, const LLT AmtTy) {
5916
5917 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5918 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5919 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5920
5921 if (Amt.isZero()) {
5922 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5923 MI.eraseFromParent();
5924 return Legalized;
5925 }
5926
5927 LLT NVT = HalfTy;
5928 unsigned NVTBits = HalfTy.getSizeInBits();
5929 unsigned VTBits = 2 * NVTBits;
5930
5931 SrcOp Lo(Register(0)), Hi(Register(0));
5932 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5933 if (Amt.ugt(VTBits)) {
5934 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5935 } else if (Amt.ugt(NVTBits)) {
5936 Lo = MIRBuilder.buildConstant(NVT, 0);
5937 Hi = MIRBuilder.buildShl(NVT, InL,
5938 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5939 } else if (Amt == NVTBits) {
5940 Lo = MIRBuilder.buildConstant(NVT, 0);
5941 Hi = InL;
5942 } else {
5943 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5944 auto OrLHS =
5945 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5946 auto OrRHS = MIRBuilder.buildLShr(
5947 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5948 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5949 }
5950 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5951 if (Amt.ugt(VTBits)) {
5952 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5953 } else if (Amt.ugt(NVTBits)) {
5954 Lo = MIRBuilder.buildLShr(NVT, InH,
5955 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5956 Hi = MIRBuilder.buildConstant(NVT, 0);
5957 } else if (Amt == NVTBits) {
5958 Lo = InH;
5959 Hi = MIRBuilder.buildConstant(NVT, 0);
5960 } else {
5961 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5962
5963 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5964 auto OrRHS = MIRBuilder.buildShl(
5965 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5966
5967 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5968 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5969 }
5970 } else {
5971 if (Amt.ugt(VTBits)) {
5972 Hi = Lo = MIRBuilder.buildAShr(
5973 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5974 } else if (Amt.ugt(NVTBits)) {
5975 Lo = MIRBuilder.buildAShr(NVT, InH,
5976 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5977 Hi = MIRBuilder.buildAShr(NVT, InH,
5978 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5979 } else if (Amt == NVTBits) {
5980 Lo = InH;
5981 Hi = MIRBuilder.buildAShr(NVT, InH,
5982 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5983 } else {
5984 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5985
5986 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5987 auto OrRHS = MIRBuilder.buildShl(
5988 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5989
5990 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5991 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5992 }
5993 }
5994
5995 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5996 MI.eraseFromParent();
5997
5998 return Legalized;
5999}
6000
6003 LLT RequestedTy) {
6004 if (TypeIdx == 1) {
6005 Observer.changingInstr(MI);
6006 narrowScalarSrc(MI, RequestedTy, 2);
6007 Observer.changedInstr(MI);
6008 return Legalized;
6009 }
6010
6011 Register DstReg = MI.getOperand(0).getReg();
6012 LLT DstTy = MRI.getType(DstReg);
6013 if (DstTy.isVector())
6014 return UnableToLegalize;
6015
6016 Register Amt = MI.getOperand(2).getReg();
6017 LLT ShiftAmtTy = MRI.getType(Amt);
6018 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6019 if (DstEltSize % 2 != 0)
6020 return UnableToLegalize;
6021
6022 // Check if we should use multi-way splitting instead of recursive binary
6023 // splitting.
6024 //
6025 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6026 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6027 // and dependency chains created by usual binary splitting approach
6028 // (128->64->32).
6029 //
6030 // The >= 8 parts threshold ensures we only use this optimization when binary
6031 // splitting would require multiple recursive passes, avoiding overhead for
6032 // simple 2-way splits where binary approach is sufficient.
6033 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6034 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6035 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6036 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6037 // steps).
6038 if (NumParts >= 8)
6039 return narrowScalarShiftMultiway(MI, RequestedTy);
6040 }
6041
6042 // Fall back to binary splitting:
6043 // Ignore the input type. We can only go to exactly half the size of the
6044 // input. If that isn't small enough, the resulting pieces will be further
6045 // legalized.
6046 const unsigned NewBitSize = DstEltSize / 2;
6047 const LLT HalfTy = LLT::scalar(NewBitSize);
6048 const LLT CondTy = LLT::scalar(1);
6049
6050 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6051 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6052 ShiftAmtTy);
6053 }
6054
6055 // TODO: Expand with known bits.
6056
6057 // Handle the fully general expansion by an unknown amount.
6058 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6059
6060 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6061 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6062 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6063
6064 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6065 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6066
6067 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6068 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6069 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6070
6071 Register ResultRegs[2];
6072 switch (MI.getOpcode()) {
6073 case TargetOpcode::G_SHL: {
6074 // Short: ShAmt < NewBitSize
6075 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6076
6077 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6078 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6079 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6080
6081 // Long: ShAmt >= NewBitSize
6082 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6083 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6084
6085 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6086 auto Hi = MIRBuilder.buildSelect(
6087 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6088
6089 ResultRegs[0] = Lo.getReg(0);
6090 ResultRegs[1] = Hi.getReg(0);
6091 break;
6092 }
6093 case TargetOpcode::G_LSHR:
6094 case TargetOpcode::G_ASHR: {
6095 // Short: ShAmt < NewBitSize
6096 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6097
6098 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6099 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6100 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6101
6102 // Long: ShAmt >= NewBitSize
6104 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6105 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6106 } else {
6107 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6108 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6109 }
6110 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6111 {InH, AmtExcess}); // Lo from Hi part.
6112
6113 auto Lo = MIRBuilder.buildSelect(
6114 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6115
6116 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6117
6118 ResultRegs[0] = Lo.getReg(0);
6119 ResultRegs[1] = Hi.getReg(0);
6120 break;
6121 }
6122 default:
6123 llvm_unreachable("not a shift");
6124 }
6125
6126 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6127 MI.eraseFromParent();
6128 return Legalized;
6129}
6130
6132 unsigned PartIdx,
6133 unsigned NumParts,
6134 ArrayRef<Register> SrcParts,
6135 const ShiftParams &Params,
6136 LLT TargetTy, LLT ShiftAmtTy) {
6137 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6138 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6139 assert(WordShiftConst && BitShiftConst && "Expected constants");
6140
6141 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6142 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6143 const bool NeedsInterWordShift = ShiftBits != 0;
6144
6145 switch (Opcode) {
6146 case TargetOpcode::G_SHL: {
6147 // Data moves from lower indices to higher indices
6148 // If this part would come from a source beyond our range, it's zero
6149 if (PartIdx < ShiftWords)
6150 return Params.Zero;
6151
6152 unsigned SrcIdx = PartIdx - ShiftWords;
6153 if (!NeedsInterWordShift)
6154 return SrcParts[SrcIdx];
6155
6156 // Combine shifted main part with carry from previous part
6157 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6158 if (SrcIdx > 0) {
6159 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6160 Params.InvBitShift);
6161 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6162 }
6163 return Hi.getReg(0);
6164 }
6165
6166 case TargetOpcode::G_LSHR: {
6167 unsigned SrcIdx = PartIdx + ShiftWords;
6168 if (SrcIdx >= NumParts)
6169 return Params.Zero;
6170 if (!NeedsInterWordShift)
6171 return SrcParts[SrcIdx];
6172
6173 // Combine shifted main part with carry from next part
6174 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6175 if (SrcIdx + 1 < NumParts) {
6176 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6177 Params.InvBitShift);
6178 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6179 }
6180 return Lo.getReg(0);
6181 }
6182
6183 case TargetOpcode::G_ASHR: {
6184 // Like LSHR but preserves sign bit
6185 unsigned SrcIdx = PartIdx + ShiftWords;
6186 if (SrcIdx >= NumParts)
6187 return Params.SignBit;
6188 if (!NeedsInterWordShift)
6189 return SrcParts[SrcIdx];
6190
6191 // Only the original MSB part uses arithmetic shift to preserve sign. All
6192 // other parts use logical shift since they're just moving data bits.
6193 auto Lo =
6194 (SrcIdx == NumParts - 1)
6195 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6196 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6197 Register HiSrc =
6198 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6199 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6200 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6201 }
6202
6203 default:
6204 llvm_unreachable("not a shift");
6205 }
6206}
6207
6209 Register MainOperand,
6210 Register ShiftAmt,
6211 LLT TargetTy,
6212 Register CarryOperand) {
6213 // This helper generates a single output part for variable shifts by combining
6214 // the main operand (shifted by BitShift) with carry bits from an adjacent
6215 // part.
6216
6217 // For G_ASHR, individual parts don't have their own sign bit, only the
6218 // complete value does. So we use LSHR for the main operand shift in ASHR
6219 // context.
6220 unsigned MainOpcode =
6221 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6222
6223 // Perform the primary shift on the main operand
6224 Register MainShifted =
6225 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6226 .getReg(0);
6227
6228 // No carry operand available
6229 if (!CarryOperand.isValid())
6230 return MainShifted;
6231
6232 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6233 // so carry bits aren't needed.
6234 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6235 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6236 LLT BoolTy = LLT::scalar(1);
6237 auto IsZeroBitShift =
6238 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6239
6240 // Extract bits from the adjacent part that will "carry over" into this part.
6241 // The carry direction is opposite to the main shift direction, so we can
6242 // align the two shifted values before combining them with OR.
6243
6244 // Determine the carry shift opcode (opposite direction)
6245 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6246 : TargetOpcode::G_SHL;
6247
6248 // Calculate inverse shift amount: BitWidth - ShiftAmt
6249 auto TargetBitsConst =
6250 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6251 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6252
6253 // Shift the carry operand
6254 Register CarryBits =
6256 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6257 .getReg(0);
6258
6259 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6260 // TargetBits which would be poison for the individual carry shift operation).
6261 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6262 Register SafeCarryBits =
6263 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6264 .getReg(0);
6265
6266 // Combine the main shifted part with the carry bits
6267 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6268}
6269
6272 const APInt &Amt,
6273 LLT TargetTy,
6274 LLT ShiftAmtTy) {
6275 // Any wide shift can be decomposed into WordShift + BitShift components.
6276 // When shift amount is known constant, directly compute the decomposition
6277 // values and generate constant registers.
6278 Register DstReg = MI.getOperand(0).getReg();
6279 Register SrcReg = MI.getOperand(1).getReg();
6280 LLT DstTy = MRI.getType(DstReg);
6281
6282 const unsigned DstBits = DstTy.getScalarSizeInBits();
6283 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6284 const unsigned NumParts = DstBits / TargetBits;
6285
6286 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6287
6288 // When the shift amount is known at compile time, we just calculate which
6289 // source parts contribute to each output part.
6290
6291 SmallVector<Register, 8> SrcParts;
6292 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6293
6294 if (Amt.isZero()) {
6295 // No shift needed, just copy
6296 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6297 MI.eraseFromParent();
6298 return Legalized;
6299 }
6300
6301 ShiftParams Params;
6302 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6303 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6304
6305 // Generate constants and values needed by all shift types
6306 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6307 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6308 Params.InvBitShift =
6309 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6310 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6311
6312 // For ASHR, we need the sign-extended value to fill shifted-out positions
6313 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6314 Params.SignBit =
6316 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6317 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6318 .getReg(0);
6319
6320 SmallVector<Register, 8> DstParts(NumParts);
6321 for (unsigned I = 0; I < NumParts; ++I)
6322 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6323 Params, TargetTy, ShiftAmtTy);
6324
6325 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6326 MI.eraseFromParent();
6327 return Legalized;
6328}
6329
6332 Register DstReg = MI.getOperand(0).getReg();
6333 Register SrcReg = MI.getOperand(1).getReg();
6334 Register AmtReg = MI.getOperand(2).getReg();
6335 LLT DstTy = MRI.getType(DstReg);
6336 LLT ShiftAmtTy = MRI.getType(AmtReg);
6337
6338 const unsigned DstBits = DstTy.getScalarSizeInBits();
6339 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6340 const unsigned NumParts = DstBits / TargetBits;
6341
6342 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6343 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6344
6345 // If the shift amount is known at compile time, we can use direct indexing
6346 // instead of generating select chains in the general case.
6347 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6348 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6349 ShiftAmtTy);
6350
6351 // For runtime-variable shift amounts, we must generate a more complex
6352 // sequence that handles all possible shift values using select chains.
6353
6354 // Split the input into target-sized pieces
6355 SmallVector<Register, 8> SrcParts;
6356 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6357
6358 // Shifting by zero should be a no-op.
6359 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6360 LLT BoolTy = LLT::scalar(1);
6361 auto IsZeroShift =
6362 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6363
6364 // Any wide shift can be decomposed into two components:
6365 // 1. WordShift: number of complete target-sized words to shift
6366 // 2. BitShift: number of bits to shift within each word
6367 //
6368 // Example: 128-bit >> 50 with 32-bit target:
6369 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6370 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6371 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6372 auto TargetBitsLog2Const =
6373 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6374 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6375
6376 Register WordShift =
6377 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6378 Register BitShift =
6379 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6380
6381 // Fill values:
6382 // - SHL/LSHR: fill with zeros
6383 // - ASHR: fill with sign-extended MSB
6384 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6385
6386 Register FillValue;
6387 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6388 auto TargetBitsMinusOneConst =
6389 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6390 FillValue = MIRBuilder
6391 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6392 TargetBitsMinusOneConst)
6393 .getReg(0);
6394 } else {
6395 FillValue = ZeroReg;
6396 }
6397
6398 SmallVector<Register, 8> DstParts(NumParts);
6399
6400 // For each output part, generate a select chain that chooses the correct
6401 // result based on the runtime WordShift value. This handles all possible
6402 // word shift amounts by pre-calculating what each would produce.
6403 for (unsigned I = 0; I < NumParts; ++I) {
6404 // Initialize with appropriate default value for this shift type
6405 Register InBoundsResult = FillValue;
6406
6407 // clang-format off
6408 // Build a branchless select chain by pre-computing results for all possible
6409 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6410 //
6411 // K=0: select(WordShift==0, result0, FillValue)
6412 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6413 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6414 // clang-format on
6415 for (unsigned K = 0; K < NumParts; ++K) {
6416 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6417 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6418 WordShift, WordShiftKConst);
6419
6420 // Calculate source indices for this word shift
6421 //
6422 // For 4-part 128-bit value with K=1 word shift:
6423 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6424 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6425 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6426 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6427 int MainSrcIdx;
6428 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6429
6430 switch (MI.getOpcode()) {
6431 case TargetOpcode::G_SHL:
6432 MainSrcIdx = (int)I - (int)K;
6433 CarrySrcIdx = MainSrcIdx - 1;
6434 break;
6435 case TargetOpcode::G_LSHR:
6436 case TargetOpcode::G_ASHR:
6437 MainSrcIdx = (int)I + (int)K;
6438 CarrySrcIdx = MainSrcIdx + 1;
6439 break;
6440 default:
6441 llvm_unreachable("Not a shift");
6442 }
6443
6444 // Check bounds and build the result for this word shift
6445 Register ResultForK;
6446 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6447 Register MainOp = SrcParts[MainSrcIdx];
6448 Register CarryOp;
6449
6450 // Determine carry operand with bounds checking
6451 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6452 CarryOp = SrcParts[CarrySrcIdx];
6453 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6454 CarrySrcIdx >= (int)NumParts)
6455 CarryOp = FillValue; // Use sign extension
6456
6457 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6458 TargetTy, CarryOp);
6459 } else {
6460 // Out of bounds - use fill value for this k
6461 ResultForK = FillValue;
6462 }
6463
6464 // Select this result if WordShift equals k
6465 InBoundsResult =
6467 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6468 .getReg(0);
6469 }
6470
6471 // Handle zero-shift special case: if shift is 0, use original input
6472 DstParts[I] =
6474 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6475 .getReg(0);
6476 }
6477
6478 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6479 MI.eraseFromParent();
6480 return Legalized;
6481}
6482
6485 LLT MoreTy) {
6486 assert(TypeIdx == 0 && "Expecting only Idx 0");
6487
6488 Observer.changingInstr(MI);
6489 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6490 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6491 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6492 moreElementsVectorSrc(MI, MoreTy, I);
6493 }
6494
6495 MachineBasicBlock &MBB = *MI.getParent();
6496 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6497 moreElementsVectorDst(MI, MoreTy, 0);
6498 Observer.changedInstr(MI);
6499 return Legalized;
6500}
6501
6502MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6503 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6504 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6505
6506 switch (Opcode) {
6507 default:
6509 "getNeutralElementForVecReduce called with invalid opcode!");
6510 case TargetOpcode::G_VECREDUCE_ADD:
6511 case TargetOpcode::G_VECREDUCE_OR:
6512 case TargetOpcode::G_VECREDUCE_XOR:
6513 case TargetOpcode::G_VECREDUCE_UMAX:
6514 return MIRBuilder.buildConstant(Ty, 0);
6515 case TargetOpcode::G_VECREDUCE_MUL:
6516 return MIRBuilder.buildConstant(Ty, 1);
6517 case TargetOpcode::G_VECREDUCE_AND:
6518 case TargetOpcode::G_VECREDUCE_UMIN:
6520 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6521 case TargetOpcode::G_VECREDUCE_SMAX:
6523 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6524 case TargetOpcode::G_VECREDUCE_SMIN:
6526 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6527 case TargetOpcode::G_VECREDUCE_FADD:
6528 return MIRBuilder.buildFConstant(Ty, -0.0);
6529 case TargetOpcode::G_VECREDUCE_FMUL:
6530 return MIRBuilder.buildFConstant(Ty, 1.0);
6531 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6532 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6533 assert(false && "getNeutralElementForVecReduce unimplemented for "
6534 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6535 }
6536 llvm_unreachable("switch expected to return!");
6537}
6538
6541 LLT MoreTy) {
6542 unsigned Opc = MI.getOpcode();
6543 switch (Opc) {
6544 case TargetOpcode::G_IMPLICIT_DEF:
6545 case TargetOpcode::G_LOAD: {
6546 if (TypeIdx != 0)
6547 return UnableToLegalize;
6548 Observer.changingInstr(MI);
6549 moreElementsVectorDst(MI, MoreTy, 0);
6550 Observer.changedInstr(MI);
6551 return Legalized;
6552 }
6553 case TargetOpcode::G_STORE:
6554 if (TypeIdx != 0)
6555 return UnableToLegalize;
6556 Observer.changingInstr(MI);
6557 moreElementsVectorSrc(MI, MoreTy, 0);
6558 Observer.changedInstr(MI);
6559 return Legalized;
6560 case TargetOpcode::G_AND:
6561 case TargetOpcode::G_OR:
6562 case TargetOpcode::G_XOR:
6563 case TargetOpcode::G_ADD:
6564 case TargetOpcode::G_SUB:
6565 case TargetOpcode::G_MUL:
6566 case TargetOpcode::G_FADD:
6567 case TargetOpcode::G_FSUB:
6568 case TargetOpcode::G_FMUL:
6569 case TargetOpcode::G_FDIV:
6570 case TargetOpcode::G_FCOPYSIGN:
6571 case TargetOpcode::G_UADDSAT:
6572 case TargetOpcode::G_USUBSAT:
6573 case TargetOpcode::G_SADDSAT:
6574 case TargetOpcode::G_SSUBSAT:
6575 case TargetOpcode::G_SMIN:
6576 case TargetOpcode::G_SMAX:
6577 case TargetOpcode::G_UMIN:
6578 case TargetOpcode::G_UMAX:
6579 case TargetOpcode::G_FMINNUM:
6580 case TargetOpcode::G_FMAXNUM:
6581 case TargetOpcode::G_FMINNUM_IEEE:
6582 case TargetOpcode::G_FMAXNUM_IEEE:
6583 case TargetOpcode::G_FMINIMUM:
6584 case TargetOpcode::G_FMAXIMUM:
6585 case TargetOpcode::G_FMINIMUMNUM:
6586 case TargetOpcode::G_FMAXIMUMNUM:
6587 case TargetOpcode::G_STRICT_FADD:
6588 case TargetOpcode::G_STRICT_FSUB:
6589 case TargetOpcode::G_STRICT_FMUL:
6590 case TargetOpcode::G_SHL:
6591 case TargetOpcode::G_ASHR:
6592 case TargetOpcode::G_LSHR: {
6593 Observer.changingInstr(MI);
6594 moreElementsVectorSrc(MI, MoreTy, 1);
6595 moreElementsVectorSrc(MI, MoreTy, 2);
6596 moreElementsVectorDst(MI, MoreTy, 0);
6597 Observer.changedInstr(MI);
6598 return Legalized;
6599 }
6600 case TargetOpcode::G_FMA:
6601 case TargetOpcode::G_STRICT_FMA:
6602 case TargetOpcode::G_FSHR:
6603 case TargetOpcode::G_FSHL: {
6604 Observer.changingInstr(MI);
6605 moreElementsVectorSrc(MI, MoreTy, 1);
6606 moreElementsVectorSrc(MI, MoreTy, 2);
6607 moreElementsVectorSrc(MI, MoreTy, 3);
6608 moreElementsVectorDst(MI, MoreTy, 0);
6609 Observer.changedInstr(MI);
6610 return Legalized;
6611 }
6612 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6613 case TargetOpcode::G_EXTRACT:
6614 if (TypeIdx != 1)
6615 return UnableToLegalize;
6616 Observer.changingInstr(MI);
6617 moreElementsVectorSrc(MI, MoreTy, 1);
6618 Observer.changedInstr(MI);
6619 return Legalized;
6620 case TargetOpcode::G_INSERT:
6621 case TargetOpcode::G_INSERT_VECTOR_ELT:
6622 case TargetOpcode::G_FREEZE:
6623 case TargetOpcode::G_FNEG:
6624 case TargetOpcode::G_FABS:
6625 case TargetOpcode::G_FSQRT:
6626 case TargetOpcode::G_FCEIL:
6627 case TargetOpcode::G_FFLOOR:
6628 case TargetOpcode::G_FNEARBYINT:
6629 case TargetOpcode::G_FRINT:
6630 case TargetOpcode::G_INTRINSIC_ROUND:
6631 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6632 case TargetOpcode::G_INTRINSIC_TRUNC:
6633 case TargetOpcode::G_BITREVERSE:
6634 case TargetOpcode::G_BSWAP:
6635 case TargetOpcode::G_FCANONICALIZE:
6636 case TargetOpcode::G_SEXT_INREG:
6637 case TargetOpcode::G_ABS:
6638 case TargetOpcode::G_CTLZ:
6639 case TargetOpcode::G_CTPOP:
6640 if (TypeIdx != 0)
6641 return UnableToLegalize;
6642 Observer.changingInstr(MI);
6643 moreElementsVectorSrc(MI, MoreTy, 1);
6644 moreElementsVectorDst(MI, MoreTy, 0);
6645 Observer.changedInstr(MI);
6646 return Legalized;
6647 case TargetOpcode::G_SELECT: {
6648 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6649 if (TypeIdx == 1) {
6650 if (!CondTy.isScalar() ||
6651 DstTy.getElementCount() != MoreTy.getElementCount())
6652 return UnableToLegalize;
6653
6654 // This is turning a scalar select of vectors into a vector
6655 // select. Broadcast the select condition.
6656 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6657 Observer.changingInstr(MI);
6658 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6659 Observer.changedInstr(MI);
6660 return Legalized;
6661 }
6662
6663 if (CondTy.isVector())
6664 return UnableToLegalize;
6665
6666 Observer.changingInstr(MI);
6667 moreElementsVectorSrc(MI, MoreTy, 2);
6668 moreElementsVectorSrc(MI, MoreTy, 3);
6669 moreElementsVectorDst(MI, MoreTy, 0);
6670 Observer.changedInstr(MI);
6671 return Legalized;
6672 }
6673 case TargetOpcode::G_UNMERGE_VALUES:
6674 return UnableToLegalize;
6675 case TargetOpcode::G_PHI:
6676 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6677 case TargetOpcode::G_SHUFFLE_VECTOR:
6678 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6679 case TargetOpcode::G_BUILD_VECTOR: {
6681 for (auto Op : MI.uses()) {
6682 Elts.push_back(Op.getReg());
6683 }
6684
6685 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6686 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6687 }
6688
6689 MIRBuilder.buildDeleteTrailingVectorElements(
6690 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6691 MI.eraseFromParent();
6692 return Legalized;
6693 }
6694 case TargetOpcode::G_SEXT:
6695 case TargetOpcode::G_ZEXT:
6696 case TargetOpcode::G_ANYEXT:
6697 case TargetOpcode::G_TRUNC:
6698 case TargetOpcode::G_FPTRUNC:
6699 case TargetOpcode::G_FPEXT:
6700 case TargetOpcode::G_FPTOSI:
6701 case TargetOpcode::G_FPTOUI:
6702 case TargetOpcode::G_FPTOSI_SAT:
6703 case TargetOpcode::G_FPTOUI_SAT:
6704 case TargetOpcode::G_SITOFP:
6705 case TargetOpcode::G_UITOFP: {
6706 Observer.changingInstr(MI);
6707 LLT SrcExtTy;
6708 LLT DstExtTy;
6709 if (TypeIdx == 0) {
6710 DstExtTy = MoreTy;
6711 SrcExtTy = LLT::fixed_vector(
6712 MoreTy.getNumElements(),
6713 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6714 } else {
6715 DstExtTy = LLT::fixed_vector(
6716 MoreTy.getNumElements(),
6717 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6718 SrcExtTy = MoreTy;
6719 }
6720 moreElementsVectorSrc(MI, SrcExtTy, 1);
6721 moreElementsVectorDst(MI, DstExtTy, 0);
6722 Observer.changedInstr(MI);
6723 return Legalized;
6724 }
6725 case TargetOpcode::G_ICMP:
6726 case TargetOpcode::G_FCMP: {
6727 if (TypeIdx != 1)
6728 return UnableToLegalize;
6729
6730 Observer.changingInstr(MI);
6731 moreElementsVectorSrc(MI, MoreTy, 2);
6732 moreElementsVectorSrc(MI, MoreTy, 3);
6733 LLT CondTy = LLT::fixed_vector(
6734 MoreTy.getNumElements(),
6735 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6736 moreElementsVectorDst(MI, CondTy, 0);
6737 Observer.changedInstr(MI);
6738 return Legalized;
6739 }
6740 case TargetOpcode::G_BITCAST: {
6741 if (TypeIdx != 0)
6742 return UnableToLegalize;
6743
6744 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6745 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6746
6747 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6748 if (coefficient % DstTy.getNumElements() != 0)
6749 return UnableToLegalize;
6750
6751 coefficient = coefficient / DstTy.getNumElements();
6752
6753 LLT NewTy = SrcTy.changeElementCount(
6754 ElementCount::get(coefficient, MoreTy.isScalable()));
6755 Observer.changingInstr(MI);
6756 moreElementsVectorSrc(MI, NewTy, 1);
6757 moreElementsVectorDst(MI, MoreTy, 0);
6758 Observer.changedInstr(MI);
6759 return Legalized;
6760 }
6761 case TargetOpcode::G_VECREDUCE_FADD:
6762 case TargetOpcode::G_VECREDUCE_FMUL:
6763 case TargetOpcode::G_VECREDUCE_ADD:
6764 case TargetOpcode::G_VECREDUCE_MUL:
6765 case TargetOpcode::G_VECREDUCE_AND:
6766 case TargetOpcode::G_VECREDUCE_OR:
6767 case TargetOpcode::G_VECREDUCE_XOR:
6768 case TargetOpcode::G_VECREDUCE_SMAX:
6769 case TargetOpcode::G_VECREDUCE_SMIN:
6770 case TargetOpcode::G_VECREDUCE_UMAX:
6771 case TargetOpcode::G_VECREDUCE_UMIN: {
6772 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6773 MachineOperand &MO = MI.getOperand(1);
6774 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6775 auto NeutralElement = getNeutralElementForVecReduce(
6776 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6777
6778 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6779 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6780 i != e; i++) {
6781 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6782 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6783 NeutralElement, Idx);
6784 }
6785
6786 Observer.changingInstr(MI);
6787 MO.setReg(NewVec.getReg(0));
6788 Observer.changedInstr(MI);
6789 return Legalized;
6790 }
6791
6792 default:
6793 return UnableToLegalize;
6794 }
6795}
6796
6799 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6800 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6801 unsigned MaskNumElts = Mask.size();
6802 unsigned SrcNumElts = SrcTy.getNumElements();
6803 LLT DestEltTy = DstTy.getElementType();
6804
6805 if (MaskNumElts == SrcNumElts)
6806 return Legalized;
6807
6808 if (MaskNumElts < SrcNumElts) {
6809 // Extend mask to match new destination vector size with
6810 // undef values.
6811 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6812 llvm::copy(Mask, NewMask.begin());
6813
6814 moreElementsVectorDst(MI, SrcTy, 0);
6815 MIRBuilder.setInstrAndDebugLoc(MI);
6816 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6817 MI.getOperand(1).getReg(),
6818 MI.getOperand(2).getReg(), NewMask);
6819 MI.eraseFromParent();
6820
6821 return Legalized;
6822 }
6823
6824 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6825 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6826 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6827
6828 // Create new source vectors by concatenating the initial
6829 // source vectors with undefined vectors of the same size.
6830 auto Undef = MIRBuilder.buildUndef(SrcTy);
6831 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6832 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6833 MOps1[0] = MI.getOperand(1).getReg();
6834 MOps2[0] = MI.getOperand(2).getReg();
6835
6836 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6837 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6838
6839 // Readjust mask for new input vector length.
6840 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6841 for (unsigned I = 0; I != MaskNumElts; ++I) {
6842 int Idx = Mask[I];
6843 if (Idx >= static_cast<int>(SrcNumElts))
6844 Idx += PaddedMaskNumElts - SrcNumElts;
6845 MappedOps[I] = Idx;
6846 }
6847
6848 // If we got more elements than required, extract subvector.
6849 if (MaskNumElts != PaddedMaskNumElts) {
6850 auto Shuffle =
6851 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6852
6853 SmallVector<Register, 16> Elts(MaskNumElts);
6854 for (unsigned I = 0; I < MaskNumElts; ++I) {
6855 Elts[I] =
6856 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6857 .getReg(0);
6858 }
6859 MIRBuilder.buildBuildVector(DstReg, Elts);
6860 } else {
6861 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6862 }
6863
6864 MI.eraseFromParent();
6866}
6867
6870 unsigned int TypeIdx, LLT MoreTy) {
6871 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6872 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6873 unsigned NumElts = DstTy.getNumElements();
6874 unsigned WidenNumElts = MoreTy.getNumElements();
6875
6876 if (DstTy.isVector() && Src1Ty.isVector() &&
6877 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6879 }
6880
6881 if (TypeIdx != 0)
6882 return UnableToLegalize;
6883
6884 // Expect a canonicalized shuffle.
6885 if (DstTy != Src1Ty || DstTy != Src2Ty)
6886 return UnableToLegalize;
6887
6888 moreElementsVectorSrc(MI, MoreTy, 1);
6889 moreElementsVectorSrc(MI, MoreTy, 2);
6890
6891 // Adjust mask based on new input vector length.
6892 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6893 for (unsigned I = 0; I != NumElts; ++I) {
6894 int Idx = Mask[I];
6895 if (Idx < static_cast<int>(NumElts))
6896 NewMask[I] = Idx;
6897 else
6898 NewMask[I] = Idx - NumElts + WidenNumElts;
6899 }
6900 moreElementsVectorDst(MI, MoreTy, 0);
6901 MIRBuilder.setInstrAndDebugLoc(MI);
6902 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6903 MI.getOperand(1).getReg(),
6904 MI.getOperand(2).getReg(), NewMask);
6905 MI.eraseFromParent();
6906 return Legalized;
6907}
6908
6909void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6910 ArrayRef<Register> Src1Regs,
6911 ArrayRef<Register> Src2Regs,
6912 LLT NarrowTy) {
6914 unsigned SrcParts = Src1Regs.size();
6915 unsigned DstParts = DstRegs.size();
6916
6917 unsigned DstIdx = 0; // Low bits of the result.
6918 Register FactorSum =
6919 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6920 DstRegs[DstIdx] = FactorSum;
6921
6922 Register CarrySumPrevDstIdx;
6924
6925 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6926 // Collect low parts of muls for DstIdx.
6927 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6928 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6930 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6931 Factors.push_back(Mul.getReg(0));
6932 }
6933 // Collect high parts of muls from previous DstIdx.
6934 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6935 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6936 MachineInstrBuilder Umulh =
6937 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6938 Factors.push_back(Umulh.getReg(0));
6939 }
6940 // Add CarrySum from additions calculated for previous DstIdx.
6941 if (DstIdx != 1) {
6942 Factors.push_back(CarrySumPrevDstIdx);
6943 }
6944
6945 Register CarrySum;
6946 // Add all factors and accumulate all carries into CarrySum.
6947 if (DstIdx != DstParts - 1) {
6948 MachineInstrBuilder Uaddo =
6949 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6950 FactorSum = Uaddo.getReg(0);
6951 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6952 for (unsigned i = 2; i < Factors.size(); ++i) {
6953 MachineInstrBuilder Uaddo =
6954 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6955 FactorSum = Uaddo.getReg(0);
6956 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6957 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6958 }
6959 } else {
6960 // Since value for the next index is not calculated, neither is CarrySum.
6961 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6962 for (unsigned i = 2; i < Factors.size(); ++i)
6963 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6964 }
6965
6966 CarrySumPrevDstIdx = CarrySum;
6967 DstRegs[DstIdx] = FactorSum;
6968 Factors.clear();
6969 }
6970}
6971
6974 LLT NarrowTy) {
6975 if (TypeIdx != 0)
6976 return UnableToLegalize;
6977
6978 Register DstReg = MI.getOperand(0).getReg();
6979 LLT DstType = MRI.getType(DstReg);
6980 // FIXME: add support for vector types
6981 if (DstType.isVector())
6982 return UnableToLegalize;
6983
6984 unsigned Opcode = MI.getOpcode();
6985 unsigned OpO, OpE, OpF;
6986 switch (Opcode) {
6987 case TargetOpcode::G_SADDO:
6988 case TargetOpcode::G_SADDE:
6989 case TargetOpcode::G_UADDO:
6990 case TargetOpcode::G_UADDE:
6991 case TargetOpcode::G_ADD:
6992 OpO = TargetOpcode::G_UADDO;
6993 OpE = TargetOpcode::G_UADDE;
6994 OpF = TargetOpcode::G_UADDE;
6995 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6996 OpF = TargetOpcode::G_SADDE;
6997 break;
6998 case TargetOpcode::G_SSUBO:
6999 case TargetOpcode::G_SSUBE:
7000 case TargetOpcode::G_USUBO:
7001 case TargetOpcode::G_USUBE:
7002 case TargetOpcode::G_SUB:
7003 OpO = TargetOpcode::G_USUBO;
7004 OpE = TargetOpcode::G_USUBE;
7005 OpF = TargetOpcode::G_USUBE;
7006 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7007 OpF = TargetOpcode::G_SSUBE;
7008 break;
7009 default:
7010 llvm_unreachable("Unexpected add/sub opcode!");
7011 }
7012
7013 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7014 unsigned NumDefs = MI.getNumExplicitDefs();
7015 Register Src1 = MI.getOperand(NumDefs).getReg();
7016 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7017 Register CarryDst, CarryIn;
7018 if (NumDefs == 2)
7019 CarryDst = MI.getOperand(1).getReg();
7020 if (MI.getNumOperands() == NumDefs + 3)
7021 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7022
7023 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7024 LLT LeftoverTy, DummyTy;
7025 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7026 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7027 MIRBuilder, MRI);
7028 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7029 MRI);
7030
7031 int NarrowParts = Src1Regs.size();
7032 Src1Regs.append(Src1Left);
7033 Src2Regs.append(Src2Left);
7034 DstRegs.reserve(Src1Regs.size());
7035
7036 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7037 Register DstReg =
7038 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7039 Register CarryOut;
7040 // Forward the final carry-out to the destination register
7041 if (i == e - 1 && CarryDst)
7042 CarryOut = CarryDst;
7043 else
7044 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7045
7046 if (!CarryIn) {
7047 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7048 {Src1Regs[i], Src2Regs[i]});
7049 } else if (i == e - 1) {
7050 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7051 {Src1Regs[i], Src2Regs[i], CarryIn});
7052 } else {
7053 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7054 {Src1Regs[i], Src2Regs[i], CarryIn});
7055 }
7056
7057 DstRegs.push_back(DstReg);
7058 CarryIn = CarryOut;
7059 }
7060 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7061 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7062 ArrayRef(DstRegs).drop_front(NarrowParts));
7063
7064 MI.eraseFromParent();
7065 return Legalized;
7066}
7067
7070 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7071
7072 LLT Ty = MRI.getType(DstReg);
7073 if (Ty.isVector())
7074 return UnableToLegalize;
7075
7076 unsigned Size = Ty.getSizeInBits();
7077 unsigned NarrowSize = NarrowTy.getSizeInBits();
7078 if (Size % NarrowSize != 0)
7079 return UnableToLegalize;
7080
7081 unsigned NumParts = Size / NarrowSize;
7082 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7083 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7084
7085 SmallVector<Register, 2> Src1Parts, Src2Parts;
7086 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7087 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7088 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7089 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7090
7091 // Take only high half of registers if this is high mul.
7092 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7093 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7094 MI.eraseFromParent();
7095 return Legalized;
7096}
7097
7100 LLT NarrowTy) {
7101 if (TypeIdx != 0)
7102 return UnableToLegalize;
7103
7104 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7105
7106 Register Src = MI.getOperand(1).getReg();
7107 LLT SrcTy = MRI.getType(Src);
7108
7109 // If all finite floats fit into the narrowed integer type, we can just swap
7110 // out the result type. This is practically only useful for conversions from
7111 // half to at least 16-bits, so just handle the one case.
7112 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7113 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7114 return UnableToLegalize;
7115
7116 Observer.changingInstr(MI);
7117 narrowScalarDst(MI, NarrowTy, 0,
7118 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7119 Observer.changedInstr(MI);
7120 return Legalized;
7121}
7122
7125 LLT NarrowTy) {
7126 if (TypeIdx != 1)
7127 return UnableToLegalize;
7128
7129 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7130
7131 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7132 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7133 // NarrowSize.
7134 if (SizeOp1 % NarrowSize != 0)
7135 return UnableToLegalize;
7136 int NumParts = SizeOp1 / NarrowSize;
7137
7138 SmallVector<Register, 2> SrcRegs, DstRegs;
7139 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7140 MIRBuilder, MRI);
7141
7142 Register OpReg = MI.getOperand(0).getReg();
7143 uint64_t OpStart = MI.getOperand(2).getImm();
7144 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7145 for (int i = 0; i < NumParts; ++i) {
7146 unsigned SrcStart = i * NarrowSize;
7147
7148 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7149 // No part of the extract uses this subregister, ignore it.
7150 continue;
7151 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7152 // The entire subregister is extracted, forward the value.
7153 DstRegs.push_back(SrcRegs[i]);
7154 continue;
7155 }
7156
7157 // OpSegStart is where this destination segment would start in OpReg if it
7158 // extended infinitely in both directions.
7159 int64_t ExtractOffset;
7160 uint64_t SegSize;
7161 if (OpStart < SrcStart) {
7162 ExtractOffset = 0;
7163 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7164 } else {
7165 ExtractOffset = OpStart - SrcStart;
7166 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7167 }
7168
7169 Register SegReg = SrcRegs[i];
7170 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7171 // A genuine extract is needed.
7172 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7173 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7174 }
7175
7176 DstRegs.push_back(SegReg);
7177 }
7178
7179 Register DstReg = MI.getOperand(0).getReg();
7180 if (MRI.getType(DstReg).isVector())
7181 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7182 else if (DstRegs.size() > 1)
7183 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7184 else
7185 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7186 MI.eraseFromParent();
7187 return Legalized;
7188}
7189
7192 LLT NarrowTy) {
7193 // FIXME: Don't know how to handle secondary types yet.
7194 if (TypeIdx != 0)
7195 return UnableToLegalize;
7196
7197 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7198 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7199 LLT LeftoverTy;
7200 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7201 LeftoverRegs, MIRBuilder, MRI);
7202
7203 SrcRegs.append(LeftoverRegs);
7204
7205 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7206 Register OpReg = MI.getOperand(2).getReg();
7207 uint64_t OpStart = MI.getOperand(3).getImm();
7208 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7209 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7210 unsigned DstStart = I * NarrowSize;
7211
7212 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7213 // The entire subregister is defined by this insert, forward the new
7214 // value.
7215 DstRegs.push_back(OpReg);
7216 continue;
7217 }
7218
7219 Register SrcReg = SrcRegs[I];
7220 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7221 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7222 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7223 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7224 }
7225
7226 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7227 // No part of the insert affects this subregister, forward the original.
7228 DstRegs.push_back(SrcReg);
7229 continue;
7230 }
7231
7232 // OpSegStart is where this destination segment would start in OpReg if it
7233 // extended infinitely in both directions.
7234 int64_t ExtractOffset, InsertOffset;
7235 uint64_t SegSize;
7236 if (OpStart < DstStart) {
7237 InsertOffset = 0;
7238 ExtractOffset = DstStart - OpStart;
7239 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7240 } else {
7241 InsertOffset = OpStart - DstStart;
7242 ExtractOffset = 0;
7243 SegSize =
7244 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7245 }
7246
7247 Register SegReg = OpReg;
7248 if (ExtractOffset != 0 || SegSize != OpSize) {
7249 // A genuine extract is needed.
7250 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7251 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7252 }
7253
7254 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7255 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7256 DstRegs.push_back(DstReg);
7257 }
7258
7259 uint64_t WideSize = DstRegs.size() * NarrowSize;
7260 Register DstReg = MI.getOperand(0).getReg();
7261 if (WideSize > RegTy.getSizeInBits()) {
7262 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7263 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7264 MIRBuilder.buildTrunc(DstReg, MergeReg);
7265 } else
7266 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7267
7268 MI.eraseFromParent();
7269 return Legalized;
7270}
7271
7274 LLT NarrowTy) {
7275 Register DstReg = MI.getOperand(0).getReg();
7276 LLT DstTy = MRI.getType(DstReg);
7277
7278 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7279
7280 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7281 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7282 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7283 LLT LeftoverTy;
7284 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7285 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7286 return UnableToLegalize;
7287
7288 LLT Unused;
7289 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7290 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7291 llvm_unreachable("inconsistent extractParts result");
7292
7293 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7294 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7295 {Src0Regs[I], Src1Regs[I]});
7296 DstRegs.push_back(Inst.getReg(0));
7297 }
7298
7299 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7300 auto Inst = MIRBuilder.buildInstr(
7301 MI.getOpcode(),
7302 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7303 DstLeftoverRegs.push_back(Inst.getReg(0));
7304 }
7305
7306 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7307 LeftoverTy, DstLeftoverRegs);
7308
7309 MI.eraseFromParent();
7310 return Legalized;
7311}
7312
7315 LLT NarrowTy) {
7316 if (TypeIdx != 0)
7317 return UnableToLegalize;
7318
7319 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7320
7321 LLT DstTy = MRI.getType(DstReg);
7322 if (DstTy.isVector())
7323 return UnableToLegalize;
7324
7326 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7327 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7328 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7329
7330 MI.eraseFromParent();
7331 return Legalized;
7332}
7333
7336 LLT NarrowTy) {
7337 if (TypeIdx != 0)
7338 return UnableToLegalize;
7339
7340 Register CondReg = MI.getOperand(1).getReg();
7341 LLT CondTy = MRI.getType(CondReg);
7342 if (CondTy.isVector()) // TODO: Handle vselect
7343 return UnableToLegalize;
7344
7345 Register DstReg = MI.getOperand(0).getReg();
7346 LLT DstTy = MRI.getType(DstReg);
7347
7348 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7349 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7350 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7351 LLT LeftoverTy;
7352 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7353 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7354 return UnableToLegalize;
7355
7356 LLT Unused;
7357 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7358 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7359 llvm_unreachable("inconsistent extractParts result");
7360
7361 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7362 auto Select = MIRBuilder.buildSelect(NarrowTy,
7363 CondReg, Src1Regs[I], Src2Regs[I]);
7364 DstRegs.push_back(Select.getReg(0));
7365 }
7366
7367 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7368 auto Select = MIRBuilder.buildSelect(
7369 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7370 DstLeftoverRegs.push_back(Select.getReg(0));
7371 }
7372
7373 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7374 LeftoverTy, DstLeftoverRegs);
7375
7376 MI.eraseFromParent();
7377 return Legalized;
7378}
7379
7382 LLT NarrowTy) {
7383 if (TypeIdx != 1)
7384 return UnableToLegalize;
7385
7386 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7387 unsigned NarrowSize = NarrowTy.getSizeInBits();
7388
7389 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7390 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7391
7393 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7394 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7395 auto C_0 = B.buildConstant(NarrowTy, 0);
7396 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7397 UnmergeSrc.getReg(1), C_0);
7398 auto LoCTLZ = IsUndef ?
7399 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7400 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7401 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7402 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7403 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7404 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7405
7406 MI.eraseFromParent();
7407 return Legalized;
7408 }
7409
7410 return UnableToLegalize;
7411}
7412
7415 LLT NarrowTy) {
7416 if (TypeIdx != 1)
7417 return UnableToLegalize;
7418
7419 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7420 unsigned NarrowSize = NarrowTy.getSizeInBits();
7421
7422 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7423 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7424
7426 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7427 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7428 auto C_0 = B.buildConstant(NarrowTy, 0);
7429 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7430 UnmergeSrc.getReg(0), C_0);
7431 auto HiCTTZ = IsUndef ?
7432 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7433 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7434 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7435 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7436 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7437 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7438
7439 MI.eraseFromParent();
7440 return Legalized;
7441 }
7442
7443 return UnableToLegalize;
7444}
7445
7448 LLT NarrowTy) {
7449 if (TypeIdx != 1)
7450 return UnableToLegalize;
7451
7452 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7453 unsigned NarrowSize = NarrowTy.getSizeInBits();
7454
7455 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7456 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7457
7458 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7459 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7460 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7461
7462 MI.eraseFromParent();
7463 return Legalized;
7464 }
7465
7466 return UnableToLegalize;
7467}
7468
7471 LLT NarrowTy) {
7472 if (TypeIdx != 1)
7473 return UnableToLegalize;
7474
7476 Register ExpReg = MI.getOperand(2).getReg();
7477 LLT ExpTy = MRI.getType(ExpReg);
7478
7479 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7480
7481 // Clamp the exponent to the range of the target type.
7482 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7483 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7484 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7485 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7486
7487 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7488 Observer.changingInstr(MI);
7489 MI.getOperand(2).setReg(Trunc.getReg(0));
7490 Observer.changedInstr(MI);
7491 return Legalized;
7492}
7493
7496 unsigned Opc = MI.getOpcode();
7497 const auto &TII = MIRBuilder.getTII();
7498 auto isSupported = [this](const LegalityQuery &Q) {
7499 auto QAction = LI.getAction(Q).Action;
7500 return QAction == Legal || QAction == Libcall || QAction == Custom;
7501 };
7502 switch (Opc) {
7503 default:
7504 return UnableToLegalize;
7505 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7506 // This trivially expands to CTLZ.
7507 Observer.changingInstr(MI);
7508 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7509 Observer.changedInstr(MI);
7510 return Legalized;
7511 }
7512 case TargetOpcode::G_CTLZ: {
7513 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7514 unsigned Len = SrcTy.getSizeInBits();
7515
7516 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7517 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7518 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7519 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7520 auto ICmp = MIRBuilder.buildICmp(
7521 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7522 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7523 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7524 MI.eraseFromParent();
7525 return Legalized;
7526 }
7527 // for now, we do this:
7528 // NewLen = NextPowerOf2(Len);
7529 // x = x | (x >> 1);
7530 // x = x | (x >> 2);
7531 // ...
7532 // x = x | (x >>16);
7533 // x = x | (x >>32); // for 64-bit input
7534 // Upto NewLen/2
7535 // return Len - popcount(x);
7536 //
7537 // Ref: "Hacker's Delight" by Henry Warren
7538 Register Op = SrcReg;
7539 unsigned NewLen = PowerOf2Ceil(Len);
7540 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7541 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7542 auto MIBOp = MIRBuilder.buildOr(
7543 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7544 Op = MIBOp.getReg(0);
7545 }
7546 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7547 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7548 MIBPop);
7549 MI.eraseFromParent();
7550 return Legalized;
7551 }
7552 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7553 // This trivially expands to CTTZ.
7554 Observer.changingInstr(MI);
7555 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7556 Observer.changedInstr(MI);
7557 return Legalized;
7558 }
7559 case TargetOpcode::G_CTTZ: {
7560 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7561
7562 unsigned Len = SrcTy.getSizeInBits();
7563 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7564 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7565 // zero.
7566 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7567 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7568 auto ICmp = MIRBuilder.buildICmp(
7569 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7570 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7571 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7572 MI.eraseFromParent();
7573 return Legalized;
7574 }
7575 // for now, we use: { return popcount(~x & (x - 1)); }
7576 // unless the target has ctlz but not ctpop, in which case we use:
7577 // { return 32 - nlz(~x & (x-1)); }
7578 // Ref: "Hacker's Delight" by Henry Warren
7579 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7580 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7581 auto MIBTmp = MIRBuilder.buildAnd(
7582 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7583 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7584 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7585 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7586 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7587 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7588 MI.eraseFromParent();
7589 return Legalized;
7590 }
7591 Observer.changingInstr(MI);
7592 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7593 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7594 Observer.changedInstr(MI);
7595 return Legalized;
7596 }
7597 case TargetOpcode::G_CTPOP: {
7598 Register SrcReg = MI.getOperand(1).getReg();
7599 LLT Ty = MRI.getType(SrcReg);
7600 unsigned Size = Ty.getSizeInBits();
7602
7603 // Count set bits in blocks of 2 bits. Default approach would be
7604 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7605 // We use following formula instead:
7606 // B2Count = val - { (val >> 1) & 0x55555555 }
7607 // since it gives same result in blocks of 2 with one instruction less.
7608 auto C_1 = B.buildConstant(Ty, 1);
7609 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7610 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7611 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7612 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7613 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7614
7615 // In order to get count in blocks of 4 add values from adjacent block of 2.
7616 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7617 auto C_2 = B.buildConstant(Ty, 2);
7618 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7619 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7620 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7621 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7622 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7623 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7624
7625 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7626 // addition since count value sits in range {0,...,8} and 4 bits are enough
7627 // to hold such binary values. After addition high 4 bits still hold count
7628 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7629 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7630 auto C_4 = B.buildConstant(Ty, 4);
7631 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7632 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7633 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7634 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7635 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7636
7637 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7638 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7639 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7640 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7641
7642 // Shift count result from 8 high bits to low bits.
7643 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7644
7645 auto IsMulSupported = [this](const LLT Ty) {
7646 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7647 return Action == Legal || Action == WidenScalar || Action == Custom;
7648 };
7649 if (IsMulSupported(Ty)) {
7650 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7651 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7652 } else {
7653 auto ResTmp = B8Count;
7654 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7655 auto ShiftC = B.buildConstant(Ty, Shift);
7656 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7657 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7658 }
7659 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7660 }
7661 MI.eraseFromParent();
7662 return Legalized;
7663 }
7664 }
7665}
7666
7667// Check that (every element of) Reg is undef or not an exact multiple of BW.
7669 Register Reg, unsigned BW) {
7670 return matchUnaryPredicate(
7671 MRI, Reg,
7672 [=](const Constant *C) {
7673 // Null constant here means an undef.
7675 return !CI || CI->getValue().urem(BW) != 0;
7676 },
7677 /*AllowUndefs*/ true);
7678}
7679
7682 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7683 LLT Ty = MRI.getType(Dst);
7684 LLT ShTy = MRI.getType(Z);
7685
7686 unsigned BW = Ty.getScalarSizeInBits();
7687
7688 if (!isPowerOf2_32(BW))
7689 return UnableToLegalize;
7690
7691 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7692 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7693
7694 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7695 // fshl X, Y, Z -> fshr X, Y, -Z
7696 // fshr X, Y, Z -> fshl X, Y, -Z
7697 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7698 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7699 } else {
7700 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7701 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7702 auto One = MIRBuilder.buildConstant(ShTy, 1);
7703 if (IsFSHL) {
7704 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7705 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7706 } else {
7707 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7708 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7709 }
7710
7711 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7712 }
7713
7714 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7715 MI.eraseFromParent();
7716 return Legalized;
7717}
7718
7721 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7722 LLT Ty = MRI.getType(Dst);
7723 LLT ShTy = MRI.getType(Z);
7724
7725 const unsigned BW = Ty.getScalarSizeInBits();
7726 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7727
7728 Register ShX, ShY;
7729 Register ShAmt, InvShAmt;
7730
7731 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7732 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7733 // fshl: X << C | Y >> (BW - C)
7734 // fshr: X << (BW - C) | Y >> C
7735 // where C = Z % BW is not zero
7736 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7737 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7738 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7739 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7740 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7741 } else {
7742 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7743 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7744 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7745 if (isPowerOf2_32(BW)) {
7746 // Z % BW -> Z & (BW - 1)
7747 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7748 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7749 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7750 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7751 } else {
7752 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7753 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7754 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7755 }
7756
7757 auto One = MIRBuilder.buildConstant(ShTy, 1);
7758 if (IsFSHL) {
7759 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7760 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7761 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7762 } else {
7763 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7764 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7765 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7766 }
7767 }
7768
7769 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7770 MI.eraseFromParent();
7771 return Legalized;
7772}
7773
7776 // These operations approximately do the following (while avoiding undefined
7777 // shifts by BW):
7778 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7779 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7780 Register Dst = MI.getOperand(0).getReg();
7781 LLT Ty = MRI.getType(Dst);
7782 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7783
7784 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7785 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7786
7787 // TODO: Use smarter heuristic that accounts for vector legalization.
7788 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7789 return lowerFunnelShiftAsShifts(MI);
7790
7791 // This only works for powers of 2, fallback to shifts if it fails.
7792 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7793 if (Result == UnableToLegalize)
7794 return lowerFunnelShiftAsShifts(MI);
7795 return Result;
7796}
7797
7799 auto [Dst, Src] = MI.getFirst2Regs();
7800 LLT DstTy = MRI.getType(Dst);
7801 LLT SrcTy = MRI.getType(Src);
7802
7803 uint32_t DstTySize = DstTy.getSizeInBits();
7804 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7805 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7806
7807 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7808 !isPowerOf2_32(SrcTyScalarSize))
7809 return UnableToLegalize;
7810
7811 // The step between extend is too large, split it by creating an intermediate
7812 // extend instruction
7813 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7814 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7815 // If the destination type is illegal, split it into multiple statements
7816 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7817 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7818 // Unmerge the vector
7819 LLT EltTy = MidTy.changeElementCount(
7821 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7822
7823 // ZExt the vectors
7824 LLT ZExtResTy = DstTy.changeElementCount(
7826 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7827 {UnmergeSrc.getReg(0)});
7828 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7829 {UnmergeSrc.getReg(1)});
7830
7831 // Merge the ending vectors
7832 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7833
7834 MI.eraseFromParent();
7835 return Legalized;
7836 }
7837 return UnableToLegalize;
7838}
7839
7841 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7842 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7843 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7844 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7845 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7846 // %lo16(<4 x s16>) = G_TRUNC %inlo
7847 // %hi16(<4 x s16>) = G_TRUNC %inhi
7848 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7849 // %res(<8 x s8>) = G_TRUNC %in16
7850
7851 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7852
7853 Register DstReg = MI.getOperand(0).getReg();
7854 Register SrcReg = MI.getOperand(1).getReg();
7855 LLT DstTy = MRI.getType(DstReg);
7856 LLT SrcTy = MRI.getType(SrcReg);
7857
7858 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7860 isPowerOf2_32(SrcTy.getNumElements()) &&
7861 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7862 // Split input type.
7863 LLT SplitSrcTy = SrcTy.changeElementCount(
7864 SrcTy.getElementCount().divideCoefficientBy(2));
7865
7866 // First, split the source into two smaller vectors.
7867 SmallVector<Register, 2> SplitSrcs;
7868 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7869
7870 // Truncate the splits into intermediate narrower elements.
7871 LLT InterTy;
7872 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7873 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7874 else
7875 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7876 for (Register &Src : SplitSrcs)
7877 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7878
7879 // Combine the new truncates into one vector
7880 auto Merge = MIRBuilder.buildMergeLikeInstr(
7881 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7882
7883 // Truncate the new vector to the final result type
7884 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7885 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7886 else
7887 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7888
7889 MI.eraseFromParent();
7890
7891 return Legalized;
7892 }
7893 return UnableToLegalize;
7894}
7895
7898 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7899 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7900 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7901 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7902 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7903 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7904 MI.eraseFromParent();
7905 return Legalized;
7906}
7907
7909 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7910
7911 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7912 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7913
7914 MIRBuilder.setInstrAndDebugLoc(MI);
7915
7916 // If a rotate in the other direction is supported, use it.
7917 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7918 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7919 isPowerOf2_32(EltSizeInBits))
7920 return lowerRotateWithReverseRotate(MI);
7921
7922 // If a funnel shift is supported, use it.
7923 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7924 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7925 bool IsFShLegal = false;
7926 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7927 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7928 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7929 Register R3) {
7930 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7931 MI.eraseFromParent();
7932 return Legalized;
7933 };
7934 // If a funnel shift in the other direction is supported, use it.
7935 if (IsFShLegal) {
7936 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7937 } else if (isPowerOf2_32(EltSizeInBits)) {
7938 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7939 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7940 }
7941 }
7942
7943 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7944 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7945 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7946 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7947 Register ShVal;
7948 Register RevShiftVal;
7949 if (isPowerOf2_32(EltSizeInBits)) {
7950 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7951 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7952 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7953 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7954 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7955 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7956 RevShiftVal =
7957 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7958 } else {
7959 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7960 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7961 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7962 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7963 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7964 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7965 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7966 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7967 RevShiftVal =
7968 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7969 }
7970 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7971 MI.eraseFromParent();
7972 return Legalized;
7973}
7974
7975// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7976// representation.
7979 auto [Dst, Src] = MI.getFirst2Regs();
7980 const LLT S64 = LLT::scalar(64);
7981 const LLT S32 = LLT::scalar(32);
7982 const LLT S1 = LLT::scalar(1);
7983
7984 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7985
7986 // unsigned cul2f(ulong u) {
7987 // uint lz = clz(u);
7988 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7989 // u = (u << lz) & 0x7fffffffffffffffUL;
7990 // ulong t = u & 0xffffffffffUL;
7991 // uint v = (e << 23) | (uint)(u >> 40);
7992 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7993 // return as_float(v + r);
7994 // }
7995
7996 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7997 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7998
7999 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8000
8001 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8002 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8003
8004 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8005 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8006
8007 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8008 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8009
8010 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8011
8012 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8013 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8014
8015 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8016 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8017 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8018
8019 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8020 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8021 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8022 auto One = MIRBuilder.buildConstant(S32, 1);
8023
8024 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8025 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8026 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8027 MIRBuilder.buildAdd(Dst, V, R);
8028
8029 MI.eraseFromParent();
8030 return Legalized;
8031}
8032
8033// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8034// operations and G_SITOFP
8037 auto [Dst, Src] = MI.getFirst2Regs();
8038 const LLT S64 = LLT::scalar(64);
8039 const LLT S32 = LLT::scalar(32);
8040 const LLT S1 = LLT::scalar(1);
8041
8042 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8043
8044 // For i64 < INT_MAX we simply reuse SITOFP.
8045 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8046 // saved before division, convert to float by SITOFP, multiply the result
8047 // by 2.
8048 auto One = MIRBuilder.buildConstant(S64, 1);
8049 auto Zero = MIRBuilder.buildConstant(S64, 0);
8050 // Result if Src < INT_MAX
8051 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8052 // Result if Src >= INT_MAX
8053 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8054 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8055 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8056 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8057 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8058 // Check if the original value is larger than INT_MAX by comparing with
8059 // zero to pick one of the two conversions.
8060 auto IsLarge =
8061 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8062 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8063
8064 MI.eraseFromParent();
8065 return Legalized;
8066}
8067
8068// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8069// IEEE double representation.
8072 auto [Dst, Src] = MI.getFirst2Regs();
8073 const LLT S64 = LLT::scalar(64);
8074 const LLT S32 = LLT::scalar(32);
8075
8076 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8077
8078 // We create double value from 32 bit parts with 32 exponent difference.
8079 // Note that + and - are float operations that adjust the implicit leading
8080 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8081 //
8082 // X = 2^52 * 1.0...LowBits
8083 // Y = 2^84 * 1.0...HighBits
8084 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8085 // = - 2^52 * 1.0...HighBits
8086 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8087 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8088 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8089 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8090 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8091 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8092
8093 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8094 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8095 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8096 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8097 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8098 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8099 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8100
8101 MI.eraseFromParent();
8102 return Legalized;
8103}
8104
8105/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8106/// convert fpround f64->f16 without double-rounding, so we manually perform the
8107/// lowering here where we know it is valid.
8110 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8111 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8112 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8113 : MIRBuilder.buildSITOFP(SrcTy, Src);
8114 LLT S32Ty = SrcTy.changeElementSize(32);
8115 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8116 MIRBuilder.buildFPTrunc(Dst, M2);
8117 MI.eraseFromParent();
8119}
8120
8122 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8123
8124 if (SrcTy == LLT::scalar(1)) {
8125 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8126 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8127 MIRBuilder.buildSelect(Dst, Src, True, False);
8128 MI.eraseFromParent();
8129 return Legalized;
8130 }
8131
8132 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8133 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8134
8135 if (SrcTy != LLT::scalar(64))
8136 return UnableToLegalize;
8137
8138 if (DstTy == LLT::scalar(32))
8139 // TODO: SelectionDAG has several alternative expansions to port which may
8140 // be more reasonable depending on the available instructions. We also need
8141 // a more advanced mechanism to choose an optimal version depending on
8142 // target features such as sitofp or CTLZ availability.
8144
8145 if (DstTy == LLT::scalar(64))
8147
8148 return UnableToLegalize;
8149}
8150
8152 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8153
8154 const LLT S64 = LLT::scalar(64);
8155 const LLT S32 = LLT::scalar(32);
8156 const LLT S1 = LLT::scalar(1);
8157
8158 if (SrcTy == S1) {
8159 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8160 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8161 MIRBuilder.buildSelect(Dst, Src, True, False);
8162 MI.eraseFromParent();
8163 return Legalized;
8164 }
8165
8166 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8167 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8168
8169 if (SrcTy != S64)
8170 return UnableToLegalize;
8171
8172 if (DstTy == S32) {
8173 // signed cl2f(long l) {
8174 // long s = l >> 63;
8175 // float r = cul2f((l + s) ^ s);
8176 // return s ? -r : r;
8177 // }
8178 Register L = Src;
8179 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8180 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8181
8182 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8183 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8184 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8185
8186 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8187 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8188 MIRBuilder.buildConstant(S64, 0));
8189 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8190 MI.eraseFromParent();
8191 return Legalized;
8192 }
8193
8194 return UnableToLegalize;
8195}
8196
8198 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8199 const LLT S64 = LLT::scalar(64);
8200 const LLT S32 = LLT::scalar(32);
8201
8202 if (SrcTy != S64 && SrcTy != S32)
8203 return UnableToLegalize;
8204 if (DstTy != S32 && DstTy != S64)
8205 return UnableToLegalize;
8206
8207 // FPTOSI gives same result as FPTOUI for positive signed integers.
8208 // FPTOUI needs to deal with fp values that convert to unsigned integers
8209 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8210
8211 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8212 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8214 APInt::getZero(SrcTy.getSizeInBits()));
8215 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8216
8217 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8218
8219 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8220 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8221 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8222 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8223 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8224 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8225 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8226
8227 const LLT S1 = LLT::scalar(1);
8228
8229 MachineInstrBuilder FCMP =
8230 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8231 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8232
8233 MI.eraseFromParent();
8234 return Legalized;
8235}
8236
8238 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8239 const LLT S64 = LLT::scalar(64);
8240 const LLT S32 = LLT::scalar(32);
8241
8242 // FIXME: Only f32 to i64 conversions are supported.
8243 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8244 return UnableToLegalize;
8245
8246 // Expand f32 -> i64 conversion
8247 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8248 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8249
8250 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8251
8252 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8253 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8254
8255 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8256 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8257
8258 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8259 APInt::getSignMask(SrcEltBits));
8260 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8261 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8262 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8263 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8264
8265 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8266 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8267 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8268
8269 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8270 R = MIRBuilder.buildZExt(DstTy, R);
8271
8272 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8273 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8274 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8275 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8276
8277 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8278 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8279
8280 const LLT S1 = LLT::scalar(1);
8281 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8282 S1, Exponent, ExponentLoBit);
8283
8284 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8285
8286 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8287 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8288
8289 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8290
8291 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8292 S1, Exponent, ZeroSrcTy);
8293
8294 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8295 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8296
8297 MI.eraseFromParent();
8298 return Legalized;
8299}
8300
8303 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8304
8305 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8306 unsigned SatWidth = DstTy.getScalarSizeInBits();
8307
8308 // Determine minimum and maximum integer values and their corresponding
8309 // floating-point values.
8310 APInt MinInt, MaxInt;
8311 if (IsSigned) {
8312 MinInt = APInt::getSignedMinValue(SatWidth);
8313 MaxInt = APInt::getSignedMaxValue(SatWidth);
8314 } else {
8315 MinInt = APInt::getMinValue(SatWidth);
8316 MaxInt = APInt::getMaxValue(SatWidth);
8317 }
8318
8319 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8320 APFloat MinFloat(Semantics);
8321 APFloat MaxFloat(Semantics);
8322
8323 APFloat::opStatus MinStatus =
8324 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8325 APFloat::opStatus MaxStatus =
8326 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8327 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8328 !(MaxStatus & APFloat::opStatus::opInexact);
8329
8330 // If the integer bounds are exactly representable as floats, emit a
8331 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8332 // and selects.
8333 if (AreExactFloatBounds) {
8334 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8335 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8336 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8337 SrcTy.changeElementSize(1), Src, MaxC);
8338 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8339 // Clamp by MaxFloat from above. NaN cannot occur.
8340 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8341 auto MinP =
8342 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8344 auto Min =
8345 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8346 // Convert clamped value to integer. In the unsigned case we're done,
8347 // because we mapped NaN to MinFloat, which will cast to zero.
8348 if (!IsSigned) {
8349 MIRBuilder.buildFPTOUI(Dst, Min);
8350 MI.eraseFromParent();
8351 return Legalized;
8352 }
8353
8354 // Otherwise, select 0 if Src is NaN.
8355 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8356 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8357 DstTy.changeElementSize(1), Src, Src);
8358 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8359 FpToInt);
8360 MI.eraseFromParent();
8361 return Legalized;
8362 }
8363
8364 // Result of direct conversion. The assumption here is that the operation is
8365 // non-trapping and it's fine to apply it to an out-of-range value if we
8366 // select it away later.
8367 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8368 : MIRBuilder.buildFPTOUI(DstTy, Src);
8369
8370 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8371 // MinInt if Src is NaN.
8372 auto ULT =
8373 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8374 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8375 auto Max = MIRBuilder.buildSelect(
8376 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8377 // If Src OGT MaxFloat, select MaxInt.
8378 auto OGT =
8379 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8380 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8381
8382 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8383 // is already zero.
8384 if (!IsSigned) {
8385 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8386 Max);
8387 MI.eraseFromParent();
8388 return Legalized;
8389 }
8390
8391 // Otherwise, select 0 if Src is NaN.
8392 auto Min = MIRBuilder.buildSelect(
8393 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8394 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8395 DstTy.changeElementSize(1), Src, Src);
8396 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8397 MI.eraseFromParent();
8398 return Legalized;
8399}
8400
8401// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8404 const LLT S1 = LLT::scalar(1);
8405 const LLT S32 = LLT::scalar(32);
8406
8407 auto [Dst, Src] = MI.getFirst2Regs();
8408 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8409 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8410
8411 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8412 return UnableToLegalize;
8413
8414 if (MI.getFlag(MachineInstr::FmAfn)) {
8415 unsigned Flags = MI.getFlags();
8416 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8417 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8418 MI.eraseFromParent();
8419 return Legalized;
8420 }
8421
8422 const unsigned ExpMask = 0x7ff;
8423 const unsigned ExpBiasf64 = 1023;
8424 const unsigned ExpBiasf16 = 15;
8425
8426 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8427 Register U = Unmerge.getReg(0);
8428 Register UH = Unmerge.getReg(1);
8429
8430 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8431 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8432
8433 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8434 // add the f16 bias (15) to get the biased exponent for the f16 format.
8435 E = MIRBuilder.buildAdd(
8436 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8437
8438 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8439 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8440
8441 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8442 MIRBuilder.buildConstant(S32, 0x1ff));
8443 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8444
8445 auto Zero = MIRBuilder.buildConstant(S32, 0);
8446 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8447 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8448 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8449
8450 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8451 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8452 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8453 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8454
8455 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8456 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8457
8458 // N = M | (E << 12);
8459 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8460 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8461
8462 // B = clamp(1-E, 0, 13);
8463 auto One = MIRBuilder.buildConstant(S32, 1);
8464 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8465 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8466 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8467
8468 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8469 MIRBuilder.buildConstant(S32, 0x1000));
8470
8471 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8472 auto D0 = MIRBuilder.buildShl(S32, D, B);
8473
8474 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8475 D0, SigSetHigh);
8476 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8477 D = MIRBuilder.buildOr(S32, D, D1);
8478
8479 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8480 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8481
8482 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8483 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8484
8485 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8486 MIRBuilder.buildConstant(S32, 3));
8487 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8488
8489 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8490 MIRBuilder.buildConstant(S32, 5));
8491 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8492
8493 V1 = MIRBuilder.buildOr(S32, V0, V1);
8494 V = MIRBuilder.buildAdd(S32, V, V1);
8495
8496 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8497 E, MIRBuilder.buildConstant(S32, 30));
8498 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8499 MIRBuilder.buildConstant(S32, 0x7c00), V);
8500
8501 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8502 E, MIRBuilder.buildConstant(S32, 1039));
8503 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8504
8505 // Extract the sign bit.
8506 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8507 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8508
8509 // Insert the sign bit
8510 V = MIRBuilder.buildOr(S32, Sign, V);
8511
8512 MIRBuilder.buildTrunc(Dst, V);
8513 MI.eraseFromParent();
8514 return Legalized;
8515}
8516
8519 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8520 const LLT S64 = LLT::scalar(64);
8521 const LLT S16 = LLT::scalar(16);
8522
8523 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8525
8526 return UnableToLegalize;
8527}
8528
8530 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8531 LLT Ty = MRI.getType(Dst);
8532
8533 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8534 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8535 MI.eraseFromParent();
8536 return Legalized;
8537}
8538
8540 switch (Opc) {
8541 case TargetOpcode::G_SMIN:
8542 return CmpInst::ICMP_SLT;
8543 case TargetOpcode::G_SMAX:
8544 return CmpInst::ICMP_SGT;
8545 case TargetOpcode::G_UMIN:
8546 return CmpInst::ICMP_ULT;
8547 case TargetOpcode::G_UMAX:
8548 return CmpInst::ICMP_UGT;
8549 default:
8550 llvm_unreachable("not in integer min/max");
8551 }
8552}
8553
8555 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8556
8557 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8558 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8559
8560 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8561 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8562
8563 MI.eraseFromParent();
8564 return Legalized;
8565}
8566
8569 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8570
8571 Register Dst = Cmp->getReg(0);
8572 LLT DstTy = MRI.getType(Dst);
8573 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8574 LLT CmpTy = DstTy.changeElementSize(1);
8575
8576 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8579 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8582
8583 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8584 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8585 Cmp->getRHSReg());
8586 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8587 Cmp->getRHSReg());
8588
8589 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8590 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8591 if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
8593 auto One = MIRBuilder.buildConstant(DstTy, 1);
8594 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8595
8596 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8597 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8598 } else {
8600 std::swap(IsGT, IsLT);
8601 // Extend boolean results to DstTy, which is at least i2, before subtracting
8602 // them.
8603 unsigned BoolExtOp =
8604 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8605 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8606 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8607 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8608 }
8609
8610 MI.eraseFromParent();
8611 return Legalized;
8612}
8613
8616 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8617 const int Src0Size = Src0Ty.getScalarSizeInBits();
8618 const int Src1Size = Src1Ty.getScalarSizeInBits();
8619
8620 auto SignBitMask = MIRBuilder.buildConstant(
8621 Src0Ty, APInt::getSignMask(Src0Size));
8622
8623 auto NotSignBitMask = MIRBuilder.buildConstant(
8624 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8625
8626 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8627 Register And1;
8628 if (Src0Ty == Src1Ty) {
8629 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8630 } else if (Src0Size > Src1Size) {
8631 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8632 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8633 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8634 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8635 } else {
8636 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8637 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8638 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8639 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8640 }
8641
8642 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8643 // constants are a nan and -0.0, but the final result should preserve
8644 // everything.
8645 unsigned Flags = MI.getFlags();
8646
8647 // We masked the sign bit and the not-sign bit, so these are disjoint.
8648 Flags |= MachineInstr::Disjoint;
8649
8650 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8651
8652 MI.eraseFromParent();
8653 return Legalized;
8654}
8655
8658 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8659 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8660 // depend on fminnum/fmaxnum.
8661
8662 unsigned NewOp;
8663 switch (MI.getOpcode()) {
8664 case TargetOpcode::G_FMINNUM:
8665 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8666 break;
8667 case TargetOpcode::G_FMINIMUMNUM:
8668 NewOp = TargetOpcode::G_FMINNUM;
8669 break;
8670 case TargetOpcode::G_FMAXNUM:
8671 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8672 break;
8673 case TargetOpcode::G_FMAXIMUMNUM:
8674 NewOp = TargetOpcode::G_FMAXNUM;
8675 break;
8676 default:
8677 llvm_unreachable("unexpected min/max opcode");
8678 }
8679
8680 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8681 LLT Ty = MRI.getType(Dst);
8682
8683 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8684 // Insert canonicalizes if it's possible we need to quiet to get correct
8685 // sNaN behavior.
8686
8687 // Note this must be done here, and not as an optimization combine in the
8688 // absence of a dedicate quiet-snan instruction as we're using an
8689 // omni-purpose G_FCANONICALIZE.
8690 if (!isKnownNeverSNaN(Src0, MRI))
8691 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8692
8693 if (!isKnownNeverSNaN(Src1, MRI))
8694 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8695 }
8696
8697 // If there are no nans, it's safe to simply replace this with the non-IEEE
8698 // version.
8699 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8700 MI.eraseFromParent();
8701 return Legalized;
8702}
8703
8705 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8706 Register DstReg = MI.getOperand(0).getReg();
8707 LLT Ty = MRI.getType(DstReg);
8708 unsigned Flags = MI.getFlags();
8709
8710 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8711 Flags);
8712 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8713 MI.eraseFromParent();
8714 return Legalized;
8715}
8716
8719 auto [DstReg, X] = MI.getFirst2Regs();
8720 const unsigned Flags = MI.getFlags();
8721 const LLT Ty = MRI.getType(DstReg);
8722 const LLT CondTy = Ty.changeElementSize(1);
8723
8724 // round(x) =>
8725 // t = trunc(x);
8726 // d = fabs(x - t);
8727 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8728 // return t + o;
8729
8730 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8731
8732 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8733 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8734
8735 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8736 auto Cmp =
8737 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8738
8739 // Could emit G_UITOFP instead
8740 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8741 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8742 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8743 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8744
8745 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8746
8747 MI.eraseFromParent();
8748 return Legalized;
8749}
8750
8752 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8753 unsigned Flags = MI.getFlags();
8754 LLT Ty = MRI.getType(DstReg);
8755 const LLT CondTy = Ty.changeElementSize(1);
8756
8757 // result = trunc(src);
8758 // if (src < 0.0 && src != result)
8759 // result += -1.0.
8760
8761 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8762 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8763
8764 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8765 SrcReg, Zero, Flags);
8766 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8767 SrcReg, Trunc, Flags);
8768 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8769 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8770
8771 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8772 MI.eraseFromParent();
8773 return Legalized;
8774}
8775
8778 const unsigned NumOps = MI.getNumOperands();
8779 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8780 unsigned PartSize = Src0Ty.getSizeInBits();
8781
8782 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8783 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8784
8785 for (unsigned I = 2; I != NumOps; ++I) {
8786 const unsigned Offset = (I - 1) * PartSize;
8787
8788 Register SrcReg = MI.getOperand(I).getReg();
8789 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8790
8791 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8792 MRI.createGenericVirtualRegister(WideTy);
8793
8794 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8795 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8796 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8797 ResultReg = NextResult;
8798 }
8799
8800 if (DstTy.isPointer()) {
8801 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8802 DstTy.getAddressSpace())) {
8803 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8804 return UnableToLegalize;
8805 }
8806
8807 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8808 }
8809
8810 MI.eraseFromParent();
8811 return Legalized;
8812}
8813
8816 const unsigned NumDst = MI.getNumOperands() - 1;
8817 Register SrcReg = MI.getOperand(NumDst).getReg();
8818 Register Dst0Reg = MI.getOperand(0).getReg();
8819 LLT DstTy = MRI.getType(Dst0Reg);
8820 if (DstTy.isPointer())
8821 return UnableToLegalize; // TODO
8822
8823 SrcReg = coerceToScalar(SrcReg);
8824 if (!SrcReg)
8825 return UnableToLegalize;
8826
8827 // Expand scalarizing unmerge as bitcast to integer and shift.
8828 LLT IntTy = MRI.getType(SrcReg);
8829
8830 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8831
8832 const unsigned DstSize = DstTy.getSizeInBits();
8833 unsigned Offset = DstSize;
8834 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8835 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8836 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8837 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8838 }
8839
8840 MI.eraseFromParent();
8841 return Legalized;
8842}
8843
8844/// Lower a vector extract or insert by writing the vector to a stack temporary
8845/// and reloading the element or vector.
8846///
8847/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8848/// =>
8849/// %stack_temp = G_FRAME_INDEX
8850/// G_STORE %vec, %stack_temp
8851/// %idx = clamp(%idx, %vec.getNumElements())
8852/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8853/// %dst = G_LOAD %element_ptr
8856 Register DstReg = MI.getOperand(0).getReg();
8857 Register SrcVec = MI.getOperand(1).getReg();
8858 Register InsertVal;
8859 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8860 InsertVal = MI.getOperand(2).getReg();
8861
8862 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8863
8864 LLT VecTy = MRI.getType(SrcVec);
8865 LLT EltTy = VecTy.getElementType();
8866 unsigned NumElts = VecTy.getNumElements();
8867
8868 int64_t IdxVal;
8869 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8871 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8872
8873 if (InsertVal) {
8874 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8875 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8876 } else {
8877 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8878 }
8879
8880 MI.eraseFromParent();
8881 return Legalized;
8882 }
8883
8884 if (!EltTy.isByteSized()) { // Not implemented.
8885 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8886 return UnableToLegalize;
8887 }
8888
8889 unsigned EltBytes = EltTy.getSizeInBytes();
8890 Align VecAlign = getStackTemporaryAlignment(VecTy);
8891 Align EltAlign;
8892
8893 MachinePointerInfo PtrInfo;
8894 auto StackTemp = createStackTemporary(
8895 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8896 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8897
8898 // Get the pointer to the element, and be sure not to hit undefined behavior
8899 // if the index is out of bounds.
8900 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8901
8902 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8903 int64_t Offset = IdxVal * EltBytes;
8904 PtrInfo = PtrInfo.getWithOffset(Offset);
8905 EltAlign = commonAlignment(VecAlign, Offset);
8906 } else {
8907 // We lose information with a variable offset.
8908 EltAlign = getStackTemporaryAlignment(EltTy);
8909 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8910 }
8911
8912 if (InsertVal) {
8913 // Write the inserted element
8914 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8915
8916 // Reload the whole vector.
8917 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8918 } else {
8919 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8920 }
8921
8922 MI.eraseFromParent();
8923 return Legalized;
8924}
8925
8928 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8929 MI.getFirst3RegLLTs();
8930 LLT IdxTy = LLT::scalar(32);
8931
8932 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8933 Register Undef;
8935 LLT EltTy = DstTy.getScalarType();
8936
8937 for (int Idx : Mask) {
8938 if (Idx < 0) {
8939 if (!Undef.isValid())
8940 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8941 BuildVec.push_back(Undef);
8942 continue;
8943 }
8944
8945 if (Src0Ty.isScalar()) {
8946 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8947 } else {
8948 int NumElts = Src0Ty.getNumElements();
8949 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8950 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8951 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8952 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8953 BuildVec.push_back(Extract.getReg(0));
8954 }
8955 }
8956
8957 if (DstTy.isVector())
8958 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8959 else
8960 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8961 MI.eraseFromParent();
8962 return Legalized;
8963}
8964
8967 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8968 MI.getFirst4RegLLTs();
8969
8970 if (VecTy.isScalableVector())
8971 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8972
8973 Align VecAlign = getStackTemporaryAlignment(VecTy);
8974 MachinePointerInfo PtrInfo;
8975 Register StackPtr =
8976 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8977 PtrInfo)
8978 .getReg(0);
8979 MachinePointerInfo ValPtrInfo =
8981
8982 LLT IdxTy = LLT::scalar(32);
8983 LLT ValTy = VecTy.getElementType();
8984 Align ValAlign = getStackTemporaryAlignment(ValTy);
8985
8986 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8987
8988 bool HasPassthru =
8989 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8990
8991 if (HasPassthru)
8992 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8993
8994 Register LastWriteVal;
8995 std::optional<APInt> PassthruSplatVal =
8996 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8997
8998 if (PassthruSplatVal.has_value()) {
8999 LastWriteVal =
9000 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9001 } else if (HasPassthru) {
9002 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9003 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9004 {LLT::scalar(32)}, {Popcount});
9005
9006 Register LastElmtPtr =
9007 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9008 LastWriteVal =
9009 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9010 .getReg(0);
9011 }
9012
9013 unsigned NumElmts = VecTy.getNumElements();
9014 for (unsigned I = 0; I < NumElmts; ++I) {
9015 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9016 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9017 Register ElmtPtr =
9018 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9019 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9020
9021 LLT MaskITy = MaskTy.getElementType();
9022 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9023 if (MaskITy.getSizeInBits() > 1)
9024 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9025
9026 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9027 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9028
9029 if (HasPassthru && I == NumElmts - 1) {
9030 auto EndOfVector =
9031 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9032 auto AllLanesSelected = MIRBuilder.buildICmp(
9033 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9034 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9035 {OutPos, EndOfVector});
9036 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9037
9038 LastWriteVal =
9039 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9040 .getReg(0);
9041 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9042 }
9043 }
9044
9045 // TODO: Use StackPtr's FrameIndex alignment.
9046 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9047
9048 MI.eraseFromParent();
9049 return Legalized;
9050}
9051
9053 Register AllocSize,
9054 Align Alignment,
9055 LLT PtrTy) {
9056 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9057
9058 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9059 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9060
9061 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9062 // have to generate an extra instruction to negate the alloc and then use
9063 // G_PTR_ADD to add the negative offset.
9064 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9065 if (Alignment > Align(1)) {
9066 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9067 AlignMask.negate();
9068 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9069 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9070 }
9071
9072 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9073}
9074
9077 const auto &MF = *MI.getMF();
9078 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9079 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9080 return UnableToLegalize;
9081
9082 Register Dst = MI.getOperand(0).getReg();
9083 Register AllocSize = MI.getOperand(1).getReg();
9084 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9085
9086 LLT PtrTy = MRI.getType(Dst);
9087 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9088 Register SPTmp =
9089 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9090
9091 MIRBuilder.buildCopy(SPReg, SPTmp);
9092 MIRBuilder.buildCopy(Dst, SPTmp);
9093
9094 MI.eraseFromParent();
9095 return Legalized;
9096}
9097
9100 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9101 if (!StackPtr)
9102 return UnableToLegalize;
9103
9104 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9105 MI.eraseFromParent();
9106 return Legalized;
9107}
9108
9111 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9112 if (!StackPtr)
9113 return UnableToLegalize;
9114
9115 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9116 MI.eraseFromParent();
9117 return Legalized;
9118}
9119
9122 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9123 unsigned Offset = MI.getOperand(2).getImm();
9124
9125 // Extract sub-vector or one element
9126 if (SrcTy.isVector()) {
9127 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9128 unsigned DstSize = DstTy.getSizeInBits();
9129
9130 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9131 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9132 // Unmerge and allow access to each Src element for the artifact combiner.
9133 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9134
9135 // Take element(s) we need to extract and copy it (merge them).
9136 SmallVector<Register, 8> SubVectorElts;
9137 for (unsigned Idx = Offset / SrcEltSize;
9138 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9139 SubVectorElts.push_back(Unmerge.getReg(Idx));
9140 }
9141 if (SubVectorElts.size() == 1)
9142 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9143 else
9144 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9145
9146 MI.eraseFromParent();
9147 return Legalized;
9148 }
9149 }
9150
9151 if (DstTy.isScalar() &&
9152 (SrcTy.isScalar() ||
9153 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9154 LLT SrcIntTy = SrcTy;
9155 if (!SrcTy.isScalar()) {
9156 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9157 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9158 }
9159
9160 if (Offset == 0)
9161 MIRBuilder.buildTrunc(DstReg, SrcReg);
9162 else {
9163 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9164 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9165 MIRBuilder.buildTrunc(DstReg, Shr);
9166 }
9167
9168 MI.eraseFromParent();
9169 return Legalized;
9170 }
9171
9172 return UnableToLegalize;
9173}
9174
9176 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9177 uint64_t Offset = MI.getOperand(3).getImm();
9178
9179 LLT DstTy = MRI.getType(Src);
9180 LLT InsertTy = MRI.getType(InsertSrc);
9181
9182 // Insert sub-vector or one element
9183 if (DstTy.isVector() && !InsertTy.isPointer()) {
9184 LLT EltTy = DstTy.getElementType();
9185 unsigned EltSize = EltTy.getSizeInBits();
9186 unsigned InsertSize = InsertTy.getSizeInBits();
9187
9188 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9189 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9190 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9192 unsigned Idx = 0;
9193 // Elements from Src before insert start Offset
9194 for (; Idx < Offset / EltSize; ++Idx) {
9195 DstElts.push_back(UnmergeSrc.getReg(Idx));
9196 }
9197
9198 // Replace elements in Src with elements from InsertSrc
9199 if (InsertTy.getSizeInBits() > EltSize) {
9200 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9201 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9202 ++Idx, ++i) {
9203 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9204 }
9205 } else {
9206 DstElts.push_back(InsertSrc);
9207 ++Idx;
9208 }
9209
9210 // Remaining elements from Src after insert
9211 for (; Idx < DstTy.getNumElements(); ++Idx) {
9212 DstElts.push_back(UnmergeSrc.getReg(Idx));
9213 }
9214
9215 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9216 MI.eraseFromParent();
9217 return Legalized;
9218 }
9219 }
9220
9221 if (InsertTy.isVector() ||
9222 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9223 return UnableToLegalize;
9224
9225 const DataLayout &DL = MIRBuilder.getDataLayout();
9226 if ((DstTy.isPointer() &&
9227 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9228 (InsertTy.isPointer() &&
9229 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9230 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9231 return UnableToLegalize;
9232 }
9233
9234 LLT IntDstTy = DstTy;
9235
9236 if (!DstTy.isScalar()) {
9237 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9238 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9239 }
9240
9241 if (!InsertTy.isScalar()) {
9242 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9243 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9244 }
9245
9246 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9247 if (Offset != 0) {
9248 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9249 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9250 }
9251
9253 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9254
9255 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9256 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9257 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9258
9259 MIRBuilder.buildCast(Dst, Or);
9260 MI.eraseFromParent();
9261 return Legalized;
9262}
9263
9266 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9267 MI.getFirst4RegLLTs();
9268 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9269
9270 LLT Ty = Dst0Ty;
9271 LLT BoolTy = Dst1Ty;
9272
9273 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9274
9275 if (IsAdd)
9276 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9277 else
9278 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9279
9280 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9281
9282 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9283
9284 // For an addition, the result should be less than one of the operands (LHS)
9285 // if and only if the other operand (RHS) is negative, otherwise there will
9286 // be overflow.
9287 // For a subtraction, the result should be less than one of the operands
9288 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9289 // otherwise there will be overflow.
9290 auto ResultLowerThanLHS =
9291 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9292 auto ConditionRHS = MIRBuilder.buildICmp(
9293 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9294
9295 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9296
9297 MIRBuilder.buildCopy(Dst0, NewDst0);
9298 MI.eraseFromParent();
9299
9300 return Legalized;
9301}
9302
9305 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9306 LLT Ty = MRI.getType(Res);
9307 bool IsSigned;
9308 bool IsAdd;
9309 unsigned BaseOp;
9310 switch (MI.getOpcode()) {
9311 default:
9312 llvm_unreachable("unexpected addsat/subsat opcode");
9313 case TargetOpcode::G_UADDSAT:
9314 IsSigned = false;
9315 IsAdd = true;
9316 BaseOp = TargetOpcode::G_ADD;
9317 break;
9318 case TargetOpcode::G_SADDSAT:
9319 IsSigned = true;
9320 IsAdd = true;
9321 BaseOp = TargetOpcode::G_ADD;
9322 break;
9323 case TargetOpcode::G_USUBSAT:
9324 IsSigned = false;
9325 IsAdd = false;
9326 BaseOp = TargetOpcode::G_SUB;
9327 break;
9328 case TargetOpcode::G_SSUBSAT:
9329 IsSigned = true;
9330 IsAdd = false;
9331 BaseOp = TargetOpcode::G_SUB;
9332 break;
9333 }
9334
9335 if (IsSigned) {
9336 // sadd.sat(a, b) ->
9337 // hi = 0x7fffffff - smax(a, 0)
9338 // lo = 0x80000000 - smin(a, 0)
9339 // a + smin(smax(lo, b), hi)
9340 // ssub.sat(a, b) ->
9341 // lo = smax(a, -1) - 0x7fffffff
9342 // hi = smin(a, -1) - 0x80000000
9343 // a - smin(smax(lo, b), hi)
9344 // TODO: AMDGPU can use a "median of 3" instruction here:
9345 // a +/- med3(lo, b, hi)
9346 uint64_t NumBits = Ty.getScalarSizeInBits();
9347 auto MaxVal =
9348 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9349 auto MinVal =
9350 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9352 if (IsAdd) {
9353 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9354 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9355 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9356 } else {
9357 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9358 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9359 MaxVal);
9360 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9361 MinVal);
9362 }
9363 auto RHSClamped =
9364 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9365 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9366 } else {
9367 // uadd.sat(a, b) -> a + umin(~a, b)
9368 // usub.sat(a, b) -> a - umin(a, b)
9369 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9370 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9371 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9372 }
9373
9374 MI.eraseFromParent();
9375 return Legalized;
9376}
9377
9380 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9381 LLT Ty = MRI.getType(Res);
9382 LLT BoolTy = Ty.changeElementSize(1);
9383 bool IsSigned;
9384 bool IsAdd;
9385 unsigned OverflowOp;
9386 switch (MI.getOpcode()) {
9387 default:
9388 llvm_unreachable("unexpected addsat/subsat opcode");
9389 case TargetOpcode::G_UADDSAT:
9390 IsSigned = false;
9391 IsAdd = true;
9392 OverflowOp = TargetOpcode::G_UADDO;
9393 break;
9394 case TargetOpcode::G_SADDSAT:
9395 IsSigned = true;
9396 IsAdd = true;
9397 OverflowOp = TargetOpcode::G_SADDO;
9398 break;
9399 case TargetOpcode::G_USUBSAT:
9400 IsSigned = false;
9401 IsAdd = false;
9402 OverflowOp = TargetOpcode::G_USUBO;
9403 break;
9404 case TargetOpcode::G_SSUBSAT:
9405 IsSigned = true;
9406 IsAdd = false;
9407 OverflowOp = TargetOpcode::G_SSUBO;
9408 break;
9409 }
9410
9411 auto OverflowRes =
9412 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9413 Register Tmp = OverflowRes.getReg(0);
9414 Register Ov = OverflowRes.getReg(1);
9415 MachineInstrBuilder Clamp;
9416 if (IsSigned) {
9417 // sadd.sat(a, b) ->
9418 // {tmp, ov} = saddo(a, b)
9419 // ov ? (tmp >>s 31) + 0x80000000 : r
9420 // ssub.sat(a, b) ->
9421 // {tmp, ov} = ssubo(a, b)
9422 // ov ? (tmp >>s 31) + 0x80000000 : r
9423 uint64_t NumBits = Ty.getScalarSizeInBits();
9424 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9425 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9426 auto MinVal =
9427 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9428 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9429 } else {
9430 // uadd.sat(a, b) ->
9431 // {tmp, ov} = uaddo(a, b)
9432 // ov ? 0xffffffff : tmp
9433 // usub.sat(a, b) ->
9434 // {tmp, ov} = usubo(a, b)
9435 // ov ? 0 : tmp
9436 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9437 }
9438 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9439
9440 MI.eraseFromParent();
9441 return Legalized;
9442}
9443
9446 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9447 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9448 "Expected shlsat opcode!");
9449 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9450 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9451 LLT Ty = MRI.getType(Res);
9452 LLT BoolTy = Ty.changeElementSize(1);
9453
9454 unsigned BW = Ty.getScalarSizeInBits();
9455 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9456 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9457 : MIRBuilder.buildLShr(Ty, Result, RHS);
9458
9459 MachineInstrBuilder SatVal;
9460 if (IsSigned) {
9461 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9462 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9463 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9464 MIRBuilder.buildConstant(Ty, 0));
9465 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9466 } else {
9467 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9468 }
9469 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9470 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9471
9472 MI.eraseFromParent();
9473 return Legalized;
9474}
9475
9477 auto [Dst, Src] = MI.getFirst2Regs();
9478 const LLT Ty = MRI.getType(Src);
9479 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9480 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9481
9482 // Swap most and least significant byte, set remaining bytes in Res to zero.
9483 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9484 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9485 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9486 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9487
9488 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9489 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9490 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9491 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9492 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9493 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9494 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9495 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9496 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9497 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9498 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9499 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9500 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9501 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9502 }
9503 Res.getInstr()->getOperand(0).setReg(Dst);
9504
9505 MI.eraseFromParent();
9506 return Legalized;
9507}
9508
9509//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9511 MachineInstrBuilder Src, const APInt &Mask) {
9512 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9513 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9514 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9515 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9516 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9517 return B.buildOr(Dst, LHS, RHS);
9518}
9519
9522 auto [Dst, Src] = MI.getFirst2Regs();
9523 const LLT SrcTy = MRI.getType(Src);
9524 unsigned Size = SrcTy.getScalarSizeInBits();
9525 unsigned VSize = SrcTy.getSizeInBits();
9526
9527 if (Size >= 8) {
9528 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9529 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9530 {LLT::fixed_vector(VSize / 8, 8),
9531 LLT::fixed_vector(VSize / 8, 8)}}))) {
9532 // If bitreverse is legal for i8 vector of the same size, then cast
9533 // to i8 vector type.
9534 // e.g. v4s32 -> v16s8
9535 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9536 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9537 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9538 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9539 MIRBuilder.buildBitcast(Dst, RBIT);
9540 } else {
9541 MachineInstrBuilder BSWAP =
9542 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9543
9544 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9545 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9546 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9547 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9548 APInt::getSplat(Size, APInt(8, 0xF0)));
9549
9550 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9551 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9552 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9553 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9554 APInt::getSplat(Size, APInt(8, 0xCC)));
9555
9556 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9557 // 6|7
9558 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9559 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9560 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9561 }
9562 } else {
9563 // Expand bitreverse for types smaller than 8 bits.
9565 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9567 if (I < J) {
9568 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9569 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9570 } else {
9571 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9572 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9573 }
9574
9575 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9576 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9577 if (I == 0)
9578 Tmp = Tmp2;
9579 else
9580 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9581 }
9582 MIRBuilder.buildCopy(Dst, Tmp);
9583 }
9584
9585 MI.eraseFromParent();
9586 return Legalized;
9587}
9588
9591 MachineFunction &MF = MIRBuilder.getMF();
9592
9593 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9594 int NameOpIdx = IsRead ? 1 : 0;
9595 int ValRegIndex = IsRead ? 0 : 1;
9596
9597 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9598 const LLT Ty = MRI.getType(ValReg);
9599 const MDString *RegStr = cast<MDString>(
9600 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9601
9602 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9603 if (!PhysReg) {
9604 const Function &Fn = MF.getFunction();
9606 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9607 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9608 Fn, MI.getDebugLoc()));
9609 if (IsRead)
9610 MIRBuilder.buildUndef(ValReg);
9611
9612 MI.eraseFromParent();
9613 return Legalized;
9614 }
9615
9616 if (IsRead)
9617 MIRBuilder.buildCopy(ValReg, PhysReg);
9618 else
9619 MIRBuilder.buildCopy(PhysReg, ValReg);
9620
9621 MI.eraseFromParent();
9622 return Legalized;
9623}
9624
9627 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9628 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9629 Register Result = MI.getOperand(0).getReg();
9630 LLT OrigTy = MRI.getType(Result);
9631 auto SizeInBits = OrigTy.getScalarSizeInBits();
9632 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9633
9634 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9635 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9636 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9637 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9638
9639 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9640 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9641 MIRBuilder.buildTrunc(Result, Shifted);
9642
9643 MI.eraseFromParent();
9644 return Legalized;
9645}
9646
9649 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9650 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9651
9652 if (Mask == fcNone) {
9653 MIRBuilder.buildConstant(DstReg, 0);
9654 MI.eraseFromParent();
9655 return Legalized;
9656 }
9657 if (Mask == fcAllFlags) {
9658 MIRBuilder.buildConstant(DstReg, 1);
9659 MI.eraseFromParent();
9660 return Legalized;
9661 }
9662
9663 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9664 // version
9665
9666 unsigned BitSize = SrcTy.getScalarSizeInBits();
9667 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9668
9669 LLT IntTy = LLT::scalar(BitSize);
9670 if (SrcTy.isVector())
9671 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9672 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9673
9674 // Various masks.
9675 APInt SignBit = APInt::getSignMask(BitSize);
9676 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9677 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9678 APInt ExpMask = Inf;
9679 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9680 APInt QNaNBitMask =
9681 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9682 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9683
9684 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9685 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9686 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9687 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9688 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9689
9690 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9691 auto Sign =
9692 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9693
9694 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9695 // Clang doesn't support capture of structured bindings:
9696 LLT DstTyCopy = DstTy;
9697 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9698 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9699 };
9700
9701 // Tests that involve more than one class should be processed first.
9702 if ((Mask & fcFinite) == fcFinite) {
9703 // finite(V) ==> abs(V) u< exp_mask
9704 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9705 ExpMaskC));
9706 Mask &= ~fcFinite;
9707 } else if ((Mask & fcFinite) == fcPosFinite) {
9708 // finite(V) && V > 0 ==> V u< exp_mask
9709 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9710 ExpMaskC));
9711 Mask &= ~fcPosFinite;
9712 } else if ((Mask & fcFinite) == fcNegFinite) {
9713 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9714 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9715 ExpMaskC);
9716 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9717 appendToRes(And);
9718 Mask &= ~fcNegFinite;
9719 }
9720
9721 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9722 // fcZero | fcSubnormal => test all exponent bits are 0
9723 // TODO: Handle sign bit specific cases
9724 // TODO: Handle inverted case
9725 if (PartialCheck == (fcZero | fcSubnormal)) {
9726 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9727 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9728 ExpBits, ZeroC));
9729 Mask &= ~PartialCheck;
9730 }
9731 }
9732
9733 // Check for individual classes.
9734 if (FPClassTest PartialCheck = Mask & fcZero) {
9735 if (PartialCheck == fcPosZero)
9736 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9737 AsInt, ZeroC));
9738 else if (PartialCheck == fcZero)
9739 appendToRes(
9740 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9741 else // fcNegZero
9742 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9743 AsInt, SignBitC));
9744 }
9745
9746 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9747 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9748 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9749 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9750 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9751 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9752 auto SubnormalRes =
9753 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9754 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9755 if (PartialCheck == fcNegSubnormal)
9756 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9757 appendToRes(SubnormalRes);
9758 }
9759
9760 if (FPClassTest PartialCheck = Mask & fcInf) {
9761 if (PartialCheck == fcPosInf)
9762 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9763 AsInt, InfC));
9764 else if (PartialCheck == fcInf)
9765 appendToRes(
9766 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9767 else { // fcNegInf
9768 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9769 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9770 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9771 AsInt, NegInfC));
9772 }
9773 }
9774
9775 if (FPClassTest PartialCheck = Mask & fcNan) {
9776 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9777 if (PartialCheck == fcNan) {
9778 // isnan(V) ==> abs(V) u> int(inf)
9779 appendToRes(
9780 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9781 } else if (PartialCheck == fcQNan) {
9782 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9783 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9784 InfWithQnanBitC));
9785 } else { // fcSNan
9786 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9787 // abs(V) u< (unsigned(Inf) | quiet_bit)
9788 auto IsNan =
9789 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9790 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9791 Abs, InfWithQnanBitC);
9792 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9793 }
9794 }
9795
9796 if (FPClassTest PartialCheck = Mask & fcNormal) {
9797 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9798 // (max_exp-1))
9799 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9800 auto ExpMinusOne = MIRBuilder.buildSub(
9801 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9802 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9803 auto NormalRes =
9804 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9805 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9806 if (PartialCheck == fcNegNormal)
9807 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9808 else if (PartialCheck == fcPosNormal) {
9809 auto PosSign = MIRBuilder.buildXor(
9810 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
9811 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9812 }
9813 appendToRes(NormalRes);
9814 }
9815
9816 MIRBuilder.buildCopy(DstReg, Res);
9817 MI.eraseFromParent();
9818 return Legalized;
9819}
9820
9822 // Implement G_SELECT in terms of XOR, AND, OR.
9823 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9824 MI.getFirst4RegLLTs();
9825
9826 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9827 if (IsEltPtr) {
9828 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9829 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9830 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9831 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9832 DstTy = NewTy;
9833 }
9834
9835 if (MaskTy.isScalar()) {
9836 // Turn the scalar condition into a vector condition mask if needed.
9837
9838 Register MaskElt = MaskReg;
9839
9840 // The condition was potentially zero extended before, but we want a sign
9841 // extended boolean.
9842 if (MaskTy != LLT::scalar(1))
9843 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9844
9845 // Continue the sign extension (or truncate) to match the data type.
9846 MaskElt =
9847 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9848
9849 if (DstTy.isVector()) {
9850 // Generate a vector splat idiom.
9851 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9852 MaskReg = ShufSplat.getReg(0);
9853 } else {
9854 MaskReg = MaskElt;
9855 }
9856 MaskTy = DstTy;
9857 } else if (!DstTy.isVector()) {
9858 // Cannot handle the case that mask is a vector and dst is a scalar.
9859 return UnableToLegalize;
9860 }
9861
9862 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9863 return UnableToLegalize;
9864 }
9865
9866 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9867 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9868 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9869 if (IsEltPtr) {
9870 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9871 MIRBuilder.buildIntToPtr(DstReg, Or);
9872 } else {
9873 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9874 }
9875 MI.eraseFromParent();
9876 return Legalized;
9877}
9878
9880 // Split DIVREM into individual instructions.
9881 unsigned Opcode = MI.getOpcode();
9882
9883 MIRBuilder.buildInstr(
9884 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9885 : TargetOpcode::G_UDIV,
9886 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9887 MIRBuilder.buildInstr(
9888 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9889 : TargetOpcode::G_UREM,
9890 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9891 MI.eraseFromParent();
9892 return Legalized;
9893}
9894
9897 // Expand %res = G_ABS %a into:
9898 // %v1 = G_ASHR %a, scalar_size-1
9899 // %v2 = G_ADD %a, %v1
9900 // %res = G_XOR %v2, %v1
9901 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9902 Register OpReg = MI.getOperand(1).getReg();
9903 auto ShiftAmt =
9904 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9905 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9906 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9907 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9908 MI.eraseFromParent();
9909 return Legalized;
9910}
9911
9914 // Expand %res = G_ABS %a into:
9915 // %v1 = G_CONSTANT 0
9916 // %v2 = G_SUB %v1, %a
9917 // %res = G_SMAX %a, %v2
9918 Register SrcReg = MI.getOperand(1).getReg();
9919 LLT Ty = MRI.getType(SrcReg);
9920 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9921 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9922 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9923 MI.eraseFromParent();
9924 return Legalized;
9925}
9926
9929 Register SrcReg = MI.getOperand(1).getReg();
9930 Register DestReg = MI.getOperand(0).getReg();
9931 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9932 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9933 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9934 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9935 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9936 MI.eraseFromParent();
9937 return Legalized;
9938}
9939
9942 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
9943 MI.getOpcode() == TargetOpcode::G_ABDU) &&
9944 "Expected G_ABDS or G_ABDU instruction");
9945
9946 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
9947 LLT Ty = MRI.getType(LHS);
9948
9949 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9950 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9951 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
9952 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
9953 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
9956 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
9957 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
9958
9959 MI.eraseFromParent();
9960 return Legalized;
9961}
9962
9965 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
9966 MI.getOpcode() == TargetOpcode::G_ABDU) &&
9967 "Expected G_ABDS or G_ABDU instruction");
9968
9969 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
9970 LLT Ty = MRI.getType(LHS);
9971
9972 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
9973 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
9974 Register MaxReg, MinReg;
9975 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
9976 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
9977 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
9978 } else {
9979 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
9980 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
9981 }
9982 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
9983
9984 MI.eraseFromParent();
9985 return Legalized;
9986}
9987
9989 Register SrcReg = MI.getOperand(1).getReg();
9990 Register DstReg = MI.getOperand(0).getReg();
9991
9992 LLT Ty = MRI.getType(DstReg);
9993
9994 // Reset sign bit
9995 MIRBuilder.buildAnd(
9996 DstReg, SrcReg,
9997 MIRBuilder.buildConstant(
9998 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
9999
10000 MI.eraseFromParent();
10001 return Legalized;
10002}
10003
10006 Register SrcReg = MI.getOperand(1).getReg();
10007 LLT SrcTy = MRI.getType(SrcReg);
10008 LLT DstTy = MRI.getType(SrcReg);
10009
10010 // The source could be a scalar if the IR type was <1 x sN>.
10011 if (SrcTy.isScalar()) {
10012 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10013 return UnableToLegalize; // FIXME: handle extension.
10014 // This can be just a plain copy.
10015 Observer.changingInstr(MI);
10016 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10017 Observer.changedInstr(MI);
10018 return Legalized;
10019 }
10020 return UnableToLegalize;
10021}
10022
10024 MachineFunction &MF = *MI.getMF();
10025 const DataLayout &DL = MIRBuilder.getDataLayout();
10026 LLVMContext &Ctx = MF.getFunction().getContext();
10027 Register ListPtr = MI.getOperand(1).getReg();
10028 LLT PtrTy = MRI.getType(ListPtr);
10029
10030 // LstPtr is a pointer to the head of the list. Get the address
10031 // of the head of the list.
10032 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10033 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10034 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10035 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10036
10037 const Align A(MI.getOperand(2).getImm());
10038 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10039 if (A > TLI.getMinStackArgumentAlignment()) {
10040 Register AlignAmt =
10041 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10042 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10043 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10044 VAList = AndDst.getReg(0);
10045 }
10046
10047 // Increment the pointer, VAList, to the next vaarg
10048 // The list should be bumped by the size of element in the current head of
10049 // list.
10050 Register Dst = MI.getOperand(0).getReg();
10051 LLT LLTTy = MRI.getType(Dst);
10052 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10053 auto IncAmt =
10054 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10055 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10056
10057 // Store the increment VAList to the legalized pointer
10059 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10060 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10061 // Load the actual argument out of the pointer VAList
10062 Align EltAlignment = DL.getABITypeAlign(Ty);
10063 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10064 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10065 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10066
10067 MI.eraseFromParent();
10068 return Legalized;
10069}
10070
10072 // On Darwin, -Os means optimize for size without hurting performance, so
10073 // only really optimize for size when -Oz (MinSize) is used.
10075 return MF.getFunction().hasMinSize();
10076 return MF.getFunction().hasOptSize();
10077}
10078
10079// Returns a list of types to use for memory op lowering in MemOps. A partial
10080// port of findOptimalMemOpLowering in TargetLowering.
10081static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10082 unsigned Limit, const MemOp &Op,
10083 unsigned DstAS, unsigned SrcAS,
10084 const AttributeList &FuncAttributes,
10085 const TargetLowering &TLI) {
10086 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10087 return false;
10088
10089 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10090
10091 if (Ty == LLT()) {
10092 // Use the largest scalar type whose alignment constraints are satisfied.
10093 // We only need to check DstAlign here as SrcAlign is always greater or
10094 // equal to DstAlign (or zero).
10095 Ty = LLT::scalar(64);
10096 if (Op.isFixedDstAlign())
10097 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10098 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10099 Ty = LLT::scalar(Ty.getSizeInBytes());
10100 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10101 // FIXME: check for the largest legal type we can load/store to.
10102 }
10103
10104 unsigned NumMemOps = 0;
10105 uint64_t Size = Op.size();
10106 while (Size) {
10107 unsigned TySize = Ty.getSizeInBytes();
10108 while (TySize > Size) {
10109 // For now, only use non-vector load / store's for the left-over pieces.
10110 LLT NewTy = Ty;
10111 // FIXME: check for mem op safety and legality of the types. Not all of
10112 // SDAGisms map cleanly to GISel concepts.
10113 if (NewTy.isVector())
10114 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10115 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10116 unsigned NewTySize = NewTy.getSizeInBytes();
10117 assert(NewTySize > 0 && "Could not find appropriate type");
10118
10119 // If the new LLT cannot cover all of the remaining bits, then consider
10120 // issuing a (or a pair of) unaligned and overlapping load / store.
10121 unsigned Fast;
10122 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10123 MVT VT = getMVTForLLT(Ty);
10124 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10126 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10128 Fast)
10129 TySize = Size;
10130 else {
10131 Ty = NewTy;
10132 TySize = NewTySize;
10133 }
10134 }
10135
10136 if (++NumMemOps > Limit)
10137 return false;
10138
10139 MemOps.push_back(Ty);
10140 Size -= TySize;
10141 }
10142
10143 return true;
10144}
10145
10146// Get a vectorized representation of the memset value operand, GISel edition.
10148 MachineRegisterInfo &MRI = *MIB.getMRI();
10149 unsigned NumBits = Ty.getScalarSizeInBits();
10150 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10151 if (!Ty.isVector() && ValVRegAndVal) {
10152 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10153 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10154 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10155 }
10156
10157 // Extend the byte value to the larger type, and then multiply by a magic
10158 // value 0x010101... in order to replicate it across every byte.
10159 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10160 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10161 return MIB.buildConstant(Ty, 0).getReg(0);
10162 }
10163
10164 LLT ExtType = Ty.getScalarType();
10165 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10166 if (NumBits > 8) {
10167 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10168 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10169 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10170 }
10171
10172 // For vector types create a G_BUILD_VECTOR.
10173 if (Ty.isVector())
10174 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10175
10176 return Val;
10177}
10178
10180LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10181 uint64_t KnownLen, Align Alignment,
10182 bool IsVolatile) {
10183 auto &MF = *MI.getParent()->getParent();
10184 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10185 auto &DL = MF.getDataLayout();
10186 LLVMContext &C = MF.getFunction().getContext();
10187
10188 assert(KnownLen != 0 && "Have a zero length memset length!");
10189
10190 bool DstAlignCanChange = false;
10191 MachineFrameInfo &MFI = MF.getFrameInfo();
10192 bool OptSize = shouldLowerMemFuncForSize(MF);
10193
10194 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10195 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10196 DstAlignCanChange = true;
10197
10198 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10199 std::vector<LLT> MemOps;
10200
10201 const auto &DstMMO = **MI.memoperands_begin();
10202 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10203
10204 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10205 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10206
10207 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10208 MemOp::Set(KnownLen, DstAlignCanChange,
10209 Alignment,
10210 /*IsZeroMemset=*/IsZeroVal,
10211 /*IsVolatile=*/IsVolatile),
10212 DstPtrInfo.getAddrSpace(), ~0u,
10213 MF.getFunction().getAttributes(), TLI))
10214 return UnableToLegalize;
10215
10216 if (DstAlignCanChange) {
10217 // Get an estimate of the type from the LLT.
10218 Type *IRTy = getTypeForLLT(MemOps[0], C);
10219 Align NewAlign = DL.getABITypeAlign(IRTy);
10220 if (NewAlign > Alignment) {
10221 Alignment = NewAlign;
10222 unsigned FI = FIDef->getOperand(1).getIndex();
10223 // Give the stack frame object a larger alignment if needed.
10224 if (MFI.getObjectAlign(FI) < Alignment)
10225 MFI.setObjectAlignment(FI, Alignment);
10226 }
10227 }
10228
10229 MachineIRBuilder MIB(MI);
10230 // Find the largest store and generate the bit pattern for it.
10231 LLT LargestTy = MemOps[0];
10232 for (unsigned i = 1; i < MemOps.size(); i++)
10233 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10234 LargestTy = MemOps[i];
10235
10236 // The memset stored value is always defined as an s8, so in order to make it
10237 // work with larger store types we need to repeat the bit pattern across the
10238 // wider type.
10239 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10240
10241 if (!MemSetValue)
10242 return UnableToLegalize;
10243
10244 // Generate the stores. For each store type in the list, we generate the
10245 // matching store of that type to the destination address.
10246 LLT PtrTy = MRI.getType(Dst);
10247 unsigned DstOff = 0;
10248 unsigned Size = KnownLen;
10249 for (unsigned I = 0; I < MemOps.size(); I++) {
10250 LLT Ty = MemOps[I];
10251 unsigned TySize = Ty.getSizeInBytes();
10252 if (TySize > Size) {
10253 // Issuing an unaligned load / store pair that overlaps with the previous
10254 // pair. Adjust the offset accordingly.
10255 assert(I == MemOps.size() - 1 && I != 0);
10256 DstOff -= TySize - Size;
10257 }
10258
10259 // If this store is smaller than the largest store see whether we can get
10260 // the smaller value for free with a truncate.
10261 Register Value = MemSetValue;
10262 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10263 MVT VT = getMVTForLLT(Ty);
10264 MVT LargestVT = getMVTForLLT(LargestTy);
10265 if (!LargestTy.isVector() && !Ty.isVector() &&
10266 TLI.isTruncateFree(LargestVT, VT))
10267 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10268 else
10269 Value = getMemsetValue(Val, Ty, MIB);
10270 if (!Value)
10271 return UnableToLegalize;
10272 }
10273
10274 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10275
10276 Register Ptr = Dst;
10277 if (DstOff != 0) {
10278 auto Offset =
10279 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10280 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10281 }
10282
10283 MIB.buildStore(Value, Ptr, *StoreMMO);
10284 DstOff += Ty.getSizeInBytes();
10285 Size -= TySize;
10286 }
10287
10288 MI.eraseFromParent();
10289 return Legalized;
10290}
10291
10293LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10294 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10295
10296 auto [Dst, Src, Len] = MI.getFirst3Regs();
10297
10298 const auto *MMOIt = MI.memoperands_begin();
10299 const MachineMemOperand *MemOp = *MMOIt;
10300 bool IsVolatile = MemOp->isVolatile();
10301
10302 // See if this is a constant length copy
10303 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10304 // FIXME: support dynamically sized G_MEMCPY_INLINE
10305 assert(LenVRegAndVal &&
10306 "inline memcpy with dynamic size is not yet supported");
10307 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10308 if (KnownLen == 0) {
10309 MI.eraseFromParent();
10310 return Legalized;
10311 }
10312
10313 const auto &DstMMO = **MI.memoperands_begin();
10314 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10315 Align DstAlign = DstMMO.getBaseAlign();
10316 Align SrcAlign = SrcMMO.getBaseAlign();
10317
10318 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10319 IsVolatile);
10320}
10321
10323LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10324 uint64_t KnownLen, Align DstAlign,
10325 Align SrcAlign, bool IsVolatile) {
10326 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10327 return lowerMemcpy(MI, Dst, Src, KnownLen,
10328 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10329 IsVolatile);
10330}
10331
10333LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10334 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10335 Align SrcAlign, bool IsVolatile) {
10336 auto &MF = *MI.getParent()->getParent();
10337 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10338 auto &DL = MF.getDataLayout();
10340
10341 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10342
10343 bool DstAlignCanChange = false;
10344 MachineFrameInfo &MFI = MF.getFrameInfo();
10345 Align Alignment = std::min(DstAlign, SrcAlign);
10346
10347 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10348 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10349 DstAlignCanChange = true;
10350
10351 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10352 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10353 // if the memcpy is in a tail call position.
10354
10355 std::vector<LLT> MemOps;
10356
10357 const auto &DstMMO = **MI.memoperands_begin();
10358 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10359 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10360 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10361
10363 MemOps, Limit,
10364 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10365 IsVolatile),
10366 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10367 MF.getFunction().getAttributes(), TLI))
10368 return UnableToLegalize;
10369
10370 if (DstAlignCanChange) {
10371 // Get an estimate of the type from the LLT.
10372 Type *IRTy = getTypeForLLT(MemOps[0], C);
10373 Align NewAlign = DL.getABITypeAlign(IRTy);
10374
10375 // Don't promote to an alignment that would require dynamic stack
10376 // realignment.
10378 if (!TRI->hasStackRealignment(MF))
10379 if (MaybeAlign StackAlign = DL.getStackAlignment())
10380 NewAlign = std::min(NewAlign, *StackAlign);
10381
10382 if (NewAlign > Alignment) {
10383 Alignment = NewAlign;
10384 unsigned FI = FIDef->getOperand(1).getIndex();
10385 // Give the stack frame object a larger alignment if needed.
10386 if (MFI.getObjectAlign(FI) < Alignment)
10387 MFI.setObjectAlignment(FI, Alignment);
10388 }
10389 }
10390
10391 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10392
10393 MachineIRBuilder MIB(MI);
10394 // Now we need to emit a pair of load and stores for each of the types we've
10395 // collected. I.e. for each type, generate a load from the source pointer of
10396 // that type width, and then generate a corresponding store to the dest buffer
10397 // of that value loaded. This can result in a sequence of loads and stores
10398 // mixed types, depending on what the target specifies as good types to use.
10399 unsigned CurrOffset = 0;
10400 unsigned Size = KnownLen;
10401 for (auto CopyTy : MemOps) {
10402 // Issuing an unaligned load / store pair that overlaps with the previous
10403 // pair. Adjust the offset accordingly.
10404 if (CopyTy.getSizeInBytes() > Size)
10405 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10406
10407 // Construct MMOs for the accesses.
10408 auto *LoadMMO =
10409 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10410 auto *StoreMMO =
10411 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10412
10413 // Create the load.
10414 Register LoadPtr = Src;
10416 if (CurrOffset != 0) {
10417 LLT SrcTy = MRI.getType(Src);
10418 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10419 .getReg(0);
10420 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10421 }
10422 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10423
10424 // Create the store.
10425 Register StorePtr = Dst;
10426 if (CurrOffset != 0) {
10427 LLT DstTy = MRI.getType(Dst);
10428 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10429 }
10430 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10431 CurrOffset += CopyTy.getSizeInBytes();
10432 Size -= CopyTy.getSizeInBytes();
10433 }
10434
10435 MI.eraseFromParent();
10436 return Legalized;
10437}
10438
10440LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10441 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10442 bool IsVolatile) {
10443 auto &MF = *MI.getParent()->getParent();
10444 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10445 auto &DL = MF.getDataLayout();
10446 LLVMContext &C = MF.getFunction().getContext();
10447
10448 assert(KnownLen != 0 && "Have a zero length memmove length!");
10449
10450 bool DstAlignCanChange = false;
10451 MachineFrameInfo &MFI = MF.getFrameInfo();
10452 bool OptSize = shouldLowerMemFuncForSize(MF);
10453 Align Alignment = std::min(DstAlign, SrcAlign);
10454
10455 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10456 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10457 DstAlignCanChange = true;
10458
10459 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10460 std::vector<LLT> MemOps;
10461
10462 const auto &DstMMO = **MI.memoperands_begin();
10463 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10464 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10465 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10466
10467 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10468 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10469 // same thing here.
10471 MemOps, Limit,
10472 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10473 /*IsVolatile*/ true),
10474 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10475 MF.getFunction().getAttributes(), TLI))
10476 return UnableToLegalize;
10477
10478 if (DstAlignCanChange) {
10479 // Get an estimate of the type from the LLT.
10480 Type *IRTy = getTypeForLLT(MemOps[0], C);
10481 Align NewAlign = DL.getABITypeAlign(IRTy);
10482
10483 // Don't promote to an alignment that would require dynamic stack
10484 // realignment.
10485 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10486 if (!TRI->hasStackRealignment(MF))
10487 if (MaybeAlign StackAlign = DL.getStackAlignment())
10488 NewAlign = std::min(NewAlign, *StackAlign);
10489
10490 if (NewAlign > Alignment) {
10491 Alignment = NewAlign;
10492 unsigned FI = FIDef->getOperand(1).getIndex();
10493 // Give the stack frame object a larger alignment if needed.
10494 if (MFI.getObjectAlign(FI) < Alignment)
10495 MFI.setObjectAlignment(FI, Alignment);
10496 }
10497 }
10498
10499 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10500
10501 MachineIRBuilder MIB(MI);
10502 // Memmove requires that we perform the loads first before issuing the stores.
10503 // Apart from that, this loop is pretty much doing the same thing as the
10504 // memcpy codegen function.
10505 unsigned CurrOffset = 0;
10506 SmallVector<Register, 16> LoadVals;
10507 for (auto CopyTy : MemOps) {
10508 // Construct MMO for the load.
10509 auto *LoadMMO =
10510 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10511
10512 // Create the load.
10513 Register LoadPtr = Src;
10514 if (CurrOffset != 0) {
10515 LLT SrcTy = MRI.getType(Src);
10516 auto Offset =
10517 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10518 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10519 }
10520 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10521 CurrOffset += CopyTy.getSizeInBytes();
10522 }
10523
10524 CurrOffset = 0;
10525 for (unsigned I = 0; I < MemOps.size(); ++I) {
10526 LLT CopyTy = MemOps[I];
10527 // Now store the values loaded.
10528 auto *StoreMMO =
10529 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10530
10531 Register StorePtr = Dst;
10532 if (CurrOffset != 0) {
10533 LLT DstTy = MRI.getType(Dst);
10534 auto Offset =
10535 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10536 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10537 }
10538 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10539 CurrOffset += CopyTy.getSizeInBytes();
10540 }
10541 MI.eraseFromParent();
10542 return Legalized;
10543}
10544
10547 const unsigned Opc = MI.getOpcode();
10548 // This combine is fairly complex so it's not written with a separate
10549 // matcher function.
10550 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10551 Opc == TargetOpcode::G_MEMSET) &&
10552 "Expected memcpy like instruction");
10553
10554 auto MMOIt = MI.memoperands_begin();
10555 const MachineMemOperand *MemOp = *MMOIt;
10556
10557 Align DstAlign = MemOp->getBaseAlign();
10558 Align SrcAlign;
10559 auto [Dst, Src, Len] = MI.getFirst3Regs();
10560
10561 if (Opc != TargetOpcode::G_MEMSET) {
10562 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10563 MemOp = *(++MMOIt);
10564 SrcAlign = MemOp->getBaseAlign();
10565 }
10566
10567 // See if this is a constant length copy
10568 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10569 if (!LenVRegAndVal)
10570 return UnableToLegalize;
10571 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10572
10573 if (KnownLen == 0) {
10574 MI.eraseFromParent();
10575 return Legalized;
10576 }
10577
10578 if (MaxLen && KnownLen > MaxLen)
10579 return UnableToLegalize;
10580
10581 bool IsVolatile = MemOp->isVolatile();
10582 if (Opc == TargetOpcode::G_MEMCPY) {
10583 auto &MF = *MI.getParent()->getParent();
10584 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10585 bool OptSize = shouldLowerMemFuncForSize(MF);
10586 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10587 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10588 IsVolatile);
10589 }
10590 if (Opc == TargetOpcode::G_MEMMOVE)
10591 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10592 if (Opc == TargetOpcode::G_MEMSET)
10593 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10594 return UnableToLegalize;
10595}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
bool isSigned() const
Definition InstrTypes.h:932
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:199
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:71
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:611
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:843
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2029
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:232
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1563
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1620
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1180
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:390
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1189
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:362
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1837
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:241
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:280
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:378
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)