LLVM 21.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
106 GISelChangeObserver &Observer,
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
111
113 GISelChangeObserver &Observer,
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
122
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs.begin(), PartRegs.end());
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FLOG10:
473 RTLIBCASE(LOG10_F);
474 case TargetOpcode::G_FLOG:
475 RTLIBCASE(LOG_F);
476 case TargetOpcode::G_FLOG2:
477 RTLIBCASE(LOG2_F);
478 case TargetOpcode::G_FLDEXP:
479 RTLIBCASE(LDEXP_F);
480 case TargetOpcode::G_FCEIL:
481 RTLIBCASE(CEIL_F);
482 case TargetOpcode::G_FFLOOR:
483 RTLIBCASE(FLOOR_F);
484 case TargetOpcode::G_FMINNUM:
485 RTLIBCASE(FMIN_F);
486 case TargetOpcode::G_FMAXNUM:
487 RTLIBCASE(FMAX_F);
488 case TargetOpcode::G_FSQRT:
489 RTLIBCASE(SQRT_F);
490 case TargetOpcode::G_FRINT:
491 RTLIBCASE(RINT_F);
492 case TargetOpcode::G_FNEARBYINT:
493 RTLIBCASE(NEARBYINT_F);
494 case TargetOpcode::G_INTRINSIC_TRUNC:
495 RTLIBCASE(TRUNC_F);
496 case TargetOpcode::G_INTRINSIC_ROUND:
497 RTLIBCASE(ROUND_F);
498 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
499 RTLIBCASE(ROUNDEVEN_F);
500 case TargetOpcode::G_INTRINSIC_LRINT:
501 RTLIBCASE(LRINT_F);
502 case TargetOpcode::G_INTRINSIC_LLRINT:
503 RTLIBCASE(LLRINT_F);
504 }
505 llvm_unreachable("Unknown libcall function");
506#undef RTLIBCASE_INT
507#undef RTLIBCASE
508}
509
510/// True if an instruction is in tail position in its caller. Intended for
511/// legalizing libcalls as tail calls when possible.
514 const TargetInstrInfo &TII,
516 MachineBasicBlock &MBB = *MI.getParent();
517 const Function &F = MBB.getParent()->getFunction();
518
519 // Conservatively require the attributes of the call to match those of
520 // the return. Ignore NoAlias and NonNull because they don't affect the
521 // call sequence.
522 AttributeList CallerAttrs = F.getAttributes();
523 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
524 .removeAttribute(Attribute::NoAlias)
525 .removeAttribute(Attribute::NonNull)
526 .hasAttributes())
527 return false;
528
529 // It's not safe to eliminate the sign / zero extension of the return value.
530 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
531 CallerAttrs.hasRetAttr(Attribute::SExt))
532 return false;
533
534 // Only tail call if the following instruction is a standard return or if we
535 // have a `thisreturn` callee, and a sequence like:
536 //
537 // G_MEMCPY %0, %1, %2
538 // $x0 = COPY %0
539 // RET_ReallyLR implicit $x0
540 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
541 if (Next != MBB.instr_end() && Next->isCopy()) {
542 if (MI.getOpcode() == TargetOpcode::G_BZERO)
543 return false;
544
545 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
546 // mempy/etc routines return the same parameter. For other it will be the
547 // returned value.
548 Register VReg = MI.getOperand(0).getReg();
549 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
550 return false;
551
552 Register PReg = Next->getOperand(0).getReg();
553 if (!PReg.isPhysical())
554 return false;
555
556 auto Ret = next_nodbg(Next, MBB.instr_end());
557 if (Ret == MBB.instr_end() || !Ret->isReturn())
558 return false;
559
560 if (Ret->getNumImplicitOperands() != 1)
561 return false;
562
563 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
564 return false;
565
566 // Skip over the COPY that we just validated.
567 Next = Ret;
568 }
569
570 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
571 return false;
572
573 return true;
574}
575
578 const CallLowering::ArgInfo &Result,
580 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
581 MachineInstr *MI) {
582 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
583
585 Info.CallConv = CC;
587 Info.OrigRet = Result;
588 if (MI)
589 Info.IsTailCall =
590 (Result.Ty->isVoidTy() ||
591 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
592 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
593 *MIRBuilder.getMRI());
594
595 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
596 if (!CLI.lowerCall(MIRBuilder, Info))
598
599 if (MI && Info.LoweredTailCall) {
600 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
601
602 // Check debug locations before removing the return.
603 LocObserver.checkpoint(true);
604
605 // We must have a return following the call (or debug insts) to get past
606 // isLibCallInTailPosition.
607 do {
608 MachineInstr *Next = MI->getNextNode();
609 assert(Next &&
610 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
611 "Expected instr following MI to be return or debug inst?");
612 // We lowered a tail call, so the call is now the return from the block.
613 // Delete the old return.
614 Next->eraseFromParent();
615 } while (MI->getNextNode());
616
617 // We expect to lose the debug location from the return.
618 LocObserver.checkpoint(false);
619 }
621}
622
625 const CallLowering::ArgInfo &Result,
627 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
628 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
629 const char *Name = TLI.getLibcallName(Libcall);
630 if (!Name)
632 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
633 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
634}
635
636// Useful for libcalls where all operands have the same type.
639 Type *OpType, LostDebugLocObserver &LocObserver) {
640 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
641
642 // FIXME: What does the original arg index mean here?
644 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
645 Args.push_back({MO.getReg(), OpType, 0});
646 return createLibcall(MIRBuilder, Libcall,
647 {MI.getOperand(0).getReg(), OpType, 0}, Args,
648 LocObserver, &MI);
649}
650
653 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
654 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
655
657 // Add all the args, except for the last which is an imm denoting 'tail'.
658 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
659 Register Reg = MI.getOperand(i).getReg();
660
661 // Need derive an IR type for call lowering.
662 LLT OpLLT = MRI.getType(Reg);
663 Type *OpTy = nullptr;
664 if (OpLLT.isPointer())
665 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
666 else
667 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
668 Args.push_back({Reg, OpTy, 0});
669 }
670
671 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
672 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
673 RTLIB::Libcall RTLibcall;
674 unsigned Opc = MI.getOpcode();
675 switch (Opc) {
676 case TargetOpcode::G_BZERO:
677 RTLibcall = RTLIB::BZERO;
678 break;
679 case TargetOpcode::G_MEMCPY:
680 RTLibcall = RTLIB::MEMCPY;
681 Args[0].Flags[0].setReturned();
682 break;
683 case TargetOpcode::G_MEMMOVE:
684 RTLibcall = RTLIB::MEMMOVE;
685 Args[0].Flags[0].setReturned();
686 break;
687 case TargetOpcode::G_MEMSET:
688 RTLibcall = RTLIB::MEMSET;
689 Args[0].Flags[0].setReturned();
690 break;
691 default:
692 llvm_unreachable("unsupported opcode");
693 }
694 const char *Name = TLI.getLibcallName(RTLibcall);
695
696 // Unsupported libcall on the target.
697 if (!Name) {
698 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
699 << MIRBuilder.getTII().getName(Opc) << "\n");
701 }
702
704 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
706 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
707 Info.IsTailCall =
708 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
709 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
710
711 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
712 if (!CLI.lowerCall(MIRBuilder, Info))
714
715 if (Info.LoweredTailCall) {
716 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
717
718 // Check debug locations before removing the return.
719 LocObserver.checkpoint(true);
720
721 // We must have a return following the call (or debug insts) to get past
722 // isLibCallInTailPosition.
723 do {
724 MachineInstr *Next = MI.getNextNode();
725 assert(Next &&
726 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
727 "Expected instr following MI to be return or debug inst?");
728 // We lowered a tail call, so the call is now the return from the block.
729 // Delete the old return.
730 Next->eraseFromParent();
731 } while (MI.getNextNode());
732
733 // We expect to lose the debug location from the return.
734 LocObserver.checkpoint(false);
735 }
736
738}
739
741 unsigned Opc = MI.getOpcode();
742 auto &AtomicMI = cast<GMemOperation>(MI);
743 auto &MMO = AtomicMI.getMMO();
744 auto Ordering = MMO.getMergedOrdering();
745 LLT MemType = MMO.getMemoryType();
746 uint64_t MemSize = MemType.getSizeInBytes();
747 if (MemType.isVector())
748 return RTLIB::UNKNOWN_LIBCALL;
749
750#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
751#define LCALL5(A) \
752 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
753 switch (Opc) {
754 case TargetOpcode::G_ATOMIC_CMPXCHG:
755 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
756 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
757 return getOutlineAtomicHelper(LC, Ordering, MemSize);
758 }
759 case TargetOpcode::G_ATOMICRMW_XCHG: {
760 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
761 return getOutlineAtomicHelper(LC, Ordering, MemSize);
762 }
763 case TargetOpcode::G_ATOMICRMW_ADD:
764 case TargetOpcode::G_ATOMICRMW_SUB: {
765 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
767 }
768 case TargetOpcode::G_ATOMICRMW_AND: {
769 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
770 return getOutlineAtomicHelper(LC, Ordering, MemSize);
771 }
772 case TargetOpcode::G_ATOMICRMW_OR: {
773 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
774 return getOutlineAtomicHelper(LC, Ordering, MemSize);
775 }
776 case TargetOpcode::G_ATOMICRMW_XOR: {
777 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
778 return getOutlineAtomicHelper(LC, Ordering, MemSize);
779 }
780 default:
781 return RTLIB::UNKNOWN_LIBCALL;
782 }
783#undef LCALLS
784#undef LCALL5
785}
786
789 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
790
791 Type *RetTy;
792 SmallVector<Register> RetRegs;
794 unsigned Opc = MI.getOpcode();
795 switch (Opc) {
796 case TargetOpcode::G_ATOMIC_CMPXCHG:
797 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
799 LLT SuccessLLT;
800 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
801 MI.getFirst4RegLLTs();
802 RetRegs.push_back(Ret);
803 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
804 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
805 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
806 NewLLT) = MI.getFirst5RegLLTs();
807 RetRegs.push_back(Success);
809 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
810 }
811 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
812 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
813 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
814 break;
815 }
816 case TargetOpcode::G_ATOMICRMW_XCHG:
817 case TargetOpcode::G_ATOMICRMW_ADD:
818 case TargetOpcode::G_ATOMICRMW_SUB:
819 case TargetOpcode::G_ATOMICRMW_AND:
820 case TargetOpcode::G_ATOMICRMW_OR:
821 case TargetOpcode::G_ATOMICRMW_XOR: {
822 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
823 RetRegs.push_back(Ret);
824 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
825 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
826 Val =
827 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
828 .getReg(0);
829 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
830 Val =
831 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
832 .getReg(0);
833 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
834 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
835 break;
836 }
837 default:
838 llvm_unreachable("unsupported opcode");
839 }
840
841 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
842 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
844 const char *Name = TLI.getLibcallName(RTLibcall);
845
846 // Unsupported libcall on the target.
847 if (!Name) {
848 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
849 << MIRBuilder.getTII().getName(Opc) << "\n");
851 }
852
854 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
856 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
857
858 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
859 if (!CLI.lowerCall(MIRBuilder, Info))
861
863}
864
865static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
866 Type *FromType) {
867 auto ToMVT = MVT::getVT(ToType);
868 auto FromMVT = MVT::getVT(FromType);
869
870 switch (Opcode) {
871 case TargetOpcode::G_FPEXT:
872 return RTLIB::getFPEXT(FromMVT, ToMVT);
873 case TargetOpcode::G_FPTRUNC:
874 return RTLIB::getFPROUND(FromMVT, ToMVT);
875 case TargetOpcode::G_FPTOSI:
876 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
877 case TargetOpcode::G_FPTOUI:
878 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
879 case TargetOpcode::G_SITOFP:
880 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
881 case TargetOpcode::G_UITOFP:
882 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
883 }
884 llvm_unreachable("Unsupported libcall function");
885}
886
889 Type *FromType, LostDebugLocObserver &LocObserver,
890 const TargetLowering &TLI, bool IsSigned = false) {
891 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
892 if (FromType->isIntegerTy()) {
893 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
894 Arg.Flags[0].setSExt();
895 else
896 Arg.Flags[0].setZExt();
897 }
898
899 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
900 return createLibcall(MIRBuilder, Libcall,
901 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
902 &MI);
903}
904
905static RTLIB::Libcall
907 RTLIB::Libcall RTLibcall;
908 switch (MI.getOpcode()) {
909 case TargetOpcode::G_GET_FPENV:
910 RTLibcall = RTLIB::FEGETENV;
911 break;
912 case TargetOpcode::G_SET_FPENV:
913 case TargetOpcode::G_RESET_FPENV:
914 RTLibcall = RTLIB::FESETENV;
915 break;
916 case TargetOpcode::G_GET_FPMODE:
917 RTLibcall = RTLIB::FEGETMODE;
918 break;
919 case TargetOpcode::G_SET_FPMODE:
920 case TargetOpcode::G_RESET_FPMODE:
921 RTLibcall = RTLIB::FESETMODE;
922 break;
923 default:
924 llvm_unreachable("Unexpected opcode");
925 }
926 return RTLibcall;
927}
928
929// Some library functions that read FP state (fegetmode, fegetenv) write the
930// state into a region in memory. IR intrinsics that do the same operations
931// (get_fpmode, get_fpenv) return the state as integer value. To implement these
932// intrinsics via the library functions, we need to use temporary variable,
933// for example:
934//
935// %0:_(s32) = G_GET_FPMODE
936//
937// is transformed to:
938//
939// %1:_(p0) = G_FRAME_INDEX %stack.0
940// BL &fegetmode
941// %0:_(s32) = G_LOAD % 1
942//
944LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
946 LostDebugLocObserver &LocObserver) {
948 auto &MF = MIRBuilder.getMF();
949 auto &MRI = *MIRBuilder.getMRI();
950 auto &Ctx = MF.getFunction().getContext();
951
952 // Create temporary, where library function will put the read state.
953 Register Dst = MI.getOperand(0).getReg();
954 LLT StateTy = MRI.getType(Dst);
955 TypeSize StateSize = StateTy.getSizeInBytes();
957 MachinePointerInfo TempPtrInfo;
958 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
959
960 // Create a call to library function, with the temporary as an argument.
961 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
962 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
964 auto Res =
965 createLibcall(MIRBuilder, RTLibcall,
967 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
968 LocObserver, nullptr);
970 return Res;
971
972 // Create a load from the temporary.
973 MachineMemOperand *MMO = MF.getMachineMemOperand(
974 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
975 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
976
978}
979
980// Similar to `createGetStateLibcall` the function calls a library function
981// using transient space in stack. In this case the library function reads
982// content of memory region.
984LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
986 LostDebugLocObserver &LocObserver) {
988 auto &MF = MIRBuilder.getMF();
989 auto &MRI = *MIRBuilder.getMRI();
990 auto &Ctx = MF.getFunction().getContext();
991
992 // Create temporary, where library function will get the new state.
993 Register Src = MI.getOperand(0).getReg();
994 LLT StateTy = MRI.getType(Src);
995 TypeSize StateSize = StateTy.getSizeInBytes();
997 MachinePointerInfo TempPtrInfo;
998 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
999
1000 // Put the new state into the temporary.
1001 MachineMemOperand *MMO = MF.getMachineMemOperand(
1002 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1003 MIRBuilder.buildStore(Src, Temp, *MMO);
1004
1005 // Create a call to library function, with the temporary as an argument.
1006 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1007 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1009 return createLibcall(MIRBuilder, RTLibcall,
1011 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1012 LocObserver, nullptr);
1013}
1014
1015/// Returns the corresponding libcall for the given Pred and
1016/// the ICMP predicate that should be generated to compare with #0
1017/// after the libcall.
1018static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1020#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1021 do { \
1022 switch (Size) { \
1023 case 32: \
1024 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1025 case 64: \
1026 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1027 case 128: \
1028 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1029 default: \
1030 llvm_unreachable("unexpected size"); \
1031 } \
1032 } while (0)
1033
1034 switch (Pred) {
1035 case CmpInst::FCMP_OEQ:
1037 case CmpInst::FCMP_UNE:
1039 case CmpInst::FCMP_OGE:
1041 case CmpInst::FCMP_OLT:
1043 case CmpInst::FCMP_OLE:
1045 case CmpInst::FCMP_OGT:
1047 case CmpInst::FCMP_UNO:
1049 default:
1050 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1051 }
1052}
1053
1055LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1057 LostDebugLocObserver &LocObserver) {
1058 auto &MF = MIRBuilder.getMF();
1059 auto &Ctx = MF.getFunction().getContext();
1060 const GFCmp *Cmp = cast<GFCmp>(&MI);
1061
1062 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1063 unsigned Size = OpLLT.getSizeInBits();
1064 if ((Size != 32 && Size != 64 && Size != 128) ||
1065 OpLLT != MRI.getType(Cmp->getRHSReg()))
1066 return UnableToLegalize;
1067
1068 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1069
1070 // DstReg type is s32
1071 const Register DstReg = Cmp->getReg(0);
1072 LLT DstTy = MRI.getType(DstReg);
1073 const auto Cond = Cmp->getCond();
1074
1075 // Reference:
1076 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1077 // Generates a libcall followed by ICMP.
1078 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1079 const CmpInst::Predicate ICmpPred,
1080 const DstOp &Res) -> Register {
1081 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1082 constexpr LLT TempLLT = LLT::scalar(32);
1083 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1084 // Generate libcall, holding result in Temp
1085 const auto Status = createLibcall(
1086 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1087 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1088 LocObserver, &MI);
1089 if (!Status)
1090 return {};
1091
1092 // Compare temp with #0 to get the final result.
1093 return MIRBuilder
1094 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1095 .getReg(0);
1096 };
1097
1098 // Simple case if we have a direct mapping from predicate to libcall
1099 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1100 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1101 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1102 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1103 return Legalized;
1104 }
1105 return UnableToLegalize;
1106 }
1107
1108 // No direct mapping found, should be generated as combination of libcalls.
1109
1110 switch (Cond) {
1111 case CmpInst::FCMP_UEQ: {
1112 // FCMP_UEQ: unordered or equal
1113 // Convert into (FCMP_OEQ || FCMP_UNO).
1114
1115 const auto [OeqLibcall, OeqPred] =
1117 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1118
1119 const auto [UnoLibcall, UnoPred] =
1121 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1122 if (Oeq && Uno)
1123 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1124 else
1125 return UnableToLegalize;
1126
1127 break;
1128 }
1129 case CmpInst::FCMP_ONE: {
1130 // FCMP_ONE: ordered and operands are unequal
1131 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1132
1133 // We inverse the predicate instead of generating a NOT
1134 // to save one instruction.
1135 // On AArch64 isel can even select two cmp into a single ccmp.
1136 const auto [OeqLibcall, OeqPred] =
1138 const auto NotOeq =
1139 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1140
1141 const auto [UnoLibcall, UnoPred] =
1143 const auto NotUno =
1144 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1145
1146 if (NotOeq && NotUno)
1147 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1148 else
1149 return UnableToLegalize;
1150
1151 break;
1152 }
1153 case CmpInst::FCMP_ULT:
1154 case CmpInst::FCMP_UGE:
1155 case CmpInst::FCMP_UGT:
1156 case CmpInst::FCMP_ULE:
1157 case CmpInst::FCMP_ORD: {
1158 // Convert into: !(inverse(Pred))
1159 // E.g. FCMP_ULT becomes !FCMP_OGE
1160 // This is equivalent to the following, but saves some instructions.
1161 // MIRBuilder.buildNot(
1162 // PredTy,
1163 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1164 // Op1, Op2));
1165 const auto [InversedLibcall, InversedPred] =
1167 if (!BuildLibcall(InversedLibcall,
1168 CmpInst::getInversePredicate(InversedPred), DstReg))
1169 return UnableToLegalize;
1170 break;
1171 }
1172 default:
1173 return UnableToLegalize;
1174 }
1175
1176 return Legalized;
1177}
1178
1179// The function is used to legalize operations that set default environment
1180// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1181// On most targets supported in glibc FE_DFL_MODE is defined as
1182// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1183// it is not true, the target must provide custom lowering.
1185LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1187 LostDebugLocObserver &LocObserver) {
1189 auto &MF = MIRBuilder.getMF();
1190 auto &Ctx = MF.getFunction().getContext();
1191
1192 // Create an argument for the library function.
1193 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1194 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1195 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1196 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1197 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1198 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1199 MIRBuilder.buildIntToPtr(Dest, DefValue);
1200
1202 return createLibcall(MIRBuilder, RTLibcall,
1204 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1205 LocObserver, &MI);
1206}
1207
1210 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1211
1212 switch (MI.getOpcode()) {
1213 default:
1214 return UnableToLegalize;
1215 case TargetOpcode::G_MUL:
1216 case TargetOpcode::G_SDIV:
1217 case TargetOpcode::G_UDIV:
1218 case TargetOpcode::G_SREM:
1219 case TargetOpcode::G_UREM:
1220 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1221 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1222 unsigned Size = LLTy.getSizeInBits();
1223 Type *HLTy = IntegerType::get(Ctx, Size);
1224 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1225 if (Status != Legalized)
1226 return Status;
1227 break;
1228 }
1229 case TargetOpcode::G_FADD:
1230 case TargetOpcode::G_FSUB:
1231 case TargetOpcode::G_FMUL:
1232 case TargetOpcode::G_FDIV:
1233 case TargetOpcode::G_FMA:
1234 case TargetOpcode::G_FPOW:
1235 case TargetOpcode::G_FREM:
1236 case TargetOpcode::G_FCOS:
1237 case TargetOpcode::G_FSIN:
1238 case TargetOpcode::G_FTAN:
1239 case TargetOpcode::G_FACOS:
1240 case TargetOpcode::G_FASIN:
1241 case TargetOpcode::G_FATAN:
1242 case TargetOpcode::G_FATAN2:
1243 case TargetOpcode::G_FCOSH:
1244 case TargetOpcode::G_FSINH:
1245 case TargetOpcode::G_FTANH:
1246 case TargetOpcode::G_FLOG10:
1247 case TargetOpcode::G_FLOG:
1248 case TargetOpcode::G_FLOG2:
1249 case TargetOpcode::G_FEXP:
1250 case TargetOpcode::G_FEXP2:
1251 case TargetOpcode::G_FEXP10:
1252 case TargetOpcode::G_FCEIL:
1253 case TargetOpcode::G_FFLOOR:
1254 case TargetOpcode::G_FMINNUM:
1255 case TargetOpcode::G_FMAXNUM:
1256 case TargetOpcode::G_FSQRT:
1257 case TargetOpcode::G_FRINT:
1258 case TargetOpcode::G_FNEARBYINT:
1259 case TargetOpcode::G_INTRINSIC_TRUNC:
1260 case TargetOpcode::G_INTRINSIC_ROUND:
1261 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1262 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1263 unsigned Size = LLTy.getSizeInBits();
1264 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1265 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1266 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1267 return UnableToLegalize;
1268 }
1269 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1270 if (Status != Legalized)
1271 return Status;
1272 break;
1273 }
1274 case TargetOpcode::G_LROUND:
1275 case TargetOpcode::G_LLROUND:
1276 case TargetOpcode::G_INTRINSIC_LRINT:
1277 case TargetOpcode::G_INTRINSIC_LLRINT: {
1278 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1279 unsigned Size = LLTy.getSizeInBits();
1280 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1281 Type *ITy = IntegerType::get(
1282 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1283 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1284 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1285 return UnableToLegalize;
1286 }
1287 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1289 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1290 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1291 if (Status != Legalized)
1292 return Status;
1293 MI.eraseFromParent();
1294 return Legalized;
1295 }
1296 case TargetOpcode::G_FPOWI:
1297 case TargetOpcode::G_FLDEXP: {
1298 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1299 unsigned Size = LLTy.getSizeInBits();
1300 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1301 Type *ITy = IntegerType::get(
1302 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1303 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1304 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1305 return UnableToLegalize;
1306 }
1307 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1309 {MI.getOperand(1).getReg(), HLTy, 0},
1310 {MI.getOperand(2).getReg(), ITy, 1}};
1311 Args[1].Flags[0].setSExt();
1313 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1314 Args, LocObserver, &MI);
1315 if (Status != Legalized)
1316 return Status;
1317 break;
1318 }
1319 case TargetOpcode::G_FPEXT:
1320 case TargetOpcode::G_FPTRUNC: {
1321 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1322 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1323 if (!FromTy || !ToTy)
1324 return UnableToLegalize;
1326 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1327 if (Status != Legalized)
1328 return Status;
1329 break;
1330 }
1331 case TargetOpcode::G_FCMP: {
1332 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1333 if (Status != Legalized)
1334 return Status;
1335 MI.eraseFromParent();
1336 return Status;
1337 }
1338 case TargetOpcode::G_FPTOSI:
1339 case TargetOpcode::G_FPTOUI: {
1340 // FIXME: Support other types
1341 Type *FromTy =
1342 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1343 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1344 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1345 return UnableToLegalize;
1347 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1348 if (Status != Legalized)
1349 return Status;
1350 break;
1351 }
1352 case TargetOpcode::G_SITOFP:
1353 case TargetOpcode::G_UITOFP: {
1354 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1355 Type *ToTy =
1356 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1357 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1358 return UnableToLegalize;
1359 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1361 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1362 LocObserver, TLI, IsSigned);
1363 if (Status != Legalized)
1364 return Status;
1365 break;
1366 }
1367 case TargetOpcode::G_ATOMICRMW_XCHG:
1368 case TargetOpcode::G_ATOMICRMW_ADD:
1369 case TargetOpcode::G_ATOMICRMW_SUB:
1370 case TargetOpcode::G_ATOMICRMW_AND:
1371 case TargetOpcode::G_ATOMICRMW_OR:
1372 case TargetOpcode::G_ATOMICRMW_XOR:
1373 case TargetOpcode::G_ATOMIC_CMPXCHG:
1374 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1376 if (Status != Legalized)
1377 return Status;
1378 break;
1379 }
1380 case TargetOpcode::G_BZERO:
1381 case TargetOpcode::G_MEMCPY:
1382 case TargetOpcode::G_MEMMOVE:
1383 case TargetOpcode::G_MEMSET: {
1384 LegalizeResult Result =
1385 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1386 if (Result != Legalized)
1387 return Result;
1388 MI.eraseFromParent();
1389 return Result;
1390 }
1391 case TargetOpcode::G_GET_FPENV:
1392 case TargetOpcode::G_GET_FPMODE: {
1393 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1394 if (Result != Legalized)
1395 return Result;
1396 break;
1397 }
1398 case TargetOpcode::G_SET_FPENV:
1399 case TargetOpcode::G_SET_FPMODE: {
1400 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1401 if (Result != Legalized)
1402 return Result;
1403 break;
1404 }
1405 case TargetOpcode::G_RESET_FPENV:
1406 case TargetOpcode::G_RESET_FPMODE: {
1407 LegalizeResult Result =
1408 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1409 if (Result != Legalized)
1410 return Result;
1411 break;
1412 }
1413 }
1414
1415 MI.eraseFromParent();
1416 return Legalized;
1417}
1418
1420 unsigned TypeIdx,
1421 LLT NarrowTy) {
1422 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1423 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1424
1425 switch (MI.getOpcode()) {
1426 default:
1427 return UnableToLegalize;
1428 case TargetOpcode::G_IMPLICIT_DEF: {
1429 Register DstReg = MI.getOperand(0).getReg();
1430 LLT DstTy = MRI.getType(DstReg);
1431
1432 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1433 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1434 // FIXME: Although this would also be legal for the general case, it causes
1435 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1436 // combines not being hit). This seems to be a problem related to the
1437 // artifact combiner.
1438 if (SizeOp0 % NarrowSize != 0) {
1439 LLT ImplicitTy = NarrowTy;
1440 if (DstTy.isVector())
1441 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1442
1443 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1444 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1445
1446 MI.eraseFromParent();
1447 return Legalized;
1448 }
1449
1450 int NumParts = SizeOp0 / NarrowSize;
1451
1453 for (int i = 0; i < NumParts; ++i)
1454 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1455
1456 if (DstTy.isVector())
1457 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1458 else
1459 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1460 MI.eraseFromParent();
1461 return Legalized;
1462 }
1463 case TargetOpcode::G_CONSTANT: {
1464 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1465 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1466 unsigned TotalSize = Ty.getSizeInBits();
1467 unsigned NarrowSize = NarrowTy.getSizeInBits();
1468 int NumParts = TotalSize / NarrowSize;
1469
1470 SmallVector<Register, 4> PartRegs;
1471 for (int I = 0; I != NumParts; ++I) {
1472 unsigned Offset = I * NarrowSize;
1473 auto K = MIRBuilder.buildConstant(NarrowTy,
1474 Val.lshr(Offset).trunc(NarrowSize));
1475 PartRegs.push_back(K.getReg(0));
1476 }
1477
1478 LLT LeftoverTy;
1479 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1480 SmallVector<Register, 1> LeftoverRegs;
1481 if (LeftoverBits != 0) {
1482 LeftoverTy = LLT::scalar(LeftoverBits);
1483 auto K = MIRBuilder.buildConstant(
1484 LeftoverTy,
1485 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1486 LeftoverRegs.push_back(K.getReg(0));
1487 }
1488
1489 insertParts(MI.getOperand(0).getReg(),
1490 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1491
1492 MI.eraseFromParent();
1493 return Legalized;
1494 }
1495 case TargetOpcode::G_SEXT:
1496 case TargetOpcode::G_ZEXT:
1497 case TargetOpcode::G_ANYEXT:
1498 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1499 case TargetOpcode::G_TRUNC: {
1500 if (TypeIdx != 1)
1501 return UnableToLegalize;
1502
1503 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1504 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1505 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1506 return UnableToLegalize;
1507 }
1508
1509 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1510 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1511 MI.eraseFromParent();
1512 return Legalized;
1513 }
1514 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1515 case TargetOpcode::G_FREEZE: {
1516 if (TypeIdx != 0)
1517 return UnableToLegalize;
1518
1519 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1520 // Should widen scalar first
1521 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1522 return UnableToLegalize;
1523
1524 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1526 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1527 Parts.push_back(
1528 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1529 .getReg(0));
1530 }
1531
1532 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1533 MI.eraseFromParent();
1534 return Legalized;
1535 }
1536 case TargetOpcode::G_ADD:
1537 case TargetOpcode::G_SUB:
1538 case TargetOpcode::G_SADDO:
1539 case TargetOpcode::G_SSUBO:
1540 case TargetOpcode::G_SADDE:
1541 case TargetOpcode::G_SSUBE:
1542 case TargetOpcode::G_UADDO:
1543 case TargetOpcode::G_USUBO:
1544 case TargetOpcode::G_UADDE:
1545 case TargetOpcode::G_USUBE:
1546 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1547 case TargetOpcode::G_MUL:
1548 case TargetOpcode::G_UMULH:
1549 return narrowScalarMul(MI, NarrowTy);
1550 case TargetOpcode::G_EXTRACT:
1551 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1552 case TargetOpcode::G_INSERT:
1553 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1554 case TargetOpcode::G_LOAD: {
1555 auto &LoadMI = cast<GLoad>(MI);
1556 Register DstReg = LoadMI.getDstReg();
1557 LLT DstTy = MRI.getType(DstReg);
1558 if (DstTy.isVector())
1559 return UnableToLegalize;
1560
1561 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1562 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1563 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1564 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1565 LoadMI.eraseFromParent();
1566 return Legalized;
1567 }
1568
1569 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1570 }
1571 case TargetOpcode::G_ZEXTLOAD:
1572 case TargetOpcode::G_SEXTLOAD: {
1573 auto &LoadMI = cast<GExtLoad>(MI);
1574 Register DstReg = LoadMI.getDstReg();
1575 Register PtrReg = LoadMI.getPointerReg();
1576
1577 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1578 auto &MMO = LoadMI.getMMO();
1579 unsigned MemSize = MMO.getSizeInBits().getValue();
1580
1581 if (MemSize == NarrowSize) {
1582 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1583 } else if (MemSize < NarrowSize) {
1584 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1585 } else if (MemSize > NarrowSize) {
1586 // FIXME: Need to split the load.
1587 return UnableToLegalize;
1588 }
1589
1590 if (isa<GZExtLoad>(LoadMI))
1591 MIRBuilder.buildZExt(DstReg, TmpReg);
1592 else
1593 MIRBuilder.buildSExt(DstReg, TmpReg);
1594
1595 LoadMI.eraseFromParent();
1596 return Legalized;
1597 }
1598 case TargetOpcode::G_STORE: {
1599 auto &StoreMI = cast<GStore>(MI);
1600
1601 Register SrcReg = StoreMI.getValueReg();
1602 LLT SrcTy = MRI.getType(SrcReg);
1603 if (SrcTy.isVector())
1604 return UnableToLegalize;
1605
1606 int NumParts = SizeOp0 / NarrowSize;
1607 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1608 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1609 if (SrcTy.isVector() && LeftoverBits != 0)
1610 return UnableToLegalize;
1611
1612 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1613 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1614 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1615 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1616 StoreMI.eraseFromParent();
1617 return Legalized;
1618 }
1619
1620 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1621 }
1622 case TargetOpcode::G_SELECT:
1623 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1624 case TargetOpcode::G_AND:
1625 case TargetOpcode::G_OR:
1626 case TargetOpcode::G_XOR: {
1627 // Legalize bitwise operation:
1628 // A = BinOp<Ty> B, C
1629 // into:
1630 // B1, ..., BN = G_UNMERGE_VALUES B
1631 // C1, ..., CN = G_UNMERGE_VALUES C
1632 // A1 = BinOp<Ty/N> B1, C2
1633 // ...
1634 // AN = BinOp<Ty/N> BN, CN
1635 // A = G_MERGE_VALUES A1, ..., AN
1636 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1637 }
1638 case TargetOpcode::G_SHL:
1639 case TargetOpcode::G_LSHR:
1640 case TargetOpcode::G_ASHR:
1641 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1642 case TargetOpcode::G_CTLZ:
1643 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1644 case TargetOpcode::G_CTTZ:
1645 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1646 case TargetOpcode::G_CTPOP:
1647 if (TypeIdx == 1)
1648 switch (MI.getOpcode()) {
1649 case TargetOpcode::G_CTLZ:
1650 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1651 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1652 case TargetOpcode::G_CTTZ:
1653 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1654 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1655 case TargetOpcode::G_CTPOP:
1656 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1657 default:
1658 return UnableToLegalize;
1659 }
1660
1662 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1664 return Legalized;
1665 case TargetOpcode::G_INTTOPTR:
1666 if (TypeIdx != 1)
1667 return UnableToLegalize;
1668
1670 narrowScalarSrc(MI, NarrowTy, 1);
1672 return Legalized;
1673 case TargetOpcode::G_PTRTOINT:
1674 if (TypeIdx != 0)
1675 return UnableToLegalize;
1676
1678 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1680 return Legalized;
1681 case TargetOpcode::G_PHI: {
1682 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1683 // NarrowSize.
1684 if (SizeOp0 % NarrowSize != 0)
1685 return UnableToLegalize;
1686
1687 unsigned NumParts = SizeOp0 / NarrowSize;
1688 SmallVector<Register, 2> DstRegs(NumParts);
1689 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1691 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1692 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1694 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1695 SrcRegs[i / 2], MIRBuilder, MRI);
1696 }
1697 MachineBasicBlock &MBB = *MI.getParent();
1699 for (unsigned i = 0; i < NumParts; ++i) {
1700 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1702 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1703 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1704 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1705 }
1707 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1709 MI.eraseFromParent();
1710 return Legalized;
1711 }
1712 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1713 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1714 if (TypeIdx != 2)
1715 return UnableToLegalize;
1716
1717 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1719 narrowScalarSrc(MI, NarrowTy, OpIdx);
1721 return Legalized;
1722 }
1723 case TargetOpcode::G_ICMP: {
1724 Register LHS = MI.getOperand(2).getReg();
1725 LLT SrcTy = MRI.getType(LHS);
1726 CmpInst::Predicate Pred =
1727 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1728
1729 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1730 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1731 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1732 LHSLeftoverRegs, MIRBuilder, MRI))
1733 return UnableToLegalize;
1734
1735 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1736 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1737 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1738 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1739 return UnableToLegalize;
1740
1741 // We now have the LHS and RHS of the compare split into narrow-type
1742 // registers, plus potentially some leftover type.
1743 Register Dst = MI.getOperand(0).getReg();
1744 LLT ResTy = MRI.getType(Dst);
1745 if (ICmpInst::isEquality(Pred)) {
1746 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1747 // them together. For each equal part, the result should be all 0s. For
1748 // each non-equal part, we'll get at least one 1.
1749 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1751 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1752 auto LHS = std::get<0>(LHSAndRHS);
1753 auto RHS = std::get<1>(LHSAndRHS);
1754 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1755 Xors.push_back(Xor);
1756 }
1757
1758 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1759 // to the desired narrow type so that we can OR them together later.
1760 SmallVector<Register, 4> WidenedXors;
1761 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1762 auto LHS = std::get<0>(LHSAndRHS);
1763 auto RHS = std::get<1>(LHSAndRHS);
1764 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1765 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1766 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1767 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1768 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1769 }
1770
1771 // Now, for each part we broke up, we know if they are equal/not equal
1772 // based off the G_XOR. We can OR these all together and compare against
1773 // 0 to get the result.
1774 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1775 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1776 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1777 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1778 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1779 } else {
1780 Register CmpIn;
1781 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1782 Register CmpOut;
1783 CmpInst::Predicate PartPred;
1784
1785 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1786 PartPred = Pred;
1787 CmpOut = Dst;
1788 } else {
1789 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1790 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1791 }
1792
1793 if (!CmpIn) {
1794 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1795 RHSPartRegs[I]);
1796 } else {
1797 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1798 RHSPartRegs[I]);
1800 LHSPartRegs[I], RHSPartRegs[I]);
1801 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1802 }
1803
1804 CmpIn = CmpOut;
1805 }
1806
1807 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1808 Register CmpOut;
1809 CmpInst::Predicate PartPred;
1810
1811 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1812 PartPred = Pred;
1813 CmpOut = Dst;
1814 } else {
1815 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1816 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1817 }
1818
1819 if (!CmpIn) {
1820 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1821 RHSLeftoverRegs[I]);
1822 } else {
1823 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1824 RHSLeftoverRegs[I]);
1825 auto CmpEq =
1827 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1828 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1829 }
1830
1831 CmpIn = CmpOut;
1832 }
1833 }
1834 MI.eraseFromParent();
1835 return Legalized;
1836 }
1837 case TargetOpcode::G_FCMP:
1838 if (TypeIdx != 0)
1839 return UnableToLegalize;
1840
1842 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1844 return Legalized;
1845
1846 case TargetOpcode::G_SEXT_INREG: {
1847 if (TypeIdx != 0)
1848 return UnableToLegalize;
1849
1850 int64_t SizeInBits = MI.getOperand(2).getImm();
1851
1852 // So long as the new type has more bits than the bits we're extending we
1853 // don't need to break it apart.
1854 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1856 // We don't lose any non-extension bits by truncating the src and
1857 // sign-extending the dst.
1858 MachineOperand &MO1 = MI.getOperand(1);
1859 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1860 MO1.setReg(TruncMIB.getReg(0));
1861
1862 MachineOperand &MO2 = MI.getOperand(0);
1863 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1865 MIRBuilder.buildSExt(MO2, DstExt);
1866 MO2.setReg(DstExt);
1868 return Legalized;
1869 }
1870
1871 // Break it apart. Components below the extension point are unmodified. The
1872 // component containing the extension point becomes a narrower SEXT_INREG.
1873 // Components above it are ashr'd from the component containing the
1874 // extension point.
1875 if (SizeOp0 % NarrowSize != 0)
1876 return UnableToLegalize;
1877 int NumParts = SizeOp0 / NarrowSize;
1878
1879 // List the registers where the destination will be scattered.
1881 // List the registers where the source will be split.
1883
1884 // Create all the temporary registers.
1885 for (int i = 0; i < NumParts; ++i) {
1886 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1887
1888 SrcRegs.push_back(SrcReg);
1889 }
1890
1891 // Explode the big arguments into smaller chunks.
1892 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1893
1894 Register AshrCstReg =
1895 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1896 .getReg(0);
1897 Register FullExtensionReg;
1898 Register PartialExtensionReg;
1899
1900 // Do the operation on each small part.
1901 for (int i = 0; i < NumParts; ++i) {
1902 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1903 DstRegs.push_back(SrcRegs[i]);
1904 PartialExtensionReg = DstRegs.back();
1905 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1906 assert(PartialExtensionReg &&
1907 "Expected to visit partial extension before full");
1908 if (FullExtensionReg) {
1909 DstRegs.push_back(FullExtensionReg);
1910 continue;
1911 }
1912 DstRegs.push_back(
1913 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1914 .getReg(0));
1915 FullExtensionReg = DstRegs.back();
1916 } else {
1917 DstRegs.push_back(
1919 .buildInstr(
1920 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1921 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1922 .getReg(0));
1923 PartialExtensionReg = DstRegs.back();
1924 }
1925 }
1926
1927 // Gather the destination registers into the final destination.
1928 Register DstReg = MI.getOperand(0).getReg();
1929 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1930 MI.eraseFromParent();
1931 return Legalized;
1932 }
1933 case TargetOpcode::G_BSWAP:
1934 case TargetOpcode::G_BITREVERSE: {
1935 if (SizeOp0 % NarrowSize != 0)
1936 return UnableToLegalize;
1937
1939 SmallVector<Register, 2> SrcRegs, DstRegs;
1940 unsigned NumParts = SizeOp0 / NarrowSize;
1941 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1942 MIRBuilder, MRI);
1943
1944 for (unsigned i = 0; i < NumParts; ++i) {
1945 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1946 {SrcRegs[NumParts - 1 - i]});
1947 DstRegs.push_back(DstPart.getReg(0));
1948 }
1949
1950 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1951
1953 MI.eraseFromParent();
1954 return Legalized;
1955 }
1956 case TargetOpcode::G_PTR_ADD:
1957 case TargetOpcode::G_PTRMASK: {
1958 if (TypeIdx != 1)
1959 return UnableToLegalize;
1961 narrowScalarSrc(MI, NarrowTy, 2);
1963 return Legalized;
1964 }
1965 case TargetOpcode::G_FPTOUI:
1966 case TargetOpcode::G_FPTOSI:
1967 case TargetOpcode::G_FPTOUI_SAT:
1968 case TargetOpcode::G_FPTOSI_SAT:
1969 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1970 case TargetOpcode::G_FPEXT:
1971 if (TypeIdx != 0)
1972 return UnableToLegalize;
1974 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1976 return Legalized;
1977 case TargetOpcode::G_FLDEXP:
1978 case TargetOpcode::G_STRICT_FLDEXP:
1979 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1980 case TargetOpcode::G_VSCALE: {
1981 Register Dst = MI.getOperand(0).getReg();
1982 LLT Ty = MRI.getType(Dst);
1983
1984 // Assume VSCALE(1) fits into a legal integer
1985 const APInt One(NarrowTy.getSizeInBits(), 1);
1986 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1987 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1988 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1989 MIRBuilder.buildMul(Dst, ZExt, C);
1990
1991 MI.eraseFromParent();
1992 return Legalized;
1993 }
1994 }
1995}
1996
1998 LLT Ty = MRI.getType(Val);
1999 if (Ty.isScalar())
2000 return Val;
2001
2003 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2004 if (Ty.isPointer()) {
2005 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2006 return Register();
2007 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2008 }
2009
2010 Register NewVal = Val;
2011
2012 assert(Ty.isVector());
2013 if (Ty.isPointerVector())
2014 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2015 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2016}
2017
2019 unsigned OpIdx, unsigned ExtOpcode) {
2020 MachineOperand &MO = MI.getOperand(OpIdx);
2021 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2022 MO.setReg(ExtB.getReg(0));
2023}
2024
2026 unsigned OpIdx) {
2027 MachineOperand &MO = MI.getOperand(OpIdx);
2028 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2029 MO.setReg(ExtB.getReg(0));
2030}
2031
2033 unsigned OpIdx, unsigned TruncOpcode) {
2034 MachineOperand &MO = MI.getOperand(OpIdx);
2035 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2037 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2038 MO.setReg(DstExt);
2039}
2040
2042 unsigned OpIdx, unsigned ExtOpcode) {
2043 MachineOperand &MO = MI.getOperand(OpIdx);
2044 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2046 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2047 MO.setReg(DstTrunc);
2048}
2049
2051 unsigned OpIdx) {
2052 MachineOperand &MO = MI.getOperand(OpIdx);
2054 Register Dst = MO.getReg();
2055 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2056 MO.setReg(DstExt);
2058}
2059
2061 unsigned OpIdx) {
2062 MachineOperand &MO = MI.getOperand(OpIdx);
2065}
2066
2067void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2068 MachineOperand &Op = MI.getOperand(OpIdx);
2069 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2070}
2071
2072void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2073 MachineOperand &MO = MI.getOperand(OpIdx);
2074 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2076 MIRBuilder.buildBitcast(MO, CastDst);
2077 MO.setReg(CastDst);
2078}
2079
2081LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2082 LLT WideTy) {
2083 if (TypeIdx != 1)
2084 return UnableToLegalize;
2085
2086 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2087 if (DstTy.isVector())
2088 return UnableToLegalize;
2089
2090 LLT SrcTy = MRI.getType(Src1Reg);
2091 const int DstSize = DstTy.getSizeInBits();
2092 const int SrcSize = SrcTy.getSizeInBits();
2093 const int WideSize = WideTy.getSizeInBits();
2094 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2095
2096 unsigned NumOps = MI.getNumOperands();
2097 unsigned NumSrc = MI.getNumOperands() - 1;
2098 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2099
2100 if (WideSize >= DstSize) {
2101 // Directly pack the bits in the target type.
2102 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2103
2104 for (unsigned I = 2; I != NumOps; ++I) {
2105 const unsigned Offset = (I - 1) * PartSize;
2106
2107 Register SrcReg = MI.getOperand(I).getReg();
2108 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2109
2110 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2111
2112 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2113 MRI.createGenericVirtualRegister(WideTy);
2114
2115 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2116 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2117 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2118 ResultReg = NextResult;
2119 }
2120
2121 if (WideSize > DstSize)
2122 MIRBuilder.buildTrunc(DstReg, ResultReg);
2123 else if (DstTy.isPointer())
2124 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2125
2126 MI.eraseFromParent();
2127 return Legalized;
2128 }
2129
2130 // Unmerge the original values to the GCD type, and recombine to the next
2131 // multiple greater than the original type.
2132 //
2133 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2134 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2135 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2136 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2137 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2138 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2139 // %12:_(s12) = G_MERGE_VALUES %10, %11
2140 //
2141 // Padding with undef if necessary:
2142 //
2143 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2144 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2145 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2146 // %7:_(s2) = G_IMPLICIT_DEF
2147 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2148 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2149 // %10:_(s12) = G_MERGE_VALUES %8, %9
2150
2151 const int GCD = std::gcd(SrcSize, WideSize);
2152 LLT GCDTy = LLT::scalar(GCD);
2153
2155 SmallVector<Register, 8> NewMergeRegs;
2156 SmallVector<Register, 8> Unmerges;
2157 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2158
2159 // Decompose the original operands if they don't evenly divide.
2160 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2161 Register SrcReg = MO.getReg();
2162 if (GCD == SrcSize) {
2163 Unmerges.push_back(SrcReg);
2164 } else {
2165 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2166 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2167 Unmerges.push_back(Unmerge.getReg(J));
2168 }
2169 }
2170
2171 // Pad with undef to the next size that is a multiple of the requested size.
2172 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2173 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2174 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2175 Unmerges.push_back(UndefReg);
2176 }
2177
2178 const int PartsPerGCD = WideSize / GCD;
2179
2180 // Build merges of each piece.
2181 ArrayRef<Register> Slicer(Unmerges);
2182 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2183 auto Merge =
2184 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2185 NewMergeRegs.push_back(Merge.getReg(0));
2186 }
2187
2188 // A truncate may be necessary if the requested type doesn't evenly divide the
2189 // original result type.
2190 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2191 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2192 } else {
2193 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2194 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2195 }
2196
2197 MI.eraseFromParent();
2198 return Legalized;
2199}
2200
2202LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 0)
2205 return UnableToLegalize;
2206
2207 int NumDst = MI.getNumOperands() - 1;
2208 Register SrcReg = MI.getOperand(NumDst).getReg();
2209 LLT SrcTy = MRI.getType(SrcReg);
2210 if (SrcTy.isVector())
2211 return UnableToLegalize;
2212
2213 Register Dst0Reg = MI.getOperand(0).getReg();
2214 LLT DstTy = MRI.getType(Dst0Reg);
2215 if (!DstTy.isScalar())
2216 return UnableToLegalize;
2217
2218 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2219 if (SrcTy.isPointer()) {
2221 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2222 LLVM_DEBUG(
2223 dbgs() << "Not casting non-integral address space integer\n");
2224 return UnableToLegalize;
2225 }
2226
2227 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2228 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2229 }
2230
2231 // Widen SrcTy to WideTy. This does not affect the result, but since the
2232 // user requested this size, it is probably better handled than SrcTy and
2233 // should reduce the total number of legalization artifacts.
2234 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2235 SrcTy = WideTy;
2236 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2237 }
2238
2239 // Theres no unmerge type to target. Directly extract the bits from the
2240 // source type
2241 unsigned DstSize = DstTy.getSizeInBits();
2242
2243 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2244 for (int I = 1; I != NumDst; ++I) {
2245 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2246 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2247 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2248 }
2249
2250 MI.eraseFromParent();
2251 return Legalized;
2252 }
2253
2254 // Extend the source to a wider type.
2255 LLT LCMTy = getLCMType(SrcTy, WideTy);
2256
2257 Register WideSrc = SrcReg;
2258 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2259 // TODO: If this is an integral address space, cast to integer and anyext.
2260 if (SrcTy.isPointer()) {
2261 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2262 return UnableToLegalize;
2263 }
2264
2265 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2266 }
2267
2268 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2269
2270 // Create a sequence of unmerges and merges to the original results. Since we
2271 // may have widened the source, we will need to pad the results with dead defs
2272 // to cover the source register.
2273 // e.g. widen s48 to s64:
2274 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2275 //
2276 // =>
2277 // %4:_(s192) = G_ANYEXT %0:_(s96)
2278 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2279 // ; unpack to GCD type, with extra dead defs
2280 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2281 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2282 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2283 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2284 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2285 const LLT GCDTy = getGCDType(WideTy, DstTy);
2286 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2287 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2288
2289 // Directly unmerge to the destination without going through a GCD type
2290 // if possible
2291 if (PartsPerRemerge == 1) {
2292 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2293
2294 for (int I = 0; I != NumUnmerge; ++I) {
2295 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2296
2297 for (int J = 0; J != PartsPerUnmerge; ++J) {
2298 int Idx = I * PartsPerUnmerge + J;
2299 if (Idx < NumDst)
2300 MIB.addDef(MI.getOperand(Idx).getReg());
2301 else {
2302 // Create dead def for excess components.
2303 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2304 }
2305 }
2306
2307 MIB.addUse(Unmerge.getReg(I));
2308 }
2309 } else {
2311 for (int J = 0; J != NumUnmerge; ++J)
2312 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2313
2314 SmallVector<Register, 8> RemergeParts;
2315 for (int I = 0; I != NumDst; ++I) {
2316 for (int J = 0; J < PartsPerRemerge; ++J) {
2317 const int Idx = I * PartsPerRemerge + J;
2318 RemergeParts.emplace_back(Parts[Idx]);
2319 }
2320
2321 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2322 RemergeParts.clear();
2323 }
2324 }
2325
2326 MI.eraseFromParent();
2327 return Legalized;
2328}
2329
2331LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2332 LLT WideTy) {
2333 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2334 unsigned Offset = MI.getOperand(2).getImm();
2335
2336 if (TypeIdx == 0) {
2337 if (SrcTy.isVector() || DstTy.isVector())
2338 return UnableToLegalize;
2339
2340 SrcOp Src(SrcReg);
2341 if (SrcTy.isPointer()) {
2342 // Extracts from pointers can be handled only if they are really just
2343 // simple integers.
2345 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2346 return UnableToLegalize;
2347
2348 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2349 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2350 SrcTy = SrcAsIntTy;
2351 }
2352
2353 if (DstTy.isPointer())
2354 return UnableToLegalize;
2355
2356 if (Offset == 0) {
2357 // Avoid a shift in the degenerate case.
2358 MIRBuilder.buildTrunc(DstReg,
2359 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2360 MI.eraseFromParent();
2361 return Legalized;
2362 }
2363
2364 // Do a shift in the source type.
2365 LLT ShiftTy = SrcTy;
2366 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2367 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2368 ShiftTy = WideTy;
2369 }
2370
2371 auto LShr = MIRBuilder.buildLShr(
2372 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2373 MIRBuilder.buildTrunc(DstReg, LShr);
2374 MI.eraseFromParent();
2375 return Legalized;
2376 }
2377
2378 if (SrcTy.isScalar()) {
2380 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2382 return Legalized;
2383 }
2384
2385 if (!SrcTy.isVector())
2386 return UnableToLegalize;
2387
2388 if (DstTy != SrcTy.getElementType())
2389 return UnableToLegalize;
2390
2391 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2392 return UnableToLegalize;
2393
2395 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2396
2397 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2398 Offset);
2399 widenScalarDst(MI, WideTy.getScalarType(), 0);
2401 return Legalized;
2402}
2403
2405LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2406 LLT WideTy) {
2407 if (TypeIdx != 0 || WideTy.isVector())
2408 return UnableToLegalize;
2410 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2411 widenScalarDst(MI, WideTy);
2413 return Legalized;
2414}
2415
2417LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2418 LLT WideTy) {
2419 unsigned Opcode;
2420 unsigned ExtOpcode;
2421 std::optional<Register> CarryIn;
2422 switch (MI.getOpcode()) {
2423 default:
2424 llvm_unreachable("Unexpected opcode!");
2425 case TargetOpcode::G_SADDO:
2426 Opcode = TargetOpcode::G_ADD;
2427 ExtOpcode = TargetOpcode::G_SEXT;
2428 break;
2429 case TargetOpcode::G_SSUBO:
2430 Opcode = TargetOpcode::G_SUB;
2431 ExtOpcode = TargetOpcode::G_SEXT;
2432 break;
2433 case TargetOpcode::G_UADDO:
2434 Opcode = TargetOpcode::G_ADD;
2435 ExtOpcode = TargetOpcode::G_ZEXT;
2436 break;
2437 case TargetOpcode::G_USUBO:
2438 Opcode = TargetOpcode::G_SUB;
2439 ExtOpcode = TargetOpcode::G_ZEXT;
2440 break;
2441 case TargetOpcode::G_SADDE:
2442 Opcode = TargetOpcode::G_UADDE;
2443 ExtOpcode = TargetOpcode::G_SEXT;
2444 CarryIn = MI.getOperand(4).getReg();
2445 break;
2446 case TargetOpcode::G_SSUBE:
2447 Opcode = TargetOpcode::G_USUBE;
2448 ExtOpcode = TargetOpcode::G_SEXT;
2449 CarryIn = MI.getOperand(4).getReg();
2450 break;
2451 case TargetOpcode::G_UADDE:
2452 Opcode = TargetOpcode::G_UADDE;
2453 ExtOpcode = TargetOpcode::G_ZEXT;
2454 CarryIn = MI.getOperand(4).getReg();
2455 break;
2456 case TargetOpcode::G_USUBE:
2457 Opcode = TargetOpcode::G_USUBE;
2458 ExtOpcode = TargetOpcode::G_ZEXT;
2459 CarryIn = MI.getOperand(4).getReg();
2460 break;
2461 }
2462
2463 if (TypeIdx == 1) {
2464 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2465
2467 if (CarryIn)
2468 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2469 widenScalarDst(MI, WideTy, 1);
2470
2472 return Legalized;
2473 }
2474
2475 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2476 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2477 // Do the arithmetic in the larger type.
2478 Register NewOp;
2479 if (CarryIn) {
2480 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2481 NewOp = MIRBuilder
2482 .buildInstr(Opcode, {WideTy, CarryOutTy},
2483 {LHSExt, RHSExt, *CarryIn})
2484 .getReg(0);
2485 } else {
2486 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2487 }
2488 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2489 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2490 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2491 // There is no overflow if the ExtOp is the same as NewOp.
2492 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2493 // Now trunc the NewOp to the original result.
2494 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2495 MI.eraseFromParent();
2496 return Legalized;
2497}
2498
2500LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2501 LLT WideTy) {
2502 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2503 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2504 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2505 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2506 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2507 // We can convert this to:
2508 // 1. Any extend iN to iM
2509 // 2. SHL by M-N
2510 // 3. [US][ADD|SUB|SHL]SAT
2511 // 4. L/ASHR by M-N
2512 //
2513 // It may be more efficient to lower this to a min and a max operation in
2514 // the higher precision arithmetic if the promoted operation isn't legal,
2515 // but this decision is up to the target's lowering request.
2516 Register DstReg = MI.getOperand(0).getReg();
2517
2518 unsigned NewBits = WideTy.getScalarSizeInBits();
2519 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2520
2521 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2522 // must not left shift the RHS to preserve the shift amount.
2523 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2524 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2525 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2526 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2527 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2528 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2529
2530 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2531 {ShiftL, ShiftR}, MI.getFlags());
2532
2533 // Use a shift that will preserve the number of sign bits when the trunc is
2534 // folded away.
2535 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2536 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2537
2538 MIRBuilder.buildTrunc(DstReg, Result);
2539 MI.eraseFromParent();
2540 return Legalized;
2541}
2542
2544LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2545 LLT WideTy) {
2546 if (TypeIdx == 1) {
2548 widenScalarDst(MI, WideTy, 1);
2550 return Legalized;
2551 }
2552
2553 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2554 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2555 LLT SrcTy = MRI.getType(LHS);
2556 LLT OverflowTy = MRI.getType(OriginalOverflow);
2557 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2558
2559 // To determine if the result overflowed in the larger type, we extend the
2560 // input to the larger type, do the multiply (checking if it overflows),
2561 // then also check the high bits of the result to see if overflow happened
2562 // there.
2563 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2564 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2565 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2566
2567 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2568 // so we don't need to check the overflow result of larger type Mulo.
2569 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2570
2571 unsigned MulOpc =
2572 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2573
2575 if (WideMulCanOverflow)
2576 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2577 {LeftOperand, RightOperand});
2578 else
2579 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2580
2581 auto Mul = Mulo->getOperand(0);
2582 MIRBuilder.buildTrunc(Result, Mul);
2583
2584 MachineInstrBuilder ExtResult;
2585 // Overflow occurred if it occurred in the larger type, or if the high part
2586 // of the result does not zero/sign-extend the low part. Check this second
2587 // possibility first.
2588 if (IsSigned) {
2589 // For signed, overflow occurred when the high part does not sign-extend
2590 // the low part.
2591 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2592 } else {
2593 // Unsigned overflow occurred when the high part does not zero-extend the
2594 // low part.
2595 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2596 }
2597
2598 if (WideMulCanOverflow) {
2599 auto Overflow =
2600 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2601 // Finally check if the multiplication in the larger type itself overflowed.
2602 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2603 } else {
2604 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2605 }
2606 MI.eraseFromParent();
2607 return Legalized;
2608}
2609
2612 unsigned Opcode = MI.getOpcode();
2613 switch (Opcode) {
2614 default:
2615 return UnableToLegalize;
2616 case TargetOpcode::G_ATOMICRMW_XCHG:
2617 case TargetOpcode::G_ATOMICRMW_ADD:
2618 case TargetOpcode::G_ATOMICRMW_SUB:
2619 case TargetOpcode::G_ATOMICRMW_AND:
2620 case TargetOpcode::G_ATOMICRMW_OR:
2621 case TargetOpcode::G_ATOMICRMW_XOR:
2622 case TargetOpcode::G_ATOMICRMW_MIN:
2623 case TargetOpcode::G_ATOMICRMW_MAX:
2624 case TargetOpcode::G_ATOMICRMW_UMIN:
2625 case TargetOpcode::G_ATOMICRMW_UMAX:
2626 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2628 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2629 widenScalarDst(MI, WideTy, 0);
2631 return Legalized;
2632 case TargetOpcode::G_ATOMIC_CMPXCHG:
2633 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2635 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2636 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2637 widenScalarDst(MI, WideTy, 0);
2639 return Legalized;
2640 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2641 if (TypeIdx == 0) {
2643 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2644 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2645 widenScalarDst(MI, WideTy, 0);
2647 return Legalized;
2648 }
2649 assert(TypeIdx == 1 &&
2650 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2652 widenScalarDst(MI, WideTy, 1);
2654 return Legalized;
2655 case TargetOpcode::G_EXTRACT:
2656 return widenScalarExtract(MI, TypeIdx, WideTy);
2657 case TargetOpcode::G_INSERT:
2658 return widenScalarInsert(MI, TypeIdx, WideTy);
2659 case TargetOpcode::G_MERGE_VALUES:
2660 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2661 case TargetOpcode::G_UNMERGE_VALUES:
2662 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2663 case TargetOpcode::G_SADDO:
2664 case TargetOpcode::G_SSUBO:
2665 case TargetOpcode::G_UADDO:
2666 case TargetOpcode::G_USUBO:
2667 case TargetOpcode::G_SADDE:
2668 case TargetOpcode::G_SSUBE:
2669 case TargetOpcode::G_UADDE:
2670 case TargetOpcode::G_USUBE:
2671 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2672 case TargetOpcode::G_UMULO:
2673 case TargetOpcode::G_SMULO:
2674 return widenScalarMulo(MI, TypeIdx, WideTy);
2675 case TargetOpcode::G_SADDSAT:
2676 case TargetOpcode::G_SSUBSAT:
2677 case TargetOpcode::G_SSHLSAT:
2678 case TargetOpcode::G_UADDSAT:
2679 case TargetOpcode::G_USUBSAT:
2680 case TargetOpcode::G_USHLSAT:
2681 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2682 case TargetOpcode::G_CTTZ:
2683 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2684 case TargetOpcode::G_CTLZ:
2685 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2686 case TargetOpcode::G_CTPOP: {
2687 if (TypeIdx == 0) {
2689 widenScalarDst(MI, WideTy, 0);
2691 return Legalized;
2692 }
2693
2694 Register SrcReg = MI.getOperand(1).getReg();
2695
2696 // First extend the input.
2697 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2698 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2699 ? TargetOpcode::G_ANYEXT
2700 : TargetOpcode::G_ZEXT;
2701 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2702 LLT CurTy = MRI.getType(SrcReg);
2703 unsigned NewOpc = Opcode;
2704 if (NewOpc == TargetOpcode::G_CTTZ) {
2705 // The count is the same in the larger type except if the original
2706 // value was zero. This can be handled by setting the bit just off
2707 // the top of the original type.
2708 auto TopBit =
2710 MIBSrc = MIRBuilder.buildOr(
2711 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2712 // Now we know the operand is non-zero, use the more relaxed opcode.
2713 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2714 }
2715
2716 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2717
2718 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2719 // An optimization where the result is the CTLZ after the left shift by
2720 // (Difference in widety and current ty), that is,
2721 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2722 // Result = ctlz MIBSrc
2723 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2724 MIRBuilder.buildConstant(WideTy, SizeDiff));
2725 }
2726
2727 // Perform the operation at the larger size.
2728 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2729 // This is already the correct result for CTPOP and CTTZs
2730 if (Opcode == TargetOpcode::G_CTLZ) {
2731 // The correct result is NewOp - (Difference in widety and current ty).
2732 MIBNewOp = MIRBuilder.buildSub(
2733 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2734 }
2735
2736 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2737 MI.eraseFromParent();
2738 return Legalized;
2739 }
2740 case TargetOpcode::G_BSWAP: {
2742 Register DstReg = MI.getOperand(0).getReg();
2743
2744 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2745 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2746 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2747 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2748
2749 MI.getOperand(0).setReg(DstExt);
2750
2752
2753 LLT Ty = MRI.getType(DstReg);
2754 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2755 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2756 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2757
2758 MIRBuilder.buildTrunc(DstReg, ShrReg);
2760 return Legalized;
2761 }
2762 case TargetOpcode::G_BITREVERSE: {
2764
2765 Register DstReg = MI.getOperand(0).getReg();
2766 LLT Ty = MRI.getType(DstReg);
2767 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2768
2769 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2770 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2771 MI.getOperand(0).setReg(DstExt);
2773
2774 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2775 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2776 MIRBuilder.buildTrunc(DstReg, Shift);
2778 return Legalized;
2779 }
2780 case TargetOpcode::G_FREEZE:
2781 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2783 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2784 widenScalarDst(MI, WideTy);
2786 return Legalized;
2787
2788 case TargetOpcode::G_ABS:
2790 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2791 widenScalarDst(MI, WideTy);
2793 return Legalized;
2794
2795 case TargetOpcode::G_ADD:
2796 case TargetOpcode::G_AND:
2797 case TargetOpcode::G_MUL:
2798 case TargetOpcode::G_OR:
2799 case TargetOpcode::G_XOR:
2800 case TargetOpcode::G_SUB:
2801 case TargetOpcode::G_SHUFFLE_VECTOR:
2802 // Perform operation at larger width (any extension is fines here, high bits
2803 // don't affect the result) and then truncate the result back to the
2804 // original type.
2806 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2807 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2808 widenScalarDst(MI, WideTy);
2810 return Legalized;
2811
2812 case TargetOpcode::G_SBFX:
2813 case TargetOpcode::G_UBFX:
2815
2816 if (TypeIdx == 0) {
2817 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2818 widenScalarDst(MI, WideTy);
2819 } else {
2820 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2821 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2822 }
2823
2825 return Legalized;
2826
2827 case TargetOpcode::G_SHL:
2829
2830 if (TypeIdx == 0) {
2831 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2832 widenScalarDst(MI, WideTy);
2833 } else {
2834 assert(TypeIdx == 1);
2835 // The "number of bits to shift" operand must preserve its value as an
2836 // unsigned integer:
2837 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2838 }
2839
2841 return Legalized;
2842
2843 case TargetOpcode::G_ROTR:
2844 case TargetOpcode::G_ROTL:
2845 if (TypeIdx != 1)
2846 return UnableToLegalize;
2847
2849 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2851 return Legalized;
2852
2853 case TargetOpcode::G_SDIV:
2854 case TargetOpcode::G_SREM:
2855 case TargetOpcode::G_SMIN:
2856 case TargetOpcode::G_SMAX:
2858 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2859 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2860 widenScalarDst(MI, WideTy);
2862 return Legalized;
2863
2864 case TargetOpcode::G_SDIVREM:
2866 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2867 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2868 widenScalarDst(MI, WideTy);
2869 widenScalarDst(MI, WideTy, 1);
2871 return Legalized;
2872
2873 case TargetOpcode::G_ASHR:
2874 case TargetOpcode::G_LSHR:
2876
2877 if (TypeIdx == 0) {
2878 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2879 : TargetOpcode::G_ZEXT;
2880
2881 widenScalarSrc(MI, WideTy, 1, CvtOp);
2882 widenScalarDst(MI, WideTy);
2883 } else {
2884 assert(TypeIdx == 1);
2885 // The "number of bits to shift" operand must preserve its value as an
2886 // unsigned integer:
2887 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2888 }
2889
2891 return Legalized;
2892 case TargetOpcode::G_UDIV:
2893 case TargetOpcode::G_UREM:
2895 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2896 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2897 widenScalarDst(MI, WideTy);
2899 return Legalized;
2900 case TargetOpcode::G_UDIVREM:
2902 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2903 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2904 widenScalarDst(MI, WideTy);
2905 widenScalarDst(MI, WideTy, 1);
2907 return Legalized;
2908 case TargetOpcode::G_UMIN:
2909 case TargetOpcode::G_UMAX: {
2910 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2911
2912 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2913 unsigned ExtOpc =
2915 getApproximateEVTForLLT(WideTy, Ctx))
2916 ? TargetOpcode::G_SEXT
2917 : TargetOpcode::G_ZEXT;
2918
2920 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2921 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2922 widenScalarDst(MI, WideTy);
2924 return Legalized;
2925 }
2926
2927 case TargetOpcode::G_SELECT:
2929 if (TypeIdx == 0) {
2930 // Perform operation at larger width (any extension is fine here, high
2931 // bits don't affect the result) and then truncate the result back to the
2932 // original type.
2933 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2934 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2935 widenScalarDst(MI, WideTy);
2936 } else {
2937 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2938 // Explicit extension is required here since high bits affect the result.
2939 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2940 }
2942 return Legalized;
2943
2944 case TargetOpcode::G_FPTOSI:
2945 case TargetOpcode::G_FPTOUI:
2946 case TargetOpcode::G_INTRINSIC_LRINT:
2947 case TargetOpcode::G_INTRINSIC_LLRINT:
2948 case TargetOpcode::G_IS_FPCLASS:
2950
2951 if (TypeIdx == 0)
2952 widenScalarDst(MI, WideTy);
2953 else
2954 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2955
2957 return Legalized;
2958 case TargetOpcode::G_SITOFP:
2960
2961 if (TypeIdx == 0)
2962 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2963 else
2964 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2965
2967 return Legalized;
2968 case TargetOpcode::G_UITOFP:
2970
2971 if (TypeIdx == 0)
2972 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2973 else
2974 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2975
2977 return Legalized;
2978 case TargetOpcode::G_FPTOSI_SAT:
2979 case TargetOpcode::G_FPTOUI_SAT:
2981
2982 if (TypeIdx == 0) {
2983 Register OldDst = MI.getOperand(0).getReg();
2984 LLT Ty = MRI.getType(OldDst);
2985 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2986 Register NewDst;
2987 MI.getOperand(0).setReg(ExtReg);
2988 uint64_t ShortBits = Ty.getScalarSizeInBits();
2989 uint64_t WideBits = WideTy.getScalarSizeInBits();
2991 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2992 // z = i16 fptosi_sat(a)
2993 // ->
2994 // x = i32 fptosi_sat(a)
2995 // y = smin(x, 32767)
2996 // z = smax(y, -32768)
2997 auto MaxVal = MIRBuilder.buildConstant(
2998 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2999 auto MinVal = MIRBuilder.buildConstant(
3000 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3001 Register MidReg =
3002 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3003 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3004 } else {
3005 // z = i16 fptoui_sat(a)
3006 // ->
3007 // x = i32 fptoui_sat(a)
3008 // y = smin(x, 65535)
3009 auto MaxVal = MIRBuilder.buildConstant(
3010 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3011 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3012 }
3013 MIRBuilder.buildTrunc(OldDst, NewDst);
3014 } else
3015 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3016
3018 return Legalized;
3019 case TargetOpcode::G_LOAD:
3020 case TargetOpcode::G_SEXTLOAD:
3021 case TargetOpcode::G_ZEXTLOAD:
3023 widenScalarDst(MI, WideTy);
3025 return Legalized;
3026
3027 case TargetOpcode::G_STORE: {
3028 if (TypeIdx != 0)
3029 return UnableToLegalize;
3030
3031 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3032 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3033 if (!Ty.isScalar()) {
3034 // We need to widen the vector element type.
3036 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3037 // We also need to adjust the MMO to turn this into a truncating store.
3038 MachineMemOperand &MMO = **MI.memoperands_begin();
3040 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3041 MI.setMemRefs(MF, {NewMMO});
3043 return Legalized;
3044 }
3045
3047
3048 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3049 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3050 widenScalarSrc(MI, WideTy, 0, ExtType);
3051
3053 return Legalized;
3054 }
3055 case TargetOpcode::G_CONSTANT: {
3056 MachineOperand &SrcMO = MI.getOperand(1);
3058 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3059 MRI.getType(MI.getOperand(0).getReg()));
3060 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3061 ExtOpc == TargetOpcode::G_ANYEXT) &&
3062 "Illegal Extend");
3063 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3064 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3065 ? SrcVal.sext(WideTy.getSizeInBits())
3066 : SrcVal.zext(WideTy.getSizeInBits());
3068 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3069
3070 widenScalarDst(MI, WideTy);
3072 return Legalized;
3073 }
3074 case TargetOpcode::G_FCONSTANT: {
3075 // To avoid changing the bits of the constant due to extension to a larger
3076 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3077 MachineOperand &SrcMO = MI.getOperand(1);
3078 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3080 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3081 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3082 MI.eraseFromParent();
3083 return Legalized;
3084 }
3085 case TargetOpcode::G_IMPLICIT_DEF: {
3087 widenScalarDst(MI, WideTy);
3089 return Legalized;
3090 }
3091 case TargetOpcode::G_BRCOND:
3093 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3095 return Legalized;
3096
3097 case TargetOpcode::G_FCMP:
3099 if (TypeIdx == 0)
3100 widenScalarDst(MI, WideTy);
3101 else {
3102 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3103 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3104 }
3106 return Legalized;
3107
3108 case TargetOpcode::G_ICMP:
3110 if (TypeIdx == 0)
3111 widenScalarDst(MI, WideTy);
3112 else {
3113 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3114 CmpInst::Predicate Pred =
3115 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3116
3117 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3118 unsigned ExtOpcode =
3119 (CmpInst::isSigned(Pred) ||
3121 getApproximateEVTForLLT(WideTy, Ctx)))
3122 ? TargetOpcode::G_SEXT
3123 : TargetOpcode::G_ZEXT;
3124 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3125 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3126 }
3128 return Legalized;
3129
3130 case TargetOpcode::G_PTR_ADD:
3131 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3133 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3135 return Legalized;
3136
3137 case TargetOpcode::G_PHI: {
3138 assert(TypeIdx == 0 && "Expecting only Idx 0");
3139
3141 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3142 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3144 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3145 }
3146
3147 MachineBasicBlock &MBB = *MI.getParent();
3149 widenScalarDst(MI, WideTy);
3151 return Legalized;
3152 }
3153 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3154 if (TypeIdx == 0) {
3155 Register VecReg = MI.getOperand(1).getReg();
3156 LLT VecTy = MRI.getType(VecReg);
3158
3160 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3161 TargetOpcode::G_ANYEXT);
3162
3163 widenScalarDst(MI, WideTy, 0);
3165 return Legalized;
3166 }
3167
3168 if (TypeIdx != 2)
3169 return UnableToLegalize;
3171 // TODO: Probably should be zext
3172 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3174 return Legalized;
3175 }
3176 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3177 if (TypeIdx == 0) {
3179 const LLT WideEltTy = WideTy.getElementType();
3180
3181 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3182 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3183 widenScalarDst(MI, WideTy, 0);
3185 return Legalized;
3186 }
3187
3188 if (TypeIdx == 1) {
3190
3191 Register VecReg = MI.getOperand(1).getReg();
3192 LLT VecTy = MRI.getType(VecReg);
3193 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3194
3195 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3196 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3197 widenScalarDst(MI, WideVecTy, 0);
3199 return Legalized;
3200 }
3201
3202 if (TypeIdx == 2) {
3204 // TODO: Probably should be zext
3205 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3207 return Legalized;
3208 }
3209
3210 return UnableToLegalize;
3211 }
3212 case TargetOpcode::G_FADD:
3213 case TargetOpcode::G_FMUL:
3214 case TargetOpcode::G_FSUB:
3215 case TargetOpcode::G_FMA:
3216 case TargetOpcode::G_FMAD:
3217 case TargetOpcode::G_FNEG:
3218 case TargetOpcode::G_FABS:
3219 case TargetOpcode::G_FCANONICALIZE:
3220 case TargetOpcode::G_FMINNUM:
3221 case TargetOpcode::G_FMAXNUM:
3222 case TargetOpcode::G_FMINNUM_IEEE:
3223 case TargetOpcode::G_FMAXNUM_IEEE:
3224 case TargetOpcode::G_FMINIMUM:
3225 case TargetOpcode::G_FMAXIMUM:
3226 case TargetOpcode::G_FDIV:
3227 case TargetOpcode::G_FREM:
3228 case TargetOpcode::G_FCEIL:
3229 case TargetOpcode::G_FFLOOR:
3230 case TargetOpcode::G_FCOS:
3231 case TargetOpcode::G_FSIN:
3232 case TargetOpcode::G_FTAN:
3233 case TargetOpcode::G_FACOS:
3234 case TargetOpcode::G_FASIN:
3235 case TargetOpcode::G_FATAN:
3236 case TargetOpcode::G_FATAN2:
3237 case TargetOpcode::G_FCOSH:
3238 case TargetOpcode::G_FSINH:
3239 case TargetOpcode::G_FTANH:
3240 case TargetOpcode::G_FLOG10:
3241 case TargetOpcode::G_FLOG:
3242 case TargetOpcode::G_FLOG2:
3243 case TargetOpcode::G_FRINT:
3244 case TargetOpcode::G_FNEARBYINT:
3245 case TargetOpcode::G_FSQRT:
3246 case TargetOpcode::G_FEXP:
3247 case TargetOpcode::G_FEXP2:
3248 case TargetOpcode::G_FEXP10:
3249 case TargetOpcode::G_FPOW:
3250 case TargetOpcode::G_INTRINSIC_TRUNC:
3251 case TargetOpcode::G_INTRINSIC_ROUND:
3252 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3253 assert(TypeIdx == 0);
3255
3256 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3257 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3258
3259 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3261 return Legalized;
3262 case TargetOpcode::G_FPOWI:
3263 case TargetOpcode::G_FLDEXP:
3264 case TargetOpcode::G_STRICT_FLDEXP: {
3265 if (TypeIdx == 0) {
3266 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3267 return UnableToLegalize;
3268
3270 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3271 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3273 return Legalized;
3274 }
3275
3276 if (TypeIdx == 1) {
3277 // For some reason SelectionDAG tries to promote to a libcall without
3278 // actually changing the integer type for promotion.
3280 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3282 return Legalized;
3283 }
3284
3285 return UnableToLegalize;
3286 }
3287 case TargetOpcode::G_FFREXP: {
3289
3290 if (TypeIdx == 0) {
3291 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3292 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3293 } else {
3294 widenScalarDst(MI, WideTy, 1);
3295 }
3296
3298 return Legalized;
3299 }
3300 case TargetOpcode::G_INTTOPTR:
3301 if (TypeIdx != 1)
3302 return UnableToLegalize;
3303
3305 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3307 return Legalized;
3308 case TargetOpcode::G_PTRTOINT:
3309 if (TypeIdx != 0)
3310 return UnableToLegalize;
3311
3313 widenScalarDst(MI, WideTy, 0);
3315 return Legalized;
3316 case TargetOpcode::G_BUILD_VECTOR: {
3318
3319 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3320 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3321 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3322
3323 // Avoid changing the result vector type if the source element type was
3324 // requested.
3325 if (TypeIdx == 1) {
3326 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3327 } else {
3328 widenScalarDst(MI, WideTy, 0);
3329 }
3330
3332 return Legalized;
3333 }
3334 case TargetOpcode::G_SEXT_INREG:
3335 if (TypeIdx != 0)
3336 return UnableToLegalize;
3337
3339 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3340 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3342 return Legalized;
3343 case TargetOpcode::G_PTRMASK: {
3344 if (TypeIdx != 1)
3345 return UnableToLegalize;
3347 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3349 return Legalized;
3350 }
3351 case TargetOpcode::G_VECREDUCE_ADD: {
3352 if (TypeIdx != 1)
3353 return UnableToLegalize;
3355 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3356 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3358 return Legalized;
3359 }
3360 case TargetOpcode::G_VECREDUCE_FADD:
3361 case TargetOpcode::G_VECREDUCE_FMUL:
3362 case TargetOpcode::G_VECREDUCE_FMIN:
3363 case TargetOpcode::G_VECREDUCE_FMAX:
3364 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3365 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3366 if (TypeIdx != 0)
3367 return UnableToLegalize;
3369 Register VecReg = MI.getOperand(1).getReg();
3370 LLT VecTy = MRI.getType(VecReg);
3371 LLT WideVecTy = VecTy.isVector()
3372 ? LLT::vector(VecTy.getElementCount(), WideTy)
3373 : WideTy;
3374 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3375 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3377 return Legalized;
3378 }
3379 case TargetOpcode::G_VSCALE: {
3380 MachineOperand &SrcMO = MI.getOperand(1);
3382 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3383 // The CImm is always a signed value
3384 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3386 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3387 widenScalarDst(MI, WideTy);
3389 return Legalized;
3390 }
3391 case TargetOpcode::G_SPLAT_VECTOR: {
3392 if (TypeIdx != 1)
3393 return UnableToLegalize;
3394
3396 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3398 return Legalized;
3399 }
3400 case TargetOpcode::G_INSERT_SUBVECTOR: {
3401 if (TypeIdx != 0)
3402 return UnableToLegalize;
3403
3404 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
3405 Register BigVec = IS.getBigVec();
3406 Register SubVec = IS.getSubVec();
3407
3408 LLT SubVecTy = MRI.getType(SubVec);
3409 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3410
3411 // Widen the G_INSERT_SUBVECTOR
3412 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3413 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3414 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3415 IS.getIndexImm());
3416
3417 // Truncate back down
3418 auto SplatZero = MIRBuilder.buildSplatVector(
3419 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3421 SplatZero);
3422
3423 MI.eraseFromParent();
3424
3425 return Legalized;
3426 }
3427 }
3428}
3429
3431 MachineIRBuilder &B, Register Src, LLT Ty) {
3432 auto Unmerge = B.buildUnmerge(Ty, Src);
3433 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3434 Pieces.push_back(Unmerge.getReg(I));
3435}
3436
3437static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3438 MachineIRBuilder &MIRBuilder) {
3439 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3440 MachineFunction &MF = MIRBuilder.getMF();
3441 const DataLayout &DL = MIRBuilder.getDataLayout();
3442 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3443 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3444 LLT DstLLT = MRI.getType(DstReg);
3445
3446 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3447
3448 auto Addr = MIRBuilder.buildConstantPool(
3449 AddrPtrTy,
3450 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3451
3452 MachineMemOperand *MMO =
3454 MachineMemOperand::MOLoad, DstLLT, Alignment);
3455
3456 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3457}
3458
3461 const MachineOperand &ConstOperand = MI.getOperand(1);
3462 const Constant *ConstantVal = ConstOperand.getCImm();
3463
3464 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3465 MI.eraseFromParent();
3466
3467 return Legalized;
3468}
3469
3472 const MachineOperand &ConstOperand = MI.getOperand(1);
3473 const Constant *ConstantVal = ConstOperand.getFPImm();
3474
3475 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3476 MI.eraseFromParent();
3477
3478 return Legalized;
3479}
3480
3483 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3484 if (SrcTy.isVector()) {
3485 LLT SrcEltTy = SrcTy.getElementType();
3487
3488 if (DstTy.isVector()) {
3489 int NumDstElt = DstTy.getNumElements();
3490 int NumSrcElt = SrcTy.getNumElements();
3491
3492 LLT DstEltTy = DstTy.getElementType();
3493 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3494 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3495
3496 // If there's an element size mismatch, insert intermediate casts to match
3497 // the result element type.
3498 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3499 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3500 //
3501 // =>
3502 //
3503 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3504 // %3:_(<2 x s8>) = G_BITCAST %2
3505 // %4:_(<2 x s8>) = G_BITCAST %3
3506 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3507 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3508 SrcPartTy = SrcEltTy;
3509 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3510 //
3511 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3512 //
3513 // =>
3514 //
3515 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3516 // %3:_(s16) = G_BITCAST %2
3517 // %4:_(s16) = G_BITCAST %3
3518 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3519 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3520 DstCastTy = DstEltTy;
3521 }
3522
3523 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3524 for (Register &SrcReg : SrcRegs)
3525 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3526 } else
3527 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3528
3529 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3530 MI.eraseFromParent();
3531 return Legalized;
3532 }
3533
3534 if (DstTy.isVector()) {
3536 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3537 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3538 MI.eraseFromParent();
3539 return Legalized;
3540 }
3541
3542 return UnableToLegalize;
3543}
3544
3545/// Figure out the bit offset into a register when coercing a vector index for
3546/// the wide element type. This is only for the case when promoting vector to
3547/// one with larger elements.
3548//
3549///
3550/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3551/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3553 Register Idx,
3554 unsigned NewEltSize,
3555 unsigned OldEltSize) {
3556 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3557 LLT IdxTy = B.getMRI()->getType(Idx);
3558
3559 // Now figure out the amount we need to shift to get the target bits.
3560 auto OffsetMask = B.buildConstant(
3561 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3562 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3563 return B.buildShl(IdxTy, OffsetIdx,
3564 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3565}
3566
3567/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3568/// is casting to a vector with a smaller element size, perform multiple element
3569/// extracts and merge the results. If this is coercing to a vector with larger
3570/// elements, index the bitcasted vector and extract the target element with bit
3571/// operations. This is intended to force the indexing in the native register
3572/// size for architectures that can dynamically index the register file.
3575 LLT CastTy) {
3576 if (TypeIdx != 1)
3577 return UnableToLegalize;
3578
3579 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3580
3581 LLT SrcEltTy = SrcVecTy.getElementType();
3582 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3583 unsigned OldNumElts = SrcVecTy.getNumElements();
3584
3585 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3586 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3587
3588 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3589 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3590 if (NewNumElts > OldNumElts) {
3591 // Decreasing the vector element size
3592 //
3593 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3594 // =>
3595 // v4i32:castx = bitcast x:v2i64
3596 //
3597 // i64 = bitcast
3598 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3599 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3600 //
3601 if (NewNumElts % OldNumElts != 0)
3602 return UnableToLegalize;
3603
3604 // Type of the intermediate result vector.
3605 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3606 LLT MidTy =
3607 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3608
3609 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3610
3611 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3612 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3613
3614 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3615 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3616 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3617 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3618 NewOps[I] = Elt.getReg(0);
3619 }
3620
3621 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3622 MIRBuilder.buildBitcast(Dst, NewVec);
3623 MI.eraseFromParent();
3624 return Legalized;
3625 }
3626
3627 if (NewNumElts < OldNumElts) {
3628 if (NewEltSize % OldEltSize != 0)
3629 return UnableToLegalize;
3630
3631 // This only depends on powers of 2 because we use bit tricks to figure out
3632 // the bit offset we need to shift to get the target element. A general
3633 // expansion could emit division/multiply.
3634 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3635 return UnableToLegalize;
3636
3637 // Increasing the vector element size.
3638 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3639 //
3640 // =>
3641 //
3642 // %cast = G_BITCAST %vec
3643 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3644 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3645 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3646 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3647 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3648 // %elt = G_TRUNC %elt_bits
3649
3650 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3651 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3652
3653 // Divide to get the index in the wider element type.
3654 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3655
3656 Register WideElt = CastVec;
3657 if (CastTy.isVector()) {
3658 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3659 ScaledIdx).getReg(0);
3660 }
3661
3662 // Compute the bit offset into the register of the target element.
3664 MIRBuilder, Idx, NewEltSize, OldEltSize);
3665
3666 // Shift the wide element to get the target element.
3667 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3668 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3669 MI.eraseFromParent();
3670 return Legalized;
3671 }
3672
3673 return UnableToLegalize;
3674}
3675
3676/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3677/// TargetReg, while preserving other bits in \p TargetReg.
3678///
3679/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3681 Register TargetReg, Register InsertReg,
3682 Register OffsetBits) {
3683 LLT TargetTy = B.getMRI()->getType(TargetReg);
3684 LLT InsertTy = B.getMRI()->getType(InsertReg);
3685 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3686 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3687
3688 // Produce a bitmask of the value to insert
3689 auto EltMask = B.buildConstant(
3690 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3691 InsertTy.getSizeInBits()));
3692 // Shift it into position
3693 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3694 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3695
3696 // Clear out the bits in the wide element
3697 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3698
3699 // The value to insert has all zeros already, so stick it into the masked
3700 // wide element.
3701 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3702}
3703
3704/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3705/// is increasing the element size, perform the indexing in the target element
3706/// type, and use bit operations to insert at the element position. This is
3707/// intended for architectures that can dynamically index the register file and
3708/// want to force indexing in the native register size.
3711 LLT CastTy) {
3712 if (TypeIdx != 0)
3713 return UnableToLegalize;
3714
3715 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3716 MI.getFirst4RegLLTs();
3717 LLT VecTy = DstTy;
3718
3719 LLT VecEltTy = VecTy.getElementType();
3720 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3721 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3722 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3723
3724 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3725 unsigned OldNumElts = VecTy.getNumElements();
3726
3727 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3728 if (NewNumElts < OldNumElts) {
3729 if (NewEltSize % OldEltSize != 0)
3730 return UnableToLegalize;
3731
3732 // This only depends on powers of 2 because we use bit tricks to figure out
3733 // the bit offset we need to shift to get the target element. A general
3734 // expansion could emit division/multiply.
3735 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3736 return UnableToLegalize;
3737
3738 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3739 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3740
3741 // Divide to get the index in the wider element type.
3742 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3743
3744 Register ExtractedElt = CastVec;
3745 if (CastTy.isVector()) {
3746 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3747 ScaledIdx).getReg(0);
3748 }
3749
3750 // Compute the bit offset into the register of the target element.
3752 MIRBuilder, Idx, NewEltSize, OldEltSize);
3753
3754 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3755 Val, OffsetBits);
3756 if (CastTy.isVector()) {
3758 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3759 }
3760
3761 MIRBuilder.buildBitcast(Dst, InsertedElt);
3762 MI.eraseFromParent();
3763 return Legalized;
3764 }
3765
3766 return UnableToLegalize;
3767}
3768
3769// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3770// those that have smaller than legal operands.
3771//
3772// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3773//
3774// ===>
3775//
3776// s32 = G_BITCAST <4 x s8>
3777// s32 = G_BITCAST <4 x s8>
3778// s32 = G_BITCAST <4 x s8>
3779// s32 = G_BITCAST <4 x s8>
3780// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3781// <16 x s8> = G_BITCAST <4 x s32>
3784 LLT CastTy) {
3785 // Convert it to CONCAT instruction
3786 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3787 if (!ConcatMI) {
3788 return UnableToLegalize;
3789 }
3790
3791 // Check if bitcast is Legal
3792 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3793 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3794
3795 // Check if the build vector is Legal
3796 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3797 return UnableToLegalize;
3798 }
3799
3800 // Bitcast the sources
3801 SmallVector<Register> BitcastRegs;
3802 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3803 BitcastRegs.push_back(
3804 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3805 .getReg(0));
3806 }
3807
3808 // Build the scalar values into a vector
3809 Register BuildReg =
3810 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3811 MIRBuilder.buildBitcast(DstReg, BuildReg);
3812
3813 MI.eraseFromParent();
3814 return Legalized;
3815}
3816
3817// This bitcasts a shuffle vector to a different type currently of the same
3818// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3819// will be used instead.
3820//
3821// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3822// ===>
3823// <4 x s64> = G_PTRTOINT <4 x p0>
3824// <4 x s64> = G_PTRTOINT <4 x p0>
3825// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3826// <16 x p0> = G_INTTOPTR <16 x s64>
3829 LLT CastTy) {
3830 auto ShuffleMI = cast<GShuffleVector>(&MI);
3831 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3832 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3833
3834 // We currently only handle vectors of the same size.
3835 if (TypeIdx != 0 ||
3836 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3837 CastTy.getElementCount() != DstTy.getElementCount())
3838 return UnableToLegalize;
3839
3840 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3841
3842 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3843 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3844 auto Shuf =
3845 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3846 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3847
3848 MI.eraseFromParent();
3849 return Legalized;
3850}
3851
3852/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3853///
3854/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3855///
3856/// ===>
3857///
3858/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3859/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3860/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3863 LLT CastTy) {
3864 auto ES = cast<GExtractSubvector>(&MI);
3865
3866 if (!CastTy.isVector())
3867 return UnableToLegalize;
3868
3869 if (TypeIdx != 0)
3870 return UnableToLegalize;
3871
3872 Register Dst = ES->getReg(0);
3873 Register Src = ES->getSrcVec();
3874 uint64_t Idx = ES->getIndexImm();
3875
3877
3878 LLT DstTy = MRI.getType(Dst);
3879 LLT SrcTy = MRI.getType(Src);
3880 ElementCount DstTyEC = DstTy.getElementCount();
3881 ElementCount SrcTyEC = SrcTy.getElementCount();
3882 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3883 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3884
3885 if (DstTy == CastTy)
3886 return Legalized;
3887
3888 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3889 return UnableToLegalize;
3890
3891 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3892 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3893 if (CastEltSize < DstEltSize)
3894 return UnableToLegalize;
3895
3896 auto AdjustAmt = CastEltSize / DstEltSize;
3897 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3898 SrcTyMinElts % AdjustAmt != 0)
3899 return UnableToLegalize;
3900
3901 Idx /= AdjustAmt;
3902 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3903 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3904 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3905 MIRBuilder.buildBitcast(Dst, PromotedES);
3906
3907 ES->eraseFromParent();
3908 return Legalized;
3909}
3910
3911/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3912///
3913/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3914/// <vscale x 8 x i1>,
3915/// N
3916///
3917/// ===>
3918///
3919/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3920/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3921/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3922/// <vscale x 1 x i8>, N / 8
3923/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3926 LLT CastTy) {
3927 auto ES = cast<GInsertSubvector>(&MI);
3928
3929 if (!CastTy.isVector())
3930 return UnableToLegalize;
3931
3932 if (TypeIdx != 0)
3933 return UnableToLegalize;
3934
3935 Register Dst = ES->getReg(0);
3936 Register BigVec = ES->getBigVec();
3937 Register SubVec = ES->getSubVec();
3938 uint64_t Idx = ES->getIndexImm();
3939
3941
3942 LLT DstTy = MRI.getType(Dst);
3943 LLT BigVecTy = MRI.getType(BigVec);
3944 LLT SubVecTy = MRI.getType(SubVec);
3945
3946 if (DstTy == CastTy)
3947 return Legalized;
3948
3949 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3950 return UnableToLegalize;
3951
3952 ElementCount DstTyEC = DstTy.getElementCount();
3953 ElementCount BigVecTyEC = BigVecTy.getElementCount();
3954 ElementCount SubVecTyEC = SubVecTy.getElementCount();
3955 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3956 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
3957 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
3958
3959 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3960 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3961 if (CastEltSize < DstEltSize)
3962 return UnableToLegalize;
3963
3964 auto AdjustAmt = CastEltSize / DstEltSize;
3965 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3966 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3967 return UnableToLegalize;
3968
3969 Idx /= AdjustAmt;
3970 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3971 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3972 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
3973 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
3974 auto PromotedIS =
3975 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
3976 MIRBuilder.buildBitcast(Dst, PromotedIS);
3977
3978 ES->eraseFromParent();
3979 return Legalized;
3980}
3981
3983 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3984 Register DstReg = LoadMI.getDstReg();
3985 Register PtrReg = LoadMI.getPointerReg();
3986 LLT DstTy = MRI.getType(DstReg);
3987 MachineMemOperand &MMO = LoadMI.getMMO();
3988 LLT MemTy = MMO.getMemoryType();
3990
3991 unsigned MemSizeInBits = MemTy.getSizeInBits();
3992 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3993
3994 if (MemSizeInBits != MemStoreSizeInBits) {
3995 if (MemTy.isVector())
3996 return UnableToLegalize;
3997
3998 // Promote to a byte-sized load if not loading an integral number of
3999 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4000 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4001 MachineMemOperand *NewMMO =
4002 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4003
4004 Register LoadReg = DstReg;
4005 LLT LoadTy = DstTy;
4006
4007 // If this wasn't already an extending load, we need to widen the result
4008 // register to avoid creating a load with a narrower result than the source.
4009 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4010 LoadTy = WideMemTy;
4011 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4012 }
4013
4014 if (isa<GSExtLoad>(LoadMI)) {
4015 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4016 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4017 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4018 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4019 // The extra bits are guaranteed to be zero, since we stored them that
4020 // way. A zext load from Wide thus automatically gives zext from MemVT.
4021 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4022 } else {
4023 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4024 }
4025
4026 if (DstTy != LoadTy)
4027 MIRBuilder.buildTrunc(DstReg, LoadReg);
4028
4029 LoadMI.eraseFromParent();
4030 return Legalized;
4031 }
4032
4033 // Big endian lowering not implemented.
4035 return UnableToLegalize;
4036
4037 // This load needs splitting into power of 2 sized loads.
4038 //
4039 // Our strategy here is to generate anyextending loads for the smaller
4040 // types up to next power-2 result type, and then combine the two larger
4041 // result values together, before truncating back down to the non-pow-2
4042 // type.
4043 // E.g. v1 = i24 load =>
4044 // v2 = i32 zextload (2 byte)
4045 // v3 = i32 load (1 byte)
4046 // v4 = i32 shl v3, 16
4047 // v5 = i32 or v4, v2
4048 // v1 = i24 trunc v5
4049 // By doing this we generate the correct truncate which should get
4050 // combined away as an artifact with a matching extend.
4051
4052 uint64_t LargeSplitSize, SmallSplitSize;
4053
4054 if (!isPowerOf2_32(MemSizeInBits)) {
4055 // This load needs splitting into power of 2 sized loads.
4056 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4057 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4058 } else {
4059 // This is already a power of 2, but we still need to split this in half.
4060 //
4061 // Assume we're being asked to decompose an unaligned load.
4062 // TODO: If this requires multiple splits, handle them all at once.
4063 auto &Ctx = MF.getFunction().getContext();
4064 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4065 return UnableToLegalize;
4066
4067 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4068 }
4069
4070 if (MemTy.isVector()) {
4071 // TODO: Handle vector extloads
4072 if (MemTy != DstTy)
4073 return UnableToLegalize;
4074
4075 // TODO: We can do better than scalarizing the vector and at least split it
4076 // in half.
4077 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4078 }
4079
4080 MachineMemOperand *LargeMMO =
4081 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4082 MachineMemOperand *SmallMMO =
4083 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4084
4085 LLT PtrTy = MRI.getType(PtrReg);
4086 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4087 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4088 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4089 PtrReg, *LargeMMO);
4090
4091 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4092 LargeSplitSize / 8);
4093 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4094 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
4095 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4096 SmallPtr, *SmallMMO);
4097
4098 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4099 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4100
4101 if (AnyExtTy == DstTy)
4102 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4103 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4104 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4105 MIRBuilder.buildTrunc(DstReg, {Or});
4106 } else {
4107 assert(DstTy.isPointer() && "expected pointer");
4108 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4109
4110 // FIXME: We currently consider this to be illegal for non-integral address
4111 // spaces, but we need still need a way to reinterpret the bits.
4112 MIRBuilder.buildIntToPtr(DstReg, Or);
4113 }
4114
4115 LoadMI.eraseFromParent();
4116 return Legalized;
4117}
4118
4120 // Lower a non-power of 2 store into multiple pow-2 stores.
4121 // E.g. split an i24 store into an i16 store + i8 store.
4122 // We do this by first extending the stored value to the next largest power
4123 // of 2 type, and then using truncating stores to store the components.
4124 // By doing this, likewise with G_LOAD, generate an extend that can be
4125 // artifact-combined away instead of leaving behind extracts.
4126 Register SrcReg = StoreMI.getValueReg();
4127 Register PtrReg = StoreMI.getPointerReg();
4128 LLT SrcTy = MRI.getType(SrcReg);
4130 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4131 LLT MemTy = MMO.getMemoryType();
4132
4133 unsigned StoreWidth = MemTy.getSizeInBits();
4134 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4135
4136 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4137 // Promote to a byte-sized store with upper bits zero if not
4138 // storing an integral number of bytes. For example, promote
4139 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4140 LLT WideTy = LLT::scalar(StoreSizeInBits);
4141
4142 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4143 // Avoid creating a store with a narrower source than result.
4144 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4145 SrcTy = WideTy;
4146 }
4147
4148 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4149
4150 MachineMemOperand *NewMMO =
4151 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4152 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4153 StoreMI.eraseFromParent();
4154 return Legalized;
4155 }
4156
4157 if (MemTy.isVector()) {
4158 if (MemTy != SrcTy)
4159 return scalarizeVectorBooleanStore(StoreMI);
4160
4161 // TODO: We can do better than scalarizing the vector and at least split it
4162 // in half.
4163 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4164 }
4165
4166 unsigned MemSizeInBits = MemTy.getSizeInBits();
4167 uint64_t LargeSplitSize, SmallSplitSize;
4168
4169 if (!isPowerOf2_32(MemSizeInBits)) {
4170 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4171 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4172 } else {
4173 auto &Ctx = MF.getFunction().getContext();
4174 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4175 return UnableToLegalize; // Don't know what we're being asked to do.
4176
4177 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4178 }
4179
4180 // Extend to the next pow-2. If this store was itself the result of lowering,
4181 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4182 // that's wider than the stored size.
4183 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4184 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4185
4186 if (SrcTy.isPointer()) {
4187 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4188 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4189 }
4190
4191 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4192
4193 // Obtain the smaller value by shifting away the larger value.
4194 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4195 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4196
4197 // Generate the PtrAdd and truncating stores.
4198 LLT PtrTy = MRI.getType(PtrReg);
4199 auto OffsetCst = MIRBuilder.buildConstant(
4200 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4201 auto SmallPtr =
4202 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
4203
4204 MachineMemOperand *LargeMMO =
4205 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4206 MachineMemOperand *SmallMMO =
4207 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4208 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4209 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4210 StoreMI.eraseFromParent();
4211 return Legalized;
4212}
4213
4216 Register SrcReg = StoreMI.getValueReg();
4217 Register PtrReg = StoreMI.getPointerReg();
4218 LLT SrcTy = MRI.getType(SrcReg);
4219 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4220 LLT MemTy = MMO.getMemoryType();
4221 LLT MemScalarTy = MemTy.getElementType();
4223
4224 assert(SrcTy.isVector() && "Expect a vector store type");
4225
4226 if (!MemScalarTy.isByteSized()) {
4227 // We need to build an integer scalar of the vector bit pattern.
4228 // It's not legal for us to add padding when storing a vector.
4229 unsigned NumBits = MemTy.getSizeInBits();
4230 LLT IntTy = LLT::scalar(NumBits);
4231 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4232 LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
4233
4234 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4236 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4237 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4238 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4239 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4240 ? (MemTy.getNumElements() - 1) - I
4241 : I;
4242 auto ShiftAmt = MIRBuilder.buildConstant(
4243 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4244 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4245 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4246 }
4247 auto PtrInfo = MMO.getPointerInfo();
4248 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4249 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4250 StoreMI.eraseFromParent();
4251 return Legalized;
4252 }
4253
4254 // TODO: implement simple scalarization.
4255 return UnableToLegalize;
4256}
4257
4259LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4260 switch (MI.getOpcode()) {
4261 case TargetOpcode::G_LOAD: {
4262 if (TypeIdx != 0)
4263 return UnableToLegalize;
4264 MachineMemOperand &MMO = **MI.memoperands_begin();
4265
4266 // Not sure how to interpret a bitcast of an extending load.
4267 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4268 return UnableToLegalize;
4269
4271 bitcastDst(MI, CastTy, 0);
4272 MMO.setType(CastTy);
4273 // The range metadata is no longer valid when reinterpreted as a different
4274 // type.
4275 MMO.clearRanges();
4277 return Legalized;
4278 }
4279 case TargetOpcode::G_STORE: {
4280 if (TypeIdx != 0)
4281 return UnableToLegalize;
4282
4283 MachineMemOperand &MMO = **MI.memoperands_begin();
4284
4285 // Not sure how to interpret a bitcast of a truncating store.
4286 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4287 return UnableToLegalize;
4288
4290 bitcastSrc(MI, CastTy, 0);
4291 MMO.setType(CastTy);
4293 return Legalized;
4294 }
4295 case TargetOpcode::G_SELECT: {
4296 if (TypeIdx != 0)
4297 return UnableToLegalize;
4298
4299 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4300 LLVM_DEBUG(
4301 dbgs() << "bitcast action not implemented for vector select\n");
4302 return UnableToLegalize;
4303 }
4304
4306 bitcastSrc(MI, CastTy, 2);
4307 bitcastSrc(MI, CastTy, 3);
4308 bitcastDst(MI, CastTy, 0);
4310 return Legalized;
4311 }
4312 case TargetOpcode::G_AND:
4313 case TargetOpcode::G_OR:
4314 case TargetOpcode::G_XOR: {
4316 bitcastSrc(MI, CastTy, 1);
4317 bitcastSrc(MI, CastTy, 2);
4318 bitcastDst(MI, CastTy, 0);
4320 return Legalized;
4321 }
4322 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4323 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4324 case TargetOpcode::G_INSERT_VECTOR_ELT:
4325 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4326 case TargetOpcode::G_CONCAT_VECTORS:
4327 return bitcastConcatVector(MI, TypeIdx, CastTy);
4328 case TargetOpcode::G_SHUFFLE_VECTOR:
4329 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4330 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4331 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4332 case TargetOpcode::G_INSERT_SUBVECTOR:
4333 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4334 default:
4335 return UnableToLegalize;
4336 }
4337}
4338
4339// Legalize an instruction by changing the opcode in place.
4340void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4342 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4344}
4345
4347LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4348 using namespace TargetOpcode;
4349
4350 switch(MI.getOpcode()) {
4351 default:
4352 return UnableToLegalize;
4353 case TargetOpcode::G_FCONSTANT:
4354 return lowerFConstant(MI);
4355 case TargetOpcode::G_BITCAST:
4356 return lowerBitcast(MI);
4357 case TargetOpcode::G_SREM:
4358 case TargetOpcode::G_UREM: {
4359 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4360 auto Quot =
4361 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4362 {MI.getOperand(1), MI.getOperand(2)});
4363
4364 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4365 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4366 MI.eraseFromParent();
4367 return Legalized;
4368 }
4369 case TargetOpcode::G_SADDO:
4370 case TargetOpcode::G_SSUBO:
4371 return lowerSADDO_SSUBO(MI);
4372 case TargetOpcode::G_UMULH:
4373 case TargetOpcode::G_SMULH:
4374 return lowerSMULH_UMULH(MI);
4375 case TargetOpcode::G_SMULO:
4376 case TargetOpcode::G_UMULO: {
4377 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4378 // result.
4379 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4380 LLT Ty = MRI.getType(Res);
4381
4382 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4383 ? TargetOpcode::G_SMULH
4384 : TargetOpcode::G_UMULH;
4385
4387 const auto &TII = MIRBuilder.getTII();
4388 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4389 MI.removeOperand(1);
4391
4392 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4393 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4394
4395 // Move insert point forward so we can use the Res register if needed.
4397
4398 // For *signed* multiply, overflow is detected by checking:
4399 // (hi != (lo >> bitwidth-1))
4400 if (Opcode == TargetOpcode::G_SMULH) {
4401 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4402 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4403 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4404 } else {
4405 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4406 }
4407 return Legalized;
4408 }
4409 case TargetOpcode::G_FNEG: {
4410 auto [Res, SubByReg] = MI.getFirst2Regs();
4411 LLT Ty = MRI.getType(Res);
4412
4413 auto SignMask = MIRBuilder.buildConstant(
4415 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4416 MI.eraseFromParent();
4417 return Legalized;
4418 }
4419 case TargetOpcode::G_FSUB:
4420 case TargetOpcode::G_STRICT_FSUB: {
4421 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4422 LLT Ty = MRI.getType(Res);
4423
4424 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4425 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4426
4427 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4428 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4429 else
4430 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4431
4432 MI.eraseFromParent();
4433 return Legalized;
4434 }
4435 case TargetOpcode::G_FMAD:
4436 return lowerFMad(MI);
4437 case TargetOpcode::G_FFLOOR:
4438 return lowerFFloor(MI);
4439 case TargetOpcode::G_LROUND:
4440 case TargetOpcode::G_LLROUND: {
4441 Register DstReg = MI.getOperand(0).getReg();
4442 Register SrcReg = MI.getOperand(1).getReg();
4443 LLT SrcTy = MRI.getType(SrcReg);
4444 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4445 {SrcReg});
4446 MIRBuilder.buildFPTOSI(DstReg, Round);
4447 MI.eraseFromParent();
4448 return Legalized;
4449 }
4450 case TargetOpcode::G_INTRINSIC_ROUND:
4451 return lowerIntrinsicRound(MI);
4452 case TargetOpcode::G_FRINT: {
4453 // Since round even is the assumed rounding mode for unconstrained FP
4454 // operations, rint and roundeven are the same operation.
4455 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4456 return Legalized;
4457 }
4458 case TargetOpcode::G_INTRINSIC_LRINT:
4459 case TargetOpcode::G_INTRINSIC_LLRINT: {
4460 Register DstReg = MI.getOperand(0).getReg();
4461 Register SrcReg = MI.getOperand(1).getReg();
4462 LLT SrcTy = MRI.getType(SrcReg);
4463 auto Round =
4464 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4465 MIRBuilder.buildFPTOSI(DstReg, Round);
4466 MI.eraseFromParent();
4467 return Legalized;
4468 }
4469 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4470 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4471 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4472 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4473 **MI.memoperands_begin());
4474 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4475 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4476 MI.eraseFromParent();
4477 return Legalized;
4478 }
4479 case TargetOpcode::G_LOAD:
4480 case TargetOpcode::G_SEXTLOAD:
4481 case TargetOpcode::G_ZEXTLOAD:
4482 return lowerLoad(cast<GAnyLoad>(MI));
4483 case TargetOpcode::G_STORE:
4484 return lowerStore(cast<GStore>(MI));
4485 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4486 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4487 case TargetOpcode::G_CTLZ:
4488 case TargetOpcode::G_CTTZ:
4489 case TargetOpcode::G_CTPOP:
4490 return lowerBitCount(MI);
4491 case G_UADDO: {
4492 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4493
4494 Register NewRes = MRI.cloneVirtualRegister(Res);
4495
4496 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4497 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4498
4499 MIRBuilder.buildCopy(Res, NewRes);
4500
4501 MI.eraseFromParent();
4502 return Legalized;
4503 }
4504 case G_UADDE: {
4505 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4506 const LLT CondTy = MRI.getType(CarryOut);
4507 const LLT Ty = MRI.getType(Res);
4508
4509 Register NewRes = MRI.cloneVirtualRegister(Res);
4510
4511 // Initial add of the two operands.
4512 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4513
4514 // Initial check for carry.
4515 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4516
4517 // Add the sum and the carry.
4518 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4519 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4520
4521 // Second check for carry. We can only carry if the initial sum is all 1s
4522 // and the carry is set, resulting in a new sum of 0.
4523 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4524 auto ResEqZero =
4525 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4526 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4527 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4528
4529 MIRBuilder.buildCopy(Res, NewRes);
4530
4531 MI.eraseFromParent();
4532 return Legalized;
4533 }
4534 case G_USUBO: {
4535 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4536
4537 MIRBuilder.buildSub(Res, LHS, RHS);
4539
4540 MI.eraseFromParent();
4541 return Legalized;
4542 }
4543 case G_USUBE: {
4544 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4545 const LLT CondTy = MRI.getType(BorrowOut);
4546 const LLT Ty = MRI.getType(Res);
4547
4548 // Initial subtract of the two operands.
4549 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4550
4551 // Initial check for borrow.
4552 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4553
4554 // Subtract the borrow from the first subtract.
4555 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4556 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4557
4558 // Second check for borrow. We can only borrow if the initial difference is
4559 // 0 and the borrow is set, resulting in a new difference of all 1s.
4560 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4561 auto TmpResEqZero =
4562 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4563 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4564 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4565
4566 MI.eraseFromParent();
4567 return Legalized;
4568 }
4569 case G_UITOFP:
4570 return lowerUITOFP(MI);
4571 case G_SITOFP:
4572 return lowerSITOFP(MI);
4573 case G_FPTOUI:
4574 return lowerFPTOUI(MI);
4575 case G_FPTOSI:
4576 return lowerFPTOSI(MI);
4577 case G_FPTOUI_SAT:
4578 case G_FPTOSI_SAT:
4579 return lowerFPTOINT_SAT(MI);
4580 case G_FPTRUNC:
4581 return lowerFPTRUNC(MI);
4582 case G_FPOWI:
4583 return lowerFPOWI(MI);
4584 case G_SMIN:
4585 case G_SMAX:
4586 case G_UMIN:
4587 case G_UMAX:
4588 return lowerMinMax(MI);
4589 case G_SCMP:
4590 case G_UCMP:
4591 return lowerThreewayCompare(MI);
4592 case G_FCOPYSIGN:
4593 return lowerFCopySign(MI);
4594 case G_FMINNUM:
4595 case G_FMAXNUM:
4596 return lowerFMinNumMaxNum(MI);
4597 case G_MERGE_VALUES:
4598 return lowerMergeValues(MI);
4599 case G_UNMERGE_VALUES:
4600 return lowerUnmergeValues(MI);
4601 case TargetOpcode::G_SEXT_INREG: {
4602 assert(MI.getOperand(2).isImm() && "Expected immediate");
4603 int64_t SizeInBits = MI.getOperand(2).getImm();
4604
4605 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4606 LLT DstTy = MRI.getType(DstReg);
4607 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4608
4609 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4610 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4611 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4612 MI.eraseFromParent();
4613 return Legalized;
4614 }
4615 case G_EXTRACT_VECTOR_ELT:
4616 case G_INSERT_VECTOR_ELT:
4618 case G_SHUFFLE_VECTOR:
4619 return lowerShuffleVector(MI);
4620 case G_VECTOR_COMPRESS:
4621 return lowerVECTOR_COMPRESS(MI);
4622 case G_DYN_STACKALLOC:
4623 return lowerDynStackAlloc(MI);
4624 case G_STACKSAVE:
4625 return lowerStackSave(MI);
4626 case G_STACKRESTORE:
4627 return lowerStackRestore(MI);
4628 case G_EXTRACT:
4629 return lowerExtract(MI);
4630 case G_INSERT:
4631 return lowerInsert(MI);
4632 case G_BSWAP:
4633 return lowerBswap(MI);
4634 case G_BITREVERSE:
4635 return lowerBitreverse(MI);
4636 case G_READ_REGISTER:
4637 case G_WRITE_REGISTER:
4638 return lowerReadWriteRegister(MI);
4639 case G_UADDSAT:
4640 case G_USUBSAT: {
4641 // Try to make a reasonable guess about which lowering strategy to use. The
4642 // target can override this with custom lowering and calling the
4643 // implementation functions.
4644 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4645 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4646 return lowerAddSubSatToMinMax(MI);
4648 }
4649 case G_SADDSAT:
4650 case G_SSUBSAT: {
4651 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4652
4653 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4654 // since it's a shorter expansion. However, we would need to figure out the
4655 // preferred boolean type for the carry out for the query.
4656 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4657 return lowerAddSubSatToMinMax(MI);
4659 }
4660 case G_SSHLSAT:
4661 case G_USHLSAT:
4662 return lowerShlSat(MI);
4663 case G_ABS:
4664 return lowerAbsToAddXor(MI);
4665 case G_FABS:
4666 return lowerFAbs(MI);
4667 case G_SELECT:
4668 return lowerSelect(MI);
4669 case G_IS_FPCLASS:
4670 return lowerISFPCLASS(MI);
4671 case G_SDIVREM:
4672 case G_UDIVREM:
4673 return lowerDIVREM(MI);
4674 case G_FSHL:
4675 case G_FSHR:
4676 return lowerFunnelShift(MI);
4677 case G_ROTL:
4678 case G_ROTR:
4679 return lowerRotate(MI);
4680 case G_MEMSET:
4681 case G_MEMCPY:
4682 case G_MEMMOVE:
4683 return lowerMemCpyFamily(MI);
4684 case G_MEMCPY_INLINE:
4685 return lowerMemcpyInline(MI);
4686 case G_ZEXT:
4687 case G_SEXT:
4688 case G_ANYEXT:
4689 return lowerEXT(MI);
4690 case G_TRUNC:
4691 return lowerTRUNC(MI);
4693 return lowerVectorReduction(MI);
4694 case G_VAARG:
4695 return lowerVAArg(MI);
4696 }
4697}
4698
4700 Align MinAlign) const {
4701 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4702 // datalayout for the preferred alignment. Also there should be a target hook
4703 // for this to allow targets to reduce the alignment and ignore the
4704 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4705 // the type.
4706 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4707}
4708
4711 MachinePointerInfo &PtrInfo) {
4714 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4715
4716 unsigned AddrSpace = DL.getAllocaAddrSpace();
4717 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4718
4719 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4720 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4721}
4722
4724 const SrcOp &Val) {
4725 LLT SrcTy = Val.getLLTTy(MRI);
4726 Align StackTypeAlign =
4727 std::max(getStackTemporaryAlignment(SrcTy),
4729 MachinePointerInfo PtrInfo;
4730 auto StackTemp =
4731 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4732
4733 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4734 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4735}
4736
4738 LLT VecTy) {
4739 LLT IdxTy = B.getMRI()->getType(IdxReg);
4740 unsigned NElts = VecTy.getNumElements();
4741
4742 int64_t IdxVal;
4743 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4744 if (IdxVal < VecTy.getNumElements())
4745 return IdxReg;
4746 // If a constant index would be out of bounds, clamp it as well.
4747 }
4748
4749 if (isPowerOf2_32(NElts)) {
4750 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4751 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4752 }
4753
4754 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4755 .getReg(0);
4756}
4757
4759 Register Index) {
4760 LLT EltTy = VecTy.getElementType();
4761
4762 // Calculate the element offset and add it to the pointer.
4763 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4764 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4765 "Converting bits to bytes lost precision");
4766
4767 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4768
4769 // Convert index to the correct size for the address space.
4771 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4772 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4773 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4774 if (IdxTy != MRI.getType(Index))
4775 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4776
4777 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4778 MIRBuilder.buildConstant(IdxTy, EltSize));
4779
4780 LLT PtrTy = MRI.getType(VecPtr);
4781 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4782}
4783
4784#ifndef NDEBUG
4785/// Check that all vector operands have same number of elements. Other operands
4786/// should be listed in NonVecOp.
4789 std::initializer_list<unsigned> NonVecOpIndices) {
4790 if (MI.getNumMemOperands() != 0)
4791 return false;
4792
4793 LLT VecTy = MRI.getType(MI.getReg(0));
4794 if (!VecTy.isVector())
4795 return false;
4796 unsigned NumElts = VecTy.getNumElements();
4797
4798 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4799 MachineOperand &Op = MI.getOperand(OpIdx);
4800 if (!Op.isReg()) {
4801 if (!is_contained(NonVecOpIndices, OpIdx))
4802 return false;
4803 continue;
4804 }
4805
4806 LLT Ty = MRI.getType(Op.getReg());
4807 if (!Ty.isVector()) {
4808 if (!is_contained(NonVecOpIndices, OpIdx))
4809 return false;
4810 continue;
4811 }
4812
4813 if (Ty.getNumElements() != NumElts)
4814 return false;
4815 }
4816
4817 return true;
4818}
4819#endif
4820
4821/// Fill \p DstOps with DstOps that have same number of elements combined as
4822/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4823/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4824/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4825static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4826 unsigned NumElts) {
4827 LLT LeftoverTy;
4828 assert(Ty.isVector() && "Expected vector type");
4829 LLT EltTy = Ty.getElementType();
4830 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4831 int NumParts, NumLeftover;
4832 std::tie(NumParts, NumLeftover) =
4833 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4834
4835 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4836 for (int i = 0; i < NumParts; ++i) {
4837 DstOps.push_back(NarrowTy);
4838 }
4839
4840 if (LeftoverTy.isValid()) {
4841 assert(NumLeftover == 1 && "expected exactly one leftover");
4842 DstOps.push_back(LeftoverTy);
4843 }
4844}
4845
4846/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4847/// made from \p Op depending on operand type.
4848static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4849 MachineOperand &Op) {
4850 for (unsigned i = 0; i < N; ++i) {
4851 if (Op.isReg())
4852 Ops.push_back(Op.getReg());
4853 else if (Op.isImm())
4854 Ops.push_back(Op.getImm());
4855 else if (Op.isPredicate())
4856 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4857 else
4858 llvm_unreachable("Unsupported type");
4859 }
4860}
4861
4862// Handle splitting vector operations which need to have the same number of
4863// elements in each type index, but each type index may have a different element
4864// type.
4865//
4866// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4867// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4868// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4869//
4870// Also handles some irregular breakdown cases, e.g.
4871// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4872// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4873// s64 = G_SHL s64, s32
4876 GenericMachineInstr &MI, unsigned NumElts,
4877 std::initializer_list<unsigned> NonVecOpIndices) {
4878 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4879 "Non-compatible opcode or not specified non-vector operands");
4880 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4881
4882 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4883 unsigned NumDefs = MI.getNumDefs();
4884
4885 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4886 // Build instructions with DstOps to use instruction found by CSE directly.
4887 // CSE copies found instruction into given vreg when building with vreg dest.
4888 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4889 // Output registers will be taken from created instructions.
4890 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4891 for (unsigned i = 0; i < NumDefs; ++i) {
4892 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4893 }
4894
4895 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4896 // Operands listed in NonVecOpIndices will be used as is without splitting;
4897 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4898 // scalar condition (op 1), immediate in sext_inreg (op 2).
4899 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4900 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4901 ++UseIdx, ++UseNo) {
4902 if (is_contained(NonVecOpIndices, UseIdx)) {
4903 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4904 MI.getOperand(UseIdx));
4905 } else {
4906 SmallVector<Register, 8> SplitPieces;
4907 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4908 MRI);
4909 for (auto Reg : SplitPieces)
4910 InputOpsPieces[UseNo].push_back(Reg);
4911 }
4912 }
4913
4914 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4915
4916 // Take i-th piece of each input operand split and build sub-vector/scalar
4917 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4918 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4920 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4921 Defs.push_back(OutputOpsPieces[DstNo][i]);
4922
4924 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4925 Uses.push_back(InputOpsPieces[InputNo][i]);
4926
4927 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4928 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4929 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4930 }
4931
4932 // Merge small outputs into MI's output for each def operand.
4933 if (NumLeftovers) {
4934 for (unsigned i = 0; i < NumDefs; ++i)
4935 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4936 } else {
4937 for (unsigned i = 0; i < NumDefs; ++i)
4938 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4939 }
4940
4941 MI.eraseFromParent();
4942 return Legalized;
4943}
4944
4947 unsigned NumElts) {
4948 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4949
4950 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4951 unsigned NumDefs = MI.getNumDefs();
4952
4953 SmallVector<DstOp, 8> OutputOpsPieces;
4954 SmallVector<Register, 8> OutputRegs;
4955 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4956
4957 // Instructions that perform register split will be inserted in basic block
4958 // where register is defined (basic block is in the next operand).
4959 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4960 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4961 UseIdx += 2, ++UseNo) {
4962 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4964 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4965 MIRBuilder, MRI);
4966 }
4967
4968 // Build PHIs with fewer elements.
4969 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4970 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4971 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4972 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4973 Phi.addDef(
4974 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4975 OutputRegs.push_back(Phi.getReg(0));
4976
4977 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4978 Phi.addUse(InputOpsPieces[j][i]);
4979 Phi.add(MI.getOperand(1 + j * 2 + 1));
4980 }
4981 }
4982
4983 // Set the insert point after the existing PHIs
4984 MachineBasicBlock &MBB = *MI.getParent();
4986
4987 // Merge small outputs into MI's def.
4988 if (NumLeftovers) {
4989 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4990 } else {
4991 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4992 }
4993
4994 MI.eraseFromParent();
4995 return Legalized;
4996}
4997
5000 unsigned TypeIdx,
5001 LLT NarrowTy) {
5002 const int NumDst = MI.getNumOperands() - 1;
5003 const Register SrcReg = MI.getOperand(NumDst).getReg();
5004 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5005 LLT SrcTy = MRI.getType(SrcReg);
5006
5007 if (TypeIdx != 1 || NarrowTy == DstTy)
5008 return UnableToLegalize;
5009
5010 // Requires compatible types. Otherwise SrcReg should have been defined by
5011 // merge-like instruction that would get artifact combined. Most likely
5012 // instruction that defines SrcReg has to perform more/fewer elements
5013 // legalization compatible with NarrowTy.
5014 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5015 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5016
5017 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5018 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5019 return UnableToLegalize;
5020
5021 // This is most likely DstTy (smaller then register size) packed in SrcTy
5022 // (larger then register size) and since unmerge was not combined it will be
5023 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5024 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5025
5026 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5027 //
5028 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5029 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5030 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5031 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5032 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5033 const int PartsPerUnmerge = NumDst / NumUnmerge;
5034
5035 for (int I = 0; I != NumUnmerge; ++I) {
5036 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5037
5038 for (int J = 0; J != PartsPerUnmerge; ++J)
5039 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5040 MIB.addUse(Unmerge.getReg(I));
5041 }
5042
5043 MI.eraseFromParent();
5044 return Legalized;
5045}
5046
5049 LLT NarrowTy) {
5050 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5051 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5052 // that should have been artifact combined. Most likely instruction that uses
5053 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5054 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5055 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5056 if (NarrowTy == SrcTy)
5057 return UnableToLegalize;
5058
5059 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5060 // is for old mir tests. Since the changes to more/fewer elements it should no
5061 // longer be possible to generate MIR like this when starting from llvm-ir
5062 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5063 if (TypeIdx == 1) {
5064 assert(SrcTy.isVector() && "Expected vector types");
5065 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5066 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5067 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5068 return UnableToLegalize;
5069 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5070 //
5071 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5072 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5073 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5074 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5075 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5076 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5077
5079 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5080 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5081 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5082 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5083 Elts.push_back(Unmerge.getReg(j));
5084 }
5085
5086 SmallVector<Register, 8> NarrowTyElts;
5087 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5088 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5089 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5090 ++i, Offset += NumNarrowTyElts) {
5091 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5092 NarrowTyElts.push_back(
5093 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5094 }
5095
5096 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5097 MI.eraseFromParent();
5098 return Legalized;
5099 }
5100
5101 assert(TypeIdx == 0 && "Bad type index");
5102 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5103 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5104 return UnableToLegalize;
5105
5106 // This is most likely SrcTy (smaller then register size) packed in DstTy
5107 // (larger then register size) and since merge was not combined it will be
5108 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5109 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5110
5111 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5112 //
5113 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5114 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5115 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5116 SmallVector<Register, 8> NarrowTyElts;
5117 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5118 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5119 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5120 for (unsigned i = 0; i < NumParts; ++i) {
5122 for (unsigned j = 0; j < NumElts; ++j)
5123 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5124 NarrowTyElts.push_back(
5125 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5126 }
5127
5128 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5129 MI.eraseFromParent();
5130 return Legalized;
5131}
5132
5135 unsigned TypeIdx,
5136 LLT NarrowVecTy) {
5137 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5138 Register InsertVal;
5139 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5140
5141 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5142 if (IsInsert)
5143 InsertVal = MI.getOperand(2).getReg();
5144
5145 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5146
5147 // TODO: Handle total scalarization case.
5148 if (!NarrowVecTy.isVector())
5149 return UnableToLegalize;
5150
5151 LLT VecTy = MRI.getType(SrcVec);
5152
5153 // If the index is a constant, we can really break this down as you would
5154 // expect, and index into the target size pieces.
5155 int64_t IdxVal;
5156 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5157 if (MaybeCst) {
5158 IdxVal = MaybeCst->Value.getSExtValue();
5159 // Avoid out of bounds indexing the pieces.
5160 if (IdxVal >= VecTy.getNumElements()) {
5161 MIRBuilder.buildUndef(DstReg);
5162 MI.eraseFromParent();
5163 return Legalized;
5164 }
5165
5166 SmallVector<Register, 8> VecParts;
5167 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5168
5169 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5170 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5171 TargetOpcode::G_ANYEXT);
5172
5173 unsigned NewNumElts = NarrowVecTy.getNumElements();
5174
5175 LLT IdxTy = MRI.getType(Idx);
5176 int64_t PartIdx = IdxVal / NewNumElts;
5177 auto NewIdx =
5178 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5179
5180 if (IsInsert) {
5181 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5182
5183 // Use the adjusted index to insert into one of the subvectors.
5184 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5185 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5186 VecParts[PartIdx] = InsertPart.getReg(0);
5187
5188 // Recombine the inserted subvector with the others to reform the result
5189 // vector.
5190 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5191 } else {
5192 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5193 }
5194
5195 MI.eraseFromParent();
5196 return Legalized;
5197 }
5198
5199 // With a variable index, we can't perform the operation in a smaller type, so
5200 // we're forced to expand this.
5201 //
5202 // TODO: We could emit a chain of compare/select to figure out which piece to
5203 // index.
5205}
5206
5209 LLT NarrowTy) {
5210 // FIXME: Don't know how to handle secondary types yet.
5211 if (TypeIdx != 0)
5212 return UnableToLegalize;
5213
5214 // This implementation doesn't work for atomics. Give up instead of doing
5215 // something invalid.
5216 if (LdStMI.isAtomic())
5217 return UnableToLegalize;
5218
5219 bool IsLoad = isa<GLoad>(LdStMI);
5220 Register ValReg = LdStMI.getReg(0);
5221 Register AddrReg = LdStMI.getPointerReg();
5222 LLT ValTy = MRI.getType(ValReg);
5223
5224 // FIXME: Do we need a distinct NarrowMemory legalize action?
5225 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5226 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5227 return UnableToLegalize;
5228 }
5229
5230 int NumParts = -1;
5231 int NumLeftover = -1;
5232 LLT LeftoverTy;
5233 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5234 if (IsLoad) {
5235 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5236 } else {
5237 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5238 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5239 NumParts = NarrowRegs.size();
5240 NumLeftover = NarrowLeftoverRegs.size();
5241 }
5242 }
5243
5244 if (NumParts == -1)
5245 return UnableToLegalize;
5246
5247 LLT PtrTy = MRI.getType(AddrReg);
5248 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5249
5250 unsigned TotalSize = ValTy.getSizeInBits();
5251
5252 // Split the load/store into PartTy sized pieces starting at Offset. If this
5253 // is a load, return the new registers in ValRegs. For a store, each elements
5254 // of ValRegs should be PartTy. Returns the next offset that needs to be
5255 // handled.
5257 auto MMO = LdStMI.getMMO();
5258 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5259 unsigned NumParts, unsigned Offset) -> unsigned {
5261 unsigned PartSize = PartTy.getSizeInBits();
5262 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5263 ++Idx) {
5264 unsigned ByteOffset = Offset / 8;
5265 Register NewAddrReg;
5266
5267 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
5268
5269 MachineMemOperand *NewMMO =
5270 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5271
5272 if (IsLoad) {
5273 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5274 ValRegs.push_back(Dst);
5275 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5276 } else {
5277 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5278 }
5279 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5280 }
5281
5282 return Offset;
5283 };
5284
5285 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5286 unsigned HandledOffset =
5287 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5288
5289 // Handle the rest of the register if this isn't an even type breakdown.
5290 if (LeftoverTy.isValid())
5291 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5292
5293 if (IsLoad) {
5294 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5295 LeftoverTy, NarrowLeftoverRegs);
5296 }
5297
5298 LdStMI.eraseFromParent();
5299 return Legalized;
5300}
5301
5304 LLT NarrowTy) {
5305 using namespace TargetOpcode;
5306 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
5307 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5308
5309 switch (MI.getOpcode()) {
5310 case G_IMPLICIT_DEF:
5311 case G_TRUNC:
5312 case G_AND:
5313 case G_OR:
5314 case G_XOR:
5315 case G_ADD:
5316 case G_SUB:
5317 case G_MUL:
5318 case G_PTR_ADD:
5319 case G_SMULH:
5320 case G_UMULH:
5321 case G_FADD:
5322 case G_FMUL:
5323 case G_FSUB:
5324 case G_FNEG:
5325 case G_FABS:
5326 case G_FCANONICALIZE:
5327 case G_FDIV:
5328 case G_FREM:
5329 case G_FMA:
5330 case G_FMAD:
5331 case G_FPOW:
5332 case G_FEXP:
5333 case G_FEXP2:
5334 case G_FEXP10:
5335 case G_FLOG:
5336 case G_FLOG2:
5337 case G_FLOG10:
5338 case G_FLDEXP:
5339 case G_FNEARBYINT:
5340 case G_FCEIL:
5341 case G_FFLOOR:
5342 case G_FRINT:
5343 case G_INTRINSIC_LRINT:
5344 case G_INTRINSIC_LLRINT:
5345 case G_INTRINSIC_ROUND:
5346 case G_INTRINSIC_ROUNDEVEN:
5347 case G_LROUND:
5348 case G_LLROUND:
5349 case G_INTRINSIC_TRUNC:
5350 case G_FCOS:
5351 case G_FSIN:
5352 case G_FTAN:
5353 case G_FACOS:
5354 case G_FASIN:
5355 case G_FATAN:
5356 case G_FATAN2:
5357 case G_FCOSH:
5358 case G_FSINH:
5359 case G_FTANH:
5360 case G_FSQRT:
5361 case G_BSWAP:
5362 case G_BITREVERSE:
5363 case G_SDIV:
5364 case G_UDIV:
5365 case G_SREM:
5366 case G_UREM:
5367 case G_SDIVREM:
5368 case G_UDIVREM:
5369 case G_SMIN:
5370 case G_SMAX:
5371 case G_UMIN:
5372 case G_UMAX:
5373 case G_ABS:
5374 case G_FMINNUM:
5375 case G_FMAXNUM:
5376 case G_FMINNUM_IEEE:
5377 case G_FMAXNUM_IEEE:
5378 case G_FMINIMUM:
5379 case G_FMAXIMUM:
5380 case G_FSHL:
5381 case G_FSHR:
5382 case G_ROTL:
5383 case G_ROTR:
5384 case G_FREEZE:
5385 case G_SADDSAT:
5386 case G_SSUBSAT:
5387 case G_UADDSAT:
5388 case G_USUBSAT:
5389 case G_UMULO:
5390 case G_SMULO:
5391 case G_SHL:
5392 case G_LSHR:
5393 case G_ASHR:
5394 case G_SSHLSAT:
5395 case G_USHLSAT:
5396 case G_CTLZ:
5397 case G_CTLZ_ZERO_UNDEF:
5398 case G_CTTZ:
5399 case G_CTTZ_ZERO_UNDEF:
5400 case G_CTPOP:
5401 case G_FCOPYSIGN:
5402 case G_ZEXT:
5403 case G_SEXT:
5404 case G_ANYEXT:
5405 case G_FPEXT:
5406 case G_FPTRUNC:
5407 case G_SITOFP:
5408 case G_UITOFP:
5409 case G_FPTOSI:
5410 case G_FPTOUI:
5411 case G_FPTOSI_SAT:
5412 case G_FPTOUI_SAT:
5413 case G_INTTOPTR:
5414 case G_PTRTOINT:
5415 case G_ADDRSPACE_CAST:
5416 case G_UADDO:
5417 case G_USUBO:
5418 case G_UADDE:
5419 case G_USUBE:
5420 case G_SADDO:
5421 case G_SSUBO:
5422 case G_SADDE:
5423 case G_SSUBE:
5424 case G_STRICT_FADD:
5425 case G_STRICT_FSUB:
5426 case G_STRICT_FMUL:
5427 case G_STRICT_FMA:
5428 case G_STRICT_FLDEXP:
5429 case G_FFREXP:
5430 return fewerElementsVectorMultiEltType(GMI, NumElts);
5431 case G_ICMP:
5432 case G_FCMP:
5433 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5434 case G_IS_FPCLASS:
5435 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5436 case G_SELECT:
5437 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5438 return fewerElementsVectorMultiEltType(GMI, NumElts);
5439 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5440 case G_PHI:
5441 return fewerElementsVectorPhi(GMI, NumElts);
5442 case G_UNMERGE_VALUES:
5443 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5444 case G_BUILD_VECTOR:
5445 assert(TypeIdx == 0 && "not a vector type index");
5446 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5447 case G_CONCAT_VECTORS:
5448 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5449 return UnableToLegalize;
5450 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5451 case G_EXTRACT_VECTOR_ELT:
5452 case G_INSERT_VECTOR_ELT:
5453 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5454 case G_LOAD:
5455 case G_STORE:
5456 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5457 case G_SEXT_INREG:
5458 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5460 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5461 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5462 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5463 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5464 case G_SHUFFLE_VECTOR:
5465 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5466 case G_FPOWI:
5467 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5468 case G_BITCAST:
5469 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5470 case G_INTRINSIC_FPTRUNC_ROUND:
5471 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5472 default:
5473 return UnableToLegalize;
5474 }
5475}
5476
5479 LLT NarrowTy) {
5480 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5481 "Not a bitcast operation");
5482
5483 if (TypeIdx != 0)
5484 return UnableToLegalize;
5485
5486 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5487
5488 unsigned NewElemCount =
5489 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5490 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5491
5492 // Split the Src and Dst Reg into smaller registers
5493 SmallVector<Register> SrcVRegs, BitcastVRegs;
5494 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5495 return UnableToLegalize;
5496
5497 // Build new smaller bitcast instructions
5498 // Not supporting Leftover types for now but will have to
5499 for (unsigned i = 0; i < SrcVRegs.size(); i++)
5500 BitcastVRegs.push_back(
5501 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5502
5503 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5504 MI.eraseFromParent();
5505 return Legalized;
5506}
5507
5509 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5510 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5511 if (TypeIdx != 0)
5512 return UnableToLegalize;
5513
5514 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5515 MI.getFirst3RegLLTs();
5516 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5517 // The shuffle should be canonicalized by now.
5518 if (DstTy != Src1Ty)
5519 return UnableToLegalize;
5520 if (DstTy != Src2Ty)
5521 return UnableToLegalize;
5522
5523 if (!isPowerOf2_32(DstTy.getNumElements()))
5524 return UnableToLegalize;
5525
5526 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5527 // Further legalization attempts will be needed to do split further.
5528 NarrowTy =
5529 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5530 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5531
5532 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5533 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5534 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5535 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5536 SplitSrc2Regs[1]};
5537
5538 Register Hi, Lo;
5539
5540 // If Lo or Hi uses elements from at most two of the four input vectors, then
5541 // express it as a vector shuffle of those two inputs. Otherwise extract the
5542 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5544 for (unsigned High = 0; High < 2; ++High) {
5545 Register &Output = High ? Hi : Lo;
5546
5547 // Build a shuffle mask for the output, discovering on the fly which
5548 // input vectors to use as shuffle operands (recorded in InputUsed).
5549 // If building a suitable shuffle vector proves too hard, then bail
5550 // out with useBuildVector set.
5551 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5552 unsigned FirstMaskIdx = High * NewElts;
5553 bool UseBuildVector = false;
5554 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5555 // The mask element. This indexes into the input.
5556 int Idx = Mask[FirstMaskIdx + MaskOffset];
5557
5558 // The input vector this mask element indexes into.
5559 unsigned Input = (unsigned)Idx / NewElts;
5560
5561 if (Input >= std::size(Inputs)) {
5562 // The mask element does not index into any input vector.
5563 Ops.push_back(-1);
5564 continue;
5565 }
5566
5567 // Turn the index into an offset from the start of the input vector.
5568 Idx -= Input * NewElts;
5569
5570 // Find or create a shuffle vector operand to hold this input.
5571 unsigned OpNo;
5572 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5573 if (InputUsed[OpNo] == Input) {
5574 // This input vector is already an operand.
5575 break;
5576 } else if (InputUsed[OpNo] == -1U) {
5577 // Create a new operand for this input vector.
5578 InputUsed[OpNo] = Input;
5579 break;
5580 }
5581 }
5582
5583 if (OpNo >= std::size(InputUsed)) {
5584 // More than two input vectors used! Give up on trying to create a
5585 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5586 UseBuildVector = true;
5587 break;
5588 }
5589
5590 // Add the mask index for the new shuffle vector.
5591 Ops.push_back(Idx + OpNo * NewElts);
5592 }
5593
5594 if (UseBuildVector) {
5595 LLT EltTy = NarrowTy.getElementType();
5597
5598 // Extract the input elements by hand.
5599 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5600 // The mask element. This indexes into the input.
5601 int Idx = Mask[FirstMaskIdx + MaskOffset];
5602
5603 // The input vector this mask element indexes into.
5604 unsigned Input = (unsigned)Idx / NewElts;
5605
5606 if (Input >= std::size(Inputs)) {
5607 // The mask element is "undef" or indexes off the end of the input.
5608 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5609 continue;
5610 }
5611
5612 // Turn the index into an offset from the start of the input vector.
5613 Idx -= Input * NewElts;
5614
5615 // Extract the vector element by hand.
5616 SVOps.push_back(MIRBuilder
5617 .buildExtractVectorElement(
5618 EltTy, Inputs[Input],
5620 .getReg(0));
5621 }
5622
5623 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5624 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5625 } else if (InputUsed[0] == -1U) {
5626 // No input vectors were used! The result is undefined.
5627 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5628 } else {
5629 Register Op0 = Inputs[InputUsed[0]];
5630 // If only one input was used, use an undefined vector for the other.
5631 Register Op1 = InputUsed[1] == -1U
5632 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5633 : Inputs[InputUsed[1]];
5634 // At least one input vector was used. Create a new shuffle vector.
5635 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5636 }
5637
5638 Ops.clear();
5639 }
5640
5641 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5642 MI.eraseFromParent();
5643 return Legalized;
5644}
5645
5647 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5648 auto &RdxMI = cast<GVecReduce>(MI);
5649
5650 if (TypeIdx != 1)
5651 return UnableToLegalize;
5652
5653 // The semantics of the normal non-sequential reductions allow us to freely
5654 // re-associate the operation.
5655 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5656
5657 if (NarrowTy.isVector() &&
5658 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5659 return UnableToLegalize;
5660
5661 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5662 SmallVector<Register> SplitSrcs;
5663 // If NarrowTy is a scalar then we're being asked to scalarize.
5664 const unsigned NumParts =
5665 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5666 : SrcTy.getNumElements();
5667
5668 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5669 if (NarrowTy.isScalar()) {
5670 if (DstTy != NarrowTy)
5671 return UnableToLegalize; // FIXME: handle implicit extensions.
5672
5673 if (isPowerOf2_32(NumParts)) {
5674 // Generate a tree of scalar operations to reduce the critical path.
5675 SmallVector<Register> PartialResults;
5676 unsigned NumPartsLeft = NumParts;
5677 while (NumPartsLeft > 1) {
5678 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5679 PartialResults.emplace_back(
5681 .buildInstr(ScalarOpc, {NarrowTy},
5682 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5683 .getReg(0));
5684 }
5685 SplitSrcs = PartialResults;
5686 PartialResults.clear();
5687 NumPartsLeft = SplitSrcs.size();
5688 }
5689 assert(SplitSrcs.size() == 1);
5690 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5691 MI.eraseFromParent();
5692 return Legalized;
5693 }
5694 // If we can't generate a tree, then just do sequential operations.
5695 Register Acc = SplitSrcs[0];
5696 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5697 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5698 .getReg(0);
5699 MIRBuilder.buildCopy(DstReg, Acc);
5700 MI.eraseFromParent();
5701 return Legalized;
5702 }
5703 SmallVector<Register> PartialReductions;
5704 for (unsigned Part = 0; Part < NumParts; ++Part) {
5705 PartialReductions.push_back(
5706 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5707 .getReg(0));
5708 }
5709
5710 // If the types involved are powers of 2, we can generate intermediate vector
5711 // ops, before generating a final reduction operation.
5712 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5713 isPowerOf2_32(NarrowTy.getNumElements())) {
5714 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5715 }
5716
5717 Register Acc = PartialReductions[0];
5718 for (unsigned Part = 1; Part < NumParts; ++Part) {
5719 if (Part == NumParts - 1) {
5720 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5721 {Acc, PartialReductions[Part]});
5722 } else {
5723 Acc = MIRBuilder
5724 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5725 .getReg(0);
5726 }
5727 }
5728 MI.eraseFromParent();
5729 return Legalized;
5730}
5731
5734 unsigned int TypeIdx,
5735 LLT NarrowTy) {
5736 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5737 MI.getFirst3RegLLTs();
5738 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5739 DstTy != NarrowTy)
5740 return UnableToLegalize;
5741
5742 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5743 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5744 "Unexpected vecreduce opcode");
5745 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5746 ? TargetOpcode::G_FADD
5747 : TargetOpcode::G_FMUL;
5748
5749 SmallVector<Register> SplitSrcs;
5750 unsigned NumParts = SrcTy.getNumElements();
5751 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5752 Register Acc = ScalarReg;
5753 for (unsigned i = 0; i < NumParts; i++)
5754 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5755 .getReg(0);
5756
5757 MIRBuilder.buildCopy(DstReg, Acc);
5758 MI.eraseFromParent();
5759 return Legalized;
5760}
5761
5763LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5764 LLT SrcTy, LLT NarrowTy,
5765 unsigned ScalarOpc) {
5766 SmallVector<Register> SplitSrcs;
5767 // Split the sources into NarrowTy size pieces.
5768 extractParts(SrcReg, NarrowTy,
5769 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5770 MIRBuilder, MRI);
5771 // We're going to do a tree reduction using vector operations until we have
5772 // one NarrowTy size value left.
5773 while (SplitSrcs.size() > 1) {
5774 SmallVector<Register> PartialRdxs;
5775 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5776 Register LHS = SplitSrcs[Idx];
5777 Register RHS = SplitSrcs[Idx + 1];
5778 // Create the intermediate vector op.
5779 Register Res =
5780 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5781 PartialRdxs.push_back(Res);
5782 }
5783 SplitSrcs = std::move(PartialRdxs);
5784 }
5785 // Finally generate the requested NarrowTy based reduction.
5787 MI.getOperand(1).setReg(SplitSrcs[0]);
5789 return Legalized;
5790}
5791
5794 const LLT HalfTy, const LLT AmtTy) {
5795
5796 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5797 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5798 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5799
5800 if (Amt.isZero()) {
5801 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5802 MI.eraseFromParent();
5803 return Legalized;
5804 }
5805
5806 LLT NVT = HalfTy;
5807 unsigned NVTBits = HalfTy.getSizeInBits();
5808 unsigned VTBits = 2 * NVTBits;
5809
5810 SrcOp Lo(Register(0)), Hi(Register(0));
5811 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5812 if (Amt.ugt(VTBits)) {
5813 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5814 } else if (Amt.ugt(NVTBits)) {
5815 Lo = MIRBuilder.buildConstant(NVT, 0);
5816 Hi = MIRBuilder.buildShl(NVT, InL,
5817 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5818 } else if (Amt == NVTBits) {
5819 Lo = MIRBuilder.buildConstant(NVT, 0);
5820 Hi = InL;
5821 } else {
5822 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5823 auto OrLHS =
5824 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5825 auto OrRHS = MIRBuilder.buildLShr(
5826 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5827 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5828 }
5829 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5830 if (Amt.ugt(VTBits)) {
5831 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5832 } else if (Amt.ugt(NVTBits)) {
5833 Lo = MIRBuilder.buildLShr(NVT, InH,
5834 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5835 Hi = MIRBuilder.buildConstant(NVT, 0);
5836 } else if (Amt == NVTBits) {
5837 Lo = InH;
5838 Hi = MIRBuilder.buildConstant(NVT, 0);
5839 } else {
5840 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5841
5842 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5843 auto OrRHS = MIRBuilder.buildShl(
5844 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5845
5846 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5847 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5848 }
5849 } else {
5850 if (Amt.ugt(VTBits)) {
5852 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5853 } else if (Amt.ugt(NVTBits)) {
5854 Lo = MIRBuilder.buildAShr(NVT, InH,
5855 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5856 Hi = MIRBuilder.buildAShr(NVT, InH,
5857 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5858 } else if (Amt == NVTBits) {
5859 Lo = InH;
5860 Hi = MIRBuilder.buildAShr(NVT, InH,
5861 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5862 } else {
5863 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5864
5865 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5866 auto OrRHS = MIRBuilder.buildShl(
5867 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5868
5869 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5870 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5871 }
5872 }
5873
5874 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5875 MI.eraseFromParent();
5876
5877 return Legalized;
5878}
5879
5880// TODO: Optimize if constant shift amount.
5883 LLT RequestedTy) {
5884 if (TypeIdx == 1) {
5886 narrowScalarSrc(MI, RequestedTy, 2);
5888 return Legalized;
5889 }
5890
5891 Register DstReg = MI.getOperand(0).getReg();
5892 LLT DstTy = MRI.getType(DstReg);
5893 if (DstTy.isVector())
5894 return UnableToLegalize;
5895
5896 Register Amt = MI.getOperand(2).getReg();
5897 LLT ShiftAmtTy = MRI.getType(Amt);
5898 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5899 if (DstEltSize % 2 != 0)
5900 return UnableToLegalize;
5901
5902 // Ignore the input type. We can only go to exactly half the size of the
5903 // input. If that isn't small enough, the resulting pieces will be further
5904 // legalized.
5905 const unsigned NewBitSize = DstEltSize / 2;
5906 const LLT HalfTy = LLT::scalar(NewBitSize);
5907 const LLT CondTy = LLT::scalar(1);
5908
5909 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5910 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5911 ShiftAmtTy);
5912 }
5913
5914 // TODO: Expand with known bits.
5915
5916 // Handle the fully general expansion by an unknown amount.
5917 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5918
5919 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5920 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5921 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5922
5923 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5924 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5925
5926 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5927 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5928 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5929
5930 Register ResultRegs[2];
5931 switch (MI.getOpcode()) {
5932 case TargetOpcode::G_SHL: {
5933 // Short: ShAmt < NewBitSize
5934 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5935
5936 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5937 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5938 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5939
5940 // Long: ShAmt >= NewBitSize
5941 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5942 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5943
5944 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5945 auto Hi = MIRBuilder.buildSelect(
5946 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5947
5948 ResultRegs[0] = Lo.getReg(0);
5949 ResultRegs[1] = Hi.getReg(0);
5950 break;
5951 }
5952 case TargetOpcode::G_LSHR:
5953 case TargetOpcode::G_ASHR: {
5954 // Short: ShAmt < NewBitSize
5955 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5956
5957 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5958 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5959 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5960
5961 // Long: ShAmt >= NewBitSize
5963 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5964 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5965 } else {
5966 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5967 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5968 }
5969 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5970 {InH, AmtExcess}); // Lo from Hi part.
5971
5972 auto Lo = MIRBuilder.buildSelect(
5973 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5974
5975 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5976
5977 ResultRegs[0] = Lo.getReg(0);
5978 ResultRegs[1] = Hi.getReg(0);
5979 break;
5980 }
5981 default:
5982 llvm_unreachable("not a shift");
5983 }
5984
5985 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5986 MI.eraseFromParent();
5987 return Legalized;
5988}
5989
5992 LLT MoreTy) {
5993 assert(TypeIdx == 0 && "Expecting only Idx 0");
5994
5996 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5997 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5999 moreElementsVectorSrc(MI, MoreTy, I);
6000 }
6001
6002 MachineBasicBlock &MBB = *MI.getParent();
6004 moreElementsVectorDst(MI, MoreTy, 0);
6006 return Legalized;
6007}
6008
6009MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6010 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6011 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6012
6013 switch (Opcode) {
6014 default:
6016 "getNeutralElementForVecReduce called with invalid opcode!");
6017 case TargetOpcode::G_VECREDUCE_ADD:
6018 case TargetOpcode::G_VECREDUCE_OR:
6019 case TargetOpcode::G_VECREDUCE_XOR:
6020 case TargetOpcode::G_VECREDUCE_UMAX:
6021 return MIRBuilder.buildConstant(Ty, 0);
6022 case TargetOpcode::G_VECREDUCE_MUL:
6023 return MIRBuilder.buildConstant(Ty, 1);
6024 case TargetOpcode::G_VECREDUCE_AND:
6025 case TargetOpcode::G_VECREDUCE_UMIN:
6028 case TargetOpcode::G_VECREDUCE_SMAX:
6031 case TargetOpcode::G_VECREDUCE_SMIN:
6034 case TargetOpcode::G_VECREDUCE_FADD:
6035 return MIRBuilder.buildFConstant(Ty, -0.0);
6036 case TargetOpcode::G_VECREDUCE_FMUL:
6037 return MIRBuilder.buildFConstant(Ty, 1.0);
6038 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6039 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6040 assert(false && "getNeutralElementForVecReduce unimplemented for "
6041 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6042 }
6043 llvm_unreachable("switch expected to return!");
6044}
6045
6048 LLT MoreTy) {
6049 unsigned Opc = MI.getOpcode();
6050 switch (Opc) {
6051 case TargetOpcode::G_IMPLICIT_DEF:
6052 case TargetOpcode::G_LOAD: {
6053 if (TypeIdx != 0)
6054 return UnableToLegalize;
6056 moreElementsVectorDst(MI, MoreTy, 0);
6058 return Legalized;
6059 }
6060 case TargetOpcode::G_STORE:
6061 if (TypeIdx != 0)
6062 return UnableToLegalize;
6064 moreElementsVectorSrc(MI, MoreTy, 0);
6066 return Legalized;
6067 case TargetOpcode::G_AND:
6068 case TargetOpcode::G_OR:
6069 case TargetOpcode::G_XOR:
6070 case TargetOpcode::G_ADD:
6071 case TargetOpcode::G_SUB:
6072 case TargetOpcode::G_MUL:
6073 case TargetOpcode::G_FADD:
6074 case TargetOpcode::G_FSUB:
6075 case TargetOpcode::G_FMUL:
6076 case TargetOpcode::G_FDIV:
6077 case TargetOpcode::G_FCOPYSIGN:
6078 case TargetOpcode::G_UADDSAT:
6079 case TargetOpcode::G_USUBSAT:
6080 case TargetOpcode::G_SADDSAT:
6081 case TargetOpcode::G_SSUBSAT:
6082 case TargetOpcode::G_SMIN:
6083 case TargetOpcode::G_SMAX:
6084 case TargetOpcode::G_UMIN:
6085 case TargetOpcode::G_UMAX:
6086 case TargetOpcode::G_FMINNUM:
6087 case TargetOpcode::G_FMAXNUM:
6088 case TargetOpcode::G_FMINNUM_IEEE:
6089 case TargetOpcode::G_FMAXNUM_IEEE:
6090 case TargetOpcode::G_FMINIMUM:
6091 case TargetOpcode::G_FMAXIMUM:
6092 case TargetOpcode::G_STRICT_FADD:
6093 case TargetOpcode::G_STRICT_FSUB:
6094 case TargetOpcode::G_STRICT_FMUL:
6095 case TargetOpcode::G_SHL:
6096 case TargetOpcode::G_ASHR:
6097 case TargetOpcode::G_LSHR: {
6099 moreElementsVectorSrc(MI, MoreTy, 1);
6100 moreElementsVectorSrc(MI, MoreTy, 2);
6101 moreElementsVectorDst(MI, MoreTy, 0);
6103 return Legalized;
6104 }
6105 case TargetOpcode::G_FMA:
6106 case TargetOpcode::G_STRICT_FMA:
6107 case TargetOpcode::G_FSHR:
6108 case TargetOpcode::G_FSHL: {
6110 moreElementsVectorSrc(MI, MoreTy, 1);
6111 moreElementsVectorSrc(MI, MoreTy, 2);
6112 moreElementsVectorSrc(MI, MoreTy, 3);
6113 moreElementsVectorDst(MI, MoreTy, 0);
6115 return Legalized;
6116 }
6117 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6118 case TargetOpcode::G_EXTRACT:
6119 if (TypeIdx != 1)
6120 return UnableToLegalize;
6122 moreElementsVectorSrc(MI, MoreTy, 1);
6124 return Legalized;
6125 case TargetOpcode::G_INSERT:
6126 case TargetOpcode::G_INSERT_VECTOR_ELT:
6127 case TargetOpcode::G_FREEZE:
6128 case TargetOpcode::G_FNEG:
6129 case TargetOpcode::G_FABS:
6130 case TargetOpcode::G_FSQRT:
6131 case TargetOpcode::G_FCEIL:
6132 case TargetOpcode::G_FFLOOR:
6133 case TargetOpcode::G_FNEARBYINT:
6134 case TargetOpcode::G_FRINT:
6135 case TargetOpcode::G_INTRINSIC_ROUND:
6136 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6137 case TargetOpcode::G_INTRINSIC_TRUNC:
6138 case TargetOpcode::G_BSWAP:
6139 case TargetOpcode::G_FCANONICALIZE:
6140 case TargetOpcode::G_SEXT_INREG:
6141 case TargetOpcode::G_ABS:
6142 if (TypeIdx != 0)
6143 return UnableToLegalize;
6145 moreElementsVectorSrc(MI, MoreTy, 1);
6146 moreElementsVectorDst(MI, MoreTy, 0);
6148 return Legalized;
6149 case TargetOpcode::G_SELECT: {
6150 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6151 if (TypeIdx == 1) {
6152 if (!CondTy.isScalar() ||
6153 DstTy.getElementCount() != MoreTy.getElementCount())
6154 return UnableToLegalize;
6155
6156 // This is turning a scalar select of vectors into a vector
6157 // select. Broadcast the select condition.
6158 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6160 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6162 return Legalized;
6163 }
6164
6165 if (CondTy.isVector())
6166 return UnableToLegalize;
6167
6169 moreElementsVectorSrc(MI, MoreTy, 2);
6170 moreElementsVectorSrc(MI, MoreTy, 3);
6171 moreElementsVectorDst(MI, MoreTy, 0);
6173 return Legalized;
6174 }
6175 case TargetOpcode::G_UNMERGE_VALUES:
6176 return UnableToLegalize;
6177 case TargetOpcode::G_PHI:
6178 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6179 case TargetOpcode::G_SHUFFLE_VECTOR:
6180 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6181 case TargetOpcode::G_BUILD_VECTOR: {
6183 for (auto Op : MI.uses()) {
6184 Elts.push_back(Op.getReg());
6185 }
6186
6187 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6189 }
6190
6192 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6193 MI.eraseFromParent();
6194 return Legalized;
6195 }
6196 case TargetOpcode::G_SEXT:
6197 case TargetOpcode::G_ZEXT:
6198 case TargetOpcode::G_ANYEXT:
6199 case TargetOpcode::G_TRUNC:
6200 case TargetOpcode::G_FPTRUNC:
6201 case TargetOpcode::G_FPEXT:
6202 case TargetOpcode::G_FPTOSI:
6203 case TargetOpcode::G_FPTOUI:
6204 case TargetOpcode::G_FPTOSI_SAT:
6205 case TargetOpcode::G_FPTOUI_SAT:
6206 case TargetOpcode::G_SITOFP:
6207 case TargetOpcode::G_UITOFP: {
6209 LLT SrcExtTy;
6210 LLT DstExtTy;
6211 if (TypeIdx == 0) {
6212 DstExtTy = MoreTy;
6213 SrcExtTy = LLT::fixed_vector(
6214 MoreTy.getNumElements(),
6215 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6216 } else {
6217 DstExtTy = LLT::fixed_vector(
6218 MoreTy.getNumElements(),
6219 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6220 SrcExtTy = MoreTy;
6221 }
6222 moreElementsVectorSrc(MI, SrcExtTy, 1);
6223 moreElementsVectorDst(MI, DstExtTy, 0);
6225 return Legalized;
6226 }
6227 case TargetOpcode::G_ICMP:
6228 case TargetOpcode::G_FCMP: {
6229 if (TypeIdx != 1)
6230 return UnableToLegalize;
6231
6233 moreElementsVectorSrc(MI, MoreTy, 2);
6234 moreElementsVectorSrc(MI, MoreTy, 3);
6235 LLT CondTy = LLT::fixed_vector(
6236 MoreTy.getNumElements(),
6237 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6238 moreElementsVectorDst(MI, CondTy, 0);
6240 return Legalized;
6241 }
6242 case TargetOpcode::G_BITCAST: {
6243 if (TypeIdx != 0)
6244 return UnableToLegalize;
6245
6246 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6247 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6248
6249 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6250 if (coefficient % DstTy.getNumElements() != 0)
6251 return UnableToLegalize;
6252
6253 coefficient = coefficient / DstTy.getNumElements();
6254
6255 LLT NewTy = SrcTy.changeElementCount(
6256 ElementCount::get(coefficient, MoreTy.isScalable()));
6258 moreElementsVectorSrc(MI, NewTy, 1);
6259 moreElementsVectorDst(MI, MoreTy, 0);
6261 return Legalized;
6262 }
6263 case TargetOpcode::G_VECREDUCE_FADD:
6264 case TargetOpcode::G_VECREDUCE_FMUL:
6265 case TargetOpcode::G_VECREDUCE_ADD:
6266 case TargetOpcode::G_VECREDUCE_MUL:
6267 case TargetOpcode::G_VECREDUCE_AND:
6268 case TargetOpcode::G_VECREDUCE_OR:
6269 case TargetOpcode::G_VECREDUCE_XOR:
6270 case TargetOpcode::G_VECREDUCE_SMAX:
6271 case TargetOpcode::G_VECREDUCE_SMIN:
6272 case TargetOpcode::G_VECREDUCE_UMAX:
6273 case TargetOpcode::G_VECREDUCE_UMIN: {
6274 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6275 MachineOperand &MO = MI.getOperand(1);
6276 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6277 auto NeutralElement = getNeutralElementForVecReduce(
6278 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6279
6281 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6282 i != e; i++) {
6283 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6284 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6285 NeutralElement, Idx);
6286 }
6287
6289 MO.setReg(NewVec.getReg(0));
6291 return Legalized;
6292 }
6293
6294 default:
6295 return UnableToLegalize;
6296 }
6297}
6298
6301 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6302 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6303 unsigned MaskNumElts = Mask.size();
6304 unsigned SrcNumElts = SrcTy.getNumElements();
6305 LLT DestEltTy = DstTy.getElementType();
6306
6307 if (MaskNumElts == SrcNumElts)
6308 return Legalized;
6309
6310 if (MaskNumElts < SrcNumElts) {
6311 // Extend mask to match new destination vector size with
6312 // undef values.
6313 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6314 llvm::copy(Mask, NewMask.begin());
6315
6316 moreElementsVectorDst(MI, SrcTy, 0);
6318 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6319 MI.getOperand(1).getReg(),
6320 MI.getOperand(2).getReg(), NewMask);
6321 MI.eraseFromParent();
6322
6323 return Legalized;
6324 }
6325
6326 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6327 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6328 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6329
6330 // Create new source vectors by concatenating the initial
6331 // source vectors with undefined vectors of the same size.
6332 auto Undef = MIRBuilder.buildUndef(SrcTy);
6333 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6334 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6335 MOps1[0] = MI.getOperand(1).getReg();
6336 MOps2[0] = MI.getOperand(2).getReg();
6337
6338 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6339 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6340
6341 // Readjust mask for new input vector length.
6342 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6343 for (unsigned I = 0; I != MaskNumElts; ++I) {
6344 int Idx = Mask[I];
6345 if (Idx >= static_cast<int>(SrcNumElts))
6346 Idx += PaddedMaskNumElts - SrcNumElts;
6347 MappedOps[I] = Idx;
6348 }
6349
6350 // If we got more elements than required, extract subvector.
6351 if (MaskNumElts != PaddedMaskNumElts) {
6352 auto Shuffle =
6353 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6354
6355 SmallVector<Register, 16> Elts(MaskNumElts);
6356 for (unsigned I = 0; I < MaskNumElts; ++I) {
6357 Elts[I] =
6359 .getReg(0);
6360 }
6361 MIRBuilder.buildBuildVector(DstReg, Elts);
6362 } else {
6363 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6364 }
6365
6366 MI.eraseFromParent();
6368}
6369
6372 unsigned int TypeIdx, LLT MoreTy) {
6373 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6374 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6375 unsigned NumElts = DstTy.getNumElements();
6376 unsigned WidenNumElts = MoreTy.getNumElements();
6377
6378 if (DstTy.isVector() && Src1Ty.isVector() &&
6379 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6381 }
6382
6383 if (TypeIdx != 0)
6384 return UnableToLegalize;
6385
6386 // Expect a canonicalized shuffle.
6387 if (DstTy != Src1Ty || DstTy != Src2Ty)
6388 return UnableToLegalize;
6389
6390 moreElementsVectorSrc(MI, MoreTy, 1);
6391 moreElementsVectorSrc(MI, MoreTy, 2);
6392
6393 // Adjust mask based on new input vector length.
6394 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6395 for (unsigned I = 0; I != NumElts; ++I) {
6396 int Idx = Mask[I];
6397 if (Idx < static_cast<int>(NumElts))
6398 NewMask[I] = Idx;
6399 else
6400 NewMask[I] = Idx - NumElts + WidenNumElts;
6401 }
6402 moreElementsVectorDst(MI, MoreTy, 0);
6404 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6405 MI.getOperand(1).getReg(),
6406 MI.getOperand(2).getReg(), NewMask);
6407 MI.eraseFromParent();
6408 return Legalized;
6409}
6410
6411void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6412 ArrayRef<Register> Src1Regs,
6413 ArrayRef<Register> Src2Regs,
6414 LLT NarrowTy) {
6416 unsigned SrcParts = Src1Regs.size();
6417 unsigned DstParts = DstRegs.size();
6418
6419 unsigned DstIdx = 0; // Low bits of the result.
6420 Register FactorSum =
6421 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6422 DstRegs[DstIdx] = FactorSum;
6423
6424 unsigned CarrySumPrevDstIdx;
6426
6427 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6428 // Collect low parts of muls for DstIdx.
6429 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6430 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6432 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6433 Factors.push_back(Mul.getReg(0));
6434 }
6435 // Collect high parts of muls from previous DstIdx.
6436 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6437 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6438 MachineInstrBuilder Umulh =
6439 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6440 Factors.push_back(Umulh.getReg(0));
6441 }
6442 // Add CarrySum from additions calculated for previous DstIdx.
6443 if (DstIdx != 1) {
6444 Factors.push_back(CarrySumPrevDstIdx);
6445 }
6446
6447 Register CarrySum;
6448 // Add all factors and accumulate all carries into CarrySum.
6449 if (DstIdx != DstParts - 1) {
6450 MachineInstrBuilder Uaddo =
6451 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6452 FactorSum = Uaddo.getReg(0);
6453 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6454 for (unsigned i = 2; i < Factors.size(); ++i) {
6455 MachineInstrBuilder Uaddo =
6456 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6457 FactorSum = Uaddo.getReg(0);
6458 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6459 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6460 }
6461 } else {
6462 // Since value for the next index is not calculated, neither is CarrySum.
6463 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6464 for (unsigned i = 2; i < Factors.size(); ++i)
6465 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6466 }
6467
6468 CarrySumPrevDstIdx = CarrySum;
6469 DstRegs[DstIdx] = FactorSum;
6470 Factors.clear();
6471 }
6472}
6473
6476 LLT NarrowTy) {
6477 if (TypeIdx != 0)
6478 return UnableToLegalize;
6479
6480 Register DstReg = MI.getOperand(0).getReg();
6481 LLT DstType = MRI.getType(DstReg);
6482 // FIXME: add support for vector types
6483 if (DstType.isVector())
6484 return UnableToLegalize;
6485
6486 unsigned Opcode = MI.getOpcode();
6487 unsigned OpO, OpE, OpF;
6488 switch (Opcode) {
6489 case TargetOpcode::G_SADDO:
6490 case TargetOpcode::G_SADDE:
6491 case TargetOpcode::G_UADDO:
6492 case TargetOpcode::G_UADDE:
6493 case TargetOpcode::G_ADD:
6494 OpO = TargetOpcode::G_UADDO;
6495 OpE = TargetOpcode::G_UADDE;
6496 OpF = TargetOpcode::G_UADDE;
6497 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6498 OpF = TargetOpcode::G_SADDE;
6499 break;
6500 case TargetOpcode::G_SSUBO:
6501 case TargetOpcode::G_SSUBE:
6502 case TargetOpcode::G_USUBO:
6503 case TargetOpcode::G_USUBE:
6504 case TargetOpcode::G_SUB:
6505 OpO = TargetOpcode::G_USUBO;
6506 OpE = TargetOpcode::G_USUBE;
6507 OpF = TargetOpcode::G_USUBE;
6508 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6509 OpF = TargetOpcode::G_SSUBE;
6510 break;
6511 default:
6512 llvm_unreachable("Unexpected add/sub opcode!");
6513 }
6514
6515 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6516 unsigned NumDefs = MI.getNumExplicitDefs();
6517 Register Src1 = MI.getOperand(NumDefs).getReg();
6518 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
6519 Register CarryDst, CarryIn;
6520 if (NumDefs == 2)
6521 CarryDst = MI.getOperand(1).getReg();
6522 if (MI.getNumOperands() == NumDefs + 3)
6523 CarryIn = MI.getOperand(NumDefs + 2).getReg();
6524
6525 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6526 LLT LeftoverTy, DummyTy;
6527 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
6528 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6529 MIRBuilder, MRI);
6530 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6531 MRI);
6532
6533 int NarrowParts = Src1Regs.size();
6534 Src1Regs.append(Src1Left);
6535 Src2Regs.append(Src2Left);
6536 DstRegs.reserve(Src1Regs.size());
6537
6538 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6539 Register DstReg =
6540 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
6541 Register CarryOut;
6542 // Forward the final carry-out to the destination register
6543 if (i == e - 1 && CarryDst)
6544 CarryOut = CarryDst;
6545 else
6546 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
6547
6548 if (!CarryIn) {
6549 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6550 {Src1Regs[i], Src2Regs[i]});
6551 } else if (i == e - 1) {
6552 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6553 {Src1Regs[i], Src2Regs[i], CarryIn});
6554 } else {
6555 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6556 {Src1Regs[i], Src2Regs[i], CarryIn});
6557 }
6558
6559 DstRegs.push_back(DstReg);
6560 CarryIn = CarryOut;
6561 }
6562 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
6563 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6564 ArrayRef(DstRegs).drop_front(NarrowParts));
6565
6566 MI.eraseFromParent();
6567 return Legalized;
6568}
6569
6572 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
6573
6574 LLT Ty = MRI.getType(DstReg);
6575 if (Ty.isVector())
6576 return UnableToLegalize;
6577
6578 unsigned Size = Ty.getSizeInBits();
6579 unsigned NarrowSize = NarrowTy.getSizeInBits();
6580 if (Size % NarrowSize != 0)
6581 return UnableToLegalize;
6582
6583 unsigned NumParts = Size / NarrowSize;
6584 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
6585 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6586
6587 SmallVector<Register, 2> Src1Parts, Src2Parts;
6588 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
6589 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6590 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6591 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6592
6593 // Take only high half of registers if this is high mul.
6594 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
6595 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6596 MI.eraseFromParent();
6597 return Legalized;
6598}
6599
6602 LLT NarrowTy) {
6603 if (TypeIdx != 0)
6604 return UnableToLegalize;
6605
6606 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6607
6608 Register Src = MI.getOperand(1).getReg();
6609 LLT SrcTy = MRI.getType(Src);
6610
6611 // If all finite floats fit into the narrowed integer type, we can just swap
6612 // out the result type. This is practically only useful for conversions from
6613 // half to at least 16-bits, so just handle the one case.
6614 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6615 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6616 return UnableToLegalize;
6617
6619 narrowScalarDst(MI, NarrowTy, 0,
6620 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6622 return Legalized;
6623}
6624
6627 LLT NarrowTy) {
6628 if (TypeIdx != 1)
6629 return UnableToLegalize;
6630
6631 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6632
6633 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6634 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6635 // NarrowSize.
6636 if (SizeOp1 % NarrowSize != 0)
6637 return UnableToLegalize;
6638 int NumParts = SizeOp1 / NarrowSize;
6639
6640 SmallVector<Register, 2> SrcRegs, DstRegs;
6642 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6643 MIRBuilder, MRI);
6644
6645 Register OpReg = MI.getOperand(0).getReg();
6646 uint64_t OpStart = MI.getOperand(2).getImm();
6647 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6648 for (int i = 0; i < NumParts; ++i) {
6649 unsigned SrcStart = i * NarrowSize;
6650
6651 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6652 // No part of the extract uses this subregister, ignore it.
6653 continue;
6654 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6655 // The entire subregister is extracted, forward the value.
6656 DstRegs.push_back(SrcRegs[i]);
6657 continue;
6658 }
6659
6660 // OpSegStart is where this destination segment would start in OpReg if it
6661 // extended infinitely in both directions.
6662 int64_t ExtractOffset;
6663 uint64_t SegSize;
6664 if (OpStart < SrcStart) {
6665 ExtractOffset = 0;
6666 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6667 } else {
6668 ExtractOffset = OpStart - SrcStart;
6669 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6670 }
6671
6672 Register SegReg = SrcRegs[i];
6673 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6674 // A genuine extract is needed.
6675 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6676 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6677 }
6678
6679 DstRegs.push_back(SegReg);
6680 }
6681
6682 Register DstReg = MI.getOperand(0).getReg();
6683 if (MRI.getType(DstReg).isVector())
6684 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6685 else if (DstRegs.size() > 1)
6686 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6687 else
6688 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6689 MI.eraseFromParent();
6690 return Legalized;
6691}
6692
6695 LLT NarrowTy) {
6696 // FIXME: Don't know how to handle secondary types yet.
6697 if (TypeIdx != 0)
6698 return UnableToLegalize;
6699
6700 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6702 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6703 LLT LeftoverTy;
6704 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6705 LeftoverRegs, MIRBuilder, MRI);
6706
6707 SrcRegs.append(LeftoverRegs);
6708
6709 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6710 Register OpReg = MI.getOperand(2).getReg();
6711 uint64_t OpStart = MI.getOperand(3).getImm();
6712 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6713 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6714 unsigned DstStart = I * NarrowSize;
6715
6716 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6717 // The entire subregister is defined by this insert, forward the new
6718 // value.
6719 DstRegs.push_back(OpReg);
6720 continue;
6721 }
6722
6723 Register SrcReg = SrcRegs[I];
6724 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6725 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6726 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6727 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6728 }
6729
6730 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6731 // No part of the insert affects this subregister, forward the original.
6732 DstRegs.push_back(SrcReg);
6733 continue;
6734 }
6735
6736 // OpSegStart is where this destination segment would start in OpReg if it
6737 // extended infinitely in both directions.
6738 int64_t ExtractOffset, InsertOffset;
6739 uint64_t SegSize;
6740 if (OpStart < DstStart) {
6741 InsertOffset = 0;
6742 ExtractOffset = DstStart - OpStart;
6743 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6744 } else {
6745 InsertOffset = OpStart - DstStart;
6746 ExtractOffset = 0;
6747 SegSize =
6748 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6749 }
6750
6751 Register SegReg = OpReg;
6752 if (ExtractOffset != 0 || SegSize != OpSize) {
6753 // A genuine extract is needed.
6754 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6755 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6756 }
6757
6758 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6759 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6760 DstRegs.push_back(DstReg);
6761 }
6762
6763 uint64_t WideSize = DstRegs.size() * NarrowSize;
6764 Register DstReg = MI.getOperand(0).getReg();
6765 if (WideSize > RegTy.getSizeInBits()) {
6766 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6767 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6768 MIRBuilder.buildTrunc(DstReg, MergeReg);
6769 } else
6770 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6771
6772 MI.eraseFromParent();
6773 return Legalized;
6774}
6775
6778 LLT NarrowTy) {
6779 Register DstReg = MI.getOperand(0).getReg();
6780 LLT DstTy = MRI.getType(DstReg);
6781
6782 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6783
6784 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6785 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6786 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6787 LLT LeftoverTy;
6788 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6789 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6790 return UnableToLegalize;
6791
6792 LLT Unused;
6793 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6794 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6795 llvm_unreachable("inconsistent extractParts result");
6796
6797 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6798 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6799 {Src0Regs[I], Src1Regs[I]});
6800 DstRegs.push_back(Inst.getReg(0));
6801 }
6802
6803 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6804 auto Inst = MIRBuilder.buildInstr(
6805 MI.getOpcode(),
6806 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6807 DstLeftoverRegs.push_back(Inst.getReg(0));
6808 }
6809
6810 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6811 LeftoverTy, DstLeftoverRegs);
6812
6813 MI.eraseFromParent();
6814 return Legalized;
6815}
6816
6819 LLT NarrowTy) {
6820 if (TypeIdx != 0)
6821 return UnableToLegalize;
6822
6823 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6824
6825 LLT DstTy = MRI.getType(DstReg);
6826 if (DstTy.isVector())
6827 return UnableToLegalize;
6828
6830 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6831 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6832 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6833
6834 MI.eraseFromParent();
6835 return Legalized;
6836}
6837
6840 LLT NarrowTy) {
6841 if (TypeIdx != 0)
6842 return UnableToLegalize;
6843
6844 Register CondReg = MI.getOperand(1).getReg();
6845 LLT CondTy = MRI.getType(CondReg);
6846 if (CondTy.isVector()) // TODO: Handle vselect
6847 return UnableToLegalize;
6848
6849 Register DstReg = MI.getOperand(0).getReg();
6850 LLT DstTy = MRI.getType(DstReg);
6851
6852 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6853 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6854 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6855 LLT LeftoverTy;
6856 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6857 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6858 return UnableToLegalize;
6859
6860 LLT Unused;
6861 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6862 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6863 llvm_unreachable("inconsistent extractParts result");
6864
6865 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6866 auto Select = MIRBuilder.buildSelect(NarrowTy,
6867 CondReg, Src1Regs[I], Src2Regs[I]);
6868 DstRegs.push_back(Select.getReg(0));
6869 }
6870
6871 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6873 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6874 DstLeftoverRegs.push_back(Select.getReg(0));
6875 }
6876
6877 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6878 LeftoverTy, DstLeftoverRegs);
6879
6880 MI.eraseFromParent();
6881 return Legalized;
6882}
6883
6886 LLT NarrowTy) {
6887 if (TypeIdx != 1)
6888 return UnableToLegalize;
6889
6890 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6891 unsigned NarrowSize = NarrowTy.getSizeInBits();
6892
6893 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6894 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6895
6897 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6898 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6899 auto C_0 = B.buildConstant(NarrowTy, 0);
6900 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6901 UnmergeSrc.getReg(1), C_0);
6902 auto LoCTLZ = IsUndef ?
6903 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6904 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6905 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6906 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6907 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6908 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6909
6910 MI.eraseFromParent();
6911 return Legalized;
6912 }
6913
6914 return UnableToLegalize;
6915}
6916
6919 LLT NarrowTy) {
6920 if (TypeIdx != 1)
6921 return UnableToLegalize;
6922
6923 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6924 unsigned NarrowSize = NarrowTy.getSizeInBits();
6925
6926 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6927 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6928
6930 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6931 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6932 auto C_0 = B.buildConstant(NarrowTy, 0);
6933 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6934 UnmergeSrc.getReg(0), C_0);
6935 auto HiCTTZ = IsUndef ?
6936 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6937 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6938 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6939 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6940 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6941 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6942
6943 MI.eraseFromParent();
6944 return Legalized;
6945 }
6946
6947 return UnableToLegalize;
6948}
6949
6952 LLT NarrowTy) {
6953 if (TypeIdx != 1)
6954 return UnableToLegalize;
6955
6956 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6957 unsigned NarrowSize = NarrowTy.getSizeInBits();
6958
6959 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6960 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6961
6962 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6963 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6964 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6965
6966 MI.eraseFromParent();
6967 return Legalized;
6968 }
6969
6970 return UnableToLegalize;
6971}
6972
6975 LLT NarrowTy) {
6976 if (TypeIdx != 1)
6977 return UnableToLegalize;
6978
6980 Register ExpReg = MI.getOperand(2).getReg();
6981 LLT ExpTy = MRI.getType(ExpReg);
6982
6983 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6984
6985 // Clamp the exponent to the range of the target type.
6986 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6987 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6988 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6989 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6990
6991 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6993 MI.getOperand(2).setReg(Trunc.getReg(0));
6995 return Legalized;
6996}
6997
7000 unsigned Opc = MI.getOpcode();
7001 const auto &TII = MIRBuilder.getTII();
7002 auto isSupported = [this](const LegalityQuery &Q) {
7003 auto QAction = LI.getAction(Q).Action;
7004 return QAction == Legal || QAction == Libcall || QAction == Custom;
7005 };
7006 switch (Opc) {
7007 default:
7008 return UnableToLegalize;
7009 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7010 // This trivially expands to CTLZ.
7012 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7014 return Legalized;
7015 }
7016 case TargetOpcode::G_CTLZ: {
7017 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7018 unsigned Len = SrcTy.getSizeInBits();
7019
7020 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7021 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7022 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7023 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7024 auto ICmp = MIRBuilder.buildICmp(
7025 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7026 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7027 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7028 MI.eraseFromParent();
7029 return Legalized;
7030 }
7031 // for now, we do this:
7032 // NewLen = NextPowerOf2(Len);
7033 // x = x | (x >> 1);
7034 // x = x | (x >> 2);
7035 // ...
7036 // x = x | (x >>16);
7037 // x = x | (x >>32); // for 64-bit input
7038 // Upto NewLen/2
7039 // return Len - popcount(x);
7040 //
7041 // Ref: "Hacker's Delight" by Henry Warren
7042 Register Op = SrcReg;
7043 unsigned NewLen = PowerOf2Ceil(Len);
7044 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7045 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7046 auto MIBOp = MIRBuilder.buildOr(
7047 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7048 Op = MIBOp.getReg(0);
7049 }
7050 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7051 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7052 MIBPop);
7053 MI.eraseFromParent();
7054 return Legalized;
7055 }
7056 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7057 // This trivially expands to CTTZ.
7059 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7061 return Legalized;
7062 }
7063 case TargetOpcode::G_CTTZ: {
7064 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7065
7066 unsigned Len = SrcTy.getSizeInBits();
7067 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7068 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7069 // zero.
7070 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7071 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7072 auto ICmp = MIRBuilder.buildICmp(
7073 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7074 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7075 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7076 MI.eraseFromParent();
7077 return Legalized;
7078 }
7079 // for now, we use: { return popcount(~x & (x - 1)); }
7080 // unless the target has ctlz but not ctpop, in which case we use:
7081 // { return 32 - nlz(~x & (x-1)); }
7082 // Ref: "Hacker's Delight" by Henry Warren
7083 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7084 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7085 auto MIBTmp = MIRBuilder.buildAnd(
7086 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7087 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7088 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7089 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7090 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7091 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7092 MI.eraseFromParent();
7093 return Legalized;
7094 }
7096 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7097 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7099 return Legalized;
7100 }
7101 case TargetOpcode::G_CTPOP: {
7102 Register SrcReg = MI.getOperand(1).getReg();
7103 LLT Ty = MRI.getType(SrcReg);
7104 unsigned Size = Ty.getSizeInBits();
7106
7107 // Count set bits in blocks of 2 bits. Default approach would be
7108 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7109 // We use following formula instead:
7110 // B2Count = val - { (val >> 1) & 0x55555555 }
7111 // since it gives same result in blocks of 2 with one instruction less.
7112 auto C_1 = B.buildConstant(Ty, 1);
7113 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7114 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7115 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7116 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7117 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7118
7119 // In order to get count in blocks of 4 add values from adjacent block of 2.
7120 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7121 auto C_2 = B.buildConstant(Ty, 2);
7122 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7123 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7124 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7125 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7126 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7127 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7128
7129 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7130 // addition since count value sits in range {0,...,8} and 4 bits are enough
7131 // to hold such binary values. After addition high 4 bits still hold count
7132 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7133 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7134 auto C_4 = B.buildConstant(Ty, 4);
7135 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7136 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7137 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7138 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7139 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7140
7141 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7142 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7143 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7144 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7145
7146 // Shift count result from 8 high bits to low bits.
7147 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7148
7149 auto IsMulSupported = [this](const LLT Ty) {
7150 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7151 return Action == Legal || Action == WidenScalar || Action == Custom;
7152 };
7153 if (IsMulSupported(Ty)) {
7154 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7155 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7156 } else {
7157 auto ResTmp = B8Count;
7158 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7159 auto ShiftC = B.buildConstant(Ty, Shift);
7160 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7161 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7162 }
7163 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7164 }
7165 MI.eraseFromParent();
7166 return Legalized;
7167 }
7168 }
7169}
7170
7171// Check that (every element of) Reg is undef or not an exact multiple of BW.
7173 Register Reg, unsigned BW) {
7174 return matchUnaryPredicate(
7175 MRI, Reg,
7176 [=](const Constant *C) {
7177 // Null constant here means an undef.
7178 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
7179 return !CI || CI->getValue().urem(BW) != 0;
7180 },
7181 /*AllowUndefs*/ true);
7182}
7183
7186 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7187 LLT Ty = MRI.getType(Dst);
7188 LLT ShTy = MRI.getType(Z);
7189
7190 unsigned BW = Ty.getScalarSizeInBits();
7191
7192 if (!isPowerOf2_32(BW))
7193 return UnableToLegalize;
7194
7195 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7196 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7197
7198 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7199 // fshl X, Y, Z -> fshr X, Y, -Z
7200 // fshr X, Y, Z -> fshl X, Y, -Z
7201 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7202 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7203 } else {
7204 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7205 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7206 auto One = MIRBuilder.buildConstant(ShTy, 1);
7207 if (IsFSHL) {
7208 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7209 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7210 } else {
7211 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7212 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7213 }
7214
7215 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7216 }
7217
7218 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7219 MI.eraseFromParent();
7220 return Legalized;
7221}
7222
7225 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7226 LLT Ty = MRI.getType(Dst);
7227 LLT ShTy = MRI.getType(Z);
7228
7229 const unsigned BW = Ty.getScalarSizeInBits();
7230 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7231
7232 Register ShX, ShY;
7233 Register ShAmt, InvShAmt;
7234
7235 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7236 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7237 // fshl: X << C | Y >> (BW - C)
7238 // fshr: X << (BW - C) | Y >> C
7239 // where C = Z % BW is not zero
7240 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7241 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7242 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7243 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7244 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7245 } else {
7246 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7247 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7248 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7249 if (isPowerOf2_32(BW)) {
7250 // Z % BW -> Z & (BW - 1)
7251 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7252 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7253 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7254 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7255 } else {
7256 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7257 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7258 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7259 }
7260
7261 auto One = MIRBuilder.buildConstant(ShTy, 1);
7262 if (IsFSHL) {
7263 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7264 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7265 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7266 } else {
7267 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7268 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7269 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7270 }
7271 }
7272
7274 MI.eraseFromParent();
7275 return Legalized;
7276}
7277
7280 // These operations approximately do the following (while avoiding undefined
7281 // shifts by BW):
7282 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7283 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7284 Register Dst = MI.getOperand(0).getReg();
7285 LLT Ty = MRI.getType(Dst);
7286 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7287
7288 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7289 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7290
7291 // TODO: Use smarter heuristic that accounts for vector legalization.
7292 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7293 return lowerFunnelShiftAsShifts(MI);
7294
7295 // This only works for powers of 2, fallback to shifts if it fails.
7296 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7297 if (Result == UnableToLegalize)
7298 return lowerFunnelShiftAsShifts(MI);
7299 return Result;
7300}
7301
7303 auto [Dst, Src] = MI.getFirst2Regs();
7304 LLT DstTy = MRI.getType(Dst);
7305 LLT SrcTy = MRI.getType(Src);
7306
7307 uint32_t DstTySize = DstTy.getSizeInBits();
7308 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7309 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7310
7311 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7312 !isPowerOf2_32(SrcTyScalarSize))
7313 return UnableToLegalize;
7314
7315 // The step between extend is too large, split it by creating an intermediate
7316 // extend instruction
7317 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7318 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7319 // If the destination type is illegal, split it into multiple statements
7320 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7321 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7322 // Unmerge the vector
7323 LLT EltTy = MidTy.changeElementCount(
7325 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7326
7327 // ZExt the vectors
7328 LLT ZExtResTy = DstTy.changeElementCount(
7330 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7331 {UnmergeSrc.getReg(0)});
7332 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7333 {UnmergeSrc.getReg(1)});
7334
7335 // Merge the ending vectors
7336 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7337
7338 MI.eraseFromParent();
7339 return Legalized;
7340 }
7341 return UnableToLegalize;
7342}
7343
7345 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7347 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7348 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7349 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7350 // %lo16(<4 x s16>) = G_TRUNC %inlo
7351 // %hi16(<4 x s16>) = G_TRUNC %inhi
7352 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7353 // %res(<8 x s8>) = G_TRUNC %in16
7354
7355 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7356
7357 Register DstReg = MI.getOperand(0).getReg();
7358 Register SrcReg = MI.getOperand(1).getReg();
7359 LLT DstTy = MRI.getType(DstReg);
7360 LLT SrcTy = MRI.getType(SrcReg);
7361
7362 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7364 isPowerOf2_32(SrcTy.getNumElements()) &&
7366 // Split input type.
7367 LLT SplitSrcTy = SrcTy.changeElementCount(
7369
7370 // First, split the source into two smaller vectors.
7371 SmallVector<Register, 2> SplitSrcs;
7372 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7373
7374 // Truncate the splits into intermediate narrower elements.
7375 LLT InterTy;
7376 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7377 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7378 else
7379 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7380 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
7381 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
7382 }
7383
7384 // Combine the new truncates into one vector
7386 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7387
7388 // Truncate the new vector to the final result type
7389 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7390 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7391 else
7392 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7393
7394 MI.eraseFromParent();
7395
7396 return Legalized;
7397 }
7398 return UnableToLegalize;
7399}
7400
7403 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7404 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7405 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7406 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7407 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7408 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7409 MI.eraseFromParent();
7410 return Legalized;
7411}
7412
7414 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7415
7416 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7417 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7418
7420
7421 // If a rotate in the other direction is supported, use it.
7422 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7423 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7424 isPowerOf2_32(EltSizeInBits))
7425 return lowerRotateWithReverseRotate(MI);
7426
7427 // If a funnel shift is supported, use it.
7428 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7429 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7430 bool IsFShLegal = false;
7431 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7432 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7433 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7434 Register R3) {
7435 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7436 MI.eraseFromParent();
7437 return Legalized;
7438 };
7439 // If a funnel shift in the other direction is supported, use it.
7440 if (IsFShLegal) {
7441 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7442 } else if (isPowerOf2_32(EltSizeInBits)) {
7443 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7444 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7445 }
7446 }
7447
7448 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7449 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7450 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7451 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7452 Register ShVal;
7453 Register RevShiftVal;
7454 if (isPowerOf2_32(EltSizeInBits)) {
7455 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7456 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7457 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7458 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7459 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7460 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7461 RevShiftVal =
7462 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7463 } else {
7464 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7465 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7466 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7467 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7468 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7469 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7470 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7471 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7472 RevShiftVal =
7473 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7474 }
7475 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7476 MI.eraseFromParent();
7477 return Legalized;
7478}
7479
7480// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7481// representation.
7484 auto [Dst, Src] = MI.getFirst2Regs();
7485 const LLT S64 = LLT::scalar(64);
7486 const LLT S32 = LLT::scalar(32);
7487 const LLT S1 = LLT::scalar(1);
7488
7489 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7490
7491 // unsigned cul2f(ulong u) {
7492 // uint lz = clz(u);
7493 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7494 // u = (u << lz) & 0x7fffffffffffffffUL;
7495 // ulong t = u & 0xffffffffffUL;
7496 // uint v = (e << 23) | (uint)(u >> 40);
7497 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7498 // return as_float(v + r);
7499 // }
7500
7501 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7502 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7503
7504 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7505
7506 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7507 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7508
7509 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7510 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7511
7512 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7513 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7514
7515 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7516
7517 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7518 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7519
7520 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7521 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7522 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7523
7524 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7525 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7526 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7527 auto One = MIRBuilder.buildConstant(S32, 1);
7528
7529 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7530 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7531 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7532 MIRBuilder.buildAdd(Dst, V, R);
7533
7534 MI.eraseFromParent();
7535 return Legalized;
7536}
7537
7538// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7539// operations and G_SITOFP
7542 auto [Dst, Src] = MI.getFirst2Regs();
7543 const LLT S64 = LLT::scalar(64);
7544 const LLT S32 = LLT::scalar(32);
7545 const LLT S1 = LLT::scalar(1);
7546
7547 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7548
7549 // For i64 < INT_MAX we simply reuse SITOFP.
7550 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7551 // saved before division, convert to float by SITOFP, multiply the result
7552 // by 2.
7553 auto One = MIRBuilder.buildConstant(S64, 1);
7554 auto Zero = MIRBuilder.buildConstant(S64, 0);
7555 // Result if Src < INT_MAX
7556 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7557 // Result if Src >= INT_MAX
7558 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7559 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7560 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7561 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7562 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7563 // Check if the original value is larger than INT_MAX by comparing with
7564 // zero to pick one of the two conversions.
7565 auto IsLarge =
7567 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7568
7569 MI.eraseFromParent();
7570 return Legalized;
7571}
7572
7573// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7574// IEEE double representation.
7577 auto [Dst, Src] = MI.getFirst2Regs();
7578 const LLT S64 = LLT::scalar(64);
7579 const LLT S32 = LLT::scalar(32);
7580
7581 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7582
7583 // We create double value from 32 bit parts with 32 exponent difference.
7584 // Note that + and - are float operations that adjust the implicit leading
7585 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7586 //
7587 // X = 2^52 * 1.0...LowBits
7588 // Y = 2^84 * 1.0...HighBits
7589 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7590 // = - 2^52 * 1.0...HighBits
7591 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7592 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7593 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7594 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7595 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7596 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7597
7598 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7599 LowBits = MIRBuilder.buildZExt(S64, LowBits);
7600 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7601 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7602 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7603 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7604 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7605
7606 MI.eraseFromParent();
7607 return Legalized;
7608}
7609
7611 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7612
7613 if (SrcTy == LLT::scalar(1)) {
7614 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7615 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7616 MIRBuilder.buildSelect(Dst, Src, True, False);
7617 MI.eraseFromParent();
7618 return Legalized;
7619 }
7620
7621 if (SrcTy != LLT::scalar(64))
7622 return UnableToLegalize;
7623
7624 if (DstTy == LLT::scalar(32))
7625 // TODO: SelectionDAG has several alternative expansions to port which may
7626 // be more reasonable depending on the available instructions. We also need
7627 // a more advanced mechanism to choose an optimal version depending on
7628 // target features such as sitofp or CTLZ availability.
7630
7631 if (DstTy == LLT::scalar(64))
7633
7634 return UnableToLegalize;
7635}
7636
7638 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7639
7640 const LLT S64 = LLT::scalar(64);
7641 const LLT S32 = LLT::scalar(32);
7642 const LLT S1 = LLT::scalar(1);
7643
7644 if (SrcTy == S1) {
7645 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7646 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7647 MIRBuilder.buildSelect(Dst, Src, True, False);
7648 MI.eraseFromParent();
7649 return Legalized;
7650 }
7651
7652 if (SrcTy != S64)
7653 return UnableToLegalize;
7654
7655 if (DstTy == S32) {
7656 // signed cl2f(long l) {
7657 // long s = l >> 63;
7658 // float r = cul2f((l + s) ^ s);
7659 // return s ? -r : r;
7660 // }
7661 Register L = Src;
7662 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7663 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7664
7665 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7666 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7667 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7668
7669 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7670 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7672 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
7673 MI.eraseFromParent();
7674 return Legalized;
7675 }
7676
7677 return UnableToLegalize;
7678}
7679
7681 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7682 const LLT S64 = LLT::scalar(64);
7683 const LLT S32 = LLT::scalar(32);
7684
7685 if (SrcTy != S64 && SrcTy != S32)
7686 return UnableToLegalize;
7687 if (DstTy != S32 && DstTy != S64)
7688 return UnableToLegalize;
7689
7690 // FPTOSI gives same result as FPTOUI for positive signed integers.
7691 // FPTOUI needs to deal with fp values that convert to unsigned integers
7692 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7693
7694 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7695 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7697 APInt::getZero(SrcTy.getSizeInBits()));
7698 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7699
7700 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7701
7702 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7703 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7704 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7705 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7706 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7707 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7708 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7709
7710 const LLT S1 = LLT::scalar(1);
7711
7712 MachineInstrBuilder FCMP =
7713 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7714 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7715
7716 MI.eraseFromParent();
7717 return Legalized;
7718}
7719
7721 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7722 const LLT S64 = LLT::scalar(64);
7723 const LLT S32 = LLT::scalar(32);
7724
7725 // FIXME: Only f32 to i64 conversions are supported.
7726 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7727 return UnableToLegalize;
7728
7729 // Expand f32 -> i64 conversion
7730 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7731 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7732
7733 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7734
7735 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7736 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7737
7738 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7739 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7740
7741 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7742 APInt::getSignMask(SrcEltBits));
7743 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7744 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7745 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7746 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7747
7748 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7749 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7750 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7751
7752 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7753 R = MIRBuilder.buildZExt(DstTy, R);
7754
7755 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7756 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7757 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7758 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7759
7760 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7761 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7762
7763 const LLT S1 = LLT::scalar(1);
7765 S1, Exponent, ExponentLoBit);
7766
7767 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7768
7769 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7770 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7771
7772 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7773
7774 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7775 S1, Exponent, ZeroSrcTy);
7776
7777 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7778 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7779
7780 MI.eraseFromParent();
7781 return Legalized;
7782}
7783
7786 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7787
7788 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7789 unsigned SatWidth = DstTy.getScalarSizeInBits();
7790
7791 // Determine minimum and maximum integer values and their corresponding
7792 // floating-point values.
7793 APInt MinInt, MaxInt;
7794 if (IsSigned) {
7795 MinInt = APInt::getSignedMinValue(SatWidth);
7796 MaxInt = APInt::getSignedMaxValue(SatWidth);
7797 } else {
7798 MinInt = APInt::getMinValue(SatWidth);
7799 MaxInt = APInt::getMaxValue(SatWidth);
7800 }
7801
7802 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7803 APFloat MinFloat(Semantics);
7804 APFloat MaxFloat(Semantics);
7805
7806 APFloat::opStatus MinStatus =
7807 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7808 APFloat::opStatus MaxStatus =
7809 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7810 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7811 !(MaxStatus & APFloat::opStatus::opInexact);
7812
7813 // If the integer bounds are exactly representable as floats, emit a
7814 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7815 // and selects.
7816 if (AreExactFloatBounds) {
7817 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7818 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7820 SrcTy.changeElementSize(1), Src, MaxC);
7821 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7822 // Clamp by MaxFloat from above. NaN cannot occur.
7823 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7824 auto MinP =
7827 auto Min =
7828 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7829 // Convert clamped value to integer. In the unsigned case we're done,
7830 // because we mapped NaN to MinFloat, which will cast to zero.
7831 if (!IsSigned) {
7832 MIRBuilder.buildFPTOUI(Dst, Min);
7833 MI.eraseFromParent();
7834 return Legalized;
7835 }
7836
7837 // Otherwise, select 0 if Src is NaN.
7838 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7840 DstTy.changeElementSize(1), Src, Src);
7841 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7842 FpToInt);
7843 MI.eraseFromParent();
7844 return Legalized;
7845 }
7846
7847 // Result of direct conversion. The assumption here is that the operation is
7848 // non-trapping and it's fine to apply it to an out-of-range value if we
7849 // select it away later.
7850 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7851 : MIRBuilder.buildFPTOUI(DstTy, Src);
7852
7853 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7854 // MinInt if Src is NaN.
7855 auto ULT =
7857 MIRBuilder.buildFConstant(SrcTy, MinFloat));
7858 auto Max = MIRBuilder.buildSelect(
7859 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7860 // If Src OGT MaxFloat, select MaxInt.
7861 auto OGT =
7863 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7864
7865 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7866 // is already zero.
7867 if (!IsSigned) {
7868 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7869 Max);
7870 MI.eraseFromParent();
7871 return Legalized;
7872 }
7873
7874 // Otherwise, select 0 if Src is NaN.
7875 auto Min = MIRBuilder.buildSelect(
7876 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7878 DstTy.changeElementSize(1), Src, Src);
7879 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7880 MI.eraseFromParent();
7881 return Legalized;
7882}
7883
7884// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7887 const LLT S1 = LLT::scalar(1);
7888 const LLT S32 = LLT::scalar(32);
7889
7890 auto [Dst, Src] = MI.getFirst2Regs();
7891 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7892 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7893
7894 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7895 return UnableToLegalize;
7896
7898 unsigned Flags = MI.getFlags();
7899 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7900 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7901 MI.eraseFromParent();
7902 return Legalized;
7903 }
7904
7905 const unsigned ExpMask = 0x7ff;
7906 const unsigned ExpBiasf64 = 1023;
7907 const unsigned ExpBiasf16 = 15;
7908
7909 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7910 Register U = Unmerge.getReg(0);
7911 Register UH = Unmerge.getReg(1);
7912
7913 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7915
7916 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7917 // add the f16 bias (15) to get the biased exponent for the f16 format.
7918 E = MIRBuilder.buildAdd(
7919 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7920
7923
7924 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7925 MIRBuilder.buildConstant(S32, 0x1ff));
7926 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7927
7928 auto Zero = MIRBuilder.buildConstant(S32, 0);
7929 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7930 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7931 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7932
7933 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7934 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7935 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7936 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7937
7938 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7939 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7940
7941 // N = M | (E << 12);
7942 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7943 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7944
7945 // B = clamp(1-E, 0, 13);
7946 auto One = MIRBuilder.buildConstant(S32, 1);
7947 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7948 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7950
7951 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7952 MIRBuilder.buildConstant(S32, 0x1000));
7953
7954 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7955 auto D0 = MIRBuilder.buildShl(S32, D, B);
7956
7957 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7958 D0, SigSetHigh);
7959 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7960 D = MIRBuilder.buildOr(S32, D, D1);
7961
7962 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7963 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7964
7965 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7967
7968 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7970 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7971
7972 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7974 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7975
7976 V1 = MIRBuilder.buildOr(S32, V0, V1);
7977 V = MIRBuilder.buildAdd(S32, V, V1);
7978
7979 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7980 E, MIRBuilder.buildConstant(S32, 30));
7981 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7982 MIRBuilder.buildConstant(S32, 0x7c00), V);
7983
7984 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7985 E, MIRBuilder.buildConstant(S32, 1039));
7986 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7987
7988 // Extract the sign bit.
7989 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7990 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7991
7992 // Insert the sign bit
7993 V = MIRBuilder.buildOr(S32, Sign, V);
7994
7995 MIRBuilder.buildTrunc(Dst, V);
7996 MI.eraseFromParent();
7997 return Legalized;
7998}
7999
8002 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8003 const LLT S64 = LLT::scalar(64);
8004 const LLT S16 = LLT::scalar(16);
8005
8006 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8008
8009 return UnableToLegalize;
8010}
8011
8013 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8014 LLT Ty = MRI.getType(Dst);
8015
8016 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8017 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8018 MI.eraseFromParent();
8019 return Legalized;
8020}
8021
8023 switch (Opc) {
8024 case TargetOpcode::G_SMIN:
8025 return CmpInst::ICMP_SLT;
8026 case TargetOpcode::G_SMAX:
8027 return CmpInst::ICMP_SGT;
8028 case TargetOpcode::G_UMIN:
8029 return CmpInst::ICMP_ULT;
8030 case TargetOpcode::G_UMAX:
8031 return CmpInst::ICMP_UGT;
8032 default:
8033 llvm_unreachable("not in integer min/max");
8034 }
8035}
8036
8038 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8039
8040 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8041 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8042
8043 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8044 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8045
8046 MI.eraseFromParent();
8047 return Legalized;
8048}
8049
8052 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8053
8054 Register Dst = Cmp->getReg(0);
8055 LLT DstTy = MRI.getType(Dst);
8056 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8057 LLT CmpTy = DstTy.changeElementSize(1);
8058
8059 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8062 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8065
8066 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8067 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8068 Cmp->getRHSReg());
8069 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8070 Cmp->getRHSReg());
8071
8072 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8073 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8076 auto One = MIRBuilder.buildConstant(DstTy, 1);
8077 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8078
8079 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8080 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8081 } else {
8083 std::swap(IsGT, IsLT);
8084 // Extend boolean results to DstTy, which is at least i2, before subtracting
8085 // them.
8086 unsigned BoolExtOp =
8087 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8088 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8089 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8090 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8091 }
8092
8093 MI.eraseFromParent();
8094 return Legalized;
8095}
8096
8099 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8100 const int Src0Size = Src0Ty.getScalarSizeInBits();
8101 const int Src1Size = Src1Ty.getScalarSizeInBits();
8102
8103 auto SignBitMask = MIRBuilder.buildConstant(
8104 Src0Ty, APInt::getSignMask(Src0Size));
8105
8106 auto NotSignBitMask = MIRBuilder.buildConstant(
8107 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8108
8109 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8110 Register And1;
8111 if (Src0Ty == Src1Ty) {
8112 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8113 } else if (Src0Size > Src1Size) {
8114 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8115 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8116 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8117 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8118 } else {
8119 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8120 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8121 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8122 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8123 }
8124
8125 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8126 // constants are a nan and -0.0, but the final result should preserve
8127 // everything.
8128 unsigned Flags = MI.getFlags();
8129
8130 // We masked the sign bit and the not-sign bit, so these are disjoint.
8131 Flags |= MachineInstr::Disjoint;
8132
8133 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8134
8135 MI.eraseFromParent();
8136 return Legalized;
8137}
8138
8141 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8142 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8143
8144 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8145 LLT Ty = MRI.getType(Dst);
8146
8147 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8148 // Insert canonicalizes if it's possible we need to quiet to get correct
8149 // sNaN behavior.
8150
8151 // Note this must be done here, and not as an optimization combine in the
8152 // absence of a dedicate quiet-snan instruction as we're using an
8153 // omni-purpose G_FCANONICALIZE.
8154 if (!isKnownNeverSNaN(Src0, MRI))
8155 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8156
8157 if (!isKnownNeverSNaN(Src1, MRI))
8158 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8159 }
8160
8161 // If there are no nans, it's safe to simply replace this with the non-IEEE
8162 // version.
8163 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8164 MI.eraseFromParent();
8165 return Legalized;
8166}
8167
8169 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8170 Register DstReg = MI.getOperand(0).getReg();
8171 LLT Ty = MRI.getType(DstReg);
8172 unsigned Flags = MI.getFlags();
8173
8174 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8175 Flags);
8176 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8177 MI.eraseFromParent();
8178 return Legalized;
8179}
8180
8183 auto [DstReg, X] = MI.getFirst2Regs();
8184 const unsigned Flags = MI.getFlags();
8185 const LLT Ty = MRI.getType(DstReg);
8186 const LLT CondTy = Ty.changeElementSize(1);
8187
8188 // round(x) =>
8189 // t = trunc(x);
8190 // d = fabs(x - t);
8191 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8192 // return t + o;
8193
8194 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8195
8196 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8197 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8198
8199 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8200 auto Cmp =
8201 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8202
8203 // Could emit G_UITOFP instead
8204 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8205 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8206 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8207 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8208
8209 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8210
8211 MI.eraseFromParent();
8212 return Legalized;
8213}
8214
8216 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8217 unsigned Flags = MI.getFlags();
8218 LLT Ty = MRI.getType(DstReg);
8219 const LLT CondTy = Ty.changeElementSize(1);
8220
8221 // result = trunc(src);
8222 // if (src < 0.0 && src != result)
8223 // result += -1.0.
8224
8225 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8226 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8227
8228 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8229 SrcReg, Zero, Flags);
8230 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8231 SrcReg, Trunc, Flags);
8232 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8233 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8234
8235 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8236 MI.eraseFromParent();
8237 return Legalized;
8238}
8239
8242 const unsigned NumOps = MI.getNumOperands();
8243 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8244 unsigned PartSize = Src0Ty.getSizeInBits();
8245
8246 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8247 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8248
8249 for (unsigned I = 2; I != NumOps; ++I) {
8250 const unsigned Offset = (I - 1) * PartSize;
8251
8252 Register SrcReg = MI.getOperand(I).getReg();
8253 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8254
8255 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8256 MRI.createGenericVirtualRegister(WideTy);
8257
8258 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8259 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8260 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8261 ResultReg = NextResult;
8262 }
8263
8264 if (DstTy.isPointer()) {
8266 DstTy.getAddressSpace())) {
8267 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8268 return UnableToLegalize;
8269 }
8270
8271 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8272 }
8273
8274 MI.eraseFromParent();
8275 return Legalized;
8276}
8277
8280 const unsigned NumDst = MI.getNumOperands() - 1;
8281 Register SrcReg = MI.getOperand(NumDst).getReg();
8282 Register Dst0Reg = MI.getOperand(0).getReg();
8283 LLT DstTy = MRI.getType(Dst0Reg);
8284 if (DstTy.isPointer())
8285 return UnableToLegalize; // TODO
8286
8287 SrcReg = coerceToScalar(SrcReg);
8288 if (!SrcReg)
8289 return UnableToLegalize;
8290
8291 // Expand scalarizing unmerge as bitcast to integer and shift.
8292 LLT IntTy = MRI.getType(SrcReg);
8293
8294 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8295
8296 const unsigned DstSize = DstTy.getSizeInBits();
8297 unsigned Offset = DstSize;
8298 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8299 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8300 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8301 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8302 }
8303
8304 MI.eraseFromParent();
8305 return Legalized;
8306}
8307
8308/// Lower a vector extract or insert by writing the vector to a stack temporary
8309/// and reloading the element or vector.
8310///
8311/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8312/// =>
8313/// %stack_temp = G_FRAME_INDEX
8314/// G_STORE %vec, %stack_temp
8315/// %idx = clamp(%idx, %vec.getNumElements())
8316/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8317/// %dst = G_LOAD %element_ptr
8320 Register DstReg = MI.getOperand(0).getReg();
8321 Register SrcVec = MI.getOperand(1).getReg();
8322 Register InsertVal;
8323 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8324 InsertVal = MI.getOperand(2).getReg();
8325
8326 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8327
8328 LLT VecTy = MRI.getType(SrcVec);
8329 LLT EltTy = VecTy.getElementType();
8330 unsigned NumElts = VecTy.getNumElements();
8331
8332 int64_t IdxVal;
8333 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8335 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8336
8337 if (InsertVal) {
8338 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8339 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8340 } else {
8341 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8342 }
8343
8344 MI.eraseFromParent();
8345 return Legalized;
8346 }
8347
8348 if (!EltTy.isByteSized()) { // Not implemented.
8349 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8350 return UnableToLegalize;
8351 }
8352
8353 unsigned EltBytes = EltTy.getSizeInBytes();
8354 Align VecAlign = getStackTemporaryAlignment(VecTy);
8355 Align EltAlign;
8356
8357 MachinePointerInfo PtrInfo;
8358 auto StackTemp = createStackTemporary(
8359 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8360 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8361
8362 // Get the pointer to the element, and be sure not to hit undefined behavior
8363 // if the index is out of bounds.
8364 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8365
8366 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8367 int64_t Offset = IdxVal * EltBytes;
8368 PtrInfo = PtrInfo.getWithOffset(Offset);
8369 EltAlign = commonAlignment(VecAlign, Offset);
8370 } else {
8371 // We lose information with a variable offset.
8372 EltAlign = getStackTemporaryAlignment(EltTy);
8373 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8374 }
8375
8376 if (InsertVal) {
8377 // Write the inserted element
8378 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8379
8380 // Reload the whole vector.
8381 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8382 } else {
8383 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8384 }
8385
8386 MI.eraseFromParent();
8387 return Legalized;
8388}
8389
8392 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8393 MI.getFirst3RegLLTs();
8394 LLT IdxTy = LLT::scalar(32);
8395
8396 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8397 Register Undef;
8399 LLT EltTy = DstTy.getScalarType();
8400
8401 for (int Idx : Mask) {
8402 if (Idx < 0) {
8403 if (!Undef.isValid())
8404 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8405 BuildVec.push_back(Undef);
8406 continue;
8407 }
8408
8409 if (Src0Ty.isScalar()) {
8410 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8411 } else {
8412 int NumElts = Src0Ty.getNumElements();
8413 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8414 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8415 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8416 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8417 BuildVec.push_back(Extract.getReg(0));
8418 }
8419 }
8420
8421 if (DstTy.isScalar())
8422 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8423 else
8424 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8425 MI.eraseFromParent();
8426 return Legalized;
8427}
8428
8431 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8432 MI.getFirst4RegLLTs();
8433
8434 if (VecTy.isScalableVector())
8435 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8436
8437 Align VecAlign = getStackTemporaryAlignment(VecTy);
8438 MachinePointerInfo PtrInfo;
8439 Register StackPtr =
8440 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8441 PtrInfo)
8442 .getReg(0);
8443 MachinePointerInfo ValPtrInfo =
8445
8446 LLT IdxTy = LLT::scalar(32);
8447 LLT ValTy = VecTy.getElementType();
8448 Align ValAlign = getStackTemporaryAlignment(ValTy);
8449
8450 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8451
8452 bool HasPassthru =
8453 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8454
8455 if (HasPassthru)
8456 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8457
8458 Register LastWriteVal;
8459 std::optional<APInt> PassthruSplatVal =
8460 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8461
8462 if (PassthruSplatVal.has_value()) {
8463 LastWriteVal =
8464 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
8465 } else if (HasPassthru) {
8466 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
8467 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
8468 {LLT::scalar(32)}, {Popcount});
8469
8470 Register LastElmtPtr =
8471 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
8472 LastWriteVal =
8473 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
8474 .getReg(0);
8475 }
8476
8477 unsigned NumElmts = VecTy.getNumElements();
8478 for (unsigned I = 0; I < NumElmts; ++I) {
8479 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
8480 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
8481 Register ElmtPtr =
8482 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8483 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
8484
8485 LLT MaskITy = MaskTy.getElementType();
8486 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
8487 if (MaskITy.getSizeInBits() > 1)
8488 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
8489
8490 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
8491 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
8492
8493 if (HasPassthru && I == NumElmts - 1) {
8494 auto EndOfVector =
8495 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
8496 auto AllLanesSelected = MIRBuilder.buildICmp(
8497 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
8498 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
8499 {OutPos, EndOfVector});
8500 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8501
8502 LastWriteVal =
8503 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
8504 .getReg(0);
8505 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
8506 }
8507 }
8508
8509 // TODO: Use StackPtr's FrameIndex alignment.
8510 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
8511
8512 MI.eraseFromParent();
8513 return Legalized;
8514}
8515
8517 Register AllocSize,
8518 Align Alignment,
8519 LLT PtrTy) {
8520 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
8521
8522 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
8523 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
8524
8525 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8526 // have to generate an extra instruction to negate the alloc and then use
8527 // G_PTR_ADD to add the negative offset.
8528 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
8529 if (Alignment > Align(1)) {
8530 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
8531 AlignMask.negate();
8532 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
8533 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
8534 }
8535
8536 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
8537}
8538
8541 const auto &MF = *MI.getMF();
8542 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8543 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
8544 return UnableToLegalize;
8545
8546 Register Dst = MI.getOperand(0).getReg();
8547 Register AllocSize = MI.getOperand(1).getReg();
8548 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
8549
8550 LLT PtrTy = MRI.getType(Dst);
8552 Register SPTmp =
8553 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
8554
8555 MIRBuilder.buildCopy(SPReg, SPTmp);
8556 MIRBuilder.buildCopy(Dst, SPTmp);
8557
8558 MI.eraseFromParent();
8559 return Legalized;
8560}
8561
8565 if (!StackPtr)
8566 return UnableToLegalize;
8567
8568 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
8569 MI.eraseFromParent();
8570 return Legalized;
8571}
8572
8576 if (!StackPtr)
8577 return UnableToLegalize;
8578
8579 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
8580 MI.eraseFromParent();
8581 return Legalized;
8582}
8583
8586 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8587 unsigned Offset = MI.getOperand(2).getImm();
8588
8589 // Extract sub-vector or one element
8590 if (SrcTy.isVector()) {
8591 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
8592 unsigned DstSize = DstTy.getSizeInBits();
8593
8594 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8595 (Offset + DstSize <= SrcTy.getSizeInBits())) {
8596 // Unmerge and allow access to each Src element for the artifact combiner.
8597 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
8598
8599 // Take element(s) we need to extract and copy it (merge them).
8600 SmallVector<Register, 8> SubVectorElts;
8601 for (unsigned Idx = Offset / SrcEltSize;
8602 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
8603 SubVectorElts.push_back(Unmerge.getReg(Idx));
8604 }
8605 if (SubVectorElts.size() == 1)
8606 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
8607 else
8608 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
8609
8610 MI.eraseFromParent();
8611 return Legalized;
8612 }
8613 }
8614
8615 if (DstTy.isScalar() &&
8616 (SrcTy.isScalar() ||
8617 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8618 LLT SrcIntTy = SrcTy;
8619 if (!SrcTy.isScalar()) {
8620 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
8621 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
8622 }
8623
8624 if (Offset == 0)
8625 MIRBuilder.buildTrunc(DstReg, SrcReg);
8626 else {
8627 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
8628 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8629 MIRBuilder.buildTrunc(DstReg, Shr);
8630 }
8631
8632 MI.eraseFromParent();
8633 return Legalized;
8634 }
8635
8636 return UnableToLegalize;
8637}
8638
8640 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
8641 uint64_t Offset = MI.getOperand(3).getImm();
8642
8643 LLT DstTy = MRI.getType(Src);
8644 LLT InsertTy = MRI.getType(InsertSrc);
8645
8646 // Insert sub-vector or one element
8647 if (DstTy.isVector() && !InsertTy.isPointer()) {
8648 LLT EltTy = DstTy.getElementType();
8649 unsigned EltSize = EltTy.getSizeInBits();
8650 unsigned InsertSize = InsertTy.getSizeInBits();
8651
8652 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8653 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8654 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8656 unsigned Idx = 0;
8657 // Elements from Src before insert start Offset
8658 for (; Idx < Offset / EltSize; ++Idx) {
8659 DstElts.push_back(UnmergeSrc.getReg(Idx));
8660 }
8661
8662 // Replace elements in Src with elements from InsertSrc
8663 if (InsertTy.getSizeInBits() > EltSize) {
8664 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8665 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8666 ++Idx, ++i) {
8667 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8668 }
8669 } else {
8670 DstElts.push_back(InsertSrc);
8671 ++Idx;
8672 }
8673
8674 // Remaining elements from Src after insert
8675 for (; Idx < DstTy.getNumElements(); ++Idx) {
8676 DstElts.push_back(UnmergeSrc.getReg(Idx));
8677 }
8678
8679 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
8680 MI.eraseFromParent();
8681 return Legalized;
8682 }
8683 }
8684
8685 if (InsertTy.isVector() ||
8686 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8687 return UnableToLegalize;
8688
8690 if ((DstTy.isPointer() &&
8691 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8692 (InsertTy.isPointer() &&
8693 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8694 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8695 return UnableToLegalize;
8696 }
8697
8698 LLT IntDstTy = DstTy;
8699
8700 if (!DstTy.isScalar()) {
8701 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8702 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8703 }
8704
8705 if (!InsertTy.isScalar()) {
8706 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8707 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8708 }
8709
8710 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8711 if (Offset != 0) {
8712 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8713 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8714 }
8715
8717 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8718
8719 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8720 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8721 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8722
8723 MIRBuilder.buildCast(Dst, Or);
8724 MI.eraseFromParent();
8725 return Legalized;
8726}
8727
8730 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8731 MI.getFirst4RegLLTs();
8732 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8733
8734 LLT Ty = Dst0Ty;
8735 LLT BoolTy = Dst1Ty;
8736
8737 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8738
8739 if (IsAdd)
8740 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
8741 else
8742 MIRBuilder.buildSub(NewDst0, LHS, RHS);
8743
8744 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8745
8746 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8747
8748 // For an addition, the result should be less than one of the operands (LHS)
8749 // if and only if the other operand (RHS) is negative, otherwise there will
8750 // be overflow.
8751 // For a subtraction, the result should be less than one of the operands
8752 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8753 // otherwise there will be overflow.
8754 auto ResultLowerThanLHS =
8755 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
8756 auto ConditionRHS = MIRBuilder.buildICmp(
8757 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8758
8759 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
8760
8761 MIRBuilder.buildCopy(Dst0, NewDst0);
8762 MI.eraseFromParent();
8763
8764 return Legalized;
8765}
8766
8769 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8770 LLT Ty = MRI.getType(Res);
8771 bool IsSigned;
8772 bool IsAdd;
8773 unsigned BaseOp;
8774 switch (MI.getOpcode()) {
8775 default:
8776 llvm_unreachable("unexpected addsat/subsat opcode");
8777 case TargetOpcode::G_UADDSAT:
8778 IsSigned = false;
8779 IsAdd = true;
8780 BaseOp = TargetOpcode::G_ADD;
8781 break;
8782 case TargetOpcode::G_SADDSAT:
8783 IsSigned = true;
8784 IsAdd = true;
8785 BaseOp = TargetOpcode::G_ADD;
8786 break;
8787 case TargetOpcode::G_USUBSAT:
8788 IsSigned = false;
8789 IsAdd = false;
8790 BaseOp = TargetOpcode::G_SUB;
8791 break;
8792 case TargetOpcode::G_SSUBSAT:
8793 IsSigned = true;
8794 IsAdd = false;
8795 BaseOp = TargetOpcode::G_SUB;
8796 break;
8797 }
8798
8799 if (IsSigned) {
8800 // sadd.sat(a, b) ->
8801 // hi = 0x7fffffff - smax(a, 0)
8802 // lo = 0x80000000 - smin(a, 0)
8803 // a + smin(smax(lo, b), hi)
8804 // ssub.sat(a, b) ->
8805 // lo = smax(a, -1) - 0x7fffffff
8806 // hi = smin(a, -1) - 0x80000000
8807 // a - smin(smax(lo, b), hi)
8808 // TODO: AMDGPU can use a "median of 3" instruction here:
8809 // a +/- med3(lo, b, hi)
8810 uint64_t NumBits = Ty.getScalarSizeInBits();
8811 auto MaxVal =
8813 auto MinVal =
8816 if (IsAdd) {
8817 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8818 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8819 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8820 } else {
8821 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8822 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8823 MaxVal);
8824 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8825 MinVal);
8826 }
8827 auto RHSClamped =
8829 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8830 } else {
8831 // uadd.sat(a, b) -> a + umin(~a, b)
8832 // usub.sat(a, b) -> a - umin(a, b)
8833 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8834 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8835 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8836 }
8837
8838 MI.eraseFromParent();
8839 return Legalized;
8840}
8841
8844 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8845 LLT Ty = MRI.getType(Res);
8846 LLT BoolTy = Ty.changeElementSize(1);
8847 bool IsSigned;
8848 bool IsAdd;
8849 unsigned OverflowOp;
8850 switch (MI.getOpcode()) {
8851 default:
8852 llvm_unreachable("unexpected addsat/subsat opcode");
8853 case TargetOpcode::G_UADDSAT:
8854 IsSigned = false;
8855 IsAdd = true;
8856 OverflowOp = TargetOpcode::G_UADDO;
8857 break;
8858 case TargetOpcode::G_SADDSAT:
8859 IsSigned = true;
8860 IsAdd = true;
8861 OverflowOp = TargetOpcode::G_SADDO;
8862 break;
8863 case TargetOpcode::G_USUBSAT:
8864 IsSigned = false;
8865 IsAdd = false;
8866 OverflowOp = TargetOpcode::G_USUBO;
8867 break;
8868 case TargetOpcode::G_SSUBSAT:
8869 IsSigned = true;
8870 IsAdd = false;
8871 OverflowOp = TargetOpcode::G_SSUBO;
8872 break;
8873 }
8874
8875 auto OverflowRes =
8876 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8877 Register Tmp = OverflowRes.getReg(0);
8878 Register Ov = OverflowRes.getReg(1);
8879 MachineInstrBuilder Clamp;
8880 if (IsSigned) {
8881 // sadd.sat(a, b) ->
8882 // {tmp, ov} = saddo(a, b)
8883 // ov ? (tmp >>s 31) + 0x80000000 : r
8884 // ssub.sat(a, b) ->
8885 // {tmp, ov} = ssubo(a, b)
8886 // ov ? (tmp >>s 31) + 0x80000000 : r
8887 uint64_t NumBits = Ty.getScalarSizeInBits();
8888 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8889 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8890 auto MinVal =
8892 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8893 } else {
8894 // uadd.sat(a, b) ->
8895 // {tmp, ov} = uaddo(a, b)
8896 // ov ? 0xffffffff : tmp
8897 // usub.sat(a, b) ->
8898 // {tmp, ov} = usubo(a, b)
8899 // ov ? 0 : tmp
8900 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8901 }
8902 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8903
8904 MI.eraseFromParent();
8905 return Legalized;
8906}
8907
8910 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8911 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8912 "Expected shlsat opcode!");
8913 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8914 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8915 LLT Ty = MRI.getType(Res);
8916 LLT BoolTy = Ty.changeElementSize(1);
8917
8918 unsigned BW = Ty.getScalarSizeInBits();
8919 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8920 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8921 : MIRBuilder.buildLShr(Ty, Result, RHS);
8922
8923 MachineInstrBuilder SatVal;
8924 if (IsSigned) {
8925 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8926 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8927 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8928 MIRBuilder.buildConstant(Ty, 0));
8929 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8930 } else {
8932 }
8933 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
8934 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8935
8936 MI.eraseFromParent();
8937 return Legalized;
8938}
8939
8941 auto [Dst, Src] = MI.getFirst2Regs();
8942 const LLT Ty = MRI.getType(Src);
8943 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
8944 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8945
8946 // Swap most and least significant byte, set remaining bytes in Res to zero.
8947 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8948 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8949 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8950 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8951
8952 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8953 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8954 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8955 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8956 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8957 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8958 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8959 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8960 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8961 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8962 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8963 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8964 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8965 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8966 }
8967 Res.getInstr()->getOperand(0).setReg(Dst);
8968
8969 MI.eraseFromParent();
8970 return Legalized;
8971}
8972
8973//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8975 MachineInstrBuilder Src, const APInt &Mask) {
8976 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8977 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8978 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8979 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8980 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8981 return B.buildOr(Dst, LHS, RHS);
8982}
8983
8986 auto [Dst, Src] = MI.getFirst2Regs();
8987 const LLT Ty = MRI.getType(Src);
8988 unsigned Size = Ty.getScalarSizeInBits();
8989
8990 if (Size >= 8) {
8991 MachineInstrBuilder BSWAP =
8992 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8993
8994 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8995 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8996 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8997 MachineInstrBuilder Swap4 =
8998 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
8999
9000 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9001 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9002 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9003 MachineInstrBuilder Swap2 =
9004 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
9005
9006 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9007 // 6|7
9008 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9009 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9010 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9011 } else {
9012 // Expand bitreverse for types smaller than 8 bits.
9014 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9016 if (I < J) {
9017 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
9018 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
9019 } else {
9020 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
9021 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
9022 }
9023
9024 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
9025 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
9026 if (I == 0)
9027 Tmp = Tmp2;
9028 else
9029 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
9030 }
9031 MIRBuilder.buildCopy(Dst, Tmp);
9032 }
9033
9034 MI.eraseFromParent();
9035 return Legalized;
9036}
9037
9041
9042 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9043 int NameOpIdx = IsRead ? 1 : 0;
9044 int ValRegIndex = IsRead ? 0 : 1;
9045
9046 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9047 const LLT Ty = MRI.getType(ValReg);
9048 const MDString *RegStr = cast<MDString>(
9049 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9050
9051 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9052 if (!PhysReg.isValid())
9053 return UnableToLegalize;
9054
9055 if (IsRead)
9056 MIRBuilder.buildCopy(ValReg, PhysReg);
9057 else
9058 MIRBuilder.buildCopy(PhysReg, ValReg);
9059
9060 MI.eraseFromParent();
9061 return Legalized;
9062}
9063
9066 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9067 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9068 Register Result = MI.getOperand(0).getReg();
9069 LLT OrigTy = MRI.getType(Result);
9070 auto SizeInBits = OrigTy.getScalarSizeInBits();
9071 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9072
9073 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9074 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9075 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9076 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9077
9078 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9079 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9080 MIRBuilder.buildTrunc(Result, Shifted);
9081
9082 MI.eraseFromParent();
9083 return Legalized;
9084}
9085
9088 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9089 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9090
9091 if (Mask == fcNone) {
9092 MIRBuilder.buildConstant(DstReg, 0);
9093 MI.eraseFromParent();
9094 return Legalized;
9095 }
9096 if (Mask == fcAllFlags) {
9097 MIRBuilder.buildConstant(DstReg, 1);
9098 MI.eraseFromParent();
9099 return Legalized;
9100 }
9101
9102 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9103 // version
9104
9105 unsigned BitSize = SrcTy.getScalarSizeInBits();
9106 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9107
9108 LLT IntTy = LLT::scalar(BitSize);
9109 if (SrcTy.isVector())
9110 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9111 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9112
9113 // Various masks.
9114 APInt SignBit = APInt::getSignMask(BitSize);
9115 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9116 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9117 APInt ExpMask = Inf;
9118 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9119 APInt QNaNBitMask =
9120 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9121 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9122
9123 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9124 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9125 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9126 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9127 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9128
9129 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9130 auto Sign =
9132
9133 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9134 // Clang doesn't support capture of structured bindings:
9135 LLT DstTyCopy = DstTy;
9136 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9137 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9138 };
9139
9140 // Tests that involve more than one class should be processed first.
9141 if ((Mask & fcFinite) == fcFinite) {
9142 // finite(V) ==> abs(V) u< exp_mask
9143 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9144 ExpMaskC));
9145 Mask &= ~fcFinite;
9146 } else if ((Mask & fcFinite) == fcPosFinite) {
9147 // finite(V) && V > 0 ==> V u< exp_mask
9148 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9149 ExpMaskC));
9150 Mask &= ~fcPosFinite;
9151 } else if ((Mask & fcFinite) == fcNegFinite) {
9152 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9153 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9154 ExpMaskC);
9155 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9156 appendToRes(And);
9157 Mask &= ~fcNegFinite;
9158 }
9159
9160 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9161 // fcZero | fcSubnormal => test all exponent bits are 0
9162 // TODO: Handle sign bit specific cases
9163 // TODO: Handle inverted case
9164 if (PartialCheck == (fcZero | fcSubnormal)) {
9165 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9167 ExpBits, ZeroC));
9168 Mask &= ~PartialCheck;
9169 }
9170 }
9171
9172 // Check for individual classes.
9173 if (FPClassTest PartialCheck = Mask & fcZero) {
9174 if (PartialCheck == fcPosZero)
9176 AsInt, ZeroC));
9177 else if (PartialCheck == fcZero)
9178 appendToRes(
9180 else // fcNegZero
9182 AsInt, SignBitC));
9183 }
9184
9185 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9186 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9187 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9188 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9189 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9190 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9191 auto SubnormalRes =
9193 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9194 if (PartialCheck == fcNegSubnormal)
9195 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9196 appendToRes(SubnormalRes);
9197 }
9198
9199 if (FPClassTest PartialCheck = Mask & fcInf) {
9200 if (PartialCheck == fcPosInf)
9202 AsInt, InfC));
9203 else if (PartialCheck == fcInf)
9204 appendToRes(
9206 else { // fcNegInf
9207 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9208 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9210 AsInt, NegInfC));
9211 }
9212 }
9213
9214 if (FPClassTest PartialCheck = Mask & fcNan) {
9215 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9216 if (PartialCheck == fcNan) {
9217 // isnan(V) ==> abs(V) u> int(inf)
9218 appendToRes(
9220 } else if (PartialCheck == fcQNan) {
9221 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9222 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9223 InfWithQnanBitC));
9224 } else { // fcSNan
9225 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9226 // abs(V) u< (unsigned(Inf) | quiet_bit)
9227 auto IsNan =
9229 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9230 Abs, InfWithQnanBitC);
9231 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9232 }
9233 }
9234
9235 if (FPClassTest PartialCheck = Mask & fcNormal) {
9236 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9237 // (max_exp-1))
9238 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9239 auto ExpMinusOne = MIRBuilder.buildSub(
9240 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9241 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9242 auto NormalRes =
9244 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9245 if (PartialCheck == fcNegNormal)
9246 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9247 else if (PartialCheck == fcPosNormal) {
9248 auto PosSign = MIRBuilder.buildXor(
9249 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
9250 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9251 }
9252 appendToRes(NormalRes);
9253 }
9254
9255 MIRBuilder.buildCopy(DstReg, Res);
9256 MI.eraseFromParent();
9257 return Legalized;
9258}
9259
9261 // Implement G_SELECT in terms of XOR, AND, OR.
9262 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9263 MI.getFirst4RegLLTs();
9264
9265 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9266 if (IsEltPtr) {
9267 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9268 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9269 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9270 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9271 DstTy = NewTy;
9272 }
9273
9274 if (MaskTy.isScalar()) {
9275 // Turn the scalar condition into a vector condition mask if needed.
9276
9277 Register MaskElt = MaskReg;
9278
9279 // The condition was potentially zero extended before, but we want a sign
9280 // extended boolean.
9281 if (MaskTy != LLT::scalar(1))
9282 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9283
9284 // Continue the sign extension (or truncate) to match the data type.
9285 MaskElt =
9286 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9287
9288 if (DstTy.isVector()) {
9289 // Generate a vector splat idiom.
9290 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9291 MaskReg = ShufSplat.getReg(0);
9292 } else {
9293 MaskReg = MaskElt;
9294 }
9295 MaskTy = DstTy;
9296 } else if (!DstTy.isVector()) {
9297 // Cannot handle the case that mask is a vector and dst is a scalar.
9298 return UnableToLegalize;
9299 }
9300
9301 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9302 return UnableToLegalize;
9303 }
9304
9305 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9306 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9307 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9308 if (IsEltPtr) {
9309 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9310 MIRBuilder.buildIntToPtr(DstReg, Or);
9311 } else {
9312 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9313 }
9314 MI.eraseFromParent();
9315 return Legalized;
9316}
9317
9319 // Split DIVREM into individual instructions.
9320 unsigned Opcode = MI.getOpcode();
9321
9323 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9324 : TargetOpcode::G_UDIV,
9325 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9327 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9328 : TargetOpcode::G_UREM,
9329 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9330 MI.eraseFromParent();
9331 return Legalized;
9332}
9333
9336 // Expand %res = G_ABS %a into:
9337 // %v1 = G_ASHR %a, scalar_size-1
9338 // %v2 = G_ADD %a, %v1
9339 // %res = G_XOR %v2, %v1
9340 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9341 Register OpReg = MI.getOperand(1).getReg();
9342 auto ShiftAmt =
9343 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9344 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9345 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9346 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9347 MI.eraseFromParent();
9348 return Legalized;
9349}
9350
9353 // Expand %res = G_ABS %a into:
9354 // %v1 = G_CONSTANT 0
9355 // %v2 = G_SUB %v1, %a
9356 // %res = G_SMAX %a, %v2
9357 Register SrcReg = MI.getOperand(1).getReg();
9358 LLT Ty = MRI.getType(SrcReg);
9359 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9360 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9361 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9362 MI.eraseFromParent();
9363 return Legalized;
9364}
9365
9368 Register SrcReg = MI.getOperand(1).getReg();
9369 Register DestReg = MI.getOperand(0).getReg();
9370 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9371 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9372 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9373 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9374 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9375 MI.eraseFromParent();
9376 return Legalized;
9377}
9378
9380 Register SrcReg = MI.getOperand(1).getReg();
9381 Register DstReg = MI.getOperand(0).getReg();
9382
9383 LLT Ty = MRI.getType(DstReg);
9384
9385 // Reset sign bit
9387 DstReg, SrcReg,
9390
9391 MI.eraseFromParent();
9392 return Legalized;
9393}
9394
9397 Register SrcReg = MI.getOperand(1).getReg();
9398 LLT SrcTy = MRI.getType(SrcReg);
9399 LLT DstTy = MRI.getType(SrcReg);
9400
9401 // The source could be a scalar if the IR type was <1 x sN>.
9402 if (SrcTy.isScalar()) {
9403 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
9404 return UnableToLegalize; // FIXME: handle extension.
9405 // This can be just a plain copy.
9407 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
9409 return Legalized;
9410 }
9411 return UnableToLegalize;
9412}
9413
9415 MachineFunction &MF = *MI.getMF();
9417 LLVMContext &Ctx = MF.getFunction().getContext();
9418 Register ListPtr = MI.getOperand(1).getReg();
9419 LLT PtrTy = MRI.getType(ListPtr);
9420
9421 // LstPtr is a pointer to the head of the list. Get the address
9422 // of the head of the list.
9423 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
9424 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
9425 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
9426 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
9427
9428 const Align A(MI.getOperand(2).getImm());
9429 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
9430 if (A > TLI.getMinStackArgumentAlignment()) {
9431 Register AlignAmt =
9432 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
9433 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
9434 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
9435 VAList = AndDst.getReg(0);
9436 }
9437
9438 // Increment the pointer, VAList, to the next vaarg
9439 // The list should be bumped by the size of element in the current head of
9440 // list.
9441 Register Dst = MI.getOperand(0).getReg();
9442 LLT LLTTy = MRI.getType(Dst);
9443 Type *Ty = getTypeForLLT(LLTTy, Ctx);
9444 auto IncAmt =
9445 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
9446 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
9447
9448 // Store the increment VAList to the legalized pointer
9450 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
9451 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
9452 // Load the actual argument out of the pointer VAList
9453 Align EltAlignment = DL.getABITypeAlign(Ty);
9454 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
9455 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
9456 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
9457
9458 MI.eraseFromParent();
9459 return Legalized;
9460}
9461
9463 // On Darwin, -Os means optimize for size without hurting performance, so
9464 // only really optimize for size when -Oz (MinSize) is used.
9466 return MF.getFunction().hasMinSize();
9467 return MF.getFunction().hasOptSize();
9468}
9469
9470// Returns a list of types to use for memory op lowering in MemOps. A partial
9471// port of findOptimalMemOpLowering in TargetLowering.
9472static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
9473 unsigned Limit, const MemOp &Op,
9474 unsigned DstAS, unsigned SrcAS,
9475 const AttributeList &FuncAttributes,
9476 const TargetLowering &TLI) {
9477 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
9478 return false;
9479
9480 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
9481
9482 if (Ty == LLT()) {
9483 // Use the largest scalar type whose alignment constraints are satisfied.
9484 // We only need to check DstAlign here as SrcAlign is always greater or
9485 // equal to DstAlign (or zero).
9486 Ty = LLT::scalar(64);
9487 if (Op.isFixedDstAlign())
9488 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
9489 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
9490 Ty = LLT::scalar(Ty.getSizeInBytes());
9491 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
9492 // FIXME: check for the largest legal type we can load/store to.
9493 }
9494
9495 unsigned NumMemOps = 0;
9496 uint64_t Size = Op.size();
9497 while (Size) {
9498 unsigned TySize = Ty.getSizeInBytes();
9499 while (TySize > Size) {
9500 // For now, only use non-vector load / store's for the left-over pieces.
9501 LLT NewTy = Ty;
9502 // FIXME: check for mem op safety and legality of the types. Not all of
9503 // SDAGisms map cleanly to GISel concepts.
9504 if (NewTy.isVector())
9505 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
9506 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
9507 unsigned NewTySize = NewTy.getSizeInBytes();
9508 assert(NewTySize > 0 && "Could not find appropriate type");
9509
9510 // If the new LLT cannot cover all of the remaining bits, then consider
9511 // issuing a (or a pair of) unaligned and overlapping load / store.
9512 unsigned Fast;
9513 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9514 MVT VT = getMVTForLLT(Ty);
9515 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
9517 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
9519 Fast)
9520 TySize = Size;
9521 else {
9522 Ty = NewTy;
9523 TySize = NewTySize;
9524 }
9525 }
9526
9527 if (++NumMemOps > Limit)
9528 return false;
9529
9530 MemOps.push_back(Ty);
9531 Size -= TySize;
9532 }
9533
9534 return true;
9535}
9536
9537// Get a vectorized representation of the memset value operand, GISel edition.
9539 MachineRegisterInfo &MRI = *MIB.getMRI();
9540 unsigned NumBits = Ty.getScalarSizeInBits();
9541 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9542 if (!Ty.isVector() && ValVRegAndVal) {
9543 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9544 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
9545 return MIB.buildConstant(Ty, SplatVal).getReg(0);
9546 }
9547
9548 // Extend the byte value to the larger type, and then multiply by a magic
9549 // value 0x010101... in order to replicate it across every byte.
9550 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9551 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9552 return MIB.buildConstant(Ty, 0).getReg(0);
9553 }
9554
9555 LLT ExtType = Ty.getScalarType();
9556 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
9557 if (NumBits > 8) {
9558 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
9559 auto MagicMI = MIB.buildConstant(ExtType, Magic);
9560 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
9561 }
9562
9563 // For vector types create a G_BUILD_VECTOR.
9564 if (Ty.isVector())
9565 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
9566
9567 return Val;
9568}
9569
9571LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
9572 uint64_t KnownLen, Align Alignment,
9573 bool IsVolatile) {
9574 auto &MF = *MI.getParent()->getParent();
9575 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9576 auto &DL = MF.getDataLayout();
9577 LLVMContext &C = MF.getFunction().getContext();
9578
9579 assert(KnownLen != 0 && "Have a zero length memset length!");
9580
9581 bool DstAlignCanChange = false;
9582 MachineFrameInfo &MFI = MF.getFrameInfo();
9583 bool OptSize = shouldLowerMemFuncForSize(MF);
9584
9585 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9586 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9587 DstAlignCanChange = true;
9588
9589 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
9590 std::vector<LLT> MemOps;
9591
9592 const auto &DstMMO = **MI.memoperands_begin();
9593 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9594
9595 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9596 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9597
9598 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
9599 MemOp::Set(KnownLen, DstAlignCanChange,
9600 Alignment,
9601 /*IsZeroMemset=*/IsZeroVal,
9602 /*IsVolatile=*/IsVolatile),
9603 DstPtrInfo.getAddrSpace(), ~0u,
9604 MF.getFunction().getAttributes(), TLI))
9605 return UnableToLegalize;
9606
9607 if (DstAlignCanChange) {
9608 // Get an estimate of the type from the LLT.
9609 Type *IRTy = getTypeForLLT(MemOps[0], C);
9610 Align NewAlign = DL.getABITypeAlign(IRTy);
9611 if (NewAlign > Alignment) {
9612 Alignment = NewAlign;
9613 unsigned FI = FIDef->getOperand(1).getIndex();
9614 // Give the stack frame object a larger alignment if needed.
9615 if (MFI.getObjectAlign(FI) < Alignment)
9616 MFI.setObjectAlignment(FI, Alignment);
9617 }
9618 }
9619
9620 MachineIRBuilder MIB(MI);
9621 // Find the largest store and generate the bit pattern for it.
9622 LLT LargestTy = MemOps[0];
9623 for (unsigned i = 1; i < MemOps.size(); i++)
9624 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
9625 LargestTy = MemOps[i];
9626
9627 // The memset stored value is always defined as an s8, so in order to make it
9628 // work with larger store types we need to repeat the bit pattern across the
9629 // wider type.
9630 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
9631
9632 if (!MemSetValue)
9633 return UnableToLegalize;
9634
9635 // Generate the stores. For each store type in the list, we generate the
9636 // matching store of that type to the destination address.
9637 LLT PtrTy = MRI.getType(Dst);
9638 unsigned DstOff = 0;
9639 unsigned Size = KnownLen;
9640 for (unsigned I = 0; I < MemOps.size(); I++) {
9641 LLT Ty = MemOps[I];
9642 unsigned TySize = Ty.getSizeInBytes();
9643 if (TySize > Size) {
9644 // Issuing an unaligned load / store pair that overlaps with the previous
9645 // pair. Adjust the offset accordingly.
9646 assert(I == MemOps.size() - 1 && I != 0);
9647 DstOff -= TySize - Size;
9648 }
9649
9650 // If this store is smaller than the largest store see whether we can get
9651 // the smaller value for free with a truncate.
9652 Register Value = MemSetValue;
9653 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9654 MVT VT = getMVTForLLT(Ty);
9655 MVT LargestVT = getMVTForLLT(LargestTy);
9656 if (!LargestTy.isVector() && !Ty.isVector() &&
9657 TLI.isTruncateFree(LargestVT, VT))
9658 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9659 else
9660 Value = getMemsetValue(Val, Ty, MIB);
9661 if (!Value)
9662 return UnableToLegalize;
9663 }
9664
9665 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9666
9667 Register Ptr = Dst;
9668 if (DstOff != 0) {
9669 auto Offset =
9670 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9671 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
9672 }
9673
9674 MIB.buildStore(Value, Ptr, *StoreMMO);
9675 DstOff += Ty.getSizeInBytes();
9676 Size -= TySize;
9677 }
9678
9679 MI.eraseFromParent();
9680 return Legalized;
9681}
9682
9684LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9685 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9686
9687 auto [Dst, Src, Len] = MI.getFirst3Regs();
9688
9689 const auto *MMOIt = MI.memoperands_begin();
9690 const MachineMemOperand *MemOp = *MMOIt;
9691 bool IsVolatile = MemOp->isVolatile();
9692
9693 // See if this is a constant length copy
9694 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9695 // FIXME: support dynamically sized G_MEMCPY_INLINE
9696 assert(LenVRegAndVal &&
9697 "inline memcpy with dynamic size is not yet supported");
9698 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9699 if (KnownLen == 0) {
9700 MI.eraseFromParent();
9701 return Legalized;
9702 }
9703
9704 const auto &DstMMO = **MI.memoperands_begin();
9705 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9706 Align DstAlign = DstMMO.getBaseAlign();
9707 Align SrcAlign = SrcMMO.getBaseAlign();
9708
9709 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9710 IsVolatile);
9711}
9712
9714LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9715 uint64_t KnownLen, Align DstAlign,
9716 Align SrcAlign, bool IsVolatile) {
9717 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9718 return lowerMemcpy(MI, Dst, Src, KnownLen,
9719 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9720 IsVolatile);
9721}
9722
9724LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9725 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9726 Align SrcAlign, bool IsVolatile) {
9727 auto &MF = *MI.getParent()->getParent();
9728 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9729 auto &DL = MF.getDataLayout();
9730 LLVMContext &C = MF.getFunction().getContext();
9731
9732 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9733
9734 bool DstAlignCanChange = false;
9735 MachineFrameInfo &MFI = MF.getFrameInfo();
9736 Align Alignment = std::min(DstAlign, SrcAlign);
9737
9738 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9739 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9740 DstAlignCanChange = true;
9741
9742 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9743 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9744 // if the memcpy is in a tail call position.
9745
9746 std::vector<LLT> MemOps;
9747
9748 const auto &DstMMO = **MI.memoperands_begin();
9749 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9750 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9751 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9752
9754 MemOps, Limit,
9755 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9756 IsVolatile),
9757 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9758 MF.getFunction().getAttributes(), TLI))
9759 return UnableToLegalize;
9760
9761 if (DstAlignCanChange) {
9762 // Get an estimate of the type from the LLT.
9763 Type *IRTy = getTypeForLLT(MemOps[0], C);
9764 Align NewAlign = DL.getABITypeAlign(IRTy);
9765
9766 // Don't promote to an alignment that would require dynamic stack
9767 // realignment.
9768 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9769 if (!TRI->hasStackRealignment(MF))
9770 if (MaybeAlign StackAlign = DL.getStackAlignment())
9771 NewAlign = std::min(NewAlign, *StackAlign);
9772
9773 if (NewAlign > Alignment) {
9774 Alignment = NewAlign;
9775 unsigned FI = FIDef->getOperand(1).getIndex();
9776 // Give the stack frame object a larger alignment if needed.
9777 if (MFI.getObjectAlign(FI) < Alignment)
9778 MFI.setObjectAlignment(FI, Alignment);
9779 }
9780 }
9781
9782 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9783
9784 MachineIRBuilder MIB(MI);
9785 // Now we need to emit a pair of load and stores for each of the types we've
9786 // collected. I.e. for each type, generate a load from the source pointer of
9787 // that type width, and then generate a corresponding store to the dest buffer
9788 // of that value loaded. This can result in a sequence of loads and stores
9789 // mixed types, depending on what the target specifies as good types to use.
9790 unsigned CurrOffset = 0;
9791 unsigned Size = KnownLen;
9792 for (auto CopyTy : MemOps) {
9793 // Issuing an unaligned load / store pair that overlaps with the previous
9794 // pair. Adjust the offset accordingly.
9795 if (CopyTy.getSizeInBytes() > Size)
9796 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9797
9798 // Construct MMOs for the accesses.
9799 auto *LoadMMO =
9800 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9801 auto *StoreMMO =
9802 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9803
9804 // Create the load.
9805 Register LoadPtr = Src;
9807 if (CurrOffset != 0) {
9808 LLT SrcTy = MRI.getType(Src);
9809 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
9810 .getReg(0);
9811 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9812 }
9813 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9814
9815 // Create the store.
9816 Register StorePtr = Dst;
9817 if (CurrOffset != 0) {
9818 LLT DstTy = MRI.getType(Dst);
9819 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9820 }
9821 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9822 CurrOffset += CopyTy.getSizeInBytes();
9823 Size -= CopyTy.getSizeInBytes();
9824 }
9825
9826 MI.eraseFromParent();
9827 return Legalized;
9828}
9829
9831LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9832 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9833 bool IsVolatile) {
9834 auto &MF = *MI.getParent()->getParent();
9835 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9836 auto &DL = MF.getDataLayout();
9837 LLVMContext &C = MF.getFunction().getContext();
9838
9839 assert(KnownLen != 0 && "Have a zero length memmove length!");
9840
9841 bool DstAlignCanChange = false;
9842 MachineFrameInfo &MFI = MF.getFrameInfo();
9843 bool OptSize = shouldLowerMemFuncForSize(MF);
9844 Align Alignment = std::min(DstAlign, SrcAlign);
9845
9846 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9847 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9848 DstAlignCanChange = true;
9849
9850 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9851 std::vector<LLT> MemOps;
9852
9853 const auto &DstMMO = **MI.memoperands_begin();
9854 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9855 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9856 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9857
9858 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9859 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9860 // same thing here.
9862 MemOps, Limit,
9863 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9864 /*IsVolatile*/ true),
9865 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9866 MF.getFunction().getAttributes(), TLI))
9867 return UnableToLegalize;
9868
9869 if (DstAlignCanChange) {
9870 // Get an estimate of the type from the LLT.
9871 Type *IRTy = getTypeForLLT(MemOps[0], C);
9872 Align NewAlign = DL.getABITypeAlign(IRTy);
9873
9874 // Don't promote to an alignment that would require dynamic stack
9875 // realignment.
9876 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9877 if (!TRI->hasStackRealignment(MF))
9878 if (MaybeAlign StackAlign = DL.getStackAlignment())
9879 NewAlign = std::min(NewAlign, *StackAlign);
9880
9881 if (NewAlign > Alignment) {
9882 Alignment = NewAlign;
9883 unsigned FI = FIDef->getOperand(1).getIndex();
9884 // Give the stack frame object a larger alignment if needed.
9885 if (MFI.getObjectAlign(FI) < Alignment)
9886 MFI.setObjectAlignment(FI, Alignment);
9887 }
9888 }
9889
9890 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9891
9892 MachineIRBuilder MIB(MI);
9893 // Memmove requires that we perform the loads first before issuing the stores.
9894 // Apart from that, this loop is pretty much doing the same thing as the
9895 // memcpy codegen function.
9896 unsigned CurrOffset = 0;
9898 for (auto CopyTy : MemOps) {
9899 // Construct MMO for the load.
9900 auto *LoadMMO =
9901 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9902
9903 // Create the load.
9904 Register LoadPtr = Src;
9905 if (CurrOffset != 0) {
9906 LLT SrcTy = MRI.getType(Src);
9907 auto Offset =
9908 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9909 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9910 }
9911 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9912 CurrOffset += CopyTy.getSizeInBytes();
9913 }
9914
9915 CurrOffset = 0;
9916 for (unsigned I = 0; I < MemOps.size(); ++I) {
9917 LLT CopyTy = MemOps[I];
9918 // Now store the values loaded.
9919 auto *StoreMMO =
9920 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9921
9922 Register StorePtr = Dst;
9923 if (CurrOffset != 0) {
9924 LLT DstTy = MRI.getType(Dst);
9925 auto Offset =
9926 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9927 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9928 }
9929 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9930 CurrOffset += CopyTy.getSizeInBytes();
9931 }
9932 MI.eraseFromParent();
9933 return Legalized;
9934}
9935
9938 const unsigned Opc = MI.getOpcode();
9939 // This combine is fairly complex so it's not written with a separate
9940 // matcher function.
9941 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9942 Opc == TargetOpcode::G_MEMSET) &&
9943 "Expected memcpy like instruction");
9944
9945 auto MMOIt = MI.memoperands_begin();
9946 const MachineMemOperand *MemOp = *MMOIt;
9947
9948 Align DstAlign = MemOp->getBaseAlign();
9949 Align SrcAlign;
9950 auto [Dst, Src, Len] = MI.getFirst3Regs();
9951
9952 if (Opc != TargetOpcode::G_MEMSET) {
9953 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9954 MemOp = *(++MMOIt);
9955 SrcAlign = MemOp->getBaseAlign();
9956 }
9957
9958 // See if this is a constant length copy
9959 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9960 if (!LenVRegAndVal)
9961 return UnableToLegalize;
9962 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9963
9964 if (KnownLen == 0) {
9965 MI.eraseFromParent();
9966 return Legalized;
9967 }
9968
9969 bool IsVolatile = MemOp->isVolatile();
9970 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9971 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9972 IsVolatile);
9973
9974 // Don't try to optimize volatile.
9975 if (IsVolatile)
9976 return UnableToLegalize;
9977
9978 if (MaxLen && KnownLen > MaxLen)
9979 return UnableToLegalize;
9980
9981 if (Opc == TargetOpcode::G_MEMCPY) {
9982 auto &MF = *MI.getParent()->getParent();
9983 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9984 bool OptSize = shouldLowerMemFuncForSize(MF);
9985 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9986 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9987 IsVolatile);
9988 }
9989 if (Opc == TargetOpcode::G_MEMMOVE)
9990 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9991 if (Opc == TargetOpcode::G_MEMSET)
9992 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9993 return UnableToLegalize;
9994}
unsigned const MachineRegisterInfo * MRI
#define Success
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:74
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1140
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1100
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
void negate()
Negate this APInt in place.
Definition: APInt.h:1450
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1124
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:849
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
bool isSigned() const
Definition: InstrTypes.h:928
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
const APFloat & getValueAPF() const
Definition: Constants.h:314
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:348
bool isBigEndian() const
Definition: DataLayout.h:198
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:719
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:716
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Represent a G_FCMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:260
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr bool isPointerOrPointerVector() const
Definition: LowLevelType.h:153
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT getScalarType() const
Definition: LowLevelType.h:205
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:200
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult lowerFAbs(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:724
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:577
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:948
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:808
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:121
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:588
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:245
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1987
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:645
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLT getLLTForMVT(MVT Ty)
Get a rough equivalent of an LLT for a given MVT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1523
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1580
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:395
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1172
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:367
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:500
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:236
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:342
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1260
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:603
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:257
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:306
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:258
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:318
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Definition: CallLowering.h:51
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)