LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
106 GISelChangeObserver &Observer,
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
113 GISelChangeObserver &Observer,
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FLOG10:
475 RTLIBCASE(LOG10_F);
476 case TargetOpcode::G_FLOG:
477 RTLIBCASE(LOG_F);
478 case TargetOpcode::G_FLOG2:
479 RTLIBCASE(LOG2_F);
480 case TargetOpcode::G_FLDEXP:
481 RTLIBCASE(LDEXP_F);
482 case TargetOpcode::G_FCEIL:
483 RTLIBCASE(CEIL_F);
484 case TargetOpcode::G_FFLOOR:
485 RTLIBCASE(FLOOR_F);
486 case TargetOpcode::G_FMINNUM:
487 RTLIBCASE(FMIN_F);
488 case TargetOpcode::G_FMAXNUM:
489 RTLIBCASE(FMAX_F);
490 case TargetOpcode::G_FSQRT:
491 RTLIBCASE(SQRT_F);
492 case TargetOpcode::G_FRINT:
493 RTLIBCASE(RINT_F);
494 case TargetOpcode::G_FNEARBYINT:
495 RTLIBCASE(NEARBYINT_F);
496 case TargetOpcode::G_INTRINSIC_TRUNC:
497 RTLIBCASE(TRUNC_F);
498 case TargetOpcode::G_INTRINSIC_ROUND:
499 RTLIBCASE(ROUND_F);
500 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
501 RTLIBCASE(ROUNDEVEN_F);
502 case TargetOpcode::G_INTRINSIC_LRINT:
503 RTLIBCASE(LRINT_F);
504 case TargetOpcode::G_INTRINSIC_LLRINT:
505 RTLIBCASE(LLRINT_F);
506 }
507 llvm_unreachable("Unknown libcall function");
508#undef RTLIBCASE_INT
509#undef RTLIBCASE
510}
511
512/// True if an instruction is in tail position in its caller. Intended for
513/// legalizing libcalls as tail calls when possible.
516 const TargetInstrInfo &TII,
518 MachineBasicBlock &MBB = *MI.getParent();
519 const Function &F = MBB.getParent()->getFunction();
520
521 // Conservatively require the attributes of the call to match those of
522 // the return. Ignore NoAlias and NonNull because they don't affect the
523 // call sequence.
524 AttributeList CallerAttrs = F.getAttributes();
525 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
526 .removeAttribute(Attribute::NoAlias)
527 .removeAttribute(Attribute::NonNull)
528 .hasAttributes())
529 return false;
530
531 // It's not safe to eliminate the sign / zero extension of the return value.
532 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
533 CallerAttrs.hasRetAttr(Attribute::SExt))
534 return false;
535
536 // Only tail call if the following instruction is a standard return or if we
537 // have a `thisreturn` callee, and a sequence like:
538 //
539 // G_MEMCPY %0, %1, %2
540 // $x0 = COPY %0
541 // RET_ReallyLR implicit $x0
542 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
543 if (Next != MBB.instr_end() && Next->isCopy()) {
544 if (MI.getOpcode() == TargetOpcode::G_BZERO)
545 return false;
546
547 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
548 // mempy/etc routines return the same parameter. For other it will be the
549 // returned value.
550 Register VReg = MI.getOperand(0).getReg();
551 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
552 return false;
553
554 Register PReg = Next->getOperand(0).getReg();
555 if (!PReg.isPhysical())
556 return false;
557
558 auto Ret = next_nodbg(Next, MBB.instr_end());
559 if (Ret == MBB.instr_end() || !Ret->isReturn())
560 return false;
561
562 if (Ret->getNumImplicitOperands() != 1)
563 return false;
564
565 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
566 return false;
567
568 // Skip over the COPY that we just validated.
569 Next = Ret;
570 }
571
572 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
573 return false;
574
575 return true;
576}
577
580 const CallLowering::ArgInfo &Result,
582 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
583 MachineInstr *MI) {
584 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
585
587 Info.CallConv = CC;
589 Info.OrigRet = Result;
590 if (MI)
591 Info.IsTailCall =
592 (Result.Ty->isVoidTy() ||
593 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
594 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
595 *MIRBuilder.getMRI());
596
597 llvm::append_range(Info.OrigArgs, Args);
598 if (!CLI.lowerCall(MIRBuilder, Info))
600
601 if (MI && Info.LoweredTailCall) {
602 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
603
604 // Check debug locations before removing the return.
605 LocObserver.checkpoint(true);
606
607 // We must have a return following the call (or debug insts) to get past
608 // isLibCallInTailPosition.
609 do {
610 MachineInstr *Next = MI->getNextNode();
611 assert(Next &&
612 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
613 "Expected instr following MI to be return or debug inst?");
614 // We lowered a tail call, so the call is now the return from the block.
615 // Delete the old return.
616 Next->eraseFromParent();
617 } while (MI->getNextNode());
618
619 // We expect to lose the debug location from the return.
620 LocObserver.checkpoint(false);
621 }
623}
624
626llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
627 const CallLowering::ArgInfo &Result,
629 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
630 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
631 const char *Name = TLI.getLibcallName(Libcall);
632 if (!Name)
634 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
635 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
636}
637
638// Useful for libcalls where all operands have the same type.
641 Type *OpType, LostDebugLocObserver &LocObserver) {
642 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
643
644 // FIXME: What does the original arg index mean here?
646 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
647 Args.push_back({MO.getReg(), OpType, 0});
648 return createLibcall(MIRBuilder, Libcall,
649 {MI.getOperand(0).getReg(), OpType, 0}, Args,
650 LocObserver, &MI);
651}
652
653LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
654 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
655 LostDebugLocObserver &LocObserver) {
656 MachineFunction &MF = *MI.getMF();
658
659 Register DstSin = MI.getOperand(0).getReg();
660 Register DstCos = MI.getOperand(1).getReg();
661 Register Src = MI.getOperand(2).getReg();
662 LLT DstTy = MRI.getType(DstSin);
663
664 int MemSize = DstTy.getSizeInBytes();
665 Align Alignment = getStackTemporaryAlignment(DstTy);
667 unsigned AddrSpace = DL.getAllocaAddrSpace();
668 MachinePointerInfo PtrInfo;
669
670 Register StackPtrSin =
671 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
672 .getReg(0);
673 Register StackPtrCos =
674 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
675 .getReg(0);
676
677 auto &Ctx = MF.getFunction().getContext();
678 auto LibcallResult =
680 {{0}, Type::getVoidTy(Ctx), 0},
681 {{Src, OpType, 0},
682 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
683 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
684 LocObserver, &MI);
685
686 if (LibcallResult != LegalizeResult::Legalized)
688
690 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
692 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
693
694 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
695 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
696 MI.eraseFromParent();
697
699}
700
703 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
704 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
705
707 // Add all the args, except for the last which is an imm denoting 'tail'.
708 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
709 Register Reg = MI.getOperand(i).getReg();
710
711 // Need derive an IR type for call lowering.
712 LLT OpLLT = MRI.getType(Reg);
713 Type *OpTy = nullptr;
714 if (OpLLT.isPointer())
715 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
716 else
717 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
718 Args.push_back({Reg, OpTy, 0});
719 }
720
721 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
722 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
723 RTLIB::Libcall RTLibcall;
724 unsigned Opc = MI.getOpcode();
725 const char *Name;
726 switch (Opc) {
727 case TargetOpcode::G_BZERO:
728 RTLibcall = RTLIB::BZERO;
729 Name = TLI.getLibcallName(RTLibcall);
730 break;
731 case TargetOpcode::G_MEMCPY:
732 RTLibcall = RTLIB::MEMCPY;
733 Name = TLI.getMemcpyName();
734 Args[0].Flags[0].setReturned();
735 break;
736 case TargetOpcode::G_MEMMOVE:
737 RTLibcall = RTLIB::MEMMOVE;
738 Name = TLI.getLibcallName(RTLibcall);
739 Args[0].Flags[0].setReturned();
740 break;
741 case TargetOpcode::G_MEMSET:
742 RTLibcall = RTLIB::MEMSET;
743 Name = TLI.getLibcallName(RTLibcall);
744 Args[0].Flags[0].setReturned();
745 break;
746 default:
747 llvm_unreachable("unsupported opcode");
748 }
749
750 // Unsupported libcall on the target.
751 if (!Name) {
752 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
753 << MIRBuilder.getTII().getName(Opc) << "\n");
755 }
756
758 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
760 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
761 Info.IsTailCall =
762 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
763 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
764
765 llvm::append_range(Info.OrigArgs, Args);
766 if (!CLI.lowerCall(MIRBuilder, Info))
768
769 if (Info.LoweredTailCall) {
770 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
771
772 // Check debug locations before removing the return.
773 LocObserver.checkpoint(true);
774
775 // We must have a return following the call (or debug insts) to get past
776 // isLibCallInTailPosition.
777 do {
778 MachineInstr *Next = MI.getNextNode();
779 assert(Next &&
780 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
781 "Expected instr following MI to be return or debug inst?");
782 // We lowered a tail call, so the call is now the return from the block.
783 // Delete the old return.
784 Next->eraseFromParent();
785 } while (MI.getNextNode());
786
787 // We expect to lose the debug location from the return.
788 LocObserver.checkpoint(false);
789 }
790
792}
793
794static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
795 unsigned Opc = MI.getOpcode();
796 auto &AtomicMI = cast<GMemOperation>(MI);
797 auto &MMO = AtomicMI.getMMO();
798 auto Ordering = MMO.getMergedOrdering();
799 LLT MemType = MMO.getMemoryType();
800 uint64_t MemSize = MemType.getSizeInBytes();
801 if (MemType.isVector())
802 return RTLIB::UNKNOWN_LIBCALL;
803
804#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
805#define LCALL5(A) \
806 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
807 switch (Opc) {
808 case TargetOpcode::G_ATOMIC_CMPXCHG:
809 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
810 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
811 return getOutlineAtomicHelper(LC, Ordering, MemSize);
812 }
813 case TargetOpcode::G_ATOMICRMW_XCHG: {
814 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
815 return getOutlineAtomicHelper(LC, Ordering, MemSize);
816 }
817 case TargetOpcode::G_ATOMICRMW_ADD:
818 case TargetOpcode::G_ATOMICRMW_SUB: {
819 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
820 return getOutlineAtomicHelper(LC, Ordering, MemSize);
821 }
822 case TargetOpcode::G_ATOMICRMW_AND: {
823 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
824 return getOutlineAtomicHelper(LC, Ordering, MemSize);
825 }
826 case TargetOpcode::G_ATOMICRMW_OR: {
827 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
828 return getOutlineAtomicHelper(LC, Ordering, MemSize);
829 }
830 case TargetOpcode::G_ATOMICRMW_XOR: {
831 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
832 return getOutlineAtomicHelper(LC, Ordering, MemSize);
833 }
834 default:
835 return RTLIB::UNKNOWN_LIBCALL;
836 }
837#undef LCALLS
838#undef LCALL5
839}
840
843 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
844
845 Type *RetTy;
846 SmallVector<Register> RetRegs;
848 unsigned Opc = MI.getOpcode();
849 switch (Opc) {
850 case TargetOpcode::G_ATOMIC_CMPXCHG:
851 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
853 LLT SuccessLLT;
854 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
855 MI.getFirst4RegLLTs();
856 RetRegs.push_back(Ret);
857 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
858 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
859 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
860 NewLLT) = MI.getFirst5RegLLTs();
861 RetRegs.push_back(Success);
863 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
864 }
865 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
866 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
867 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
868 break;
869 }
870 case TargetOpcode::G_ATOMICRMW_XCHG:
871 case TargetOpcode::G_ATOMICRMW_ADD:
872 case TargetOpcode::G_ATOMICRMW_SUB:
873 case TargetOpcode::G_ATOMICRMW_AND:
874 case TargetOpcode::G_ATOMICRMW_OR:
875 case TargetOpcode::G_ATOMICRMW_XOR: {
876 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
877 RetRegs.push_back(Ret);
878 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
879 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
880 Val =
881 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
882 .getReg(0);
883 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
884 Val =
885 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
886 .getReg(0);
887 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
888 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
889 break;
890 }
891 default:
892 llvm_unreachable("unsupported opcode");
893 }
894
895 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
896 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
897 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
898 const char *Name = TLI.getLibcallName(RTLibcall);
899
900 // Unsupported libcall on the target.
901 if (!Name) {
902 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
903 << MIRBuilder.getTII().getName(Opc) << "\n");
905 }
906
908 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
910 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
911
912 llvm::append_range(Info.OrigArgs, Args);
913 if (!CLI.lowerCall(MIRBuilder, Info))
915
917}
918
919static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
920 Type *FromType) {
921 auto ToMVT = MVT::getVT(ToType);
922 auto FromMVT = MVT::getVT(FromType);
923
924 switch (Opcode) {
925 case TargetOpcode::G_FPEXT:
926 return RTLIB::getFPEXT(FromMVT, ToMVT);
927 case TargetOpcode::G_FPTRUNC:
928 return RTLIB::getFPROUND(FromMVT, ToMVT);
929 case TargetOpcode::G_FPTOSI:
930 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
931 case TargetOpcode::G_FPTOUI:
932 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
933 case TargetOpcode::G_SITOFP:
934 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
935 case TargetOpcode::G_UITOFP:
936 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
937 }
938 llvm_unreachable("Unsupported libcall function");
939}
940
943 Type *FromType, LostDebugLocObserver &LocObserver,
944 const TargetLowering &TLI, bool IsSigned = false) {
945 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
946 if (FromType->isIntegerTy()) {
947 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
948 Arg.Flags[0].setSExt();
949 else
950 Arg.Flags[0].setZExt();
951 }
952
953 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
954 return createLibcall(MIRBuilder, Libcall,
955 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
956 &MI);
957}
958
959static RTLIB::Libcall
961 RTLIB::Libcall RTLibcall;
962 switch (MI.getOpcode()) {
963 case TargetOpcode::G_GET_FPENV:
964 RTLibcall = RTLIB::FEGETENV;
965 break;
966 case TargetOpcode::G_SET_FPENV:
967 case TargetOpcode::G_RESET_FPENV:
968 RTLibcall = RTLIB::FESETENV;
969 break;
970 case TargetOpcode::G_GET_FPMODE:
971 RTLibcall = RTLIB::FEGETMODE;
972 break;
973 case TargetOpcode::G_SET_FPMODE:
974 case TargetOpcode::G_RESET_FPMODE:
975 RTLibcall = RTLIB::FESETMODE;
976 break;
977 default:
978 llvm_unreachable("Unexpected opcode");
979 }
980 return RTLibcall;
981}
982
983// Some library functions that read FP state (fegetmode, fegetenv) write the
984// state into a region in memory. IR intrinsics that do the same operations
985// (get_fpmode, get_fpenv) return the state as integer value. To implement these
986// intrinsics via the library functions, we need to use temporary variable,
987// for example:
988//
989// %0:_(s32) = G_GET_FPMODE
990//
991// is transformed to:
992//
993// %1:_(p0) = G_FRAME_INDEX %stack.0
994// BL &fegetmode
995// %0:_(s32) = G_LOAD % 1
996//
998LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1000 LostDebugLocObserver &LocObserver) {
1002 auto &MF = MIRBuilder.getMF();
1003 auto &MRI = *MIRBuilder.getMRI();
1004 auto &Ctx = MF.getFunction().getContext();
1005
1006 // Create temporary, where library function will put the read state.
1007 Register Dst = MI.getOperand(0).getReg();
1008 LLT StateTy = MRI.getType(Dst);
1009 TypeSize StateSize = StateTy.getSizeInBytes();
1011 MachinePointerInfo TempPtrInfo;
1012 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1013
1014 // Create a call to library function, with the temporary as an argument.
1015 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1016 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1017 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1018 auto Res =
1019 createLibcall(MIRBuilder, RTLibcall,
1021 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1022 LocObserver, nullptr);
1023 if (Res != LegalizerHelper::Legalized)
1024 return Res;
1025
1026 // Create a load from the temporary.
1028 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1029 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1030
1032}
1033
1034// Similar to `createGetStateLibcall` the function calls a library function
1035// using transient space in stack. In this case the library function reads
1036// content of memory region.
1038LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1040 LostDebugLocObserver &LocObserver) {
1042 auto &MF = MIRBuilder.getMF();
1043 auto &MRI = *MIRBuilder.getMRI();
1044 auto &Ctx = MF.getFunction().getContext();
1045
1046 // Create temporary, where library function will get the new state.
1047 Register Src = MI.getOperand(0).getReg();
1048 LLT StateTy = MRI.getType(Src);
1049 TypeSize StateSize = StateTy.getSizeInBytes();
1051 MachinePointerInfo TempPtrInfo;
1052 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1053
1054 // Put the new state into the temporary.
1056 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1057 MIRBuilder.buildStore(Src, Temp, *MMO);
1058
1059 // Create a call to library function, with the temporary as an argument.
1060 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1061 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1062 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1063 return createLibcall(MIRBuilder, RTLibcall,
1065 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1066 LocObserver, nullptr);
1067}
1068
1069/// Returns the corresponding libcall for the given Pred and
1070/// the ICMP predicate that should be generated to compare with #0
1071/// after the libcall.
1072static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1074#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1075 do { \
1076 switch (Size) { \
1077 case 32: \
1078 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1079 case 64: \
1080 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1081 case 128: \
1082 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1083 default: \
1084 llvm_unreachable("unexpected size"); \
1085 } \
1086 } while (0)
1087
1088 switch (Pred) {
1089 case CmpInst::FCMP_OEQ:
1091 case CmpInst::FCMP_UNE:
1093 case CmpInst::FCMP_OGE:
1095 case CmpInst::FCMP_OLT:
1097 case CmpInst::FCMP_OLE:
1099 case CmpInst::FCMP_OGT:
1101 case CmpInst::FCMP_UNO:
1103 default:
1104 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1105 }
1106}
1107
1109LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1111 LostDebugLocObserver &LocObserver) {
1112 auto &MF = MIRBuilder.getMF();
1113 auto &Ctx = MF.getFunction().getContext();
1114 const GFCmp *Cmp = cast<GFCmp>(&MI);
1115
1116 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1117 unsigned Size = OpLLT.getSizeInBits();
1118 if ((Size != 32 && Size != 64 && Size != 128) ||
1119 OpLLT != MRI.getType(Cmp->getRHSReg()))
1120 return UnableToLegalize;
1121
1122 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1123
1124 // DstReg type is s32
1125 const Register DstReg = Cmp->getReg(0);
1126 LLT DstTy = MRI.getType(DstReg);
1127 const auto Cond = Cmp->getCond();
1128
1129 // Reference:
1130 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1131 // Generates a libcall followed by ICMP.
1132 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1133 const CmpInst::Predicate ICmpPred,
1134 const DstOp &Res) -> Register {
1135 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1136 constexpr LLT TempLLT = LLT::scalar(32);
1137 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1138 // Generate libcall, holding result in Temp
1139 const auto Status = createLibcall(
1140 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1141 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1142 LocObserver, &MI);
1143 if (!Status)
1144 return {};
1145
1146 // Compare temp with #0 to get the final result.
1147 return MIRBuilder
1148 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1149 .getReg(0);
1150 };
1151
1152 // Simple case if we have a direct mapping from predicate to libcall
1153 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1154 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1155 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1156 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1157 return Legalized;
1158 }
1159 return UnableToLegalize;
1160 }
1161
1162 // No direct mapping found, should be generated as combination of libcalls.
1163
1164 switch (Cond) {
1165 case CmpInst::FCMP_UEQ: {
1166 // FCMP_UEQ: unordered or equal
1167 // Convert into (FCMP_OEQ || FCMP_UNO).
1168
1169 const auto [OeqLibcall, OeqPred] =
1171 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1172
1173 const auto [UnoLibcall, UnoPred] =
1175 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1176 if (Oeq && Uno)
1177 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1178 else
1179 return UnableToLegalize;
1180
1181 break;
1182 }
1183 case CmpInst::FCMP_ONE: {
1184 // FCMP_ONE: ordered and operands are unequal
1185 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1186
1187 // We inverse the predicate instead of generating a NOT
1188 // to save one instruction.
1189 // On AArch64 isel can even select two cmp into a single ccmp.
1190 const auto [OeqLibcall, OeqPred] =
1192 const auto NotOeq =
1193 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1194
1195 const auto [UnoLibcall, UnoPred] =
1197 const auto NotUno =
1198 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1199
1200 if (NotOeq && NotUno)
1201 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1202 else
1203 return UnableToLegalize;
1204
1205 break;
1206 }
1207 case CmpInst::FCMP_ULT:
1208 case CmpInst::FCMP_UGE:
1209 case CmpInst::FCMP_UGT:
1210 case CmpInst::FCMP_ULE:
1211 case CmpInst::FCMP_ORD: {
1212 // Convert into: !(inverse(Pred))
1213 // E.g. FCMP_ULT becomes !FCMP_OGE
1214 // This is equivalent to the following, but saves some instructions.
1215 // MIRBuilder.buildNot(
1216 // PredTy,
1217 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1218 // Op1, Op2));
1219 const auto [InversedLibcall, InversedPred] =
1221 if (!BuildLibcall(InversedLibcall,
1222 CmpInst::getInversePredicate(InversedPred), DstReg))
1223 return UnableToLegalize;
1224 break;
1225 }
1226 default:
1227 return UnableToLegalize;
1228 }
1229
1230 return Legalized;
1231}
1232
1233// The function is used to legalize operations that set default environment
1234// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1235// On most targets supported in glibc FE_DFL_MODE is defined as
1236// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1237// it is not true, the target must provide custom lowering.
1239LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1241 LostDebugLocObserver &LocObserver) {
1243 auto &MF = MIRBuilder.getMF();
1244 auto &Ctx = MF.getFunction().getContext();
1245
1246 // Create an argument for the library function.
1247 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1248 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1249 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1250 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1251 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1252 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1253 MIRBuilder.buildIntToPtr(Dest, DefValue);
1254
1255 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1256 return createLibcall(MIRBuilder, RTLibcall,
1258 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1259 LocObserver, &MI);
1260}
1261
1264 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1265
1266 switch (MI.getOpcode()) {
1267 default:
1268 return UnableToLegalize;
1269 case TargetOpcode::G_MUL:
1270 case TargetOpcode::G_SDIV:
1271 case TargetOpcode::G_UDIV:
1272 case TargetOpcode::G_SREM:
1273 case TargetOpcode::G_UREM:
1274 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1275 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1276 unsigned Size = LLTy.getSizeInBits();
1277 Type *HLTy = IntegerType::get(Ctx, Size);
1278 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1279 if (Status != Legalized)
1280 return Status;
1281 break;
1282 }
1283 case TargetOpcode::G_FADD:
1284 case TargetOpcode::G_FSUB:
1285 case TargetOpcode::G_FMUL:
1286 case TargetOpcode::G_FDIV:
1287 case TargetOpcode::G_FMA:
1288 case TargetOpcode::G_FPOW:
1289 case TargetOpcode::G_FREM:
1290 case TargetOpcode::G_FCOS:
1291 case TargetOpcode::G_FSIN:
1292 case TargetOpcode::G_FTAN:
1293 case TargetOpcode::G_FACOS:
1294 case TargetOpcode::G_FASIN:
1295 case TargetOpcode::G_FATAN:
1296 case TargetOpcode::G_FATAN2:
1297 case TargetOpcode::G_FCOSH:
1298 case TargetOpcode::G_FSINH:
1299 case TargetOpcode::G_FTANH:
1300 case TargetOpcode::G_FLOG10:
1301 case TargetOpcode::G_FLOG:
1302 case TargetOpcode::G_FLOG2:
1303 case TargetOpcode::G_FEXP:
1304 case TargetOpcode::G_FEXP2:
1305 case TargetOpcode::G_FEXP10:
1306 case TargetOpcode::G_FCEIL:
1307 case TargetOpcode::G_FFLOOR:
1308 case TargetOpcode::G_FMINNUM:
1309 case TargetOpcode::G_FMAXNUM:
1310 case TargetOpcode::G_FSQRT:
1311 case TargetOpcode::G_FRINT:
1312 case TargetOpcode::G_FNEARBYINT:
1313 case TargetOpcode::G_INTRINSIC_TRUNC:
1314 case TargetOpcode::G_INTRINSIC_ROUND:
1315 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1316 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1317 unsigned Size = LLTy.getSizeInBits();
1318 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1319 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1320 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1321 return UnableToLegalize;
1322 }
1323 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1324 if (Status != Legalized)
1325 return Status;
1326 break;
1327 }
1328 case TargetOpcode::G_FSINCOS: {
1329 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1330 unsigned Size = LLTy.getSizeInBits();
1331 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1332 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1333 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1334 return UnableToLegalize;
1335 }
1336 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1337 }
1338 case TargetOpcode::G_LROUND:
1339 case TargetOpcode::G_LLROUND:
1340 case TargetOpcode::G_INTRINSIC_LRINT:
1341 case TargetOpcode::G_INTRINSIC_LLRINT: {
1342 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1343 unsigned Size = LLTy.getSizeInBits();
1344 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1345 Type *ITy = IntegerType::get(
1346 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1347 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1348 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1349 return UnableToLegalize;
1350 }
1351 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1353 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1354 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1355 if (Status != Legalized)
1356 return Status;
1357 MI.eraseFromParent();
1358 return Legalized;
1359 }
1360 case TargetOpcode::G_FPOWI:
1361 case TargetOpcode::G_FLDEXP: {
1362 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1363 unsigned Size = LLTy.getSizeInBits();
1364 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1365 Type *ITy = IntegerType::get(
1366 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1373 {MI.getOperand(1).getReg(), HLTy, 0},
1374 {MI.getOperand(2).getReg(), ITy, 1}};
1375 Args[1].Flags[0].setSExt();
1377 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1378 Args, LocObserver, &MI);
1379 if (Status != Legalized)
1380 return Status;
1381 break;
1382 }
1383 case TargetOpcode::G_FPEXT:
1384 case TargetOpcode::G_FPTRUNC: {
1385 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1386 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1387 if (!FromTy || !ToTy)
1388 return UnableToLegalize;
1390 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1391 if (Status != Legalized)
1392 return Status;
1393 break;
1394 }
1395 case TargetOpcode::G_FCMP: {
1396 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1397 if (Status != Legalized)
1398 return Status;
1399 MI.eraseFromParent();
1400 return Status;
1401 }
1402 case TargetOpcode::G_FPTOSI:
1403 case TargetOpcode::G_FPTOUI: {
1404 // FIXME: Support other types
1405 Type *FromTy =
1406 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1407 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1408 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1409 return UnableToLegalize;
1411 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1412 if (Status != Legalized)
1413 return Status;
1414 break;
1415 }
1416 case TargetOpcode::G_SITOFP:
1417 case TargetOpcode::G_UITOFP: {
1418 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1419 Type *ToTy =
1420 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1421 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1422 return UnableToLegalize;
1423 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1425 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1426 LocObserver, TLI, IsSigned);
1427 if (Status != Legalized)
1428 return Status;
1429 break;
1430 }
1431 case TargetOpcode::G_ATOMICRMW_XCHG:
1432 case TargetOpcode::G_ATOMICRMW_ADD:
1433 case TargetOpcode::G_ATOMICRMW_SUB:
1434 case TargetOpcode::G_ATOMICRMW_AND:
1435 case TargetOpcode::G_ATOMICRMW_OR:
1436 case TargetOpcode::G_ATOMICRMW_XOR:
1437 case TargetOpcode::G_ATOMIC_CMPXCHG:
1438 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1440 if (Status != Legalized)
1441 return Status;
1442 break;
1443 }
1444 case TargetOpcode::G_BZERO:
1445 case TargetOpcode::G_MEMCPY:
1446 case TargetOpcode::G_MEMMOVE:
1447 case TargetOpcode::G_MEMSET: {
1448 LegalizeResult Result =
1449 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1450 if (Result != Legalized)
1451 return Result;
1452 MI.eraseFromParent();
1453 return Result;
1454 }
1455 case TargetOpcode::G_GET_FPENV:
1456 case TargetOpcode::G_GET_FPMODE: {
1457 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1458 if (Result != Legalized)
1459 return Result;
1460 break;
1461 }
1462 case TargetOpcode::G_SET_FPENV:
1463 case TargetOpcode::G_SET_FPMODE: {
1464 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1465 if (Result != Legalized)
1466 return Result;
1467 break;
1468 }
1469 case TargetOpcode::G_RESET_FPENV:
1470 case TargetOpcode::G_RESET_FPMODE: {
1471 LegalizeResult Result =
1472 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1473 if (Result != Legalized)
1474 return Result;
1475 break;
1476 }
1477 }
1478
1479 MI.eraseFromParent();
1480 return Legalized;
1481}
1482
1484 unsigned TypeIdx,
1485 LLT NarrowTy) {
1486 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1487 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1488
1489 switch (MI.getOpcode()) {
1490 default:
1491 return UnableToLegalize;
1492 case TargetOpcode::G_IMPLICIT_DEF: {
1493 Register DstReg = MI.getOperand(0).getReg();
1494 LLT DstTy = MRI.getType(DstReg);
1495
1496 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1497 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1498 // FIXME: Although this would also be legal for the general case, it causes
1499 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1500 // combines not being hit). This seems to be a problem related to the
1501 // artifact combiner.
1502 if (SizeOp0 % NarrowSize != 0) {
1503 LLT ImplicitTy = NarrowTy;
1504 if (DstTy.isVector())
1505 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1506
1507 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1508 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1509
1510 MI.eraseFromParent();
1511 return Legalized;
1512 }
1513
1514 int NumParts = SizeOp0 / NarrowSize;
1515
1517 for (int i = 0; i < NumParts; ++i)
1518 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1519
1520 if (DstTy.isVector())
1521 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1522 else
1523 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1524 MI.eraseFromParent();
1525 return Legalized;
1526 }
1527 case TargetOpcode::G_CONSTANT: {
1528 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1529 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1530 unsigned TotalSize = Ty.getSizeInBits();
1531 unsigned NarrowSize = NarrowTy.getSizeInBits();
1532 int NumParts = TotalSize / NarrowSize;
1533
1534 SmallVector<Register, 4> PartRegs;
1535 for (int I = 0; I != NumParts; ++I) {
1536 unsigned Offset = I * NarrowSize;
1537 auto K = MIRBuilder.buildConstant(NarrowTy,
1538 Val.lshr(Offset).trunc(NarrowSize));
1539 PartRegs.push_back(K.getReg(0));
1540 }
1541
1542 LLT LeftoverTy;
1543 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1544 SmallVector<Register, 1> LeftoverRegs;
1545 if (LeftoverBits != 0) {
1546 LeftoverTy = LLT::scalar(LeftoverBits);
1547 auto K = MIRBuilder.buildConstant(
1548 LeftoverTy,
1549 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1550 LeftoverRegs.push_back(K.getReg(0));
1551 }
1552
1553 insertParts(MI.getOperand(0).getReg(),
1554 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1555
1556 MI.eraseFromParent();
1557 return Legalized;
1558 }
1559 case TargetOpcode::G_SEXT:
1560 case TargetOpcode::G_ZEXT:
1561 case TargetOpcode::G_ANYEXT:
1562 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1563 case TargetOpcode::G_TRUNC: {
1564 if (TypeIdx != 1)
1565 return UnableToLegalize;
1566
1567 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1568 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1569 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1570 return UnableToLegalize;
1571 }
1572
1573 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1574 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1575 MI.eraseFromParent();
1576 return Legalized;
1577 }
1578 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1579 case TargetOpcode::G_FREEZE: {
1580 if (TypeIdx != 0)
1581 return UnableToLegalize;
1582
1583 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1584 // Should widen scalar first
1585 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1586 return UnableToLegalize;
1587
1588 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1590 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1591 Parts.push_back(
1592 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1593 .getReg(0));
1594 }
1595
1596 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1597 MI.eraseFromParent();
1598 return Legalized;
1599 }
1600 case TargetOpcode::G_ADD:
1601 case TargetOpcode::G_SUB:
1602 case TargetOpcode::G_SADDO:
1603 case TargetOpcode::G_SSUBO:
1604 case TargetOpcode::G_SADDE:
1605 case TargetOpcode::G_SSUBE:
1606 case TargetOpcode::G_UADDO:
1607 case TargetOpcode::G_USUBO:
1608 case TargetOpcode::G_UADDE:
1609 case TargetOpcode::G_USUBE:
1610 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1611 case TargetOpcode::G_MUL:
1612 case TargetOpcode::G_UMULH:
1613 return narrowScalarMul(MI, NarrowTy);
1614 case TargetOpcode::G_EXTRACT:
1615 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1616 case TargetOpcode::G_INSERT:
1617 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1618 case TargetOpcode::G_LOAD: {
1619 auto &LoadMI = cast<GLoad>(MI);
1620 Register DstReg = LoadMI.getDstReg();
1621 LLT DstTy = MRI.getType(DstReg);
1622 if (DstTy.isVector())
1623 return UnableToLegalize;
1624
1625 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1626 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1627 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1628 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1629 LoadMI.eraseFromParent();
1630 return Legalized;
1631 }
1632
1633 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1634 }
1635 case TargetOpcode::G_ZEXTLOAD:
1636 case TargetOpcode::G_SEXTLOAD: {
1637 auto &LoadMI = cast<GExtLoad>(MI);
1638 Register DstReg = LoadMI.getDstReg();
1639 Register PtrReg = LoadMI.getPointerReg();
1640
1641 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1642 auto &MMO = LoadMI.getMMO();
1643 unsigned MemSize = MMO.getSizeInBits().getValue();
1644
1645 if (MemSize == NarrowSize) {
1646 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1647 } else if (MemSize < NarrowSize) {
1648 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1649 } else if (MemSize > NarrowSize) {
1650 // FIXME: Need to split the load.
1651 return UnableToLegalize;
1652 }
1653
1654 if (isa<GZExtLoad>(LoadMI))
1655 MIRBuilder.buildZExt(DstReg, TmpReg);
1656 else
1657 MIRBuilder.buildSExt(DstReg, TmpReg);
1658
1659 LoadMI.eraseFromParent();
1660 return Legalized;
1661 }
1662 case TargetOpcode::G_STORE: {
1663 auto &StoreMI = cast<GStore>(MI);
1664
1665 Register SrcReg = StoreMI.getValueReg();
1666 LLT SrcTy = MRI.getType(SrcReg);
1667 if (SrcTy.isVector())
1668 return UnableToLegalize;
1669
1670 int NumParts = SizeOp0 / NarrowSize;
1671 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1672 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1673 if (SrcTy.isVector() && LeftoverBits != 0)
1674 return UnableToLegalize;
1675
1676 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1677 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1678 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1679 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1680 StoreMI.eraseFromParent();
1681 return Legalized;
1682 }
1683
1684 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1685 }
1686 case TargetOpcode::G_SELECT:
1687 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1688 case TargetOpcode::G_AND:
1689 case TargetOpcode::G_OR:
1690 case TargetOpcode::G_XOR: {
1691 // Legalize bitwise operation:
1692 // A = BinOp<Ty> B, C
1693 // into:
1694 // B1, ..., BN = G_UNMERGE_VALUES B
1695 // C1, ..., CN = G_UNMERGE_VALUES C
1696 // A1 = BinOp<Ty/N> B1, C2
1697 // ...
1698 // AN = BinOp<Ty/N> BN, CN
1699 // A = G_MERGE_VALUES A1, ..., AN
1700 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1701 }
1702 case TargetOpcode::G_SHL:
1703 case TargetOpcode::G_LSHR:
1704 case TargetOpcode::G_ASHR:
1705 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1706 case TargetOpcode::G_CTLZ:
1707 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1708 case TargetOpcode::G_CTTZ:
1709 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1710 case TargetOpcode::G_CTPOP:
1711 if (TypeIdx == 1)
1712 switch (MI.getOpcode()) {
1713 case TargetOpcode::G_CTLZ:
1714 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1715 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1716 case TargetOpcode::G_CTTZ:
1717 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1718 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1719 case TargetOpcode::G_CTPOP:
1720 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1721 default:
1722 return UnableToLegalize;
1723 }
1724
1726 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1728 return Legalized;
1729 case TargetOpcode::G_INTTOPTR:
1730 if (TypeIdx != 1)
1731 return UnableToLegalize;
1732
1734 narrowScalarSrc(MI, NarrowTy, 1);
1736 return Legalized;
1737 case TargetOpcode::G_PTRTOINT:
1738 if (TypeIdx != 0)
1739 return UnableToLegalize;
1740
1742 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1744 return Legalized;
1745 case TargetOpcode::G_PHI: {
1746 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1747 // NarrowSize.
1748 if (SizeOp0 % NarrowSize != 0)
1749 return UnableToLegalize;
1750
1751 unsigned NumParts = SizeOp0 / NarrowSize;
1752 SmallVector<Register, 2> DstRegs(NumParts);
1753 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1755 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1756 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1758 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1759 SrcRegs[i / 2], MIRBuilder, MRI);
1760 }
1761 MachineBasicBlock &MBB = *MI.getParent();
1763 for (unsigned i = 0; i < NumParts; ++i) {
1764 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1766 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1767 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1768 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1769 }
1771 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1773 MI.eraseFromParent();
1774 return Legalized;
1775 }
1776 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1777 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1778 if (TypeIdx != 2)
1779 return UnableToLegalize;
1780
1781 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1783 narrowScalarSrc(MI, NarrowTy, OpIdx);
1785 return Legalized;
1786 }
1787 case TargetOpcode::G_ICMP: {
1788 Register LHS = MI.getOperand(2).getReg();
1789 LLT SrcTy = MRI.getType(LHS);
1790 CmpInst::Predicate Pred =
1791 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1792
1793 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1794 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1795 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1796 LHSLeftoverRegs, MIRBuilder, MRI))
1797 return UnableToLegalize;
1798
1799 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1800 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1801 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1802 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1803 return UnableToLegalize;
1804
1805 // We now have the LHS and RHS of the compare split into narrow-type
1806 // registers, plus potentially some leftover type.
1807 Register Dst = MI.getOperand(0).getReg();
1808 LLT ResTy = MRI.getType(Dst);
1809 if (ICmpInst::isEquality(Pred)) {
1810 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1811 // them together. For each equal part, the result should be all 0s. For
1812 // each non-equal part, we'll get at least one 1.
1813 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1815 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1816 auto LHS = std::get<0>(LHSAndRHS);
1817 auto RHS = std::get<1>(LHSAndRHS);
1818 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1819 Xors.push_back(Xor);
1820 }
1821
1822 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1823 // to the desired narrow type so that we can OR them together later.
1824 SmallVector<Register, 4> WidenedXors;
1825 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1826 auto LHS = std::get<0>(LHSAndRHS);
1827 auto RHS = std::get<1>(LHSAndRHS);
1828 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1829 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1830 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1831 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1832 llvm::append_range(Xors, WidenedXors);
1833 }
1834
1835 // Now, for each part we broke up, we know if they are equal/not equal
1836 // based off the G_XOR. We can OR these all together and compare against
1837 // 0 to get the result.
1838 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1839 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1840 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1841 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1842 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1843 } else {
1844 Register CmpIn;
1845 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1846 Register CmpOut;
1847 CmpInst::Predicate PartPred;
1848
1849 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1850 PartPred = Pred;
1851 CmpOut = Dst;
1852 } else {
1853 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1854 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1855 }
1856
1857 if (!CmpIn) {
1858 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1859 RHSPartRegs[I]);
1860 } else {
1861 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1862 RHSPartRegs[I]);
1864 LHSPartRegs[I], RHSPartRegs[I]);
1865 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1866 }
1867
1868 CmpIn = CmpOut;
1869 }
1870
1871 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1872 Register CmpOut;
1873 CmpInst::Predicate PartPred;
1874
1875 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1876 PartPred = Pred;
1877 CmpOut = Dst;
1878 } else {
1879 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1880 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1881 }
1882
1883 if (!CmpIn) {
1884 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1885 RHSLeftoverRegs[I]);
1886 } else {
1887 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1888 RHSLeftoverRegs[I]);
1889 auto CmpEq =
1891 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1892 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1893 }
1894
1895 CmpIn = CmpOut;
1896 }
1897 }
1898 MI.eraseFromParent();
1899 return Legalized;
1900 }
1901 case TargetOpcode::G_FCMP:
1902 if (TypeIdx != 0)
1903 return UnableToLegalize;
1904
1906 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1908 return Legalized;
1909
1910 case TargetOpcode::G_SEXT_INREG: {
1911 if (TypeIdx != 0)
1912 return UnableToLegalize;
1913
1914 int64_t SizeInBits = MI.getOperand(2).getImm();
1915
1916 // So long as the new type has more bits than the bits we're extending we
1917 // don't need to break it apart.
1918 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1920 // We don't lose any non-extension bits by truncating the src and
1921 // sign-extending the dst.
1922 MachineOperand &MO1 = MI.getOperand(1);
1923 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1924 MO1.setReg(TruncMIB.getReg(0));
1925
1926 MachineOperand &MO2 = MI.getOperand(0);
1927 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1929 MIRBuilder.buildSExt(MO2, DstExt);
1930 MO2.setReg(DstExt);
1932 return Legalized;
1933 }
1934
1935 // Break it apart. Components below the extension point are unmodified. The
1936 // component containing the extension point becomes a narrower SEXT_INREG.
1937 // Components above it are ashr'd from the component containing the
1938 // extension point.
1939 if (SizeOp0 % NarrowSize != 0)
1940 return UnableToLegalize;
1941 int NumParts = SizeOp0 / NarrowSize;
1942
1943 // List the registers where the destination will be scattered.
1945 // List the registers where the source will be split.
1947
1948 // Create all the temporary registers.
1949 for (int i = 0; i < NumParts; ++i) {
1950 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1951
1952 SrcRegs.push_back(SrcReg);
1953 }
1954
1955 // Explode the big arguments into smaller chunks.
1956 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1957
1958 Register AshrCstReg =
1959 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1960 .getReg(0);
1961 Register FullExtensionReg;
1962 Register PartialExtensionReg;
1963
1964 // Do the operation on each small part.
1965 for (int i = 0; i < NumParts; ++i) {
1966 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1967 DstRegs.push_back(SrcRegs[i]);
1968 PartialExtensionReg = DstRegs.back();
1969 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1970 assert(PartialExtensionReg &&
1971 "Expected to visit partial extension before full");
1972 if (FullExtensionReg) {
1973 DstRegs.push_back(FullExtensionReg);
1974 continue;
1975 }
1976 DstRegs.push_back(
1977 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1978 .getReg(0));
1979 FullExtensionReg = DstRegs.back();
1980 } else {
1981 DstRegs.push_back(
1983 .buildInstr(
1984 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1985 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1986 .getReg(0));
1987 PartialExtensionReg = DstRegs.back();
1988 }
1989 }
1990
1991 // Gather the destination registers into the final destination.
1992 Register DstReg = MI.getOperand(0).getReg();
1993 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1994 MI.eraseFromParent();
1995 return Legalized;
1996 }
1997 case TargetOpcode::G_BSWAP:
1998 case TargetOpcode::G_BITREVERSE: {
1999 if (SizeOp0 % NarrowSize != 0)
2000 return UnableToLegalize;
2001
2003 SmallVector<Register, 2> SrcRegs, DstRegs;
2004 unsigned NumParts = SizeOp0 / NarrowSize;
2005 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2006 MIRBuilder, MRI);
2007
2008 for (unsigned i = 0; i < NumParts; ++i) {
2009 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2010 {SrcRegs[NumParts - 1 - i]});
2011 DstRegs.push_back(DstPart.getReg(0));
2012 }
2013
2014 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2015
2017 MI.eraseFromParent();
2018 return Legalized;
2019 }
2020 case TargetOpcode::G_PTR_ADD:
2021 case TargetOpcode::G_PTRMASK: {
2022 if (TypeIdx != 1)
2023 return UnableToLegalize;
2025 narrowScalarSrc(MI, NarrowTy, 2);
2027 return Legalized;
2028 }
2029 case TargetOpcode::G_FPTOUI:
2030 case TargetOpcode::G_FPTOSI:
2031 case TargetOpcode::G_FPTOUI_SAT:
2032 case TargetOpcode::G_FPTOSI_SAT:
2033 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2034 case TargetOpcode::G_FPEXT:
2035 if (TypeIdx != 0)
2036 return UnableToLegalize;
2038 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2040 return Legalized;
2041 case TargetOpcode::G_FLDEXP:
2042 case TargetOpcode::G_STRICT_FLDEXP:
2043 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2044 case TargetOpcode::G_VSCALE: {
2045 Register Dst = MI.getOperand(0).getReg();
2046 LLT Ty = MRI.getType(Dst);
2047
2048 // Assume VSCALE(1) fits into a legal integer
2049 const APInt One(NarrowTy.getSizeInBits(), 1);
2050 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2051 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2052 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2053 MIRBuilder.buildMul(Dst, ZExt, C);
2054
2055 MI.eraseFromParent();
2056 return Legalized;
2057 }
2058 }
2059}
2060
2062 LLT Ty = MRI.getType(Val);
2063 if (Ty.isScalar())
2064 return Val;
2065
2067 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2068 if (Ty.isPointer()) {
2069 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2070 return Register();
2071 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2072 }
2073
2074 Register NewVal = Val;
2075
2076 assert(Ty.isVector());
2077 if (Ty.isPointerVector())
2078 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2079 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2080}
2081
2083 unsigned OpIdx, unsigned ExtOpcode) {
2084 MachineOperand &MO = MI.getOperand(OpIdx);
2085 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2086 MO.setReg(ExtB.getReg(0));
2087}
2088
2090 unsigned OpIdx) {
2091 MachineOperand &MO = MI.getOperand(OpIdx);
2092 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2093 MO.setReg(ExtB.getReg(0));
2094}
2095
2097 unsigned OpIdx, unsigned TruncOpcode) {
2098 MachineOperand &MO = MI.getOperand(OpIdx);
2099 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2101 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2102 MO.setReg(DstExt);
2103}
2104
2106 unsigned OpIdx, unsigned ExtOpcode) {
2107 MachineOperand &MO = MI.getOperand(OpIdx);
2108 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2110 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2111 MO.setReg(DstTrunc);
2112}
2113
2115 unsigned OpIdx) {
2116 MachineOperand &MO = MI.getOperand(OpIdx);
2118 Register Dst = MO.getReg();
2119 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2120 MO.setReg(DstExt);
2122}
2123
2125 unsigned OpIdx) {
2126 MachineOperand &MO = MI.getOperand(OpIdx);
2128}
2129
2131 MachineOperand &Op = MI.getOperand(OpIdx);
2132 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2133}
2134
2136 MachineOperand &MO = MI.getOperand(OpIdx);
2137 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2139 MIRBuilder.buildBitcast(MO, CastDst);
2140 MO.setReg(CastDst);
2141}
2142
2144LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2145 LLT WideTy) {
2146 if (TypeIdx != 1)
2147 return UnableToLegalize;
2148
2149 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2150 if (DstTy.isVector())
2151 return UnableToLegalize;
2152
2153 LLT SrcTy = MRI.getType(Src1Reg);
2154 const int DstSize = DstTy.getSizeInBits();
2155 const int SrcSize = SrcTy.getSizeInBits();
2156 const int WideSize = WideTy.getSizeInBits();
2157 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2158
2159 unsigned NumOps = MI.getNumOperands();
2160 unsigned NumSrc = MI.getNumOperands() - 1;
2161 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2162
2163 if (WideSize >= DstSize) {
2164 // Directly pack the bits in the target type.
2165 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2166
2167 for (unsigned I = 2; I != NumOps; ++I) {
2168 const unsigned Offset = (I - 1) * PartSize;
2169
2170 Register SrcReg = MI.getOperand(I).getReg();
2171 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2172
2173 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2174
2175 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2176 MRI.createGenericVirtualRegister(WideTy);
2177
2178 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2179 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2180 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2181 ResultReg = NextResult;
2182 }
2183
2184 if (WideSize > DstSize)
2185 MIRBuilder.buildTrunc(DstReg, ResultReg);
2186 else if (DstTy.isPointer())
2187 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2188
2189 MI.eraseFromParent();
2190 return Legalized;
2191 }
2192
2193 // Unmerge the original values to the GCD type, and recombine to the next
2194 // multiple greater than the original type.
2195 //
2196 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2197 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2198 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2199 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2200 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2201 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2202 // %12:_(s12) = G_MERGE_VALUES %10, %11
2203 //
2204 // Padding with undef if necessary:
2205 //
2206 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2207 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2208 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2209 // %7:_(s2) = G_IMPLICIT_DEF
2210 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2211 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2212 // %10:_(s12) = G_MERGE_VALUES %8, %9
2213
2214 const int GCD = std::gcd(SrcSize, WideSize);
2215 LLT GCDTy = LLT::scalar(GCD);
2216
2217 SmallVector<Register, 8> NewMergeRegs;
2218 SmallVector<Register, 8> Unmerges;
2219 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2220
2221 // Decompose the original operands if they don't evenly divide.
2222 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2223 Register SrcReg = MO.getReg();
2224 if (GCD == SrcSize) {
2225 Unmerges.push_back(SrcReg);
2226 } else {
2227 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2228 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2229 Unmerges.push_back(Unmerge.getReg(J));
2230 }
2231 }
2232
2233 // Pad with undef to the next size that is a multiple of the requested size.
2234 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2235 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2236 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2237 Unmerges.push_back(UndefReg);
2238 }
2239
2240 const int PartsPerGCD = WideSize / GCD;
2241
2242 // Build merges of each piece.
2243 ArrayRef<Register> Slicer(Unmerges);
2244 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2245 auto Merge =
2246 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2247 NewMergeRegs.push_back(Merge.getReg(0));
2248 }
2249
2250 // A truncate may be necessary if the requested type doesn't evenly divide the
2251 // original result type.
2252 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2253 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2254 } else {
2255 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2256 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2257 }
2258
2259 MI.eraseFromParent();
2260 return Legalized;
2261}
2262
2264LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2265 LLT WideTy) {
2266 if (TypeIdx != 0)
2267 return UnableToLegalize;
2268
2269 int NumDst = MI.getNumOperands() - 1;
2270 Register SrcReg = MI.getOperand(NumDst).getReg();
2271 LLT SrcTy = MRI.getType(SrcReg);
2272 if (SrcTy.isVector())
2273 return UnableToLegalize;
2274
2275 Register Dst0Reg = MI.getOperand(0).getReg();
2276 LLT DstTy = MRI.getType(Dst0Reg);
2277 if (!DstTy.isScalar())
2278 return UnableToLegalize;
2279
2280 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2281 if (SrcTy.isPointer()) {
2283 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2284 LLVM_DEBUG(
2285 dbgs() << "Not casting non-integral address space integer\n");
2286 return UnableToLegalize;
2287 }
2288
2289 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2290 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2291 }
2292
2293 // Widen SrcTy to WideTy. This does not affect the result, but since the
2294 // user requested this size, it is probably better handled than SrcTy and
2295 // should reduce the total number of legalization artifacts.
2296 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2297 SrcTy = WideTy;
2298 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2299 }
2300
2301 // Theres no unmerge type to target. Directly extract the bits from the
2302 // source type
2303 unsigned DstSize = DstTy.getSizeInBits();
2304
2305 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2306 for (int I = 1; I != NumDst; ++I) {
2307 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2308 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2309 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2310 }
2311
2312 MI.eraseFromParent();
2313 return Legalized;
2314 }
2315
2316 // Extend the source to a wider type.
2317 LLT LCMTy = getLCMType(SrcTy, WideTy);
2318
2319 Register WideSrc = SrcReg;
2320 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2321 // TODO: If this is an integral address space, cast to integer and anyext.
2322 if (SrcTy.isPointer()) {
2323 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2324 return UnableToLegalize;
2325 }
2326
2327 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2328 }
2329
2330 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2331
2332 // Create a sequence of unmerges and merges to the original results. Since we
2333 // may have widened the source, we will need to pad the results with dead defs
2334 // to cover the source register.
2335 // e.g. widen s48 to s64:
2336 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2337 //
2338 // =>
2339 // %4:_(s192) = G_ANYEXT %0:_(s96)
2340 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2341 // ; unpack to GCD type, with extra dead defs
2342 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2343 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2344 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2345 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2346 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2347 const LLT GCDTy = getGCDType(WideTy, DstTy);
2348 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2349 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2350
2351 // Directly unmerge to the destination without going through a GCD type
2352 // if possible
2353 if (PartsPerRemerge == 1) {
2354 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2355
2356 for (int I = 0; I != NumUnmerge; ++I) {
2357 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2358
2359 for (int J = 0; J != PartsPerUnmerge; ++J) {
2360 int Idx = I * PartsPerUnmerge + J;
2361 if (Idx < NumDst)
2362 MIB.addDef(MI.getOperand(Idx).getReg());
2363 else {
2364 // Create dead def for excess components.
2365 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2366 }
2367 }
2368
2369 MIB.addUse(Unmerge.getReg(I));
2370 }
2371 } else {
2373 for (int J = 0; J != NumUnmerge; ++J)
2374 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2375
2376 SmallVector<Register, 8> RemergeParts;
2377 for (int I = 0; I != NumDst; ++I) {
2378 for (int J = 0; J < PartsPerRemerge; ++J) {
2379 const int Idx = I * PartsPerRemerge + J;
2380 RemergeParts.emplace_back(Parts[Idx]);
2381 }
2382
2383 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2384 RemergeParts.clear();
2385 }
2386 }
2387
2388 MI.eraseFromParent();
2389 return Legalized;
2390}
2391
2393LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2394 LLT WideTy) {
2395 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2396 unsigned Offset = MI.getOperand(2).getImm();
2397
2398 if (TypeIdx == 0) {
2399 if (SrcTy.isVector() || DstTy.isVector())
2400 return UnableToLegalize;
2401
2402 SrcOp Src(SrcReg);
2403 if (SrcTy.isPointer()) {
2404 // Extracts from pointers can be handled only if they are really just
2405 // simple integers.
2407 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2408 return UnableToLegalize;
2409
2410 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2411 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2412 SrcTy = SrcAsIntTy;
2413 }
2414
2415 if (DstTy.isPointer())
2416 return UnableToLegalize;
2417
2418 if (Offset == 0) {
2419 // Avoid a shift in the degenerate case.
2420 MIRBuilder.buildTrunc(DstReg,
2421 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2422 MI.eraseFromParent();
2423 return Legalized;
2424 }
2425
2426 // Do a shift in the source type.
2427 LLT ShiftTy = SrcTy;
2428 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2429 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2430 ShiftTy = WideTy;
2431 }
2432
2433 auto LShr = MIRBuilder.buildLShr(
2434 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2435 MIRBuilder.buildTrunc(DstReg, LShr);
2436 MI.eraseFromParent();
2437 return Legalized;
2438 }
2439
2440 if (SrcTy.isScalar()) {
2442 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2444 return Legalized;
2445 }
2446
2447 if (!SrcTy.isVector())
2448 return UnableToLegalize;
2449
2450 if (DstTy != SrcTy.getElementType())
2451 return UnableToLegalize;
2452
2453 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2454 return UnableToLegalize;
2455
2457 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2458
2459 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2460 Offset);
2461 widenScalarDst(MI, WideTy.getScalarType(), 0);
2463 return Legalized;
2464}
2465
2467LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2468 LLT WideTy) {
2469 if (TypeIdx != 0 || WideTy.isVector())
2470 return UnableToLegalize;
2472 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2473 widenScalarDst(MI, WideTy);
2475 return Legalized;
2476}
2477
2479LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2480 LLT WideTy) {
2481 unsigned Opcode;
2482 unsigned ExtOpcode;
2483 std::optional<Register> CarryIn;
2484 switch (MI.getOpcode()) {
2485 default:
2486 llvm_unreachable("Unexpected opcode!");
2487 case TargetOpcode::G_SADDO:
2488 Opcode = TargetOpcode::G_ADD;
2489 ExtOpcode = TargetOpcode::G_SEXT;
2490 break;
2491 case TargetOpcode::G_SSUBO:
2492 Opcode = TargetOpcode::G_SUB;
2493 ExtOpcode = TargetOpcode::G_SEXT;
2494 break;
2495 case TargetOpcode::G_UADDO:
2496 Opcode = TargetOpcode::G_ADD;
2497 ExtOpcode = TargetOpcode::G_ZEXT;
2498 break;
2499 case TargetOpcode::G_USUBO:
2500 Opcode = TargetOpcode::G_SUB;
2501 ExtOpcode = TargetOpcode::G_ZEXT;
2502 break;
2503 case TargetOpcode::G_SADDE:
2504 Opcode = TargetOpcode::G_UADDE;
2505 ExtOpcode = TargetOpcode::G_SEXT;
2506 CarryIn = MI.getOperand(4).getReg();
2507 break;
2508 case TargetOpcode::G_SSUBE:
2509 Opcode = TargetOpcode::G_USUBE;
2510 ExtOpcode = TargetOpcode::G_SEXT;
2511 CarryIn = MI.getOperand(4).getReg();
2512 break;
2513 case TargetOpcode::G_UADDE:
2514 Opcode = TargetOpcode::G_UADDE;
2515 ExtOpcode = TargetOpcode::G_ZEXT;
2516 CarryIn = MI.getOperand(4).getReg();
2517 break;
2518 case TargetOpcode::G_USUBE:
2519 Opcode = TargetOpcode::G_USUBE;
2520 ExtOpcode = TargetOpcode::G_ZEXT;
2521 CarryIn = MI.getOperand(4).getReg();
2522 break;
2523 }
2524
2525 if (TypeIdx == 1) {
2526 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2527
2529 if (CarryIn)
2530 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2531 widenScalarDst(MI, WideTy, 1);
2532
2534 return Legalized;
2535 }
2536
2537 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2538 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2539 // Do the arithmetic in the larger type.
2540 Register NewOp;
2541 if (CarryIn) {
2542 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2543 NewOp = MIRBuilder
2544 .buildInstr(Opcode, {WideTy, CarryOutTy},
2545 {LHSExt, RHSExt, *CarryIn})
2546 .getReg(0);
2547 } else {
2548 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2549 }
2550 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2551 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2552 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2553 // There is no overflow if the ExtOp is the same as NewOp.
2554 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2555 // Now trunc the NewOp to the original result.
2556 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2557 MI.eraseFromParent();
2558 return Legalized;
2559}
2560
2562LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2563 LLT WideTy) {
2564 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2565 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2566 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2567 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2568 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2569 // We can convert this to:
2570 // 1. Any extend iN to iM
2571 // 2. SHL by M-N
2572 // 3. [US][ADD|SUB|SHL]SAT
2573 // 4. L/ASHR by M-N
2574 //
2575 // It may be more efficient to lower this to a min and a max operation in
2576 // the higher precision arithmetic if the promoted operation isn't legal,
2577 // but this decision is up to the target's lowering request.
2578 Register DstReg = MI.getOperand(0).getReg();
2579
2580 unsigned NewBits = WideTy.getScalarSizeInBits();
2581 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2582
2583 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2584 // must not left shift the RHS to preserve the shift amount.
2585 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2586 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2587 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2588 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2589 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2590 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2591
2592 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2593 {ShiftL, ShiftR}, MI.getFlags());
2594
2595 // Use a shift that will preserve the number of sign bits when the trunc is
2596 // folded away.
2597 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2598 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2599
2600 MIRBuilder.buildTrunc(DstReg, Result);
2601 MI.eraseFromParent();
2602 return Legalized;
2603}
2604
2606LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2607 LLT WideTy) {
2608 if (TypeIdx == 1) {
2610 widenScalarDst(MI, WideTy, 1);
2612 return Legalized;
2613 }
2614
2615 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2616 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2617 LLT SrcTy = MRI.getType(LHS);
2618 LLT OverflowTy = MRI.getType(OriginalOverflow);
2619 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2620
2621 // To determine if the result overflowed in the larger type, we extend the
2622 // input to the larger type, do the multiply (checking if it overflows),
2623 // then also check the high bits of the result to see if overflow happened
2624 // there.
2625 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2626 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2627 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2628
2629 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2630 // so we don't need to check the overflow result of larger type Mulo.
2631 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2632
2633 unsigned MulOpc =
2634 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2635
2637 if (WideMulCanOverflow)
2638 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2639 {LeftOperand, RightOperand});
2640 else
2641 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2642
2643 auto Mul = Mulo->getOperand(0);
2644 MIRBuilder.buildTrunc(Result, Mul);
2645
2646 MachineInstrBuilder ExtResult;
2647 // Overflow occurred if it occurred in the larger type, or if the high part
2648 // of the result does not zero/sign-extend the low part. Check this second
2649 // possibility first.
2650 if (IsSigned) {
2651 // For signed, overflow occurred when the high part does not sign-extend
2652 // the low part.
2653 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2654 } else {
2655 // Unsigned overflow occurred when the high part does not zero-extend the
2656 // low part.
2657 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2658 }
2659
2660 if (WideMulCanOverflow) {
2661 auto Overflow =
2662 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2663 // Finally check if the multiplication in the larger type itself overflowed.
2664 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2665 } else {
2666 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2667 }
2668 MI.eraseFromParent();
2669 return Legalized;
2670}
2671
2674 unsigned Opcode = MI.getOpcode();
2675 switch (Opcode) {
2676 default:
2677 return UnableToLegalize;
2678 case TargetOpcode::G_ATOMICRMW_XCHG:
2679 case TargetOpcode::G_ATOMICRMW_ADD:
2680 case TargetOpcode::G_ATOMICRMW_SUB:
2681 case TargetOpcode::G_ATOMICRMW_AND:
2682 case TargetOpcode::G_ATOMICRMW_OR:
2683 case TargetOpcode::G_ATOMICRMW_XOR:
2684 case TargetOpcode::G_ATOMICRMW_MIN:
2685 case TargetOpcode::G_ATOMICRMW_MAX:
2686 case TargetOpcode::G_ATOMICRMW_UMIN:
2687 case TargetOpcode::G_ATOMICRMW_UMAX:
2688 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2690 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2691 widenScalarDst(MI, WideTy, 0);
2693 return Legalized;
2694 case TargetOpcode::G_ATOMIC_CMPXCHG:
2695 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2697 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2698 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2699 widenScalarDst(MI, WideTy, 0);
2701 return Legalized;
2702 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2703 if (TypeIdx == 0) {
2705 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2706 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2707 widenScalarDst(MI, WideTy, 0);
2709 return Legalized;
2710 }
2711 assert(TypeIdx == 1 &&
2712 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2714 widenScalarDst(MI, WideTy, 1);
2716 return Legalized;
2717 case TargetOpcode::G_EXTRACT:
2718 return widenScalarExtract(MI, TypeIdx, WideTy);
2719 case TargetOpcode::G_INSERT:
2720 return widenScalarInsert(MI, TypeIdx, WideTy);
2721 case TargetOpcode::G_MERGE_VALUES:
2722 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2723 case TargetOpcode::G_UNMERGE_VALUES:
2724 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2725 case TargetOpcode::G_SADDO:
2726 case TargetOpcode::G_SSUBO:
2727 case TargetOpcode::G_UADDO:
2728 case TargetOpcode::G_USUBO:
2729 case TargetOpcode::G_SADDE:
2730 case TargetOpcode::G_SSUBE:
2731 case TargetOpcode::G_UADDE:
2732 case TargetOpcode::G_USUBE:
2733 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2734 case TargetOpcode::G_UMULO:
2735 case TargetOpcode::G_SMULO:
2736 return widenScalarMulo(MI, TypeIdx, WideTy);
2737 case TargetOpcode::G_SADDSAT:
2738 case TargetOpcode::G_SSUBSAT:
2739 case TargetOpcode::G_SSHLSAT:
2740 case TargetOpcode::G_UADDSAT:
2741 case TargetOpcode::G_USUBSAT:
2742 case TargetOpcode::G_USHLSAT:
2743 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2744 case TargetOpcode::G_CTTZ:
2745 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2746 case TargetOpcode::G_CTLZ:
2747 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2748 case TargetOpcode::G_CTPOP: {
2749 if (TypeIdx == 0) {
2751 widenScalarDst(MI, WideTy, 0);
2753 return Legalized;
2754 }
2755
2756 Register SrcReg = MI.getOperand(1).getReg();
2757
2758 // First extend the input.
2759 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2760 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2761 ? TargetOpcode::G_ANYEXT
2762 : TargetOpcode::G_ZEXT;
2763 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2764 LLT CurTy = MRI.getType(SrcReg);
2765 unsigned NewOpc = Opcode;
2766 if (NewOpc == TargetOpcode::G_CTTZ) {
2767 // The count is the same in the larger type except if the original
2768 // value was zero. This can be handled by setting the bit just off
2769 // the top of the original type.
2770 auto TopBit =
2772 MIBSrc = MIRBuilder.buildOr(
2773 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2774 // Now we know the operand is non-zero, use the more relaxed opcode.
2775 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2776 }
2777
2778 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2779
2780 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2781 // An optimization where the result is the CTLZ after the left shift by
2782 // (Difference in widety and current ty), that is,
2783 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2784 // Result = ctlz MIBSrc
2785 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2786 MIRBuilder.buildConstant(WideTy, SizeDiff));
2787 }
2788
2789 // Perform the operation at the larger size.
2790 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2791 // This is already the correct result for CTPOP and CTTZs
2792 if (Opcode == TargetOpcode::G_CTLZ) {
2793 // The correct result is NewOp - (Difference in widety and current ty).
2794 MIBNewOp = MIRBuilder.buildSub(
2795 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2796 }
2797
2798 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2799 MI.eraseFromParent();
2800 return Legalized;
2801 }
2802 case TargetOpcode::G_BSWAP: {
2804 Register DstReg = MI.getOperand(0).getReg();
2805
2806 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2807 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2808 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2809 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2810
2811 MI.getOperand(0).setReg(DstExt);
2812
2814
2815 LLT Ty = MRI.getType(DstReg);
2816 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2817 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2818 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2819
2820 MIRBuilder.buildTrunc(DstReg, ShrReg);
2822 return Legalized;
2823 }
2824 case TargetOpcode::G_BITREVERSE: {
2826
2827 Register DstReg = MI.getOperand(0).getReg();
2828 LLT Ty = MRI.getType(DstReg);
2829 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2830
2831 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2832 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2833 MI.getOperand(0).setReg(DstExt);
2835
2836 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2837 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2838 MIRBuilder.buildTrunc(DstReg, Shift);
2840 return Legalized;
2841 }
2842 case TargetOpcode::G_FREEZE:
2843 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2845 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2846 widenScalarDst(MI, WideTy);
2848 return Legalized;
2849
2850 case TargetOpcode::G_ABS:
2852 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2853 widenScalarDst(MI, WideTy);
2855 return Legalized;
2856
2857 case TargetOpcode::G_ADD:
2858 case TargetOpcode::G_AND:
2859 case TargetOpcode::G_MUL:
2860 case TargetOpcode::G_OR:
2861 case TargetOpcode::G_XOR:
2862 case TargetOpcode::G_SUB:
2863 case TargetOpcode::G_SHUFFLE_VECTOR:
2864 // Perform operation at larger width (any extension is fines here, high bits
2865 // don't affect the result) and then truncate the result back to the
2866 // original type.
2868 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2869 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2870 widenScalarDst(MI, WideTy);
2872 return Legalized;
2873
2874 case TargetOpcode::G_SBFX:
2875 case TargetOpcode::G_UBFX:
2877
2878 if (TypeIdx == 0) {
2879 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2880 widenScalarDst(MI, WideTy);
2881 } else {
2882 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2883 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2884 }
2885
2887 return Legalized;
2888
2889 case TargetOpcode::G_SHL:
2891
2892 if (TypeIdx == 0) {
2893 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2894 widenScalarDst(MI, WideTy);
2895 } else {
2896 assert(TypeIdx == 1);
2897 // The "number of bits to shift" operand must preserve its value as an
2898 // unsigned integer:
2899 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2900 }
2901
2903 return Legalized;
2904
2905 case TargetOpcode::G_ROTR:
2906 case TargetOpcode::G_ROTL:
2907 if (TypeIdx != 1)
2908 return UnableToLegalize;
2909
2911 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2913 return Legalized;
2914
2915 case TargetOpcode::G_SDIV:
2916 case TargetOpcode::G_SREM:
2917 case TargetOpcode::G_SMIN:
2918 case TargetOpcode::G_SMAX:
2920 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2921 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2922 widenScalarDst(MI, WideTy);
2924 return Legalized;
2925
2926 case TargetOpcode::G_SDIVREM:
2928 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2929 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2930 widenScalarDst(MI, WideTy);
2931 widenScalarDst(MI, WideTy, 1);
2933 return Legalized;
2934
2935 case TargetOpcode::G_ASHR:
2936 case TargetOpcode::G_LSHR:
2938
2939 if (TypeIdx == 0) {
2940 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2941 : TargetOpcode::G_ZEXT;
2942
2943 widenScalarSrc(MI, WideTy, 1, CvtOp);
2944 widenScalarDst(MI, WideTy);
2945 } else {
2946 assert(TypeIdx == 1);
2947 // The "number of bits to shift" operand must preserve its value as an
2948 // unsigned integer:
2949 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2950 }
2951
2953 return Legalized;
2954 case TargetOpcode::G_UDIV:
2955 case TargetOpcode::G_UREM:
2957 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2958 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2959 widenScalarDst(MI, WideTy);
2961 return Legalized;
2962 case TargetOpcode::G_UDIVREM:
2964 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2965 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2966 widenScalarDst(MI, WideTy);
2967 widenScalarDst(MI, WideTy, 1);
2969 return Legalized;
2970 case TargetOpcode::G_UMIN:
2971 case TargetOpcode::G_UMAX: {
2972 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2973
2974 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2975 unsigned ExtOpc =
2977 getApproximateEVTForLLT(WideTy, Ctx))
2978 ? TargetOpcode::G_SEXT
2979 : TargetOpcode::G_ZEXT;
2980
2982 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2983 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2984 widenScalarDst(MI, WideTy);
2986 return Legalized;
2987 }
2988
2989 case TargetOpcode::G_SELECT:
2991 if (TypeIdx == 0) {
2992 // Perform operation at larger width (any extension is fine here, high
2993 // bits don't affect the result) and then truncate the result back to the
2994 // original type.
2995 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2996 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2997 widenScalarDst(MI, WideTy);
2998 } else {
2999 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3000 // Explicit extension is required here since high bits affect the result.
3001 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3002 }
3004 return Legalized;
3005
3006 case TargetOpcode::G_FPTOSI:
3007 case TargetOpcode::G_FPTOUI:
3008 case TargetOpcode::G_INTRINSIC_LRINT:
3009 case TargetOpcode::G_INTRINSIC_LLRINT:
3010 case TargetOpcode::G_IS_FPCLASS:
3012
3013 if (TypeIdx == 0)
3014 widenScalarDst(MI, WideTy);
3015 else
3016 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3017
3019 return Legalized;
3020 case TargetOpcode::G_SITOFP:
3022
3023 if (TypeIdx == 0)
3024 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3025 else
3026 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3027
3029 return Legalized;
3030 case TargetOpcode::G_UITOFP:
3032
3033 if (TypeIdx == 0)
3034 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3035 else
3036 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3037
3039 return Legalized;
3040 case TargetOpcode::G_FPTOSI_SAT:
3041 case TargetOpcode::G_FPTOUI_SAT:
3043
3044 if (TypeIdx == 0) {
3045 Register OldDst = MI.getOperand(0).getReg();
3046 LLT Ty = MRI.getType(OldDst);
3047 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3048 Register NewDst;
3049 MI.getOperand(0).setReg(ExtReg);
3050 uint64_t ShortBits = Ty.getScalarSizeInBits();
3051 uint64_t WideBits = WideTy.getScalarSizeInBits();
3053 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3054 // z = i16 fptosi_sat(a)
3055 // ->
3056 // x = i32 fptosi_sat(a)
3057 // y = smin(x, 32767)
3058 // z = smax(y, -32768)
3059 auto MaxVal = MIRBuilder.buildConstant(
3060 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3061 auto MinVal = MIRBuilder.buildConstant(
3062 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3063 Register MidReg =
3064 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3065 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3066 } else {
3067 // z = i16 fptoui_sat(a)
3068 // ->
3069 // x = i32 fptoui_sat(a)
3070 // y = smin(x, 65535)
3071 auto MaxVal = MIRBuilder.buildConstant(
3072 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3073 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3074 }
3075 MIRBuilder.buildTrunc(OldDst, NewDst);
3076 } else
3077 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3078
3080 return Legalized;
3081 case TargetOpcode::G_LOAD:
3082 case TargetOpcode::G_SEXTLOAD:
3083 case TargetOpcode::G_ZEXTLOAD:
3085 widenScalarDst(MI, WideTy);
3087 return Legalized;
3088
3089 case TargetOpcode::G_STORE: {
3090 if (TypeIdx != 0)
3091 return UnableToLegalize;
3092
3093 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3094 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3095 if (!Ty.isScalar()) {
3096 // We need to widen the vector element type.
3098 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3099 // We also need to adjust the MMO to turn this into a truncating store.
3100 MachineMemOperand &MMO = **MI.memoperands_begin();
3102 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3103 MI.setMemRefs(MF, {NewMMO});
3105 return Legalized;
3106 }
3107
3109
3110 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3111 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3112 widenScalarSrc(MI, WideTy, 0, ExtType);
3113
3115 return Legalized;
3116 }
3117 case TargetOpcode::G_CONSTANT: {
3118 MachineOperand &SrcMO = MI.getOperand(1);
3120 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3121 MRI.getType(MI.getOperand(0).getReg()));
3122 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3123 ExtOpc == TargetOpcode::G_ANYEXT) &&
3124 "Illegal Extend");
3125 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3126 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3127 ? SrcVal.sext(WideTy.getSizeInBits())
3128 : SrcVal.zext(WideTy.getSizeInBits());
3130 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3131
3132 widenScalarDst(MI, WideTy);
3134 return Legalized;
3135 }
3136 case TargetOpcode::G_FCONSTANT: {
3137 // To avoid changing the bits of the constant due to extension to a larger
3138 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3139 MachineOperand &SrcMO = MI.getOperand(1);
3140 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3142 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3143 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3144 MI.eraseFromParent();
3145 return Legalized;
3146 }
3147 case TargetOpcode::G_IMPLICIT_DEF: {
3149 widenScalarDst(MI, WideTy);
3151 return Legalized;
3152 }
3153 case TargetOpcode::G_BRCOND:
3155 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3157 return Legalized;
3158
3159 case TargetOpcode::G_FCMP:
3161 if (TypeIdx == 0)
3162 widenScalarDst(MI, WideTy);
3163 else {
3164 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3165 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3166 }
3168 return Legalized;
3169
3170 case TargetOpcode::G_ICMP:
3172 if (TypeIdx == 0)
3173 widenScalarDst(MI, WideTy);
3174 else {
3175 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3176 CmpInst::Predicate Pred =
3177 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3178
3179 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3180 unsigned ExtOpcode =
3181 (CmpInst::isSigned(Pred) ||
3183 getApproximateEVTForLLT(WideTy, Ctx)))
3184 ? TargetOpcode::G_SEXT
3185 : TargetOpcode::G_ZEXT;
3186 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3187 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3188 }
3190 return Legalized;
3191
3192 case TargetOpcode::G_PTR_ADD:
3193 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3195 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3197 return Legalized;
3198
3199 case TargetOpcode::G_PHI: {
3200 assert(TypeIdx == 0 && "Expecting only Idx 0");
3201
3203 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3204 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3206 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3207 }
3208
3209 MachineBasicBlock &MBB = *MI.getParent();
3211 widenScalarDst(MI, WideTy);
3213 return Legalized;
3214 }
3215 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3216 if (TypeIdx == 0) {
3217 Register VecReg = MI.getOperand(1).getReg();
3218 LLT VecTy = MRI.getType(VecReg);
3220
3222 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3223 TargetOpcode::G_ANYEXT);
3224
3225 widenScalarDst(MI, WideTy, 0);
3227 return Legalized;
3228 }
3229
3230 if (TypeIdx != 2)
3231 return UnableToLegalize;
3233 // TODO: Probably should be zext
3234 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3236 return Legalized;
3237 }
3238 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3239 if (TypeIdx == 0) {
3241 const LLT WideEltTy = WideTy.getElementType();
3242
3243 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3244 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3245 widenScalarDst(MI, WideTy, 0);
3247 return Legalized;
3248 }
3249
3250 if (TypeIdx == 1) {
3252
3253 Register VecReg = MI.getOperand(1).getReg();
3254 LLT VecTy = MRI.getType(VecReg);
3255 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3256
3257 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3258 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3259 widenScalarDst(MI, WideVecTy, 0);
3261 return Legalized;
3262 }
3263
3264 if (TypeIdx == 2) {
3266 // TODO: Probably should be zext
3267 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3269 return Legalized;
3270 }
3271
3272 return UnableToLegalize;
3273 }
3274 case TargetOpcode::G_FADD:
3275 case TargetOpcode::G_FMUL:
3276 case TargetOpcode::G_FSUB:
3277 case TargetOpcode::G_FMA:
3278 case TargetOpcode::G_FMAD:
3279 case TargetOpcode::G_FNEG:
3280 case TargetOpcode::G_FABS:
3281 case TargetOpcode::G_FCANONICALIZE:
3282 case TargetOpcode::G_FMINNUM:
3283 case TargetOpcode::G_FMAXNUM:
3284 case TargetOpcode::G_FMINNUM_IEEE:
3285 case TargetOpcode::G_FMAXNUM_IEEE:
3286 case TargetOpcode::G_FMINIMUM:
3287 case TargetOpcode::G_FMAXIMUM:
3288 case TargetOpcode::G_FMINIMUMNUM:
3289 case TargetOpcode::G_FMAXIMUMNUM:
3290 case TargetOpcode::G_FDIV:
3291 case TargetOpcode::G_FREM:
3292 case TargetOpcode::G_FCEIL:
3293 case TargetOpcode::G_FFLOOR:
3294 case TargetOpcode::G_FCOS:
3295 case TargetOpcode::G_FSIN:
3296 case TargetOpcode::G_FTAN:
3297 case TargetOpcode::G_FACOS:
3298 case TargetOpcode::G_FASIN:
3299 case TargetOpcode::G_FATAN:
3300 case TargetOpcode::G_FATAN2:
3301 case TargetOpcode::G_FCOSH:
3302 case TargetOpcode::G_FSINH:
3303 case TargetOpcode::G_FTANH:
3304 case TargetOpcode::G_FLOG10:
3305 case TargetOpcode::G_FLOG:
3306 case TargetOpcode::G_FLOG2:
3307 case TargetOpcode::G_FRINT:
3308 case TargetOpcode::G_FNEARBYINT:
3309 case TargetOpcode::G_FSQRT:
3310 case TargetOpcode::G_FEXP:
3311 case TargetOpcode::G_FEXP2:
3312 case TargetOpcode::G_FEXP10:
3313 case TargetOpcode::G_FPOW:
3314 case TargetOpcode::G_INTRINSIC_TRUNC:
3315 case TargetOpcode::G_INTRINSIC_ROUND:
3316 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3317 assert(TypeIdx == 0);
3319
3320 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3321 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3322
3323 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3325 return Legalized;
3326 case TargetOpcode::G_FPOWI:
3327 case TargetOpcode::G_FLDEXP:
3328 case TargetOpcode::G_STRICT_FLDEXP: {
3329 if (TypeIdx == 0) {
3330 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3331 return UnableToLegalize;
3332
3334 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3335 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3337 return Legalized;
3338 }
3339
3340 if (TypeIdx == 1) {
3341 // For some reason SelectionDAG tries to promote to a libcall without
3342 // actually changing the integer type for promotion.
3344 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3346 return Legalized;
3347 }
3348
3349 return UnableToLegalize;
3350 }
3351 case TargetOpcode::G_FFREXP: {
3353
3354 if (TypeIdx == 0) {
3355 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3356 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3357 } else {
3358 widenScalarDst(MI, WideTy, 1);
3359 }
3360
3362 return Legalized;
3363 }
3364 case TargetOpcode::G_INTTOPTR:
3365 if (TypeIdx != 1)
3366 return UnableToLegalize;
3367
3369 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3371 return Legalized;
3372 case TargetOpcode::G_PTRTOINT:
3373 if (TypeIdx != 0)
3374 return UnableToLegalize;
3375
3377 widenScalarDst(MI, WideTy, 0);
3379 return Legalized;
3380 case TargetOpcode::G_BUILD_VECTOR: {
3382
3383 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3384 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3385 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3386
3387 // Avoid changing the result vector type if the source element type was
3388 // requested.
3389 if (TypeIdx == 1) {
3390 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3391 } else {
3392 widenScalarDst(MI, WideTy, 0);
3393 }
3394
3396 return Legalized;
3397 }
3398 case TargetOpcode::G_SEXT_INREG:
3399 if (TypeIdx != 0)
3400 return UnableToLegalize;
3401
3403 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3404 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3406 return Legalized;
3407 case TargetOpcode::G_PTRMASK: {
3408 if (TypeIdx != 1)
3409 return UnableToLegalize;
3411 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3413 return Legalized;
3414 }
3415 case TargetOpcode::G_VECREDUCE_ADD: {
3416 if (TypeIdx != 1)
3417 return UnableToLegalize;
3419 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3420 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3422 return Legalized;
3423 }
3424 case TargetOpcode::G_VECREDUCE_FADD:
3425 case TargetOpcode::G_VECREDUCE_FMUL:
3426 case TargetOpcode::G_VECREDUCE_FMIN:
3427 case TargetOpcode::G_VECREDUCE_FMAX:
3428 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3429 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3430 if (TypeIdx != 0)
3431 return UnableToLegalize;
3433 Register VecReg = MI.getOperand(1).getReg();
3434 LLT VecTy = MRI.getType(VecReg);
3435 LLT WideVecTy = VecTy.isVector()
3436 ? LLT::vector(VecTy.getElementCount(), WideTy)
3437 : WideTy;
3438 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3439 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3441 return Legalized;
3442 }
3443 case TargetOpcode::G_VSCALE: {
3444 MachineOperand &SrcMO = MI.getOperand(1);
3446 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3447 // The CImm is always a signed value
3448 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3450 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3451 widenScalarDst(MI, WideTy);
3453 return Legalized;
3454 }
3455 case TargetOpcode::G_SPLAT_VECTOR: {
3456 if (TypeIdx != 1)
3457 return UnableToLegalize;
3458
3460 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3462 return Legalized;
3463 }
3464 case TargetOpcode::G_INSERT_SUBVECTOR: {
3465 if (TypeIdx != 0)
3466 return UnableToLegalize;
3467
3468 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
3469 Register BigVec = IS.getBigVec();
3470 Register SubVec = IS.getSubVec();
3471
3472 LLT SubVecTy = MRI.getType(SubVec);
3473 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3474
3475 // Widen the G_INSERT_SUBVECTOR
3476 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3477 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3478 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3479 IS.getIndexImm());
3480
3481 // Truncate back down
3482 auto SplatZero = MIRBuilder.buildSplatVector(
3483 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3485 SplatZero);
3486
3487 MI.eraseFromParent();
3488
3489 return Legalized;
3490 }
3491 }
3492}
3493
3495 MachineIRBuilder &B, Register Src, LLT Ty) {
3496 auto Unmerge = B.buildUnmerge(Ty, Src);
3497 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3498 Pieces.push_back(Unmerge.getReg(I));
3499}
3500
3501static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3502 MachineIRBuilder &MIRBuilder) {
3503 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3504 MachineFunction &MF = MIRBuilder.getMF();
3505 const DataLayout &DL = MIRBuilder.getDataLayout();
3506 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3507 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3508 LLT DstLLT = MRI.getType(DstReg);
3509
3510 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3511
3512 auto Addr = MIRBuilder.buildConstantPool(
3513 AddrPtrTy,
3514 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3515
3516 MachineMemOperand *MMO =
3518 MachineMemOperand::MOLoad, DstLLT, Alignment);
3519
3520 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3521}
3522
3525 const MachineOperand &ConstOperand = MI.getOperand(1);
3526 const Constant *ConstantVal = ConstOperand.getCImm();
3527
3528 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3529 MI.eraseFromParent();
3530
3531 return Legalized;
3532}
3533
3536 const MachineOperand &ConstOperand = MI.getOperand(1);
3537 const Constant *ConstantVal = ConstOperand.getFPImm();
3538
3539 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3540 MI.eraseFromParent();
3541
3542 return Legalized;
3543}
3544
3547 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3548 if (SrcTy.isVector()) {
3549 LLT SrcEltTy = SrcTy.getElementType();
3551
3552 if (DstTy.isVector()) {
3553 int NumDstElt = DstTy.getNumElements();
3554 int NumSrcElt = SrcTy.getNumElements();
3555
3556 LLT DstEltTy = DstTy.getElementType();
3557 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3558 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3559
3560 // If there's an element size mismatch, insert intermediate casts to match
3561 // the result element type.
3562 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3563 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3564 //
3565 // =>
3566 //
3567 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3568 // %3:_(<2 x s8>) = G_BITCAST %2
3569 // %4:_(<2 x s8>) = G_BITCAST %3
3570 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3571 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3572 SrcPartTy = SrcEltTy;
3573 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3574 //
3575 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3576 //
3577 // =>
3578 //
3579 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3580 // %3:_(s16) = G_BITCAST %2
3581 // %4:_(s16) = G_BITCAST %3
3582 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3583 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3584 DstCastTy = DstEltTy;
3585 }
3586
3587 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3588 for (Register &SrcReg : SrcRegs)
3589 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3590 } else
3591 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3592
3593 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3594 MI.eraseFromParent();
3595 return Legalized;
3596 }
3597
3598 if (DstTy.isVector()) {
3600 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3601 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3602 MI.eraseFromParent();
3603 return Legalized;
3604 }
3605
3606 return UnableToLegalize;
3607}
3608
3609/// Figure out the bit offset into a register when coercing a vector index for
3610/// the wide element type. This is only for the case when promoting vector to
3611/// one with larger elements.
3612//
3613///
3614/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3615/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3617 Register Idx,
3618 unsigned NewEltSize,
3619 unsigned OldEltSize) {
3620 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3621 LLT IdxTy = B.getMRI()->getType(Idx);
3622
3623 // Now figure out the amount we need to shift to get the target bits.
3624 auto OffsetMask = B.buildConstant(
3625 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3626 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3627 return B.buildShl(IdxTy, OffsetIdx,
3628 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3629}
3630
3631/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3632/// is casting to a vector with a smaller element size, perform multiple element
3633/// extracts and merge the results. If this is coercing to a vector with larger
3634/// elements, index the bitcasted vector and extract the target element with bit
3635/// operations. This is intended to force the indexing in the native register
3636/// size for architectures that can dynamically index the register file.
3639 LLT CastTy) {
3640 if (TypeIdx != 1)
3641 return UnableToLegalize;
3642
3643 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3644
3645 LLT SrcEltTy = SrcVecTy.getElementType();
3646 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3647 unsigned OldNumElts = SrcVecTy.getNumElements();
3648
3649 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3650 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3651
3652 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3653 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3654 if (NewNumElts > OldNumElts) {
3655 // Decreasing the vector element size
3656 //
3657 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3658 // =>
3659 // v4i32:castx = bitcast x:v2i64
3660 //
3661 // i64 = bitcast
3662 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3663 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3664 //
3665 if (NewNumElts % OldNumElts != 0)
3666 return UnableToLegalize;
3667
3668 // Type of the intermediate result vector.
3669 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3670 LLT MidTy =
3671 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3672
3673 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3674
3675 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3676 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3677
3678 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3679 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3680 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3681 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3682 NewOps[I] = Elt.getReg(0);
3683 }
3684
3685 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3686 MIRBuilder.buildBitcast(Dst, NewVec);
3687 MI.eraseFromParent();
3688 return Legalized;
3689 }
3690
3691 if (NewNumElts < OldNumElts) {
3692 if (NewEltSize % OldEltSize != 0)
3693 return UnableToLegalize;
3694
3695 // This only depends on powers of 2 because we use bit tricks to figure out
3696 // the bit offset we need to shift to get the target element. A general
3697 // expansion could emit division/multiply.
3698 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3699 return UnableToLegalize;
3700
3701 // Increasing the vector element size.
3702 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3703 //
3704 // =>
3705 //
3706 // %cast = G_BITCAST %vec
3707 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3708 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3709 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3710 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3711 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3712 // %elt = G_TRUNC %elt_bits
3713
3714 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3715 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3716
3717 // Divide to get the index in the wider element type.
3718 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3719
3720 Register WideElt = CastVec;
3721 if (CastTy.isVector()) {
3722 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3723 ScaledIdx).getReg(0);
3724 }
3725
3726 // Compute the bit offset into the register of the target element.
3728 MIRBuilder, Idx, NewEltSize, OldEltSize);
3729
3730 // Shift the wide element to get the target element.
3731 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3732 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3733 MI.eraseFromParent();
3734 return Legalized;
3735 }
3736
3737 return UnableToLegalize;
3738}
3739
3740/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3741/// TargetReg, while preserving other bits in \p TargetReg.
3742///
3743/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3745 Register TargetReg, Register InsertReg,
3746 Register OffsetBits) {
3747 LLT TargetTy = B.getMRI()->getType(TargetReg);
3748 LLT InsertTy = B.getMRI()->getType(InsertReg);
3749 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3750 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3751
3752 // Produce a bitmask of the value to insert
3753 auto EltMask = B.buildConstant(
3754 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3755 InsertTy.getSizeInBits()));
3756 // Shift it into position
3757 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3758 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3759
3760 // Clear out the bits in the wide element
3761 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3762
3763 // The value to insert has all zeros already, so stick it into the masked
3764 // wide element.
3765 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3766}
3767
3768/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3769/// is increasing the element size, perform the indexing in the target element
3770/// type, and use bit operations to insert at the element position. This is
3771/// intended for architectures that can dynamically index the register file and
3772/// want to force indexing in the native register size.
3775 LLT CastTy) {
3776 if (TypeIdx != 0)
3777 return UnableToLegalize;
3778
3779 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3780 MI.getFirst4RegLLTs();
3781 LLT VecTy = DstTy;
3782
3783 LLT VecEltTy = VecTy.getElementType();
3784 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3785 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3786 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3787
3788 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3789 unsigned OldNumElts = VecTy.getNumElements();
3790
3791 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3792 if (NewNumElts < OldNumElts) {
3793 if (NewEltSize % OldEltSize != 0)
3794 return UnableToLegalize;
3795
3796 // This only depends on powers of 2 because we use bit tricks to figure out
3797 // the bit offset we need to shift to get the target element. A general
3798 // expansion could emit division/multiply.
3799 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3800 return UnableToLegalize;
3801
3802 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3803 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3804
3805 // Divide to get the index in the wider element type.
3806 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3807
3808 Register ExtractedElt = CastVec;
3809 if (CastTy.isVector()) {
3810 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3811 ScaledIdx).getReg(0);
3812 }
3813
3814 // Compute the bit offset into the register of the target element.
3816 MIRBuilder, Idx, NewEltSize, OldEltSize);
3817
3818 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3819 Val, OffsetBits);
3820 if (CastTy.isVector()) {
3822 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3823 }
3824
3825 MIRBuilder.buildBitcast(Dst, InsertedElt);
3826 MI.eraseFromParent();
3827 return Legalized;
3828 }
3829
3830 return UnableToLegalize;
3831}
3832
3833// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3834// those that have smaller than legal operands.
3835//
3836// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3837//
3838// ===>
3839//
3840// s32 = G_BITCAST <4 x s8>
3841// s32 = G_BITCAST <4 x s8>
3842// s32 = G_BITCAST <4 x s8>
3843// s32 = G_BITCAST <4 x s8>
3844// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3845// <16 x s8> = G_BITCAST <4 x s32>
3848 LLT CastTy) {
3849 // Convert it to CONCAT instruction
3850 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3851 if (!ConcatMI) {
3852 return UnableToLegalize;
3853 }
3854
3855 // Check if bitcast is Legal
3856 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3857 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3858
3859 // Check if the build vector is Legal
3860 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3861 return UnableToLegalize;
3862 }
3863
3864 // Bitcast the sources
3865 SmallVector<Register> BitcastRegs;
3866 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3867 BitcastRegs.push_back(
3868 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3869 .getReg(0));
3870 }
3871
3872 // Build the scalar values into a vector
3873 Register BuildReg =
3874 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3875 MIRBuilder.buildBitcast(DstReg, BuildReg);
3876
3877 MI.eraseFromParent();
3878 return Legalized;
3879}
3880
3881// This bitcasts a shuffle vector to a different type currently of the same
3882// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3883// will be used instead.
3884//
3885// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3886// ===>
3887// <4 x s64> = G_PTRTOINT <4 x p0>
3888// <4 x s64> = G_PTRTOINT <4 x p0>
3889// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3890// <16 x p0> = G_INTTOPTR <16 x s64>
3893 LLT CastTy) {
3894 auto ShuffleMI = cast<GShuffleVector>(&MI);
3895 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3896 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3897
3898 // We currently only handle vectors of the same size.
3899 if (TypeIdx != 0 ||
3900 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3901 CastTy.getElementCount() != DstTy.getElementCount())
3902 return UnableToLegalize;
3903
3904 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3905
3906 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3907 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3908 auto Shuf =
3909 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3910 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3911
3912 MI.eraseFromParent();
3913 return Legalized;
3914}
3915
3916/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3917///
3918/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3919///
3920/// ===>
3921///
3922/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3923/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3924/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3927 LLT CastTy) {
3928 auto ES = cast<GExtractSubvector>(&MI);
3929
3930 if (!CastTy.isVector())
3931 return UnableToLegalize;
3932
3933 if (TypeIdx != 0)
3934 return UnableToLegalize;
3935
3936 Register Dst = ES->getReg(0);
3937 Register Src = ES->getSrcVec();
3938 uint64_t Idx = ES->getIndexImm();
3939
3941
3942 LLT DstTy = MRI.getType(Dst);
3943 LLT SrcTy = MRI.getType(Src);
3944 ElementCount DstTyEC = DstTy.getElementCount();
3945 ElementCount SrcTyEC = SrcTy.getElementCount();
3946 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3947 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3948
3949 if (DstTy == CastTy)
3950 return Legalized;
3951
3952 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3953 return UnableToLegalize;
3954
3955 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3956 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3957 if (CastEltSize < DstEltSize)
3958 return UnableToLegalize;
3959
3960 auto AdjustAmt = CastEltSize / DstEltSize;
3961 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3962 SrcTyMinElts % AdjustAmt != 0)
3963 return UnableToLegalize;
3964
3965 Idx /= AdjustAmt;
3966 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3967 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3968 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3969 MIRBuilder.buildBitcast(Dst, PromotedES);
3970
3971 ES->eraseFromParent();
3972 return Legalized;
3973}
3974
3975/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3976///
3977/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3978/// <vscale x 8 x i1>,
3979/// N
3980///
3981/// ===>
3982///
3983/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3984/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3985/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3986/// <vscale x 1 x i8>, N / 8
3987/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3990 LLT CastTy) {
3991 auto ES = cast<GInsertSubvector>(&MI);
3992
3993 if (!CastTy.isVector())
3994 return UnableToLegalize;
3995
3996 if (TypeIdx != 0)
3997 return UnableToLegalize;
3998
3999 Register Dst = ES->getReg(0);
4000 Register BigVec = ES->getBigVec();
4001 Register SubVec = ES->getSubVec();
4002 uint64_t Idx = ES->getIndexImm();
4003
4005
4006 LLT DstTy = MRI.getType(Dst);
4007 LLT BigVecTy = MRI.getType(BigVec);
4008 LLT SubVecTy = MRI.getType(SubVec);
4009
4010 if (DstTy == CastTy)
4011 return Legalized;
4012
4013 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4014 return UnableToLegalize;
4015
4016 ElementCount DstTyEC = DstTy.getElementCount();
4017 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4018 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4019 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4020 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4021 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4022
4023 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4024 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4025 if (CastEltSize < DstEltSize)
4026 return UnableToLegalize;
4027
4028 auto AdjustAmt = CastEltSize / DstEltSize;
4029 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4030 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4031 return UnableToLegalize;
4032
4033 Idx /= AdjustAmt;
4034 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4035 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4036 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4037 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4038 auto PromotedIS =
4039 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4040 MIRBuilder.buildBitcast(Dst, PromotedIS);
4041
4042 ES->eraseFromParent();
4043 return Legalized;
4044}
4045
4047 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4048 Register DstReg = LoadMI.getDstReg();
4049 Register PtrReg = LoadMI.getPointerReg();
4050 LLT DstTy = MRI.getType(DstReg);
4051 MachineMemOperand &MMO = LoadMI.getMMO();
4052 LLT MemTy = MMO.getMemoryType();
4054
4055 unsigned MemSizeInBits = MemTy.getSizeInBits();
4056 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4057
4058 if (MemSizeInBits != MemStoreSizeInBits) {
4059 if (MemTy.isVector())
4060 return UnableToLegalize;
4061
4062 // Promote to a byte-sized load if not loading an integral number of
4063 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4064 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4065 MachineMemOperand *NewMMO =
4066 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4067
4068 Register LoadReg = DstReg;
4069 LLT LoadTy = DstTy;
4070
4071 // If this wasn't already an extending load, we need to widen the result
4072 // register to avoid creating a load with a narrower result than the source.
4073 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4074 LoadTy = WideMemTy;
4075 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4076 }
4077
4078 if (isa<GSExtLoad>(LoadMI)) {
4079 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4080 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4081 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4082 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4083 // The extra bits are guaranteed to be zero, since we stored them that
4084 // way. A zext load from Wide thus automatically gives zext from MemVT.
4085 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4086 } else {
4087 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4088 }
4089
4090 if (DstTy != LoadTy)
4091 MIRBuilder.buildTrunc(DstReg, LoadReg);
4092
4093 LoadMI.eraseFromParent();
4094 return Legalized;
4095 }
4096
4097 // Big endian lowering not implemented.
4099 return UnableToLegalize;
4100
4101 // This load needs splitting into power of 2 sized loads.
4102 //
4103 // Our strategy here is to generate anyextending loads for the smaller
4104 // types up to next power-2 result type, and then combine the two larger
4105 // result values together, before truncating back down to the non-pow-2
4106 // type.
4107 // E.g. v1 = i24 load =>
4108 // v2 = i32 zextload (2 byte)
4109 // v3 = i32 load (1 byte)
4110 // v4 = i32 shl v3, 16
4111 // v5 = i32 or v4, v2
4112 // v1 = i24 trunc v5
4113 // By doing this we generate the correct truncate which should get
4114 // combined away as an artifact with a matching extend.
4115
4116 uint64_t LargeSplitSize, SmallSplitSize;
4117
4118 if (!isPowerOf2_32(MemSizeInBits)) {
4119 // This load needs splitting into power of 2 sized loads.
4120 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4121 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4122 } else {
4123 // This is already a power of 2, but we still need to split this in half.
4124 //
4125 // Assume we're being asked to decompose an unaligned load.
4126 // TODO: If this requires multiple splits, handle them all at once.
4127 auto &Ctx = MF.getFunction().getContext();
4128 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4129 return UnableToLegalize;
4130
4131 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4132 }
4133
4134 if (MemTy.isVector()) {
4135 // TODO: Handle vector extloads
4136 if (MemTy != DstTy)
4137 return UnableToLegalize;
4138
4139 Align Alignment = LoadMI.getAlign();
4140 // Given an alignment larger than the size of the memory, we can increase
4141 // the size of the load without needing to scalarize it.
4142 if (Alignment.value() * 8 > MemSizeInBits &&
4145 DstTy.getElementType());
4146 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4147 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4149 NewLoad.getReg(0));
4150 LoadMI.eraseFromParent();
4151 return Legalized;
4152 }
4153
4154 // TODO: We can do better than scalarizing the vector and at least split it
4155 // in half.
4156 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4157 }
4158
4159 MachineMemOperand *LargeMMO =
4160 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4161 MachineMemOperand *SmallMMO =
4162 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4163
4164 LLT PtrTy = MRI.getType(PtrReg);
4165 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4166 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4167 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4168 PtrReg, *LargeMMO);
4169
4170 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4171 LargeSplitSize / 8);
4172 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4173 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4174 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4175 SmallPtr, *SmallMMO);
4176
4177 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4178 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4179
4180 if (AnyExtTy == DstTy)
4181 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4182 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4183 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4184 MIRBuilder.buildTrunc(DstReg, {Or});
4185 } else {
4186 assert(DstTy.isPointer() && "expected pointer");
4187 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4188
4189 // FIXME: We currently consider this to be illegal for non-integral address
4190 // spaces, but we need still need a way to reinterpret the bits.
4191 MIRBuilder.buildIntToPtr(DstReg, Or);
4192 }
4193
4194 LoadMI.eraseFromParent();
4195 return Legalized;
4196}
4197
4199 // Lower a non-power of 2 store into multiple pow-2 stores.
4200 // E.g. split an i24 store into an i16 store + i8 store.
4201 // We do this by first extending the stored value to the next largest power
4202 // of 2 type, and then using truncating stores to store the components.
4203 // By doing this, likewise with G_LOAD, generate an extend that can be
4204 // artifact-combined away instead of leaving behind extracts.
4205 Register SrcReg = StoreMI.getValueReg();
4206 Register PtrReg = StoreMI.getPointerReg();
4207 LLT SrcTy = MRI.getType(SrcReg);
4209 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4210 LLT MemTy = MMO.getMemoryType();
4211
4212 unsigned StoreWidth = MemTy.getSizeInBits();
4213 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4214
4215 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4216 // Promote to a byte-sized store with upper bits zero if not
4217 // storing an integral number of bytes. For example, promote
4218 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4219 LLT WideTy = LLT::scalar(StoreSizeInBits);
4220
4221 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4222 // Avoid creating a store with a narrower source than result.
4223 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4224 SrcTy = WideTy;
4225 }
4226
4227 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4228
4229 MachineMemOperand *NewMMO =
4230 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4231 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4232 StoreMI.eraseFromParent();
4233 return Legalized;
4234 }
4235
4236 if (MemTy.isVector()) {
4237 if (MemTy != SrcTy)
4238 return scalarizeVectorBooleanStore(StoreMI);
4239
4240 // TODO: We can do better than scalarizing the vector and at least split it
4241 // in half.
4242 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4243 }
4244
4245 unsigned MemSizeInBits = MemTy.getSizeInBits();
4246 uint64_t LargeSplitSize, SmallSplitSize;
4247
4248 if (!isPowerOf2_32(MemSizeInBits)) {
4249 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4250 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4251 } else {
4252 auto &Ctx = MF.getFunction().getContext();
4253 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4254 return UnableToLegalize; // Don't know what we're being asked to do.
4255
4256 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4257 }
4258
4259 // Extend to the next pow-2. If this store was itself the result of lowering,
4260 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4261 // that's wider than the stored size.
4262 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4263 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4264
4265 if (SrcTy.isPointer()) {
4266 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4267 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4268 }
4269
4270 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4271
4272 // Obtain the smaller value by shifting away the larger value.
4273 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4274 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4275
4276 // Generate the PtrAdd and truncating stores.
4277 LLT PtrTy = MRI.getType(PtrReg);
4278 auto OffsetCst = MIRBuilder.buildConstant(
4279 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4280 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4281
4282 MachineMemOperand *LargeMMO =
4283 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4284 MachineMemOperand *SmallMMO =
4285 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4286 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4287 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4288 StoreMI.eraseFromParent();
4289 return Legalized;
4290}
4291
4294 Register SrcReg = StoreMI.getValueReg();
4295 Register PtrReg = StoreMI.getPointerReg();
4296 LLT SrcTy = MRI.getType(SrcReg);
4297 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4298 LLT MemTy = MMO.getMemoryType();
4299 LLT MemScalarTy = MemTy.getElementType();
4301
4302 assert(SrcTy.isVector() && "Expect a vector store type");
4303
4304 if (!MemScalarTy.isByteSized()) {
4305 // We need to build an integer scalar of the vector bit pattern.
4306 // It's not legal for us to add padding when storing a vector.
4307 unsigned NumBits = MemTy.getSizeInBits();
4308 LLT IntTy = LLT::scalar(NumBits);
4309 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4310 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4311
4312 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4314 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4315 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4316 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4317 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4318 ? (MemTy.getNumElements() - 1) - I
4319 : I;
4320 auto ShiftAmt = MIRBuilder.buildConstant(
4321 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4322 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4323 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4324 }
4325 auto PtrInfo = MMO.getPointerInfo();
4326 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4327 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4328 StoreMI.eraseFromParent();
4329 return Legalized;
4330 }
4331
4332 // TODO: implement simple scalarization.
4333 return UnableToLegalize;
4334}
4335
4337LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4338 switch (MI.getOpcode()) {
4339 case TargetOpcode::G_LOAD: {
4340 if (TypeIdx != 0)
4341 return UnableToLegalize;
4342 MachineMemOperand &MMO = **MI.memoperands_begin();
4343
4344 // Not sure how to interpret a bitcast of an extending load.
4345 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4346 return UnableToLegalize;
4347
4349 bitcastDst(MI, CastTy, 0);
4350 MMO.setType(CastTy);
4351 // The range metadata is no longer valid when reinterpreted as a different
4352 // type.
4353 MMO.clearRanges();
4355 return Legalized;
4356 }
4357 case TargetOpcode::G_STORE: {
4358 if (TypeIdx != 0)
4359 return UnableToLegalize;
4360
4361 MachineMemOperand &MMO = **MI.memoperands_begin();
4362
4363 // Not sure how to interpret a bitcast of a truncating store.
4364 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4365 return UnableToLegalize;
4366
4368 bitcastSrc(MI, CastTy, 0);
4369 MMO.setType(CastTy);
4371 return Legalized;
4372 }
4373 case TargetOpcode::G_SELECT: {
4374 if (TypeIdx != 0)
4375 return UnableToLegalize;
4376
4377 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4378 LLVM_DEBUG(
4379 dbgs() << "bitcast action not implemented for vector select\n");
4380 return UnableToLegalize;
4381 }
4382
4384 bitcastSrc(MI, CastTy, 2);
4385 bitcastSrc(MI, CastTy, 3);
4386 bitcastDst(MI, CastTy, 0);
4388 return Legalized;
4389 }
4390 case TargetOpcode::G_AND:
4391 case TargetOpcode::G_OR:
4392 case TargetOpcode::G_XOR: {
4394 bitcastSrc(MI, CastTy, 1);
4395 bitcastSrc(MI, CastTy, 2);
4396 bitcastDst(MI, CastTy, 0);
4398 return Legalized;
4399 }
4400 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4401 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4402 case TargetOpcode::G_INSERT_VECTOR_ELT:
4403 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4404 case TargetOpcode::G_CONCAT_VECTORS:
4405 return bitcastConcatVector(MI, TypeIdx, CastTy);
4406 case TargetOpcode::G_SHUFFLE_VECTOR:
4407 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4408 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4409 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4410 case TargetOpcode::G_INSERT_SUBVECTOR:
4411 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4412 default:
4413 return UnableToLegalize;
4414 }
4415}
4416
4417// Legalize an instruction by changing the opcode in place.
4418void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4420 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4422}
4423
4425LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4426 using namespace TargetOpcode;
4427
4428 switch(MI.getOpcode()) {
4429 default:
4430 return UnableToLegalize;
4431 case TargetOpcode::G_FCONSTANT:
4432 return lowerFConstant(MI);
4433 case TargetOpcode::G_BITCAST:
4434 return lowerBitcast(MI);
4435 case TargetOpcode::G_SREM:
4436 case TargetOpcode::G_UREM: {
4437 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4438 auto Quot =
4439 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4440 {MI.getOperand(1), MI.getOperand(2)});
4441
4442 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4443 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4444 MI.eraseFromParent();
4445 return Legalized;
4446 }
4447 case TargetOpcode::G_SADDO:
4448 case TargetOpcode::G_SSUBO:
4449 return lowerSADDO_SSUBO(MI);
4450 case TargetOpcode::G_UMULH:
4451 case TargetOpcode::G_SMULH:
4452 return lowerSMULH_UMULH(MI);
4453 case TargetOpcode::G_SMULO:
4454 case TargetOpcode::G_UMULO: {
4455 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4456 // result.
4457 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4458 LLT Ty = MRI.getType(Res);
4459
4460 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4461 ? TargetOpcode::G_SMULH
4462 : TargetOpcode::G_UMULH;
4463
4465 const auto &TII = MIRBuilder.getTII();
4466 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4467 MI.removeOperand(1);
4469
4470 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4471 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4472
4473 // Move insert point forward so we can use the Res register if needed.
4475
4476 // For *signed* multiply, overflow is detected by checking:
4477 // (hi != (lo >> bitwidth-1))
4478 if (Opcode == TargetOpcode::G_SMULH) {
4479 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4480 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4481 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4482 } else {
4483 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4484 }
4485 return Legalized;
4486 }
4487 case TargetOpcode::G_FNEG: {
4488 auto [Res, SubByReg] = MI.getFirst2Regs();
4489 LLT Ty = MRI.getType(Res);
4490
4491 auto SignMask = MIRBuilder.buildConstant(
4493 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4494 MI.eraseFromParent();
4495 return Legalized;
4496 }
4497 case TargetOpcode::G_FSUB:
4498 case TargetOpcode::G_STRICT_FSUB: {
4499 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4500 LLT Ty = MRI.getType(Res);
4501
4502 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4503 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4504
4505 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4506 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4507 else
4508 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4509
4510 MI.eraseFromParent();
4511 return Legalized;
4512 }
4513 case TargetOpcode::G_FMAD:
4514 return lowerFMad(MI);
4515 case TargetOpcode::G_FFLOOR:
4516 return lowerFFloor(MI);
4517 case TargetOpcode::G_LROUND:
4518 case TargetOpcode::G_LLROUND: {
4519 Register DstReg = MI.getOperand(0).getReg();
4520 Register SrcReg = MI.getOperand(1).getReg();
4521 LLT SrcTy = MRI.getType(SrcReg);
4522 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4523 {SrcReg});
4524 MIRBuilder.buildFPTOSI(DstReg, Round);
4525 MI.eraseFromParent();
4526 return Legalized;
4527 }
4528 case TargetOpcode::G_INTRINSIC_ROUND:
4529 return lowerIntrinsicRound(MI);
4530 case TargetOpcode::G_FRINT: {
4531 // Since round even is the assumed rounding mode for unconstrained FP
4532 // operations, rint and roundeven are the same operation.
4533 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4534 return Legalized;
4535 }
4536 case TargetOpcode::G_INTRINSIC_LRINT:
4537 case TargetOpcode::G_INTRINSIC_LLRINT: {
4538 Register DstReg = MI.getOperand(0).getReg();
4539 Register SrcReg = MI.getOperand(1).getReg();
4540 LLT SrcTy = MRI.getType(SrcReg);
4541 auto Round =
4542 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4543 MIRBuilder.buildFPTOSI(DstReg, Round);
4544 MI.eraseFromParent();
4545 return Legalized;
4546 }
4547 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4548 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4549 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4550 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4551 **MI.memoperands_begin());
4552 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4553 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4554 MI.eraseFromParent();
4555 return Legalized;
4556 }
4557 case TargetOpcode::G_LOAD:
4558 case TargetOpcode::G_SEXTLOAD:
4559 case TargetOpcode::G_ZEXTLOAD:
4560 return lowerLoad(cast<GAnyLoad>(MI));
4561 case TargetOpcode::G_STORE:
4562 return lowerStore(cast<GStore>(MI));
4563 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4564 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4565 case TargetOpcode::G_CTLZ:
4566 case TargetOpcode::G_CTTZ:
4567 case TargetOpcode::G_CTPOP:
4568 return lowerBitCount(MI);
4569 case G_UADDO: {
4570 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4571
4572 Register NewRes = MRI.cloneVirtualRegister(Res);
4573
4574 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4575 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4576
4577 MIRBuilder.buildCopy(Res, NewRes);
4578
4579 MI.eraseFromParent();
4580 return Legalized;
4581 }
4582 case G_UADDE: {
4583 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4584 const LLT CondTy = MRI.getType(CarryOut);
4585 const LLT Ty = MRI.getType(Res);
4586
4587 Register NewRes = MRI.cloneVirtualRegister(Res);
4588
4589 // Initial add of the two operands.
4590 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4591
4592 // Initial check for carry.
4593 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4594
4595 // Add the sum and the carry.
4596 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4597 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4598
4599 // Second check for carry. We can only carry if the initial sum is all 1s
4600 // and the carry is set, resulting in a new sum of 0.
4601 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4602 auto ResEqZero =
4603 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4604 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4605 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4606
4607 MIRBuilder.buildCopy(Res, NewRes);
4608
4609 MI.eraseFromParent();
4610 return Legalized;
4611 }
4612 case G_USUBO: {
4613 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4614
4615 MIRBuilder.buildSub(Res, LHS, RHS);
4617
4618 MI.eraseFromParent();
4619 return Legalized;
4620 }
4621 case G_USUBE: {
4622 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4623 const LLT CondTy = MRI.getType(BorrowOut);
4624 const LLT Ty = MRI.getType(Res);
4625
4626 // Initial subtract of the two operands.
4627 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4628
4629 // Initial check for borrow.
4630 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4631
4632 // Subtract the borrow from the first subtract.
4633 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4634 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4635
4636 // Second check for borrow. We can only borrow if the initial difference is
4637 // 0 and the borrow is set, resulting in a new difference of all 1s.
4638 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4639 auto TmpResEqZero =
4640 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4641 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4642 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4643
4644 MI.eraseFromParent();
4645 return Legalized;
4646 }
4647 case G_UITOFP:
4648 return lowerUITOFP(MI);
4649 case G_SITOFP:
4650 return lowerSITOFP(MI);
4651 case G_FPTOUI:
4652 return lowerFPTOUI(MI);
4653 case G_FPTOSI:
4654 return lowerFPTOSI(MI);
4655 case G_FPTOUI_SAT:
4656 case G_FPTOSI_SAT:
4657 return lowerFPTOINT_SAT(MI);
4658 case G_FPTRUNC:
4659 return lowerFPTRUNC(MI);
4660 case G_FPOWI:
4661 return lowerFPOWI(MI);
4662 case G_SMIN:
4663 case G_SMAX:
4664 case G_UMIN:
4665 case G_UMAX:
4666 return lowerMinMax(MI);
4667 case G_SCMP:
4668 case G_UCMP:
4669 return lowerThreewayCompare(MI);
4670 case G_FCOPYSIGN:
4671 return lowerFCopySign(MI);
4672 case G_FMINNUM:
4673 case G_FMAXNUM:
4674 case G_FMINIMUMNUM:
4675 case G_FMAXIMUMNUM:
4676 return lowerFMinNumMaxNum(MI);
4677 case G_MERGE_VALUES:
4678 return lowerMergeValues(MI);
4679 case G_UNMERGE_VALUES:
4680 return lowerUnmergeValues(MI);
4681 case TargetOpcode::G_SEXT_INREG: {
4682 assert(MI.getOperand(2).isImm() && "Expected immediate");
4683 int64_t SizeInBits = MI.getOperand(2).getImm();
4684
4685 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4686 LLT DstTy = MRI.getType(DstReg);
4687 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4688
4689 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4690 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4691 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4692 MI.eraseFromParent();
4693 return Legalized;
4694 }
4695 case G_EXTRACT_VECTOR_ELT:
4696 case G_INSERT_VECTOR_ELT:
4698 case G_SHUFFLE_VECTOR:
4699 return lowerShuffleVector(MI);
4700 case G_VECTOR_COMPRESS:
4701 return lowerVECTOR_COMPRESS(MI);
4702 case G_DYN_STACKALLOC:
4703 return lowerDynStackAlloc(MI);
4704 case G_STACKSAVE:
4705 return lowerStackSave(MI);
4706 case G_STACKRESTORE:
4707 return lowerStackRestore(MI);
4708 case G_EXTRACT:
4709 return lowerExtract(MI);
4710 case G_INSERT:
4711 return lowerInsert(MI);
4712 case G_BSWAP:
4713 return lowerBswap(MI);
4714 case G_BITREVERSE:
4715 return lowerBitreverse(MI);
4716 case G_READ_REGISTER:
4717 case G_WRITE_REGISTER:
4718 return lowerReadWriteRegister(MI);
4719 case G_UADDSAT:
4720 case G_USUBSAT: {
4721 // Try to make a reasonable guess about which lowering strategy to use. The
4722 // target can override this with custom lowering and calling the
4723 // implementation functions.
4724 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4725 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4726 return lowerAddSubSatToMinMax(MI);
4728 }
4729 case G_SADDSAT:
4730 case G_SSUBSAT: {
4731 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4732
4733 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4734 // since it's a shorter expansion. However, we would need to figure out the
4735 // preferred boolean type for the carry out for the query.
4736 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4737 return lowerAddSubSatToMinMax(MI);
4739 }
4740 case G_SSHLSAT:
4741 case G_USHLSAT:
4742 return lowerShlSat(MI);
4743 case G_ABS:
4744 return lowerAbsToAddXor(MI);
4745 case G_FABS:
4746 return lowerFAbs(MI);
4747 case G_SELECT:
4748 return lowerSelect(MI);
4749 case G_IS_FPCLASS:
4750 return lowerISFPCLASS(MI);
4751 case G_SDIVREM:
4752 case G_UDIVREM:
4753 return lowerDIVREM(MI);
4754 case G_FSHL:
4755 case G_FSHR:
4756 return lowerFunnelShift(MI);
4757 case G_ROTL:
4758 case G_ROTR:
4759 return lowerRotate(MI);
4760 case G_MEMSET:
4761 case G_MEMCPY:
4762 case G_MEMMOVE:
4763 return lowerMemCpyFamily(MI);
4764 case G_MEMCPY_INLINE:
4765 return lowerMemcpyInline(MI);
4766 case G_ZEXT:
4767 case G_SEXT:
4768 case G_ANYEXT:
4769 return lowerEXT(MI);
4770 case G_TRUNC:
4771 return lowerTRUNC(MI);
4773 return lowerVectorReduction(MI);
4774 case G_VAARG:
4775 return lowerVAArg(MI);
4776 }
4777}
4778
4780 Align MinAlign) const {
4781 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4782 // datalayout for the preferred alignment. Also there should be a target hook
4783 // for this to allow targets to reduce the alignment and ignore the
4784 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4785 // the type.
4786 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4787}
4788
4791 MachinePointerInfo &PtrInfo) {
4794 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4795
4796 unsigned AddrSpace = DL.getAllocaAddrSpace();
4797 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4798
4799 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4800 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4801}
4802
4804 const SrcOp &Val) {
4805 LLT SrcTy = Val.getLLTTy(MRI);
4806 Align StackTypeAlign =
4807 std::max(getStackTemporaryAlignment(SrcTy),
4809 MachinePointerInfo PtrInfo;
4810 auto StackTemp =
4811 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4812
4813 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4814 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4815}
4816
4818 LLT VecTy) {
4819 LLT IdxTy = B.getMRI()->getType(IdxReg);
4820 unsigned NElts = VecTy.getNumElements();
4821
4822 int64_t IdxVal;
4823 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4824 if (IdxVal < VecTy.getNumElements())
4825 return IdxReg;
4826 // If a constant index would be out of bounds, clamp it as well.
4827 }
4828
4829 if (isPowerOf2_32(NElts)) {
4830 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4831 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4832 }
4833
4834 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4835 .getReg(0);
4836}
4837
4839 Register Index) {
4840 LLT EltTy = VecTy.getElementType();
4841
4842 // Calculate the element offset and add it to the pointer.
4843 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4844 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4845 "Converting bits to bytes lost precision");
4846
4847 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4848
4849 // Convert index to the correct size for the address space.
4851 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4852 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4853 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4854 if (IdxTy != MRI.getType(Index))
4855 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4856
4857 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4858 MIRBuilder.buildConstant(IdxTy, EltSize));
4859
4860 LLT PtrTy = MRI.getType(VecPtr);
4861 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4862}
4863
4864#ifndef NDEBUG
4865/// Check that all vector operands have same number of elements. Other operands
4866/// should be listed in NonVecOp.
4869 std::initializer_list<unsigned> NonVecOpIndices) {
4870 if (MI.getNumMemOperands() != 0)
4871 return false;
4872
4873 LLT VecTy = MRI.getType(MI.getReg(0));
4874 if (!VecTy.isVector())
4875 return false;
4876 unsigned NumElts = VecTy.getNumElements();
4877
4878 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4879 MachineOperand &Op = MI.getOperand(OpIdx);
4880 if (!Op.isReg()) {
4881 if (!is_contained(NonVecOpIndices, OpIdx))
4882 return false;
4883 continue;
4884 }
4885
4886 LLT Ty = MRI.getType(Op.getReg());
4887 if (!Ty.isVector()) {
4888 if (!is_contained(NonVecOpIndices, OpIdx))
4889 return false;
4890 continue;
4891 }
4892
4893 if (Ty.getNumElements() != NumElts)
4894 return false;
4895 }
4896
4897 return true;
4898}
4899#endif
4900
4901/// Fill \p DstOps with DstOps that have same number of elements combined as
4902/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4903/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4904/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4905static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4906 unsigned NumElts) {
4907 LLT LeftoverTy;
4908 assert(Ty.isVector() && "Expected vector type");
4909 LLT EltTy = Ty.getElementType();
4910 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4911 int NumParts, NumLeftover;
4912 std::tie(NumParts, NumLeftover) =
4913 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4914
4915 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4916 for (int i = 0; i < NumParts; ++i) {
4917 DstOps.push_back(NarrowTy);
4918 }
4919
4920 if (LeftoverTy.isValid()) {
4921 assert(NumLeftover == 1 && "expected exactly one leftover");
4922 DstOps.push_back(LeftoverTy);
4923 }
4924}
4925
4926/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4927/// made from \p Op depending on operand type.
4928static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4929 MachineOperand &Op) {
4930 for (unsigned i = 0; i < N; ++i) {
4931 if (Op.isReg())
4932 Ops.push_back(Op.getReg());
4933 else if (Op.isImm())
4934 Ops.push_back(Op.getImm());
4935 else if (Op.isPredicate())
4936 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4937 else
4938 llvm_unreachable("Unsupported type");
4939 }
4940}
4941
4942// Handle splitting vector operations which need to have the same number of
4943// elements in each type index, but each type index may have a different element
4944// type.
4945//
4946// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4947// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4948// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4949//
4950// Also handles some irregular breakdown cases, e.g.
4951// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4952// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4953// s64 = G_SHL s64, s32
4956 GenericMachineInstr &MI, unsigned NumElts,
4957 std::initializer_list<unsigned> NonVecOpIndices) {
4958 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4959 "Non-compatible opcode or not specified non-vector operands");
4960 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4961
4962 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4963 unsigned NumDefs = MI.getNumDefs();
4964
4965 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4966 // Build instructions with DstOps to use instruction found by CSE directly.
4967 // CSE copies found instruction into given vreg when building with vreg dest.
4968 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4969 // Output registers will be taken from created instructions.
4970 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4971 for (unsigned i = 0; i < NumDefs; ++i) {
4972 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4973 }
4974
4975 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4976 // Operands listed in NonVecOpIndices will be used as is without splitting;
4977 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4978 // scalar condition (op 1), immediate in sext_inreg (op 2).
4979 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4980 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4981 ++UseIdx, ++UseNo) {
4982 if (is_contained(NonVecOpIndices, UseIdx)) {
4983 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4984 MI.getOperand(UseIdx));
4985 } else {
4986 SmallVector<Register, 8> SplitPieces;
4987 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4988 MRI);
4989 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
4990 }
4991 }
4992
4993 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4994
4995 // Take i-th piece of each input operand split and build sub-vector/scalar
4996 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4997 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4999 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5000 Defs.push_back(OutputOpsPieces[DstNo][i]);
5001
5003 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5004 Uses.push_back(InputOpsPieces[InputNo][i]);
5005
5006 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5007 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5008 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5009 }
5010
5011 // Merge small outputs into MI's output for each def operand.
5012 if (NumLeftovers) {
5013 for (unsigned i = 0; i < NumDefs; ++i)
5014 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5015 } else {
5016 for (unsigned i = 0; i < NumDefs; ++i)
5017 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5018 }
5019
5020 MI.eraseFromParent();
5021 return Legalized;
5022}
5023
5026 unsigned NumElts) {
5027 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5028
5029 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5030 unsigned NumDefs = MI.getNumDefs();
5031
5032 SmallVector<DstOp, 8> OutputOpsPieces;
5033 SmallVector<Register, 8> OutputRegs;
5034 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5035
5036 // Instructions that perform register split will be inserted in basic block
5037 // where register is defined (basic block is in the next operand).
5038 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5039 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5040 UseIdx += 2, ++UseNo) {
5041 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5043 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5044 MIRBuilder, MRI);
5045 }
5046
5047 // Build PHIs with fewer elements.
5048 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5049 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5050 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5051 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5052 Phi.addDef(
5053 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5054 OutputRegs.push_back(Phi.getReg(0));
5055
5056 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5057 Phi.addUse(InputOpsPieces[j][i]);
5058 Phi.add(MI.getOperand(1 + j * 2 + 1));
5059 }
5060 }
5061
5062 // Set the insert point after the existing PHIs
5063 MachineBasicBlock &MBB = *MI.getParent();
5065
5066 // Merge small outputs into MI's def.
5067 if (NumLeftovers) {
5068 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5069 } else {
5070 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5071 }
5072
5073 MI.eraseFromParent();
5074 return Legalized;
5075}
5076
5079 unsigned TypeIdx,
5080 LLT NarrowTy) {
5081 const int NumDst = MI.getNumOperands() - 1;
5082 const Register SrcReg = MI.getOperand(NumDst).getReg();
5083 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5084 LLT SrcTy = MRI.getType(SrcReg);
5085
5086 if (TypeIdx != 1 || NarrowTy == DstTy)
5087 return UnableToLegalize;
5088
5089 // Requires compatible types. Otherwise SrcReg should have been defined by
5090 // merge-like instruction that would get artifact combined. Most likely
5091 // instruction that defines SrcReg has to perform more/fewer elements
5092 // legalization compatible with NarrowTy.
5093 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5094 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5095
5096 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5097 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5098 return UnableToLegalize;
5099
5100 // This is most likely DstTy (smaller then register size) packed in SrcTy
5101 // (larger then register size) and since unmerge was not combined it will be
5102 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5103 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5104
5105 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5106 //
5107 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5108 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5109 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5110 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5111 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5112 const int PartsPerUnmerge = NumDst / NumUnmerge;
5113
5114 for (int I = 0; I != NumUnmerge; ++I) {
5115 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5116
5117 for (int J = 0; J != PartsPerUnmerge; ++J)
5118 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5119 MIB.addUse(Unmerge.getReg(I));
5120 }
5121
5122 MI.eraseFromParent();
5123 return Legalized;
5124}
5125
5128 LLT NarrowTy) {
5129 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5130 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5131 // that should have been artifact combined. Most likely instruction that uses
5132 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5133 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5134 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5135 if (NarrowTy == SrcTy)
5136 return UnableToLegalize;
5137
5138 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5139 // is for old mir tests. Since the changes to more/fewer elements it should no
5140 // longer be possible to generate MIR like this when starting from llvm-ir
5141 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5142 if (TypeIdx == 1) {
5143 assert(SrcTy.isVector() && "Expected vector types");
5144 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5145 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5146 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5147 return UnableToLegalize;
5148 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5149 //
5150 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5151 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5152 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5153 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5154 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5155 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5156
5158 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5159 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5160 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5161 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5162 Elts.push_back(Unmerge.getReg(j));
5163 }
5164
5165 SmallVector<Register, 8> NarrowTyElts;
5166 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5167 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5168 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5169 ++i, Offset += NumNarrowTyElts) {
5170 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5171 NarrowTyElts.push_back(
5172 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5173 }
5174
5175 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5176 MI.eraseFromParent();
5177 return Legalized;
5178 }
5179
5180 assert(TypeIdx == 0 && "Bad type index");
5181 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5182 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5183 return UnableToLegalize;
5184
5185 // This is most likely SrcTy (smaller then register size) packed in DstTy
5186 // (larger then register size) and since merge was not combined it will be
5187 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5188 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5189
5190 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5191 //
5192 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5193 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5194 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5195 SmallVector<Register, 8> NarrowTyElts;
5196 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5197 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5198 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5199 for (unsigned i = 0; i < NumParts; ++i) {
5201 for (unsigned j = 0; j < NumElts; ++j)
5202 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5203 NarrowTyElts.push_back(
5204 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5205 }
5206
5207 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5208 MI.eraseFromParent();
5209 return Legalized;
5210}
5211
5214 unsigned TypeIdx,
5215 LLT NarrowVecTy) {
5216 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5217 Register InsertVal;
5218 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5219
5220 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5221 if (IsInsert)
5222 InsertVal = MI.getOperand(2).getReg();
5223
5224 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5225 LLT VecTy = MRI.getType(SrcVec);
5226
5227 // If the index is a constant, we can really break this down as you would
5228 // expect, and index into the target size pieces.
5229 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5230 if (MaybeCst) {
5231 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5232 // Avoid out of bounds indexing the pieces.
5233 if (IdxVal >= VecTy.getNumElements()) {
5234 MIRBuilder.buildUndef(DstReg);
5235 MI.eraseFromParent();
5236 return Legalized;
5237 }
5238
5239 if (!NarrowVecTy.isVector()) {
5240 SmallVector<Register, 8> SplitPieces;
5241 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5242 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5243 if (IsInsert) {
5244 SplitPieces[IdxVal] = InsertVal;
5245 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5246 } else {
5247 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5248 }
5249 } else {
5250 SmallVector<Register, 8> VecParts;
5251 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5252
5253 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5254 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5255 TargetOpcode::G_ANYEXT);
5256
5257 unsigned NewNumElts = NarrowVecTy.getNumElements();
5258
5259 LLT IdxTy = MRI.getType(Idx);
5260 int64_t PartIdx = IdxVal / NewNumElts;
5261 auto NewIdx =
5262 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5263
5264 if (IsInsert) {
5265 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5266
5267 // Use the adjusted index to insert into one of the subvectors.
5268 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5269 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5270 VecParts[PartIdx] = InsertPart.getReg(0);
5271
5272 // Recombine the inserted subvector with the others to reform the result
5273 // vector.
5274 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5275 } else {
5276 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5277 }
5278 }
5279
5280 MI.eraseFromParent();
5281 return Legalized;
5282 }
5283
5284 // With a variable index, we can't perform the operation in a smaller type, so
5285 // we're forced to expand this.
5286 //
5287 // TODO: We could emit a chain of compare/select to figure out which piece to
5288 // index.
5290}
5291
5294 LLT NarrowTy) {
5295 // FIXME: Don't know how to handle secondary types yet.
5296 if (TypeIdx != 0)
5297 return UnableToLegalize;
5298
5299 if (!NarrowTy.isByteSized()) {
5300 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5301 return UnableToLegalize;
5302 }
5303
5304 // This implementation doesn't work for atomics. Give up instead of doing
5305 // something invalid.
5306 if (LdStMI.isAtomic())
5307 return UnableToLegalize;
5308
5309 bool IsLoad = isa<GLoad>(LdStMI);
5310 Register ValReg = LdStMI.getReg(0);
5311 Register AddrReg = LdStMI.getPointerReg();
5312 LLT ValTy = MRI.getType(ValReg);
5313
5314 // FIXME: Do we need a distinct NarrowMemory legalize action?
5315 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5316 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5317 return UnableToLegalize;
5318 }
5319
5320 int NumParts = -1;
5321 int NumLeftover = -1;
5322 LLT LeftoverTy;
5323 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5324 if (IsLoad) {
5325 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5326 } else {
5327 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5328 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5329 NumParts = NarrowRegs.size();
5330 NumLeftover = NarrowLeftoverRegs.size();
5331 }
5332 }
5333
5334 if (NumParts == -1)
5335 return UnableToLegalize;
5336
5337 LLT PtrTy = MRI.getType(AddrReg);
5338 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5339
5340 unsigned TotalSize = ValTy.getSizeInBits();
5341
5342 // Split the load/store into PartTy sized pieces starting at Offset. If this
5343 // is a load, return the new registers in ValRegs. For a store, each elements
5344 // of ValRegs should be PartTy. Returns the next offset that needs to be
5345 // handled.
5347 auto MMO = LdStMI.getMMO();
5348 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5349 unsigned NumParts, unsigned Offset) -> unsigned {
5351 unsigned PartSize = PartTy.getSizeInBits();
5352 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5353 ++Idx) {
5354 unsigned ByteOffset = Offset / 8;
5355 Register NewAddrReg;
5356
5357 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5358 ByteOffset);
5359
5360 MachineMemOperand *NewMMO =
5361 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5362
5363 if (IsLoad) {
5364 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5365 ValRegs.push_back(Dst);
5366 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5367 } else {
5368 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5369 }
5370 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5371 }
5372
5373 return Offset;
5374 };
5375
5376 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5377 unsigned HandledOffset =
5378 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5379
5380 // Handle the rest of the register if this isn't an even type breakdown.
5381 if (LeftoverTy.isValid())
5382 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5383
5384 if (IsLoad) {
5385 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5386 LeftoverTy, NarrowLeftoverRegs);
5387 }
5388
5389 LdStMI.eraseFromParent();
5390 return Legalized;
5391}
5392
5395 LLT NarrowTy) {
5396 using namespace TargetOpcode;
5397 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
5398 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5399
5400 switch (MI.getOpcode()) {
5401 case G_IMPLICIT_DEF:
5402 case G_TRUNC:
5403 case G_AND:
5404 case G_OR:
5405 case G_XOR:
5406 case G_ADD:
5407 case G_SUB:
5408 case G_MUL:
5409 case G_PTR_ADD:
5410 case G_SMULH:
5411 case G_UMULH:
5412 case G_FADD:
5413 case G_FMUL:
5414 case G_FSUB:
5415 case G_FNEG:
5416 case G_FABS:
5417 case G_FCANONICALIZE:
5418 case G_FDIV:
5419 case G_FREM:
5420 case G_FMA:
5421 case G_FMAD:
5422 case G_FPOW:
5423 case G_FEXP:
5424 case G_FEXP2:
5425 case G_FEXP10:
5426 case G_FLOG:
5427 case G_FLOG2:
5428 case G_FLOG10:
5429 case G_FLDEXP:
5430 case G_FNEARBYINT:
5431 case G_FCEIL:
5432 case G_FFLOOR:
5433 case G_FRINT:
5434 case G_INTRINSIC_LRINT:
5435 case G_INTRINSIC_LLRINT:
5436 case G_INTRINSIC_ROUND:
5437 case G_INTRINSIC_ROUNDEVEN:
5438 case G_LROUND:
5439 case G_LLROUND:
5440 case G_INTRINSIC_TRUNC:
5441 case G_FCOS:
5442 case G_FSIN:
5443 case G_FTAN:
5444 case G_FACOS:
5445 case G_FASIN:
5446 case G_FATAN:
5447 case G_FATAN2:
5448 case G_FCOSH:
5449 case G_FSINH:
5450 case G_FTANH:
5451 case G_FSQRT:
5452 case G_BSWAP:
5453 case G_BITREVERSE:
5454 case G_SDIV:
5455 case G_UDIV:
5456 case G_SREM:
5457 case G_UREM:
5458 case G_SDIVREM:
5459 case G_UDIVREM:
5460 case G_SMIN:
5461 case G_SMAX:
5462 case G_UMIN:
5463 case G_UMAX:
5464 case G_ABS:
5465 case G_FMINNUM:
5466 case G_FMAXNUM:
5467 case G_FMINNUM_IEEE:
5468 case G_FMAXNUM_IEEE:
5469 case G_FMINIMUM:
5470 case G_FMAXIMUM:
5471 case G_FMINIMUMNUM:
5472 case G_FMAXIMUMNUM:
5473 case G_FSHL:
5474 case G_FSHR:
5475 case G_ROTL:
5476 case G_ROTR:
5477 case G_FREEZE:
5478 case G_SADDSAT:
5479 case G_SSUBSAT:
5480 case G_UADDSAT:
5481 case G_USUBSAT:
5482 case G_UMULO:
5483 case G_SMULO:
5484 case G_SHL:
5485 case G_LSHR:
5486 case G_ASHR:
5487 case G_SSHLSAT:
5488 case G_USHLSAT:
5489 case G_CTLZ:
5490 case G_CTLZ_ZERO_UNDEF:
5491 case G_CTTZ:
5492 case G_CTTZ_ZERO_UNDEF:
5493 case G_CTPOP:
5494 case G_FCOPYSIGN:
5495 case G_ZEXT:
5496 case G_SEXT:
5497 case G_ANYEXT:
5498 case G_FPEXT:
5499 case G_FPTRUNC:
5500 case G_SITOFP:
5501 case G_UITOFP:
5502 case G_FPTOSI:
5503 case G_FPTOUI:
5504 case G_FPTOSI_SAT:
5505 case G_FPTOUI_SAT:
5506 case G_INTTOPTR:
5507 case G_PTRTOINT:
5508 case G_ADDRSPACE_CAST:
5509 case G_UADDO:
5510 case G_USUBO:
5511 case G_UADDE:
5512 case G_USUBE:
5513 case G_SADDO:
5514 case G_SSUBO:
5515 case G_SADDE:
5516 case G_SSUBE:
5517 case G_STRICT_FADD:
5518 case G_STRICT_FSUB:
5519 case G_STRICT_FMUL:
5520 case G_STRICT_FMA:
5521 case G_STRICT_FLDEXP:
5522 case G_FFREXP:
5523 return fewerElementsVectorMultiEltType(GMI, NumElts);
5524 case G_ICMP:
5525 case G_FCMP:
5526 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5527 case G_IS_FPCLASS:
5528 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5529 case G_SELECT:
5530 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5531 return fewerElementsVectorMultiEltType(GMI, NumElts);
5532 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5533 case G_PHI:
5534 return fewerElementsVectorPhi(GMI, NumElts);
5535 case G_UNMERGE_VALUES:
5536 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5537 case G_BUILD_VECTOR:
5538 assert(TypeIdx == 0 && "not a vector type index");
5539 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5540 case G_CONCAT_VECTORS:
5541 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5542 return UnableToLegalize;
5543 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5544 case G_EXTRACT_VECTOR_ELT:
5545 case G_INSERT_VECTOR_ELT:
5546 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5547 case G_LOAD:
5548 case G_STORE:
5549 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5550 case G_SEXT_INREG:
5551 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5553 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5554 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5555 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5556 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5557 case G_SHUFFLE_VECTOR:
5558 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5559 case G_FPOWI:
5560 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5561 case G_BITCAST:
5562 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5563 case G_INTRINSIC_FPTRUNC_ROUND:
5564 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5565 default:
5566 return UnableToLegalize;
5567 }
5568}
5569
5572 LLT NarrowTy) {
5573 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5574 "Not a bitcast operation");
5575
5576 if (TypeIdx != 0)
5577 return UnableToLegalize;
5578
5579 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5580
5581 unsigned NewElemCount =
5582 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5583 SmallVector<Register> SrcVRegs, BitcastVRegs;
5584 if (NewElemCount == 1) {
5585 LLT SrcNarrowTy = SrcTy.getElementType();
5586
5587 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5588 getUnmergeResults(SrcVRegs, *Unmerge);
5589 } else {
5590 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5591
5592 // Split the Src and Dst Reg into smaller registers
5593 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5594 return UnableToLegalize;
5595 }
5596
5597 // Build new smaller bitcast instructions
5598 // Not supporting Leftover types for now but will have to
5599 for (Register Reg : SrcVRegs)
5600 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5601
5602 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5603 MI.eraseFromParent();
5604 return Legalized;
5605}
5606
5608 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5609 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5610 if (TypeIdx != 0)
5611 return UnableToLegalize;
5612
5613 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5614 MI.getFirst3RegLLTs();
5615 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5616 // The shuffle should be canonicalized by now.
5617 if (DstTy != Src1Ty)
5618 return UnableToLegalize;
5619 if (DstTy != Src2Ty)
5620 return UnableToLegalize;
5621
5622 if (!isPowerOf2_32(DstTy.getNumElements()))
5623 return UnableToLegalize;
5624
5625 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5626 // Further legalization attempts will be needed to do split further.
5627 NarrowTy =
5628 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5629 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5630
5631 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5632 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5633 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5634 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5635 SplitSrc2Regs[1]};
5636
5637 Register Hi, Lo;
5638
5639 // If Lo or Hi uses elements from at most two of the four input vectors, then
5640 // express it as a vector shuffle of those two inputs. Otherwise extract the
5641 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5643 for (unsigned High = 0; High < 2; ++High) {
5644 Register &Output = High ? Hi : Lo;
5645
5646 // Build a shuffle mask for the output, discovering on the fly which
5647 // input vectors to use as shuffle operands (recorded in InputUsed).
5648 // If building a suitable shuffle vector proves too hard, then bail
5649 // out with useBuildVector set.
5650 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5651 unsigned FirstMaskIdx = High * NewElts;
5652 bool UseBuildVector = false;
5653 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5654 // The mask element. This indexes into the input.
5655 int Idx = Mask[FirstMaskIdx + MaskOffset];
5656
5657 // The input vector this mask element indexes into.
5658 unsigned Input = (unsigned)Idx / NewElts;
5659
5660 if (Input >= std::size(Inputs)) {
5661 // The mask element does not index into any input vector.
5662 Ops.push_back(-1);
5663 continue;
5664 }
5665
5666 // Turn the index into an offset from the start of the input vector.
5667 Idx -= Input * NewElts;
5668
5669 // Find or create a shuffle vector operand to hold this input.
5670 unsigned OpNo;
5671 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5672 if (InputUsed[OpNo] == Input) {
5673 // This input vector is already an operand.
5674 break;
5675 } else if (InputUsed[OpNo] == -1U) {
5676 // Create a new operand for this input vector.
5677 InputUsed[OpNo] = Input;
5678 break;
5679 }
5680 }
5681
5682 if (OpNo >= std::size(InputUsed)) {
5683 // More than two input vectors used! Give up on trying to create a
5684 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5685 UseBuildVector = true;
5686 break;
5687 }
5688
5689 // Add the mask index for the new shuffle vector.
5690 Ops.push_back(Idx + OpNo * NewElts);
5691 }
5692
5693 if (UseBuildVector) {
5694 LLT EltTy = NarrowTy.getElementType();
5696
5697 // Extract the input elements by hand.
5698 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5699 // The mask element. This indexes into the input.
5700 int Idx = Mask[FirstMaskIdx + MaskOffset];
5701
5702 // The input vector this mask element indexes into.
5703 unsigned Input = (unsigned)Idx / NewElts;
5704
5705 if (Input >= std::size(Inputs)) {
5706 // The mask element is "undef" or indexes off the end of the input.
5707 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5708 continue;
5709 }
5710
5711 // Turn the index into an offset from the start of the input vector.
5712 Idx -= Input * NewElts;
5713
5714 // Extract the vector element by hand.
5715 SVOps.push_back(MIRBuilder
5716 .buildExtractVectorElement(
5717 EltTy, Inputs[Input],
5719 .getReg(0));
5720 }
5721
5722 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5723 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5724 } else if (InputUsed[0] == -1U) {
5725 // No input vectors were used! The result is undefined.
5726 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5727 } else {
5728 Register Op0 = Inputs[InputUsed[0]];
5729 // If only one input was used, use an undefined vector for the other.
5730 Register Op1 = InputUsed[1] == -1U
5731 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5732 : Inputs[InputUsed[1]];
5733 // At least one input vector was used. Create a new shuffle vector.
5734 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5735 }
5736
5737 Ops.clear();
5738 }
5739
5740 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5741 MI.eraseFromParent();
5742 return Legalized;
5743}
5744
5746 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5747 auto &RdxMI = cast<GVecReduce>(MI);
5748
5749 if (TypeIdx != 1)
5750 return UnableToLegalize;
5751
5752 // The semantics of the normal non-sequential reductions allow us to freely
5753 // re-associate the operation.
5754 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5755
5756 if (NarrowTy.isVector() &&
5757 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5758 return UnableToLegalize;
5759
5760 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5761 SmallVector<Register> SplitSrcs;
5762 // If NarrowTy is a scalar then we're being asked to scalarize.
5763 const unsigned NumParts =
5764 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5765 : SrcTy.getNumElements();
5766
5767 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5768 if (NarrowTy.isScalar()) {
5769 if (DstTy != NarrowTy)
5770 return UnableToLegalize; // FIXME: handle implicit extensions.
5771
5772 if (isPowerOf2_32(NumParts)) {
5773 // Generate a tree of scalar operations to reduce the critical path.
5774 SmallVector<Register> PartialResults;
5775 unsigned NumPartsLeft = NumParts;
5776 while (NumPartsLeft > 1) {
5777 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5778 PartialResults.emplace_back(
5780 .buildInstr(ScalarOpc, {NarrowTy},
5781 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5782 .getReg(0));
5783 }
5784 SplitSrcs = PartialResults;
5785 PartialResults.clear();
5786 NumPartsLeft = SplitSrcs.size();
5787 }
5788 assert(SplitSrcs.size() == 1);
5789 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5790 MI.eraseFromParent();
5791 return Legalized;
5792 }
5793 // If we can't generate a tree, then just do sequential operations.
5794 Register Acc = SplitSrcs[0];
5795 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5796 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5797 .getReg(0);
5798 MIRBuilder.buildCopy(DstReg, Acc);
5799 MI.eraseFromParent();
5800 return Legalized;
5801 }
5802 SmallVector<Register> PartialReductions;
5803 for (unsigned Part = 0; Part < NumParts; ++Part) {
5804 PartialReductions.push_back(
5805 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5806 .getReg(0));
5807 }
5808
5809 // If the types involved are powers of 2, we can generate intermediate vector
5810 // ops, before generating a final reduction operation.
5811 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5812 isPowerOf2_32(NarrowTy.getNumElements())) {
5813 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5814 }
5815
5816 Register Acc = PartialReductions[0];
5817 for (unsigned Part = 1; Part < NumParts; ++Part) {
5818 if (Part == NumParts - 1) {
5819 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5820 {Acc, PartialReductions[Part]});
5821 } else {
5822 Acc = MIRBuilder
5823 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5824 .getReg(0);
5825 }
5826 }
5827 MI.eraseFromParent();
5828 return Legalized;
5829}
5830
5833 unsigned int TypeIdx,
5834 LLT NarrowTy) {
5835 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5836 MI.getFirst3RegLLTs();
5837 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5838 DstTy != NarrowTy)
5839 return UnableToLegalize;
5840
5841 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5842 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5843 "Unexpected vecreduce opcode");
5844 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5845 ? TargetOpcode::G_FADD
5846 : TargetOpcode::G_FMUL;
5847
5848 SmallVector<Register> SplitSrcs;
5849 unsigned NumParts = SrcTy.getNumElements();
5850 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5851 Register Acc = ScalarReg;
5852 for (unsigned i = 0; i < NumParts; i++)
5853 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5854 .getReg(0);
5855
5856 MIRBuilder.buildCopy(DstReg, Acc);
5857 MI.eraseFromParent();
5858 return Legalized;
5859}
5860
5862LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5863 LLT SrcTy, LLT NarrowTy,
5864 unsigned ScalarOpc) {
5865 SmallVector<Register> SplitSrcs;
5866 // Split the sources into NarrowTy size pieces.
5867 extractParts(SrcReg, NarrowTy,
5868 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5869 MIRBuilder, MRI);
5870 // We're going to do a tree reduction using vector operations until we have
5871 // one NarrowTy size value left.
5872 while (SplitSrcs.size() > 1) {
5873 SmallVector<Register> PartialRdxs;
5874 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5875 Register LHS = SplitSrcs[Idx];
5876 Register RHS = SplitSrcs[Idx + 1];
5877 // Create the intermediate vector op.
5878 Register Res =
5879 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5880 PartialRdxs.push_back(Res);
5881 }
5882 SplitSrcs = std::move(PartialRdxs);
5883 }
5884 // Finally generate the requested NarrowTy based reduction.
5886 MI.getOperand(1).setReg(SplitSrcs[0]);
5888 return Legalized;
5889}
5890
5893 const LLT HalfTy, const LLT AmtTy) {
5894
5895 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5896 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5897 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5898
5899 if (Amt.isZero()) {
5900 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5901 MI.eraseFromParent();
5902 return Legalized;
5903 }
5904
5905 LLT NVT = HalfTy;
5906 unsigned NVTBits = HalfTy.getSizeInBits();
5907 unsigned VTBits = 2 * NVTBits;
5908
5909 SrcOp Lo(Register(0)), Hi(Register(0));
5910 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5911 if (Amt.ugt(VTBits)) {
5912 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5913 } else if (Amt.ugt(NVTBits)) {
5914 Lo = MIRBuilder.buildConstant(NVT, 0);
5915 Hi = MIRBuilder.buildShl(NVT, InL,
5916 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5917 } else if (Amt == NVTBits) {
5918 Lo = MIRBuilder.buildConstant(NVT, 0);
5919 Hi = InL;
5920 } else {
5921 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5922 auto OrLHS =
5923 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5924 auto OrRHS = MIRBuilder.buildLShr(
5925 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5926 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5927 }
5928 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5929 if (Amt.ugt(VTBits)) {
5930 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5931 } else if (Amt.ugt(NVTBits)) {
5932 Lo = MIRBuilder.buildLShr(NVT, InH,
5933 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5934 Hi = MIRBuilder.buildConstant(NVT, 0);
5935 } else if (Amt == NVTBits) {
5936 Lo = InH;
5937 Hi = MIRBuilder.buildConstant(NVT, 0);
5938 } else {
5939 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5940
5941 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5942 auto OrRHS = MIRBuilder.buildShl(
5943 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5944
5945 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5946 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5947 }
5948 } else {
5949 if (Amt.ugt(VTBits)) {
5951 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5952 } else if (Amt.ugt(NVTBits)) {
5953 Lo = MIRBuilder.buildAShr(NVT, InH,
5954 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5955 Hi = MIRBuilder.buildAShr(NVT, InH,
5956 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5957 } else if (Amt == NVTBits) {
5958 Lo = InH;
5959 Hi = MIRBuilder.buildAShr(NVT, InH,
5960 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5961 } else {
5962 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5963
5964 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5965 auto OrRHS = MIRBuilder.buildShl(
5966 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5967
5968 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5969 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5970 }
5971 }
5972
5973 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5974 MI.eraseFromParent();
5975
5976 return Legalized;
5977}
5978
5979// TODO: Optimize if constant shift amount.
5982 LLT RequestedTy) {
5983 if (TypeIdx == 1) {
5985 narrowScalarSrc(MI, RequestedTy, 2);
5987 return Legalized;
5988 }
5989
5990 Register DstReg = MI.getOperand(0).getReg();
5991 LLT DstTy = MRI.getType(DstReg);
5992 if (DstTy.isVector())
5993 return UnableToLegalize;
5994
5995 Register Amt = MI.getOperand(2).getReg();
5996 LLT ShiftAmtTy = MRI.getType(Amt);
5997 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5998 if (DstEltSize % 2 != 0)
5999 return UnableToLegalize;
6000
6001 // Ignore the input type. We can only go to exactly half the size of the
6002 // input. If that isn't small enough, the resulting pieces will be further
6003 // legalized.
6004 const unsigned NewBitSize = DstEltSize / 2;
6005 const LLT HalfTy = LLT::scalar(NewBitSize);
6006 const LLT CondTy = LLT::scalar(1);
6007
6008 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6009 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6010 ShiftAmtTy);
6011 }
6012
6013 // TODO: Expand with known bits.
6014
6015 // Handle the fully general expansion by an unknown amount.
6016 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6017
6018 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6019 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6020 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6021
6022 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6023 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6024
6025 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6026 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6027 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6028
6029 Register ResultRegs[2];
6030 switch (MI.getOpcode()) {
6031 case TargetOpcode::G_SHL: {
6032 // Short: ShAmt < NewBitSize
6033 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6034
6035 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6036 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6037 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6038
6039 // Long: ShAmt >= NewBitSize
6040 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6041 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6042
6043 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6044 auto Hi = MIRBuilder.buildSelect(
6045 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6046
6047 ResultRegs[0] = Lo.getReg(0);
6048 ResultRegs[1] = Hi.getReg(0);
6049 break;
6050 }
6051 case TargetOpcode::G_LSHR:
6052 case TargetOpcode::G_ASHR: {
6053 // Short: ShAmt < NewBitSize
6054 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6055
6056 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6057 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6058 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6059
6060 // Long: ShAmt >= NewBitSize
6062 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6063 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6064 } else {
6065 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6066 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6067 }
6068 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6069 {InH, AmtExcess}); // Lo from Hi part.
6070
6071 auto Lo = MIRBuilder.buildSelect(
6072 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6073
6074 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6075
6076 ResultRegs[0] = Lo.getReg(0);
6077 ResultRegs[1] = Hi.getReg(0);
6078 break;
6079 }
6080 default:
6081 llvm_unreachable("not a shift");
6082 }
6083
6084 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6085 MI.eraseFromParent();
6086 return Legalized;
6087}
6088
6091 LLT MoreTy) {
6092 assert(TypeIdx == 0 && "Expecting only Idx 0");
6093
6095 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6096 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6098 moreElementsVectorSrc(MI, MoreTy, I);
6099 }
6100
6101 MachineBasicBlock &MBB = *MI.getParent();
6103 moreElementsVectorDst(MI, MoreTy, 0);
6105 return Legalized;
6106}
6107
6108MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6109 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6110 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6111
6112 switch (Opcode) {
6113 default:
6115 "getNeutralElementForVecReduce called with invalid opcode!");
6116 case TargetOpcode::G_VECREDUCE_ADD:
6117 case TargetOpcode::G_VECREDUCE_OR:
6118 case TargetOpcode::G_VECREDUCE_XOR:
6119 case TargetOpcode::G_VECREDUCE_UMAX:
6120 return MIRBuilder.buildConstant(Ty, 0);
6121 case TargetOpcode::G_VECREDUCE_MUL:
6122 return MIRBuilder.buildConstant(Ty, 1);
6123 case TargetOpcode::G_VECREDUCE_AND:
6124 case TargetOpcode::G_VECREDUCE_UMIN:
6127 case TargetOpcode::G_VECREDUCE_SMAX:
6130 case TargetOpcode::G_VECREDUCE_SMIN:
6133 case TargetOpcode::G_VECREDUCE_FADD:
6134 return MIRBuilder.buildFConstant(Ty, -0.0);
6135 case TargetOpcode::G_VECREDUCE_FMUL:
6136 return MIRBuilder.buildFConstant(Ty, 1.0);
6137 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6138 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6139 assert(false && "getNeutralElementForVecReduce unimplemented for "
6140 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6141 }
6142 llvm_unreachable("switch expected to return!");
6143}
6144
6147 LLT MoreTy) {
6148 unsigned Opc = MI.getOpcode();
6149 switch (Opc) {
6150 case TargetOpcode::G_IMPLICIT_DEF:
6151 case TargetOpcode::G_LOAD: {
6152 if (TypeIdx != 0)
6153 return UnableToLegalize;
6155 moreElementsVectorDst(MI, MoreTy, 0);
6157 return Legalized;
6158 }
6159 case TargetOpcode::G_STORE:
6160 if (TypeIdx != 0)
6161 return UnableToLegalize;
6163 moreElementsVectorSrc(MI, MoreTy, 0);
6165 return Legalized;
6166 case TargetOpcode::G_AND:
6167 case TargetOpcode::G_OR:
6168 case TargetOpcode::G_XOR:
6169 case TargetOpcode::G_ADD:
6170 case TargetOpcode::G_SUB:
6171 case TargetOpcode::G_MUL:
6172 case TargetOpcode::G_FADD:
6173 case TargetOpcode::G_FSUB:
6174 case TargetOpcode::G_FMUL:
6175 case TargetOpcode::G_FDIV:
6176 case TargetOpcode::G_FCOPYSIGN:
6177 case TargetOpcode::G_UADDSAT:
6178 case TargetOpcode::G_USUBSAT:
6179 case TargetOpcode::G_SADDSAT:
6180 case TargetOpcode::G_SSUBSAT:
6181 case TargetOpcode::G_SMIN:
6182 case TargetOpcode::G_SMAX:
6183 case TargetOpcode::G_UMIN:
6184 case TargetOpcode::G_UMAX:
6185 case TargetOpcode::G_FMINNUM:
6186 case TargetOpcode::G_FMAXNUM:
6187 case TargetOpcode::G_FMINNUM_IEEE:
6188 case TargetOpcode::G_FMAXNUM_IEEE:
6189 case TargetOpcode::G_FMINIMUM:
6190 case TargetOpcode::G_FMAXIMUM:
6191 case TargetOpcode::G_FMINIMUMNUM:
6192 case TargetOpcode::G_FMAXIMUMNUM:
6193 case TargetOpcode::G_STRICT_FADD:
6194 case TargetOpcode::G_STRICT_FSUB:
6195 case TargetOpcode::G_STRICT_FMUL:
6196 case TargetOpcode::G_SHL:
6197 case TargetOpcode::G_ASHR:
6198 case TargetOpcode::G_LSHR: {
6200 moreElementsVectorSrc(MI, MoreTy, 1);
6201 moreElementsVectorSrc(MI, MoreTy, 2);
6202 moreElementsVectorDst(MI, MoreTy, 0);
6204 return Legalized;
6205 }
6206 case TargetOpcode::G_FMA:
6207 case TargetOpcode::G_STRICT_FMA:
6208 case TargetOpcode::G_FSHR:
6209 case TargetOpcode::G_FSHL: {
6211 moreElementsVectorSrc(MI, MoreTy, 1);
6212 moreElementsVectorSrc(MI, MoreTy, 2);
6213 moreElementsVectorSrc(MI, MoreTy, 3);
6214 moreElementsVectorDst(MI, MoreTy, 0);
6216 return Legalized;
6217 }
6218 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6219 case TargetOpcode::G_EXTRACT:
6220 if (TypeIdx != 1)
6221 return UnableToLegalize;
6223 moreElementsVectorSrc(MI, MoreTy, 1);
6225 return Legalized;
6226 case TargetOpcode::G_INSERT:
6227 case TargetOpcode::G_INSERT_VECTOR_ELT:
6228 case TargetOpcode::G_FREEZE:
6229 case TargetOpcode::G_FNEG:
6230 case TargetOpcode::G_FABS:
6231 case TargetOpcode::G_FSQRT:
6232 case TargetOpcode::G_FCEIL:
6233 case TargetOpcode::G_FFLOOR:
6234 case TargetOpcode::G_FNEARBYINT:
6235 case TargetOpcode::G_FRINT:
6236 case TargetOpcode::G_INTRINSIC_ROUND:
6237 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6238 case TargetOpcode::G_INTRINSIC_TRUNC:
6239 case TargetOpcode::G_BITREVERSE:
6240 case TargetOpcode::G_BSWAP:
6241 case TargetOpcode::G_FCANONICALIZE:
6242 case TargetOpcode::G_SEXT_INREG:
6243 case TargetOpcode::G_ABS:
6244 case TargetOpcode::G_CTLZ:
6245 case TargetOpcode::G_CTPOP:
6246 if (TypeIdx != 0)
6247 return UnableToLegalize;
6249 moreElementsVectorSrc(MI, MoreTy, 1);
6250 moreElementsVectorDst(MI, MoreTy, 0);
6252 return Legalized;
6253 case TargetOpcode::G_SELECT: {
6254 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6255 if (TypeIdx == 1) {
6256 if (!CondTy.isScalar() ||
6257 DstTy.getElementCount() != MoreTy.getElementCount())
6258 return UnableToLegalize;
6259
6260 // This is turning a scalar select of vectors into a vector
6261 // select. Broadcast the select condition.
6262 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6264 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6266 return Legalized;
6267 }
6268
6269 if (CondTy.isVector())
6270 return UnableToLegalize;
6271
6273 moreElementsVectorSrc(MI, MoreTy, 2);
6274 moreElementsVectorSrc(MI, MoreTy, 3);
6275 moreElementsVectorDst(MI, MoreTy, 0);
6277 return Legalized;
6278 }
6279 case TargetOpcode::G_UNMERGE_VALUES:
6280 return UnableToLegalize;
6281 case TargetOpcode::G_PHI:
6282 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6283 case TargetOpcode::G_SHUFFLE_VECTOR:
6284 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6285 case TargetOpcode::G_BUILD_VECTOR: {
6287 for (auto Op : MI.uses()) {
6288 Elts.push_back(Op.getReg());
6289 }
6290
6291 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6293 }
6294
6296 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6297 MI.eraseFromParent();
6298 return Legalized;
6299 }
6300 case TargetOpcode::G_SEXT:
6301 case TargetOpcode::G_ZEXT:
6302 case TargetOpcode::G_ANYEXT:
6303 case TargetOpcode::G_TRUNC:
6304 case TargetOpcode::G_FPTRUNC:
6305 case TargetOpcode::G_FPEXT:
6306 case TargetOpcode::G_FPTOSI:
6307 case TargetOpcode::G_FPTOUI:
6308 case TargetOpcode::G_FPTOSI_SAT:
6309 case TargetOpcode::G_FPTOUI_SAT:
6310 case TargetOpcode::G_SITOFP:
6311 case TargetOpcode::G_UITOFP: {
6313 LLT SrcExtTy;
6314 LLT DstExtTy;
6315 if (TypeIdx == 0) {
6316 DstExtTy = MoreTy;
6317 SrcExtTy = LLT::fixed_vector(
6318 MoreTy.getNumElements(),
6319 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6320 } else {
6321 DstExtTy = LLT::fixed_vector(
6322 MoreTy.getNumElements(),
6323 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6324 SrcExtTy = MoreTy;
6325 }
6326 moreElementsVectorSrc(MI, SrcExtTy, 1);
6327 moreElementsVectorDst(MI, DstExtTy, 0);
6329 return Legalized;
6330 }
6331 case TargetOpcode::G_ICMP:
6332 case TargetOpcode::G_FCMP: {
6333 if (TypeIdx != 1)
6334 return UnableToLegalize;
6335
6337 moreElementsVectorSrc(MI, MoreTy, 2);
6338 moreElementsVectorSrc(MI, MoreTy, 3);
6339 LLT CondTy = LLT::fixed_vector(
6340 MoreTy.getNumElements(),
6341 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6342 moreElementsVectorDst(MI, CondTy, 0);
6344 return Legalized;
6345 }
6346 case TargetOpcode::G_BITCAST: {
6347 if (TypeIdx != 0)
6348 return UnableToLegalize;
6349
6350 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6351 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6352
6353 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6354 if (coefficient % DstTy.getNumElements() != 0)
6355 return UnableToLegalize;
6356
6357 coefficient = coefficient / DstTy.getNumElements();
6358
6359 LLT NewTy = SrcTy.changeElementCount(
6360 ElementCount::get(coefficient, MoreTy.isScalable()));
6362 moreElementsVectorSrc(MI, NewTy, 1);
6363 moreElementsVectorDst(MI, MoreTy, 0);
6365 return Legalized;
6366 }
6367 case TargetOpcode::G_VECREDUCE_FADD:
6368 case TargetOpcode::G_VECREDUCE_FMUL:
6369 case TargetOpcode::G_VECREDUCE_ADD:
6370 case TargetOpcode::G_VECREDUCE_MUL:
6371 case TargetOpcode::G_VECREDUCE_AND:
6372 case TargetOpcode::G_VECREDUCE_OR:
6373 case TargetOpcode::G_VECREDUCE_XOR:
6374 case TargetOpcode::G_VECREDUCE_SMAX:
6375 case TargetOpcode::G_VECREDUCE_SMIN:
6376 case TargetOpcode::G_VECREDUCE_UMAX:
6377 case TargetOpcode::G_VECREDUCE_UMIN: {
6378 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6379 MachineOperand &MO = MI.getOperand(1);
6380 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6381 auto NeutralElement = getNeutralElementForVecReduce(
6382 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6383
6385 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6386 i != e; i++) {
6387 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6388 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6389 NeutralElement, Idx);
6390 }
6391
6393 MO.setReg(NewVec.getReg(0));
6395 return Legalized;
6396 }
6397
6398 default:
6399 return UnableToLegalize;
6400 }
6401}
6402
6405 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6406 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6407 unsigned MaskNumElts = Mask.size();
6408 unsigned SrcNumElts = SrcTy.getNumElements();
6409 LLT DestEltTy = DstTy.getElementType();
6410
6411 if (MaskNumElts == SrcNumElts)
6412 return Legalized;
6413
6414 if (MaskNumElts < SrcNumElts) {
6415 // Extend mask to match new destination vector size with
6416 // undef values.
6417 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6418 llvm::copy(Mask, NewMask.begin());
6419
6420 moreElementsVectorDst(MI, SrcTy, 0);
6422 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6423 MI.getOperand(1).getReg(),
6424 MI.getOperand(2).getReg(), NewMask);
6425 MI.eraseFromParent();
6426
6427 return Legalized;
6428 }
6429
6430 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6431 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6432 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6433
6434 // Create new source vectors by concatenating the initial
6435 // source vectors with undefined vectors of the same size.
6436 auto Undef = MIRBuilder.buildUndef(SrcTy);
6437 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6438 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6439 MOps1[0] = MI.getOperand(1).getReg();
6440 MOps2[0] = MI.getOperand(2).getReg();
6441
6442 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6443 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6444
6445 // Readjust mask for new input vector length.
6446 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6447 for (unsigned I = 0; I != MaskNumElts; ++I) {
6448 int Idx = Mask[I];
6449 if (Idx >= static_cast<int>(SrcNumElts))
6450 Idx += PaddedMaskNumElts - SrcNumElts;
6451 MappedOps[I] = Idx;
6452 }
6453
6454 // If we got more elements than required, extract subvector.
6455 if (MaskNumElts != PaddedMaskNumElts) {
6456 auto Shuffle =
6457 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6458
6459 SmallVector<Register, 16> Elts(MaskNumElts);
6460 for (unsigned I = 0; I < MaskNumElts; ++I) {
6461 Elts[I] =
6463 .getReg(0);
6464 }
6465 MIRBuilder.buildBuildVector(DstReg, Elts);
6466 } else {
6467 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6468 }
6469
6470 MI.eraseFromParent();
6472}
6473
6476 unsigned int TypeIdx, LLT MoreTy) {
6477 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6478 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6479 unsigned NumElts = DstTy.getNumElements();
6480 unsigned WidenNumElts = MoreTy.getNumElements();
6481
6482 if (DstTy.isVector() && Src1Ty.isVector() &&
6483 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6485 }
6486
6487 if (TypeIdx != 0)
6488 return UnableToLegalize;
6489
6490 // Expect a canonicalized shuffle.
6491 if (DstTy != Src1Ty || DstTy != Src2Ty)
6492 return UnableToLegalize;
6493
6494 moreElementsVectorSrc(MI, MoreTy, 1);
6495 moreElementsVectorSrc(MI, MoreTy, 2);
6496
6497 // Adjust mask based on new input vector length.
6498 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6499 for (unsigned I = 0; I != NumElts; ++I) {
6500 int Idx = Mask[I];
6501 if (Idx < static_cast<int>(NumElts))
6502 NewMask[I] = Idx;
6503 else
6504 NewMask[I] = Idx - NumElts + WidenNumElts;
6505 }
6506 moreElementsVectorDst(MI, MoreTy, 0);
6508 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6509 MI.getOperand(1).getReg(),
6510 MI.getOperand(2).getReg(), NewMask);
6511 MI.eraseFromParent();
6512 return Legalized;
6513}
6514
6515void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6516 ArrayRef<Register> Src1Regs,
6517 ArrayRef<Register> Src2Regs,
6518 LLT NarrowTy) {
6520 unsigned SrcParts = Src1Regs.size();
6521 unsigned DstParts = DstRegs.size();
6522
6523 unsigned DstIdx = 0; // Low bits of the result.
6524 Register FactorSum =
6525 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6526 DstRegs[DstIdx] = FactorSum;
6527
6528 Register CarrySumPrevDstIdx;
6530
6531 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6532 // Collect low parts of muls for DstIdx.
6533 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6534 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6536 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6537 Factors.push_back(Mul.getReg(0));
6538 }
6539 // Collect high parts of muls from previous DstIdx.
6540 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6541 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6542 MachineInstrBuilder Umulh =
6543 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6544 Factors.push_back(Umulh.getReg(0));
6545 }
6546 // Add CarrySum from additions calculated for previous DstIdx.
6547 if (DstIdx != 1) {
6548 Factors.push_back(CarrySumPrevDstIdx);
6549 }
6550
6551 Register CarrySum;
6552 // Add all factors and accumulate all carries into CarrySum.
6553 if (DstIdx != DstParts - 1) {
6554 MachineInstrBuilder Uaddo =
6555 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6556 FactorSum = Uaddo.getReg(0);
6557 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6558 for (unsigned i = 2; i < Factors.size(); ++i) {
6559 MachineInstrBuilder Uaddo =
6560 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6561 FactorSum = Uaddo.getReg(0);
6562 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6563 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6564 }
6565 } else {
6566 // Since value for the next index is not calculated, neither is CarrySum.
6567 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6568 for (unsigned i = 2; i < Factors.size(); ++i)
6569 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6570 }
6571
6572 CarrySumPrevDstIdx = CarrySum;
6573 DstRegs[DstIdx] = FactorSum;
6574 Factors.clear();
6575 }
6576}
6577
6580 LLT NarrowTy) {
6581 if (TypeIdx != 0)
6582 return UnableToLegalize;
6583
6584 Register DstReg = MI.getOperand(0).getReg();
6585 LLT DstType = MRI.getType(DstReg);
6586 // FIXME: add support for vector types
6587 if (DstType.isVector())
6588 return UnableToLegalize;
6589
6590 unsigned Opcode = MI.getOpcode();
6591 unsigned OpO, OpE, OpF;
6592 switch (Opcode) {
6593 case TargetOpcode::G_SADDO:
6594 case TargetOpcode::G_SADDE:
6595 case TargetOpcode::G_UADDO:
6596 case TargetOpcode::G_UADDE:
6597 case TargetOpcode::G_ADD:
6598 OpO = TargetOpcode::G_UADDO;
6599 OpE = TargetOpcode::G_UADDE;
6600 OpF = TargetOpcode::G_UADDE;
6601 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6602 OpF = TargetOpcode::G_SADDE;
6603 break;
6604 case TargetOpcode::G_SSUBO:
6605 case TargetOpcode::G_SSUBE:
6606 case TargetOpcode::G_USUBO:
6607 case TargetOpcode::G_USUBE:
6608 case TargetOpcode::G_SUB:
6609 OpO = TargetOpcode::G_USUBO;
6610 OpE = TargetOpcode::G_USUBE;
6611 OpF = TargetOpcode::G_USUBE;
6612 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6613 OpF = TargetOpcode::G_SSUBE;
6614 break;
6615 default:
6616 llvm_unreachable("Unexpected add/sub opcode!");
6617 }
6618
6619 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6620 unsigned NumDefs = MI.getNumExplicitDefs();
6621 Register Src1 = MI.getOperand(NumDefs).getReg();
6622 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
6623 Register CarryDst, CarryIn;
6624 if (NumDefs == 2)
6625 CarryDst = MI.getOperand(1).getReg();
6626 if (MI.getNumOperands() == NumDefs + 3)
6627 CarryIn = MI.getOperand(NumDefs + 2).getReg();
6628
6629 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6630 LLT LeftoverTy, DummyTy;
6631 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
6632 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6633 MIRBuilder, MRI);
6634 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6635 MRI);
6636
6637 int NarrowParts = Src1Regs.size();
6638 Src1Regs.append(Src1Left);
6639 Src2Regs.append(Src2Left);
6640 DstRegs.reserve(Src1Regs.size());
6641
6642 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6643 Register DstReg =
6644 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
6645 Register CarryOut;
6646 // Forward the final carry-out to the destination register
6647 if (i == e - 1 && CarryDst)
6648 CarryOut = CarryDst;
6649 else
6650 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
6651
6652 if (!CarryIn) {
6653 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6654 {Src1Regs[i], Src2Regs[i]});
6655 } else if (i == e - 1) {
6656 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6657 {Src1Regs[i], Src2Regs[i], CarryIn});
6658 } else {
6659 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6660 {Src1Regs[i], Src2Regs[i], CarryIn});
6661 }
6662
6663 DstRegs.push_back(DstReg);
6664 CarryIn = CarryOut;
6665 }
6666 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
6667 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6668 ArrayRef(DstRegs).drop_front(NarrowParts));
6669
6670 MI.eraseFromParent();
6671 return Legalized;
6672}
6673
6676 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
6677
6678 LLT Ty = MRI.getType(DstReg);
6679 if (Ty.isVector())
6680 return UnableToLegalize;
6681
6682 unsigned Size = Ty.getSizeInBits();
6683 unsigned NarrowSize = NarrowTy.getSizeInBits();
6684 if (Size % NarrowSize != 0)
6685 return UnableToLegalize;
6686
6687 unsigned NumParts = Size / NarrowSize;
6688 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
6689 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6690
6691 SmallVector<Register, 2> Src1Parts, Src2Parts;
6692 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
6693 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6694 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6695 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6696
6697 // Take only high half of registers if this is high mul.
6698 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
6699 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6700 MI.eraseFromParent();
6701 return Legalized;
6702}
6703
6706 LLT NarrowTy) {
6707 if (TypeIdx != 0)
6708 return UnableToLegalize;
6709
6710 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6711
6712 Register Src = MI.getOperand(1).getReg();
6713 LLT SrcTy = MRI.getType(Src);
6714
6715 // If all finite floats fit into the narrowed integer type, we can just swap
6716 // out the result type. This is practically only useful for conversions from
6717 // half to at least 16-bits, so just handle the one case.
6718 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6719 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6720 return UnableToLegalize;
6721
6723 narrowScalarDst(MI, NarrowTy, 0,
6724 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6726 return Legalized;
6727}
6728
6731 LLT NarrowTy) {
6732 if (TypeIdx != 1)
6733 return UnableToLegalize;
6734
6735 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6736
6737 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6738 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6739 // NarrowSize.
6740 if (SizeOp1 % NarrowSize != 0)
6741 return UnableToLegalize;
6742 int NumParts = SizeOp1 / NarrowSize;
6743
6744 SmallVector<Register, 2> SrcRegs, DstRegs;
6745 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6746 MIRBuilder, MRI);
6747
6748 Register OpReg = MI.getOperand(0).getReg();
6749 uint64_t OpStart = MI.getOperand(2).getImm();
6750 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6751 for (int i = 0; i < NumParts; ++i) {
6752 unsigned SrcStart = i * NarrowSize;
6753
6754 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6755 // No part of the extract uses this subregister, ignore it.
6756 continue;
6757 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6758 // The entire subregister is extracted, forward the value.
6759 DstRegs.push_back(SrcRegs[i]);
6760 continue;
6761 }
6762
6763 // OpSegStart is where this destination segment would start in OpReg if it
6764 // extended infinitely in both directions.
6765 int64_t ExtractOffset;
6766 uint64_t SegSize;
6767 if (OpStart < SrcStart) {
6768 ExtractOffset = 0;
6769 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6770 } else {
6771 ExtractOffset = OpStart - SrcStart;
6772 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6773 }
6774
6775 Register SegReg = SrcRegs[i];
6776 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6777 // A genuine extract is needed.
6778 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6779 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6780 }
6781
6782 DstRegs.push_back(SegReg);
6783 }
6784
6785 Register DstReg = MI.getOperand(0).getReg();
6786 if (MRI.getType(DstReg).isVector())
6787 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6788 else if (DstRegs.size() > 1)
6789 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6790 else
6791 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6792 MI.eraseFromParent();
6793 return Legalized;
6794}
6795
6798 LLT NarrowTy) {
6799 // FIXME: Don't know how to handle secondary types yet.
6800 if (TypeIdx != 0)
6801 return UnableToLegalize;
6802
6803 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6804 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6805 LLT LeftoverTy;
6806 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6807 LeftoverRegs, MIRBuilder, MRI);
6808
6809 SrcRegs.append(LeftoverRegs);
6810
6811 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6812 Register OpReg = MI.getOperand(2).getReg();
6813 uint64_t OpStart = MI.getOperand(3).getImm();
6814 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6815 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6816 unsigned DstStart = I * NarrowSize;
6817
6818 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6819 // The entire subregister is defined by this insert, forward the new
6820 // value.
6821 DstRegs.push_back(OpReg);
6822 continue;
6823 }
6824
6825 Register SrcReg = SrcRegs[I];
6826 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6827 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6828 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6829 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6830 }
6831
6832 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6833 // No part of the insert affects this subregister, forward the original.
6834 DstRegs.push_back(SrcReg);
6835 continue;
6836 }
6837
6838 // OpSegStart is where this destination segment would start in OpReg if it
6839 // extended infinitely in both directions.
6840 int64_t ExtractOffset, InsertOffset;
6841 uint64_t SegSize;
6842 if (OpStart < DstStart) {
6843 InsertOffset = 0;
6844 ExtractOffset = DstStart - OpStart;
6845 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6846 } else {
6847 InsertOffset = OpStart - DstStart;
6848 ExtractOffset = 0;
6849 SegSize =
6850 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6851 }
6852
6853 Register SegReg = OpReg;
6854 if (ExtractOffset != 0 || SegSize != OpSize) {
6855 // A genuine extract is needed.
6856 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6857 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6858 }
6859
6860 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6861 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6862 DstRegs.push_back(DstReg);
6863 }
6864
6865 uint64_t WideSize = DstRegs.size() * NarrowSize;
6866 Register DstReg = MI.getOperand(0).getReg();
6867 if (WideSize > RegTy.getSizeInBits()) {
6868 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6869 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6870 MIRBuilder.buildTrunc(DstReg, MergeReg);
6871 } else
6872 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6873
6874 MI.eraseFromParent();
6875 return Legalized;
6876}
6877
6880 LLT NarrowTy) {
6881 Register DstReg = MI.getOperand(0).getReg();
6882 LLT DstTy = MRI.getType(DstReg);
6883
6884 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6885
6886 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6887 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6888 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6889 LLT LeftoverTy;
6890 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6891 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6892 return UnableToLegalize;
6893
6894 LLT Unused;
6895 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6896 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6897 llvm_unreachable("inconsistent extractParts result");
6898
6899 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6900 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6901 {Src0Regs[I], Src1Regs[I]});
6902 DstRegs.push_back(Inst.getReg(0));
6903 }
6904
6905 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6906 auto Inst = MIRBuilder.buildInstr(
6907 MI.getOpcode(),
6908 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6909 DstLeftoverRegs.push_back(Inst.getReg(0));
6910 }
6911
6912 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6913 LeftoverTy, DstLeftoverRegs);
6914
6915 MI.eraseFromParent();
6916 return Legalized;
6917}
6918
6921 LLT NarrowTy) {
6922 if (TypeIdx != 0)
6923 return UnableToLegalize;
6924
6925 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6926
6927 LLT DstTy = MRI.getType(DstReg);
6928 if (DstTy.isVector())
6929 return UnableToLegalize;
6930
6932 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6933 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6934 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6935
6936 MI.eraseFromParent();
6937 return Legalized;
6938}
6939
6942 LLT NarrowTy) {
6943 if (TypeIdx != 0)
6944 return UnableToLegalize;
6945
6946 Register CondReg = MI.getOperand(1).getReg();
6947 LLT CondTy = MRI.getType(CondReg);
6948 if (CondTy.isVector()) // TODO: Handle vselect
6949 return UnableToLegalize;
6950
6951 Register DstReg = MI.getOperand(0).getReg();
6952 LLT DstTy = MRI.getType(DstReg);
6953
6954 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6955 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6956 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6957 LLT LeftoverTy;
6958 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6959 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6960 return UnableToLegalize;
6961
6962 LLT Unused;
6963 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6964 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6965 llvm_unreachable("inconsistent extractParts result");
6966
6967 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6968 auto Select = MIRBuilder.buildSelect(NarrowTy,
6969 CondReg, Src1Regs[I], Src2Regs[I]);
6970 DstRegs.push_back(Select.getReg(0));
6971 }
6972
6973 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6975 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6976 DstLeftoverRegs.push_back(Select.getReg(0));
6977 }
6978
6979 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6980 LeftoverTy, DstLeftoverRegs);
6981
6982 MI.eraseFromParent();
6983 return Legalized;
6984}
6985
6988 LLT NarrowTy) {
6989 if (TypeIdx != 1)
6990 return UnableToLegalize;
6991
6992 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6993 unsigned NarrowSize = NarrowTy.getSizeInBits();
6994
6995 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6996 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6997
6999 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7000 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7001 auto C_0 = B.buildConstant(NarrowTy, 0);
7002 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7003 UnmergeSrc.getReg(1), C_0);
7004 auto LoCTLZ = IsUndef ?
7005 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7006 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7007 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7008 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7009 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7010 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7011
7012 MI.eraseFromParent();
7013 return Legalized;
7014 }
7015
7016 return UnableToLegalize;
7017}
7018
7021 LLT NarrowTy) {
7022 if (TypeIdx != 1)
7023 return UnableToLegalize;
7024
7025 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7026 unsigned NarrowSize = NarrowTy.getSizeInBits();
7027
7028 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7029 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7030
7032 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7033 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7034 auto C_0 = B.buildConstant(NarrowTy, 0);
7035 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7036 UnmergeSrc.getReg(0), C_0);
7037 auto HiCTTZ = IsUndef ?
7038 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7039 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7040 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7041 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7042 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7043 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7044
7045 MI.eraseFromParent();
7046 return Legalized;
7047 }
7048
7049 return UnableToLegalize;
7050}
7051
7054 LLT NarrowTy) {
7055 if (TypeIdx != 1)
7056 return UnableToLegalize;
7057
7058 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7059 unsigned NarrowSize = NarrowTy.getSizeInBits();
7060
7061 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7062 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7063
7064 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7065 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7066 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7067
7068 MI.eraseFromParent();
7069 return Legalized;
7070 }
7071
7072 return UnableToLegalize;
7073}
7074
7077 LLT NarrowTy) {
7078 if (TypeIdx != 1)
7079 return UnableToLegalize;
7080
7082 Register ExpReg = MI.getOperand(2).getReg();
7083 LLT ExpTy = MRI.getType(ExpReg);
7084
7085 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7086
7087 // Clamp the exponent to the range of the target type.
7088 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7089 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7090 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7091 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7092
7093 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7095 MI.getOperand(2).setReg(Trunc.getReg(0));
7097 return Legalized;
7098}
7099
7102 unsigned Opc = MI.getOpcode();
7103 const auto &TII = MIRBuilder.getTII();
7104 auto isSupported = [this](const LegalityQuery &Q) {
7105 auto QAction = LI.getAction(Q).Action;
7106 return QAction == Legal || QAction == Libcall || QAction == Custom;
7107 };
7108 switch (Opc) {
7109 default:
7110 return UnableToLegalize;
7111 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7112 // This trivially expands to CTLZ.
7114 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7116 return Legalized;
7117 }
7118 case TargetOpcode::G_CTLZ: {
7119 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7120 unsigned Len = SrcTy.getSizeInBits();
7121
7122 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7123 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7124 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7125 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7126 auto ICmp = MIRBuilder.buildICmp(
7127 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7128 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7129 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7130 MI.eraseFromParent();
7131 return Legalized;
7132 }
7133 // for now, we do this:
7134 // NewLen = NextPowerOf2(Len);
7135 // x = x | (x >> 1);
7136 // x = x | (x >> 2);
7137 // ...
7138 // x = x | (x >>16);
7139 // x = x | (x >>32); // for 64-bit input
7140 // Upto NewLen/2
7141 // return Len - popcount(x);
7142 //
7143 // Ref: "Hacker's Delight" by Henry Warren
7144 Register Op = SrcReg;
7145 unsigned NewLen = PowerOf2Ceil(Len);
7146 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7147 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7148 auto MIBOp = MIRBuilder.buildOr(
7149 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7150 Op = MIBOp.getReg(0);
7151 }
7152 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7153 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7154 MIBPop);
7155 MI.eraseFromParent();
7156 return Legalized;
7157 }
7158 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7159 // This trivially expands to CTTZ.
7161 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7163 return Legalized;
7164 }
7165 case TargetOpcode::G_CTTZ: {
7166 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7167
7168 unsigned Len = SrcTy.getSizeInBits();
7169 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7170 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7171 // zero.
7172 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7173 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7174 auto ICmp = MIRBuilder.buildICmp(
7175 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7176 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7177 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7178 MI.eraseFromParent();
7179 return Legalized;
7180 }
7181 // for now, we use: { return popcount(~x & (x - 1)); }
7182 // unless the target has ctlz but not ctpop, in which case we use:
7183 // { return 32 - nlz(~x & (x-1)); }
7184 // Ref: "Hacker's Delight" by Henry Warren
7185 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7186 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7187 auto MIBTmp = MIRBuilder.buildAnd(
7188 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7189 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7190 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7191 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7192 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7193 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7194 MI.eraseFromParent();
7195 return Legalized;
7196 }
7198 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7199 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7201 return Legalized;
7202 }
7203 case TargetOpcode::G_CTPOP: {
7204 Register SrcReg = MI.getOperand(1).getReg();
7205 LLT Ty = MRI.getType(SrcReg);
7206 unsigned Size = Ty.getSizeInBits();
7208
7209 // Count set bits in blocks of 2 bits. Default approach would be
7210 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7211 // We use following formula instead:
7212 // B2Count = val - { (val >> 1) & 0x55555555 }
7213 // since it gives same result in blocks of 2 with one instruction less.
7214 auto C_1 = B.buildConstant(Ty, 1);
7215 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7216 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7217 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7218 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7219 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7220
7221 // In order to get count in blocks of 4 add values from adjacent block of 2.
7222 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7223 auto C_2 = B.buildConstant(Ty, 2);
7224 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7225 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7226 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7227 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7228 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7229 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7230
7231 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7232 // addition since count value sits in range {0,...,8} and 4 bits are enough
7233 // to hold such binary values. After addition high 4 bits still hold count
7234 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7235 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7236 auto C_4 = B.buildConstant(Ty, 4);
7237 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7238 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7239 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7240 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7241 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7242
7243 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7244 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7245 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7246 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7247
7248 // Shift count result from 8 high bits to low bits.
7249 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7250
7251 auto IsMulSupported = [this](const LLT Ty) {
7252 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7253 return Action == Legal || Action == WidenScalar || Action == Custom;
7254 };
7255 if (IsMulSupported(Ty)) {
7256 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7257 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7258 } else {
7259 auto ResTmp = B8Count;
7260 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7261 auto ShiftC = B.buildConstant(Ty, Shift);
7262 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7263 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7264 }
7265 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7266 }
7267 MI.eraseFromParent();
7268 return Legalized;
7269 }
7270 }
7271}
7272
7273// Check that (every element of) Reg is undef or not an exact multiple of BW.
7275 Register Reg, unsigned BW) {
7276 return matchUnaryPredicate(
7277 MRI, Reg,
7278 [=](const Constant *C) {
7279 // Null constant here means an undef.
7280 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
7281 return !CI || CI->getValue().urem(BW) != 0;
7282 },
7283 /*AllowUndefs*/ true);
7284}
7285
7288 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7289 LLT Ty = MRI.getType(Dst);
7290 LLT ShTy = MRI.getType(Z);
7291
7292 unsigned BW = Ty.getScalarSizeInBits();
7293
7294 if (!isPowerOf2_32(BW))
7295 return UnableToLegalize;
7296
7297 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7298 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7299
7300 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7301 // fshl X, Y, Z -> fshr X, Y, -Z
7302 // fshr X, Y, Z -> fshl X, Y, -Z
7303 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7304 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7305 } else {
7306 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7307 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7308 auto One = MIRBuilder.buildConstant(ShTy, 1);
7309 if (IsFSHL) {
7310 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7311 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7312 } else {
7313 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7314 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7315 }
7316
7317 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7318 }
7319
7320 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7321 MI.eraseFromParent();
7322 return Legalized;
7323}
7324
7327 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7328 LLT Ty = MRI.getType(Dst);
7329 LLT ShTy = MRI.getType(Z);
7330
7331 const unsigned BW = Ty.getScalarSizeInBits();
7332 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7333
7334 Register ShX, ShY;
7335 Register ShAmt, InvShAmt;
7336
7337 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7338 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7339 // fshl: X << C | Y >> (BW - C)
7340 // fshr: X << (BW - C) | Y >> C
7341 // where C = Z % BW is not zero
7342 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7343 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7344 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7345 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7346 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7347 } else {
7348 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7349 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7350 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7351 if (isPowerOf2_32(BW)) {
7352 // Z % BW -> Z & (BW - 1)
7353 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7354 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7355 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7356 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7357 } else {
7358 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7359 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7360 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7361 }
7362
7363 auto One = MIRBuilder.buildConstant(ShTy, 1);
7364 if (IsFSHL) {
7365 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7366 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7367 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7368 } else {
7369 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7370 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7371 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7372 }
7373 }
7374
7376 MI.eraseFromParent();
7377 return Legalized;
7378}
7379
7382 // These operations approximately do the following (while avoiding undefined
7383 // shifts by BW):
7384 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7385 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7386 Register Dst = MI.getOperand(0).getReg();
7387 LLT Ty = MRI.getType(Dst);
7388 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7389
7390 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7391 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7392
7393 // TODO: Use smarter heuristic that accounts for vector legalization.
7394 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7395 return lowerFunnelShiftAsShifts(MI);
7396
7397 // This only works for powers of 2, fallback to shifts if it fails.
7398 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7399 if (Result == UnableToLegalize)
7400 return lowerFunnelShiftAsShifts(MI);
7401 return Result;
7402}
7403
7405 auto [Dst, Src] = MI.getFirst2Regs();
7406 LLT DstTy = MRI.getType(Dst);
7407 LLT SrcTy = MRI.getType(Src);
7408
7409 uint32_t DstTySize = DstTy.getSizeInBits();
7410 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7411 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7412
7413 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7414 !isPowerOf2_32(SrcTyScalarSize))
7415 return UnableToLegalize;
7416
7417 // The step between extend is too large, split it by creating an intermediate
7418 // extend instruction
7419 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7420 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7421 // If the destination type is illegal, split it into multiple statements
7422 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7423 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7424 // Unmerge the vector
7425 LLT EltTy = MidTy.changeElementCount(
7427 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7428
7429 // ZExt the vectors
7430 LLT ZExtResTy = DstTy.changeElementCount(
7432 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7433 {UnmergeSrc.getReg(0)});
7434 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7435 {UnmergeSrc.getReg(1)});
7436
7437 // Merge the ending vectors
7438 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7439
7440 MI.eraseFromParent();
7441 return Legalized;
7442 }
7443 return UnableToLegalize;
7444}
7445
7447 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7449 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7450 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7451 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7452 // %lo16(<4 x s16>) = G_TRUNC %inlo
7453 // %hi16(<4 x s16>) = G_TRUNC %inhi
7454 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7455 // %res(<8 x s8>) = G_TRUNC %in16
7456
7457 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7458
7459 Register DstReg = MI.getOperand(0).getReg();
7460 Register SrcReg = MI.getOperand(1).getReg();
7461 LLT DstTy = MRI.getType(DstReg);
7462 LLT SrcTy = MRI.getType(SrcReg);
7463
7464 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7466 isPowerOf2_32(SrcTy.getNumElements()) &&
7468 // Split input type.
7469 LLT SplitSrcTy = SrcTy.changeElementCount(
7471
7472 // First, split the source into two smaller vectors.
7473 SmallVector<Register, 2> SplitSrcs;
7474 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7475
7476 // Truncate the splits into intermediate narrower elements.
7477 LLT InterTy;
7478 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7479 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7480 else
7481 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7482 for (Register &Src : SplitSrcs)
7483 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7484
7485 // Combine the new truncates into one vector
7487 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7488
7489 // Truncate the new vector to the final result type
7490 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7491 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7492 else
7493 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7494
7495 MI.eraseFromParent();
7496
7497 return Legalized;
7498 }
7499 return UnableToLegalize;
7500}
7501
7504 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7505 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7506 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7507 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7508 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7509 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7510 MI.eraseFromParent();
7511 return Legalized;
7512}
7513
7515 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7516
7517 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7518 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7519
7521
7522 // If a rotate in the other direction is supported, use it.
7523 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7524 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7525 isPowerOf2_32(EltSizeInBits))
7526 return lowerRotateWithReverseRotate(MI);
7527
7528 // If a funnel shift is supported, use it.
7529 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7530 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7531 bool IsFShLegal = false;
7532 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7533 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7534 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7535 Register R3) {
7536 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7537 MI.eraseFromParent();
7538 return Legalized;
7539 };
7540 // If a funnel shift in the other direction is supported, use it.
7541 if (IsFShLegal) {
7542 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7543 } else if (isPowerOf2_32(EltSizeInBits)) {
7544 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7545 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7546 }
7547 }
7548
7549 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7550 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7551 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7552 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7553 Register ShVal;
7554 Register RevShiftVal;
7555 if (isPowerOf2_32(EltSizeInBits)) {
7556 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7557 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7558 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7559 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7560 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7561 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7562 RevShiftVal =
7563 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7564 } else {
7565 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7566 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7567 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7568 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7569 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7570 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7571 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7572 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7573 RevShiftVal =
7574 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7575 }
7576 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7577 MI.eraseFromParent();
7578 return Legalized;
7579}
7580
7581// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7582// representation.
7585 auto [Dst, Src] = MI.getFirst2Regs();
7586 const LLT S64 = LLT::scalar(64);
7587 const LLT S32 = LLT::scalar(32);
7588 const LLT S1 = LLT::scalar(1);
7589
7590 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7591
7592 // unsigned cul2f(ulong u) {
7593 // uint lz = clz(u);
7594 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7595 // u = (u << lz) & 0x7fffffffffffffffUL;
7596 // ulong t = u & 0xffffffffffUL;
7597 // uint v = (e << 23) | (uint)(u >> 40);
7598 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7599 // return as_float(v + r);
7600 // }
7601
7602 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7603 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7604
7605 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7606
7607 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7608 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7609
7610 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7611 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7612
7613 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7614 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7615
7616 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7617
7618 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7619 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7620
7621 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7622 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7623 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7624
7625 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7626 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7627 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7628 auto One = MIRBuilder.buildConstant(S32, 1);
7629
7630 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7631 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7632 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7633 MIRBuilder.buildAdd(Dst, V, R);
7634
7635 MI.eraseFromParent();
7636 return Legalized;
7637}
7638
7639// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7640// operations and G_SITOFP
7643 auto [Dst, Src] = MI.getFirst2Regs();
7644 const LLT S64 = LLT::scalar(64);
7645 const LLT S32 = LLT::scalar(32);
7646 const LLT S1 = LLT::scalar(1);
7647
7648 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7649
7650 // For i64 < INT_MAX we simply reuse SITOFP.
7651 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7652 // saved before division, convert to float by SITOFP, multiply the result
7653 // by 2.
7654 auto One = MIRBuilder.buildConstant(S64, 1);
7655 auto Zero = MIRBuilder.buildConstant(S64, 0);
7656 // Result if Src < INT_MAX
7657 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7658 // Result if Src >= INT_MAX
7659 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7660 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7661 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7662 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7663 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7664 // Check if the original value is larger than INT_MAX by comparing with
7665 // zero to pick one of the two conversions.
7666 auto IsLarge =
7668 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7669
7670 MI.eraseFromParent();
7671 return Legalized;
7672}
7673
7674// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7675// IEEE double representation.
7678 auto [Dst, Src] = MI.getFirst2Regs();
7679 const LLT S64 = LLT::scalar(64);
7680 const LLT S32 = LLT::scalar(32);
7681
7682 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7683
7684 // We create double value from 32 bit parts with 32 exponent difference.
7685 // Note that + and - are float operations that adjust the implicit leading
7686 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7687 //
7688 // X = 2^52 * 1.0...LowBits
7689 // Y = 2^84 * 1.0...HighBits
7690 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7691 // = - 2^52 * 1.0...HighBits
7692 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7693 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7694 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7695 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7696 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7697 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7698
7699 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7700 LowBits = MIRBuilder.buildZExt(S64, LowBits);
7701 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7702 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7703 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7704 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7705 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7706
7707 MI.eraseFromParent();
7708 return Legalized;
7709}
7710
7711/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
7712/// convert fpround f64->f16 without double-rounding, so we manually perform the
7713/// lowering here where we know it is valid.
7716 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
7717 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
7718 ? MIRBuilder.buildUITOFP(SrcTy, Src)
7719 : MIRBuilder.buildSITOFP(SrcTy, Src);
7720 LLT S32Ty = SrcTy.changeElementSize(32);
7721 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
7722 MIRBuilder.buildFPTrunc(Dst, M2);
7723 MI.eraseFromParent();
7725}
7726
7728 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7729
7730 if (SrcTy == LLT::scalar(1)) {
7731 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7732 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7733 MIRBuilder.buildSelect(Dst, Src, True, False);
7734 MI.eraseFromParent();
7735 return Legalized;
7736 }
7737
7738 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
7739 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
7740
7741 if (SrcTy != LLT::scalar(64))
7742 return UnableToLegalize;
7743
7744 if (DstTy == LLT::scalar(32))
7745 // TODO: SelectionDAG has several alternative expansions to port which may
7746 // be more reasonable depending on the available instructions. We also need
7747 // a more advanced mechanism to choose an optimal version depending on
7748 // target features such as sitofp or CTLZ availability.
7750
7751 if (DstTy == LLT::scalar(64))
7753
7754 return UnableToLegalize;
7755}
7756
7758 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7759
7760 const LLT S64 = LLT::scalar(64);
7761 const LLT S32 = LLT::scalar(32);
7762 const LLT S1 = LLT::scalar(1);
7763
7764 if (SrcTy == S1) {
7765 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7766 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7767 MIRBuilder.buildSelect(Dst, Src, True, False);
7768 MI.eraseFromParent();
7769 return Legalized;
7770 }
7771
7772 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
7773 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
7774
7775 if (SrcTy != S64)
7776 return UnableToLegalize;
7777
7778 if (DstTy == S32) {
7779 // signed cl2f(long l) {
7780 // long s = l >> 63;
7781 // float r = cul2f((l + s) ^ s);
7782 // return s ? -r : r;
7783 // }
7784 Register L = Src;
7785 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7786 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7787
7788 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7789 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7790 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7791
7792 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7793 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7795 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
7796 MI.eraseFromParent();
7797 return Legalized;
7798 }
7799
7800 return UnableToLegalize;
7801}
7802
7804 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7805 const LLT S64 = LLT::scalar(64);
7806 const LLT S32 = LLT::scalar(32);
7807
7808 if (SrcTy != S64 && SrcTy != S32)
7809 return UnableToLegalize;
7810 if (DstTy != S32 && DstTy != S64)
7811 return UnableToLegalize;
7812
7813 // FPTOSI gives same result as FPTOUI for positive signed integers.
7814 // FPTOUI needs to deal with fp values that convert to unsigned integers
7815 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7816
7817 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7818 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7820 APInt::getZero(SrcTy.getSizeInBits()));
7821 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7822
7823 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7824
7825 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7826 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7827 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7828 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7829 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7830 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7831 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7832
7833 const LLT S1 = LLT::scalar(1);
7834
7835 MachineInstrBuilder FCMP =
7836 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7837 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7838
7839 MI.eraseFromParent();
7840 return Legalized;
7841}
7842
7844 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7845 const LLT S64 = LLT::scalar(64);
7846 const LLT S32 = LLT::scalar(32);
7847
7848 // FIXME: Only f32 to i64 conversions are supported.
7849 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7850 return UnableToLegalize;
7851
7852 // Expand f32 -> i64 conversion
7853 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7854 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7855
7856 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7857
7858 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7859 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7860
7861 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7862 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7863
7864 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7865 APInt::getSignMask(SrcEltBits));
7866 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7867 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7868 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7869 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7870
7871 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7872 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7873 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7874
7875 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7876 R = MIRBuilder.buildZExt(DstTy, R);
7877
7878 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7879 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7880 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7881 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7882
7883 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7884 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7885
7886 const LLT S1 = LLT::scalar(1);
7888 S1, Exponent, ExponentLoBit);
7889
7890 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7891
7892 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7893 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7894
7895 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7896
7897 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7898 S1, Exponent, ZeroSrcTy);
7899
7900 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7901 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7902
7903 MI.eraseFromParent();
7904 return Legalized;
7905}
7906
7909 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7910
7911 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7912 unsigned SatWidth = DstTy.getScalarSizeInBits();
7913
7914 // Determine minimum and maximum integer values and their corresponding
7915 // floating-point values.
7916 APInt MinInt, MaxInt;
7917 if (IsSigned) {
7918 MinInt = APInt::getSignedMinValue(SatWidth);
7919 MaxInt = APInt::getSignedMaxValue(SatWidth);
7920 } else {
7921 MinInt = APInt::getMinValue(SatWidth);
7922 MaxInt = APInt::getMaxValue(SatWidth);
7923 }
7924
7925 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7926 APFloat MinFloat(Semantics);
7927 APFloat MaxFloat(Semantics);
7928
7929 APFloat::opStatus MinStatus =
7930 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7931 APFloat::opStatus MaxStatus =
7932 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7933 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7934 !(MaxStatus & APFloat::opStatus::opInexact);
7935
7936 // If the integer bounds are exactly representable as floats, emit a
7937 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7938 // and selects.
7939 if (AreExactFloatBounds) {
7940 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7941 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7943 SrcTy.changeElementSize(1), Src, MaxC);
7944 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7945 // Clamp by MaxFloat from above. NaN cannot occur.
7946 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7947 auto MinP =
7950 auto Min =
7951 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7952 // Convert clamped value to integer. In the unsigned case we're done,
7953 // because we mapped NaN to MinFloat, which will cast to zero.
7954 if (!IsSigned) {
7955 MIRBuilder.buildFPTOUI(Dst, Min);
7956 MI.eraseFromParent();
7957 return Legalized;
7958 }
7959
7960 // Otherwise, select 0 if Src is NaN.
7961 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7963 DstTy.changeElementSize(1), Src, Src);
7964 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7965 FpToInt);
7966 MI.eraseFromParent();
7967 return Legalized;
7968 }
7969
7970 // Result of direct conversion. The assumption here is that the operation is
7971 // non-trapping and it's fine to apply it to an out-of-range value if we
7972 // select it away later.
7973 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7974 : MIRBuilder.buildFPTOUI(DstTy, Src);
7975
7976 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7977 // MinInt if Src is NaN.
7978 auto ULT =
7980 MIRBuilder.buildFConstant(SrcTy, MinFloat));
7981 auto Max = MIRBuilder.buildSelect(
7982 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7983 // If Src OGT MaxFloat, select MaxInt.
7984 auto OGT =
7986 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7987
7988 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7989 // is already zero.
7990 if (!IsSigned) {
7991 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7992 Max);
7993 MI.eraseFromParent();
7994 return Legalized;
7995 }
7996
7997 // Otherwise, select 0 if Src is NaN.
7998 auto Min = MIRBuilder.buildSelect(
7999 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8001 DstTy.changeElementSize(1), Src, Src);
8002 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8003 MI.eraseFromParent();
8004 return Legalized;
8005}
8006
8007// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8010 const LLT S1 = LLT::scalar(1);
8011 const LLT S32 = LLT::scalar(32);
8012
8013 auto [Dst, Src] = MI.getFirst2Regs();
8014 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8015 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8016
8017 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8018 return UnableToLegalize;
8019
8020 if (MI.getFlag(MachineInstr::FmAfn)) {
8021 unsigned Flags = MI.getFlags();
8022 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8023 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8024 MI.eraseFromParent();
8025 return Legalized;
8026 }
8027
8028 const unsigned ExpMask = 0x7ff;
8029 const unsigned ExpBiasf64 = 1023;
8030 const unsigned ExpBiasf16 = 15;
8031
8032 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8033 Register U = Unmerge.getReg(0);
8034 Register UH = Unmerge.getReg(1);
8035
8036 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8038
8039 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8040 // add the f16 bias (15) to get the biased exponent for the f16 format.
8041 E = MIRBuilder.buildAdd(
8042 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8043
8046
8047 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8048 MIRBuilder.buildConstant(S32, 0x1ff));
8049 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8050
8051 auto Zero = MIRBuilder.buildConstant(S32, 0);
8052 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8053 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8054 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8055
8056 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8057 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8058 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8059 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8060
8061 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8062 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8063
8064 // N = M | (E << 12);
8065 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8066 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8067
8068 // B = clamp(1-E, 0, 13);
8069 auto One = MIRBuilder.buildConstant(S32, 1);
8070 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8071 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8073
8074 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8075 MIRBuilder.buildConstant(S32, 0x1000));
8076
8077 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8078 auto D0 = MIRBuilder.buildShl(S32, D, B);
8079
8080 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8081 D0, SigSetHigh);
8082 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8083 D = MIRBuilder.buildOr(S32, D, D1);
8084
8085 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8086 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8087
8088 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8090
8091 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8093 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8094
8095 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8097 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8098
8099 V1 = MIRBuilder.buildOr(S32, V0, V1);
8100 V = MIRBuilder.buildAdd(S32, V, V1);
8101
8102 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8103 E, MIRBuilder.buildConstant(S32, 30));
8104 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8105 MIRBuilder.buildConstant(S32, 0x7c00), V);
8106
8107 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8108 E, MIRBuilder.buildConstant(S32, 1039));
8109 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8110
8111 // Extract the sign bit.
8112 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8113 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8114
8115 // Insert the sign bit
8116 V = MIRBuilder.buildOr(S32, Sign, V);
8117
8118 MIRBuilder.buildTrunc(Dst, V);
8119 MI.eraseFromParent();
8120 return Legalized;
8121}
8122
8125 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8126 const LLT S64 = LLT::scalar(64);
8127 const LLT S16 = LLT::scalar(16);
8128
8129 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8131
8132 return UnableToLegalize;
8133}
8134
8136 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8137 LLT Ty = MRI.getType(Dst);
8138
8139 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8140 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8141 MI.eraseFromParent();
8142 return Legalized;
8143}
8144
8146 switch (Opc) {
8147 case TargetOpcode::G_SMIN:
8148 return CmpInst::ICMP_SLT;
8149 case TargetOpcode::G_SMAX:
8150 return CmpInst::ICMP_SGT;
8151 case TargetOpcode::G_UMIN:
8152 return CmpInst::ICMP_ULT;
8153 case TargetOpcode::G_UMAX:
8154 return CmpInst::ICMP_UGT;
8155 default:
8156 llvm_unreachable("not in integer min/max");
8157 }
8158}
8159
8161 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8162
8163 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8164 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8165
8166 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8167 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8168
8169 MI.eraseFromParent();
8170 return Legalized;
8171}
8172
8175 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8176
8177 Register Dst = Cmp->getReg(0);
8178 LLT DstTy = MRI.getType(Dst);
8179 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8180 LLT CmpTy = DstTy.changeElementSize(1);
8181
8182 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8185 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8188
8189 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8190 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8191 Cmp->getRHSReg());
8192 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8193 Cmp->getRHSReg());
8194
8195 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8196 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8199 auto One = MIRBuilder.buildConstant(DstTy, 1);
8200 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8201
8202 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8203 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8204 } else {
8206 std::swap(IsGT, IsLT);
8207 // Extend boolean results to DstTy, which is at least i2, before subtracting
8208 // them.
8209 unsigned BoolExtOp =
8210 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8211 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8212 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8213 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8214 }
8215
8216 MI.eraseFromParent();
8217 return Legalized;
8218}
8219
8222 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8223 const int Src0Size = Src0Ty.getScalarSizeInBits();
8224 const int Src1Size = Src1Ty.getScalarSizeInBits();
8225
8226 auto SignBitMask = MIRBuilder.buildConstant(
8227 Src0Ty, APInt::getSignMask(Src0Size));
8228
8229 auto NotSignBitMask = MIRBuilder.buildConstant(
8230 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8231
8232 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8233 Register And1;
8234 if (Src0Ty == Src1Ty) {
8235 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8236 } else if (Src0Size > Src1Size) {
8237 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8238 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8239 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8240 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8241 } else {
8242 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8243 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8244 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8245 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8246 }
8247
8248 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8249 // constants are a nan and -0.0, but the final result should preserve
8250 // everything.
8251 unsigned Flags = MI.getFlags();
8252
8253 // We masked the sign bit and the not-sign bit, so these are disjoint.
8254 Flags |= MachineInstr::Disjoint;
8255
8256 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8257
8258 MI.eraseFromParent();
8259 return Legalized;
8260}
8261
8264 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8265 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8266 // depend on fminnum/fmaxnum.
8267
8268 unsigned NewOp;
8269 switch (MI.getOpcode()) {
8270 case TargetOpcode::G_FMINNUM:
8271 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8272 break;
8273 case TargetOpcode::G_FMINIMUMNUM:
8274 NewOp = TargetOpcode::G_FMINNUM;
8275 break;
8276 case TargetOpcode::G_FMAXNUM:
8277 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8278 break;
8279 case TargetOpcode::G_FMAXIMUMNUM:
8280 NewOp = TargetOpcode::G_FMAXNUM;
8281 break;
8282 default:
8283 llvm_unreachable("unexpected min/max opcode");
8284 }
8285
8286 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8287 LLT Ty = MRI.getType(Dst);
8288
8289 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8290 // Insert canonicalizes if it's possible we need to quiet to get correct
8291 // sNaN behavior.
8292
8293 // Note this must be done here, and not as an optimization combine in the
8294 // absence of a dedicate quiet-snan instruction as we're using an
8295 // omni-purpose G_FCANONICALIZE.
8296 if (!isKnownNeverSNaN(Src0, MRI))
8297 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8298
8299 if (!isKnownNeverSNaN(Src1, MRI))
8300 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8301 }
8302
8303 // If there are no nans, it's safe to simply replace this with the non-IEEE
8304 // version.
8305 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8306 MI.eraseFromParent();
8307 return Legalized;
8308}
8309
8311 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8312 Register DstReg = MI.getOperand(0).getReg();
8313 LLT Ty = MRI.getType(DstReg);
8314 unsigned Flags = MI.getFlags();
8315
8316 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8317 Flags);
8318 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8319 MI.eraseFromParent();
8320 return Legalized;
8321}
8322
8325 auto [DstReg, X] = MI.getFirst2Regs();
8326 const unsigned Flags = MI.getFlags();
8327 const LLT Ty = MRI.getType(DstReg);
8328 const LLT CondTy = Ty.changeElementSize(1);
8329
8330 // round(x) =>
8331 // t = trunc(x);
8332 // d = fabs(x - t);
8333 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8334 // return t + o;
8335
8336 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8337
8338 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8339 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8340
8341 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8342 auto Cmp =
8343 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8344
8345 // Could emit G_UITOFP instead
8346 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8347 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8348 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8349 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8350
8351 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8352
8353 MI.eraseFromParent();
8354 return Legalized;
8355}
8356
8358 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8359 unsigned Flags = MI.getFlags();
8360 LLT Ty = MRI.getType(DstReg);
8361 const LLT CondTy = Ty.changeElementSize(1);
8362
8363 // result = trunc(src);
8364 // if (src < 0.0 && src != result)
8365 // result += -1.0.
8366
8367 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8368 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8369
8370 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8371 SrcReg, Zero, Flags);
8372 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8373 SrcReg, Trunc, Flags);
8374 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8375 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8376
8377 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8378 MI.eraseFromParent();
8379 return Legalized;
8380}
8381
8384 const unsigned NumOps = MI.getNumOperands();
8385 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8386 unsigned PartSize = Src0Ty.getSizeInBits();
8387
8388 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8389 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8390
8391 for (unsigned I = 2; I != NumOps; ++I) {
8392 const unsigned Offset = (I - 1) * PartSize;
8393
8394 Register SrcReg = MI.getOperand(I).getReg();
8395 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8396
8397 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8398 MRI.createGenericVirtualRegister(WideTy);
8399
8400 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8401 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8402 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8403 ResultReg = NextResult;
8404 }
8405
8406 if (DstTy.isPointer()) {
8408 DstTy.getAddressSpace())) {
8409 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8410 return UnableToLegalize;
8411 }
8412
8413 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8414 }
8415
8416 MI.eraseFromParent();
8417 return Legalized;
8418}
8419
8422 const unsigned NumDst = MI.getNumOperands() - 1;
8423 Register SrcReg = MI.getOperand(NumDst).getReg();
8424 Register Dst0Reg = MI.getOperand(0).getReg();
8425 LLT DstTy = MRI.getType(Dst0Reg);
8426 if (DstTy.isPointer())
8427 return UnableToLegalize; // TODO
8428
8429 SrcReg = coerceToScalar(SrcReg);
8430 if (!SrcReg)
8431 return UnableToLegalize;
8432
8433 // Expand scalarizing unmerge as bitcast to integer and shift.
8434 LLT IntTy = MRI.getType(SrcReg);
8435
8436 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8437
8438 const unsigned DstSize = DstTy.getSizeInBits();
8439 unsigned Offset = DstSize;
8440 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8441 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8442 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8443 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8444 }
8445
8446 MI.eraseFromParent();
8447 return Legalized;
8448}
8449
8450/// Lower a vector extract or insert by writing the vector to a stack temporary
8451/// and reloading the element or vector.
8452///
8453/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8454/// =>
8455/// %stack_temp = G_FRAME_INDEX
8456/// G_STORE %vec, %stack_temp
8457/// %idx = clamp(%idx, %vec.getNumElements())
8458/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8459/// %dst = G_LOAD %element_ptr
8462 Register DstReg = MI.getOperand(0).getReg();
8463 Register SrcVec = MI.getOperand(1).getReg();
8464 Register InsertVal;
8465 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8466 InsertVal = MI.getOperand(2).getReg();
8467
8468 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8469
8470 LLT VecTy = MRI.getType(SrcVec);
8471 LLT EltTy = VecTy.getElementType();
8472 unsigned NumElts = VecTy.getNumElements();
8473
8474 int64_t IdxVal;
8475 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8477 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8478
8479 if (InsertVal) {
8480 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8481 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8482 } else {
8483 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8484 }
8485
8486 MI.eraseFromParent();
8487 return Legalized;
8488 }
8489
8490 if (!EltTy.isByteSized()) { // Not implemented.
8491 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8492 return UnableToLegalize;
8493 }
8494
8495 unsigned EltBytes = EltTy.getSizeInBytes();
8496 Align VecAlign = getStackTemporaryAlignment(VecTy);
8497 Align EltAlign;
8498
8499 MachinePointerInfo PtrInfo;
8500 auto StackTemp = createStackTemporary(
8501 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8502 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8503
8504 // Get the pointer to the element, and be sure not to hit undefined behavior
8505 // if the index is out of bounds.
8506 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8507
8508 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8509 int64_t Offset = IdxVal * EltBytes;
8510 PtrInfo = PtrInfo.getWithOffset(Offset);
8511 EltAlign = commonAlignment(VecAlign, Offset);
8512 } else {
8513 // We lose information with a variable offset.
8514 EltAlign = getStackTemporaryAlignment(EltTy);
8515 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8516 }
8517
8518 if (InsertVal) {
8519 // Write the inserted element
8520 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8521
8522 // Reload the whole vector.
8523 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8524 } else {
8525 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8526 }
8527
8528 MI.eraseFromParent();
8529 return Legalized;
8530}
8531
8534 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8535 MI.getFirst3RegLLTs();
8536 LLT IdxTy = LLT::scalar(32);
8537
8538 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8539 Register Undef;
8541 LLT EltTy = DstTy.getScalarType();
8542
8543 for (int Idx : Mask) {
8544 if (Idx < 0) {
8545 if (!Undef.isValid())
8546 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8547 BuildVec.push_back(Undef);
8548 continue;
8549 }
8550
8551 if (Src0Ty.isScalar()) {
8552 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8553 } else {
8554 int NumElts = Src0Ty.getNumElements();
8555 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8556 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8557 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8558 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8559 BuildVec.push_back(Extract.getReg(0));
8560 }
8561 }
8562
8563 if (DstTy.isVector())
8564 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8565 else
8566 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8567 MI.eraseFromParent();
8568 return Legalized;
8569}
8570
8573 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8574 MI.getFirst4RegLLTs();
8575
8576 if (VecTy.isScalableVector())
8577 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8578
8579 Align VecAlign = getStackTemporaryAlignment(VecTy);
8580 MachinePointerInfo PtrInfo;
8581 Register StackPtr =
8582 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8583 PtrInfo)
8584 .getReg(0);
8585 MachinePointerInfo ValPtrInfo =
8587
8588 LLT IdxTy = LLT::scalar(32);
8589 LLT ValTy = VecTy.getElementType();
8590 Align ValAlign = getStackTemporaryAlignment(ValTy);
8591
8592 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8593
8594 bool HasPassthru =
8595 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8596
8597 if (HasPassthru)
8598 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8599
8600 Register LastWriteVal;
8601 std::optional<APInt> PassthruSplatVal =
8602 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8603
8604 if (PassthruSplatVal.has_value()) {
8605 LastWriteVal =
8606 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
8607 } else if (HasPassthru) {
8608 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
8609 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
8610 {LLT::scalar(32)}, {Popcount});
8611
8612 Register LastElmtPtr =
8613 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
8614 LastWriteVal =
8615 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
8616 .getReg(0);
8617 }
8618
8619 unsigned NumElmts = VecTy.getNumElements();
8620 for (unsigned I = 0; I < NumElmts; ++I) {
8621 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
8622 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
8623 Register ElmtPtr =
8624 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8625 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
8626
8627 LLT MaskITy = MaskTy.getElementType();
8628 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
8629 if (MaskITy.getSizeInBits() > 1)
8630 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
8631
8632 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
8633 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
8634
8635 if (HasPassthru && I == NumElmts - 1) {
8636 auto EndOfVector =
8637 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
8638 auto AllLanesSelected = MIRBuilder.buildICmp(
8639 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
8640 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
8641 {OutPos, EndOfVector});
8642 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8643
8644 LastWriteVal =
8645 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
8646 .getReg(0);
8647 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
8648 }
8649 }
8650
8651 // TODO: Use StackPtr's FrameIndex alignment.
8652 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
8653
8654 MI.eraseFromParent();
8655 return Legalized;
8656}
8657
8659 Register AllocSize,
8660 Align Alignment,
8661 LLT PtrTy) {
8662 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
8663
8664 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
8665 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
8666
8667 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8668 // have to generate an extra instruction to negate the alloc and then use
8669 // G_PTR_ADD to add the negative offset.
8670 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
8671 if (Alignment > Align(1)) {
8672 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
8673 AlignMask.negate();
8674 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
8675 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
8676 }
8677
8678 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
8679}
8680
8683 const auto &MF = *MI.getMF();
8684 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8685 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
8686 return UnableToLegalize;
8687
8688 Register Dst = MI.getOperand(0).getReg();
8689 Register AllocSize = MI.getOperand(1).getReg();
8690 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
8691
8692 LLT PtrTy = MRI.getType(Dst);
8694 Register SPTmp =
8695 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
8696
8697 MIRBuilder.buildCopy(SPReg, SPTmp);
8698 MIRBuilder.buildCopy(Dst, SPTmp);
8699
8700 MI.eraseFromParent();
8701 return Legalized;
8702}
8703
8707 if (!StackPtr)
8708 return UnableToLegalize;
8709
8710 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
8711 MI.eraseFromParent();
8712 return Legalized;
8713}
8714
8718 if (!StackPtr)
8719 return UnableToLegalize;
8720
8721 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
8722 MI.eraseFromParent();
8723 return Legalized;
8724}
8725
8728 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8729 unsigned Offset = MI.getOperand(2).getImm();
8730
8731 // Extract sub-vector or one element
8732 if (SrcTy.isVector()) {
8733 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
8734 unsigned DstSize = DstTy.getSizeInBits();
8735
8736 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8737 (Offset + DstSize <= SrcTy.getSizeInBits())) {
8738 // Unmerge and allow access to each Src element for the artifact combiner.
8739 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
8740
8741 // Take element(s) we need to extract and copy it (merge them).
8742 SmallVector<Register, 8> SubVectorElts;
8743 for (unsigned Idx = Offset / SrcEltSize;
8744 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
8745 SubVectorElts.push_back(Unmerge.getReg(Idx));
8746 }
8747 if (SubVectorElts.size() == 1)
8748 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
8749 else
8750 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
8751
8752 MI.eraseFromParent();
8753 return Legalized;
8754 }
8755 }
8756
8757 if (DstTy.isScalar() &&
8758 (SrcTy.isScalar() ||
8759 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8760 LLT SrcIntTy = SrcTy;
8761 if (!SrcTy.isScalar()) {
8762 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
8763 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
8764 }
8765
8766 if (Offset == 0)
8767 MIRBuilder.buildTrunc(DstReg, SrcReg);
8768 else {
8769 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
8770 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8771 MIRBuilder.buildTrunc(DstReg, Shr);
8772 }
8773
8774 MI.eraseFromParent();
8775 return Legalized;
8776 }
8777
8778 return UnableToLegalize;
8779}
8780
8782 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
8783 uint64_t Offset = MI.getOperand(3).getImm();
8784
8785 LLT DstTy = MRI.getType(Src);
8786 LLT InsertTy = MRI.getType(InsertSrc);
8787
8788 // Insert sub-vector or one element
8789 if (DstTy.isVector() && !InsertTy.isPointer()) {
8790 LLT EltTy = DstTy.getElementType();
8791 unsigned EltSize = EltTy.getSizeInBits();
8792 unsigned InsertSize = InsertTy.getSizeInBits();
8793
8794 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8795 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8796 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8798 unsigned Idx = 0;
8799 // Elements from Src before insert start Offset
8800 for (; Idx < Offset / EltSize; ++Idx) {
8801 DstElts.push_back(UnmergeSrc.getReg(Idx));
8802 }
8803
8804 // Replace elements in Src with elements from InsertSrc
8805 if (InsertTy.getSizeInBits() > EltSize) {
8806 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8807 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8808 ++Idx, ++i) {
8809 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8810 }
8811 } else {
8812 DstElts.push_back(InsertSrc);
8813 ++Idx;
8814 }
8815
8816 // Remaining elements from Src after insert
8817 for (; Idx < DstTy.getNumElements(); ++Idx) {
8818 DstElts.push_back(UnmergeSrc.getReg(Idx));
8819 }
8820
8821 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
8822 MI.eraseFromParent();
8823 return Legalized;
8824 }
8825 }
8826
8827 if (InsertTy.isVector() ||
8828 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8829 return UnableToLegalize;
8830
8832 if ((DstTy.isPointer() &&
8833 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8834 (InsertTy.isPointer() &&
8835 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8836 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8837 return UnableToLegalize;
8838 }
8839
8840 LLT IntDstTy = DstTy;
8841
8842 if (!DstTy.isScalar()) {
8843 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8844 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8845 }
8846
8847 if (!InsertTy.isScalar()) {
8848 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8849 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8850 }
8851
8852 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8853 if (Offset != 0) {
8854 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8855 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8856 }
8857
8859 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8860
8861 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8862 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8863 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8864
8865 MIRBuilder.buildCast(Dst, Or);
8866 MI.eraseFromParent();
8867 return Legalized;
8868}
8869
8872 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8873 MI.getFirst4RegLLTs();
8874 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8875
8876 LLT Ty = Dst0Ty;
8877 LLT BoolTy = Dst1Ty;
8878
8879 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8880
8881 if (IsAdd)
8882 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
8883 else
8884 MIRBuilder.buildSub(NewDst0, LHS, RHS);
8885
8886 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8887
8888 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8889
8890 // For an addition, the result should be less than one of the operands (LHS)
8891 // if and only if the other operand (RHS) is negative, otherwise there will
8892 // be overflow.
8893 // For a subtraction, the result should be less than one of the operands
8894 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8895 // otherwise there will be overflow.
8896 auto ResultLowerThanLHS =
8897 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
8898 auto ConditionRHS = MIRBuilder.buildICmp(
8899 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8900
8901 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
8902
8903 MIRBuilder.buildCopy(Dst0, NewDst0);
8904 MI.eraseFromParent();
8905
8906 return Legalized;
8907}
8908
8911 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8912 LLT Ty = MRI.getType(Res);
8913 bool IsSigned;
8914 bool IsAdd;
8915 unsigned BaseOp;
8916 switch (MI.getOpcode()) {
8917 default:
8918 llvm_unreachable("unexpected addsat/subsat opcode");
8919 case TargetOpcode::G_UADDSAT:
8920 IsSigned = false;
8921 IsAdd = true;
8922 BaseOp = TargetOpcode::G_ADD;
8923 break;
8924 case TargetOpcode::G_SADDSAT:
8925 IsSigned = true;
8926 IsAdd = true;
8927 BaseOp = TargetOpcode::G_ADD;
8928 break;
8929 case TargetOpcode::G_USUBSAT:
8930 IsSigned = false;
8931 IsAdd = false;
8932 BaseOp = TargetOpcode::G_SUB;
8933 break;
8934 case TargetOpcode::G_SSUBSAT:
8935 IsSigned = true;
8936 IsAdd = false;
8937 BaseOp = TargetOpcode::G_SUB;
8938 break;
8939 }
8940
8941 if (IsSigned) {
8942 // sadd.sat(a, b) ->
8943 // hi = 0x7fffffff - smax(a, 0)
8944 // lo = 0x80000000 - smin(a, 0)
8945 // a + smin(smax(lo, b), hi)
8946 // ssub.sat(a, b) ->
8947 // lo = smax(a, -1) - 0x7fffffff
8948 // hi = smin(a, -1) - 0x80000000
8949 // a - smin(smax(lo, b), hi)
8950 // TODO: AMDGPU can use a "median of 3" instruction here:
8951 // a +/- med3(lo, b, hi)
8952 uint64_t NumBits = Ty.getScalarSizeInBits();
8953 auto MaxVal =
8955 auto MinVal =
8958 if (IsAdd) {
8959 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8960 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8961 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8962 } else {
8963 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8964 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8965 MaxVal);
8966 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8967 MinVal);
8968 }
8969 auto RHSClamped =
8971 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8972 } else {
8973 // uadd.sat(a, b) -> a + umin(~a, b)
8974 // usub.sat(a, b) -> a - umin(a, b)
8975 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8976 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8977 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8978 }
8979
8980 MI.eraseFromParent();
8981 return Legalized;
8982}
8983
8986 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8987 LLT Ty = MRI.getType(Res);
8988 LLT BoolTy = Ty.changeElementSize(1);
8989 bool IsSigned;
8990 bool IsAdd;
8991 unsigned OverflowOp;
8992 switch (MI.getOpcode()) {
8993 default:
8994 llvm_unreachable("unexpected addsat/subsat opcode");
8995 case TargetOpcode::G_UADDSAT:
8996 IsSigned = false;
8997 IsAdd = true;
8998 OverflowOp = TargetOpcode::G_UADDO;
8999 break;
9000 case TargetOpcode::G_SADDSAT:
9001 IsSigned = true;
9002 IsAdd = true;
9003 OverflowOp = TargetOpcode::G_SADDO;
9004 break;
9005 case TargetOpcode::G_USUBSAT:
9006 IsSigned = false;
9007 IsAdd = false;
9008 OverflowOp = TargetOpcode::G_USUBO;
9009 break;
9010 case TargetOpcode::G_SSUBSAT:
9011 IsSigned = true;
9012 IsAdd = false;
9013 OverflowOp = TargetOpcode::G_SSUBO;
9014 break;
9015 }
9016
9017 auto OverflowRes =
9018 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9019 Register Tmp = OverflowRes.getReg(0);
9020 Register Ov = OverflowRes.getReg(1);
9021 MachineInstrBuilder Clamp;
9022 if (IsSigned) {
9023 // sadd.sat(a, b) ->
9024 // {tmp, ov} = saddo(a, b)
9025 // ov ? (tmp >>s 31) + 0x80000000 : r
9026 // ssub.sat(a, b) ->
9027 // {tmp, ov} = ssubo(a, b)
9028 // ov ? (tmp >>s 31) + 0x80000000 : r
9029 uint64_t NumBits = Ty.getScalarSizeInBits();
9030 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9031 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9032 auto MinVal =
9034 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9035 } else {
9036 // uadd.sat(a, b) ->
9037 // {tmp, ov} = uaddo(a, b)
9038 // ov ? 0xffffffff : tmp
9039 // usub.sat(a, b) ->
9040 // {tmp, ov} = usubo(a, b)
9041 // ov ? 0 : tmp
9042 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9043 }
9044 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9045
9046 MI.eraseFromParent();
9047 return Legalized;
9048}
9049
9052 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9053 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9054 "Expected shlsat opcode!");
9055 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9056 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9057 LLT Ty = MRI.getType(Res);
9058 LLT BoolTy = Ty.changeElementSize(1);
9059
9060 unsigned BW = Ty.getScalarSizeInBits();
9061 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9062 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9063 : MIRBuilder.buildLShr(Ty, Result, RHS);
9064
9065 MachineInstrBuilder SatVal;
9066 if (IsSigned) {
9067 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9068 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9069 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9070 MIRBuilder.buildConstant(Ty, 0));
9071 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9072 } else {
9074 }
9075 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9076 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9077
9078 MI.eraseFromParent();
9079 return Legalized;
9080}
9081
9083 auto [Dst, Src] = MI.getFirst2Regs();
9084 const LLT Ty = MRI.getType(Src);
9085 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9086 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9087
9088 // Swap most and least significant byte, set remaining bytes in Res to zero.
9089 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9090 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9091 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9092 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9093
9094 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9095 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9096 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9097 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9098 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9099 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9100 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9101 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9102 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9103 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9104 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9105 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9106 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9107 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9108 }
9109 Res.getInstr()->getOperand(0).setReg(Dst);
9110
9111 MI.eraseFromParent();
9112 return Legalized;
9113}
9114
9115//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9117 MachineInstrBuilder Src, const APInt &Mask) {
9118 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9119 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9120 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9121 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9122 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9123 return B.buildOr(Dst, LHS, RHS);
9124}
9125
9128 auto [Dst, Src] = MI.getFirst2Regs();
9129 const LLT SrcTy = MRI.getType(Src);
9130 unsigned Size = SrcTy.getScalarSizeInBits();
9131 unsigned VSize = SrcTy.getSizeInBits();
9132
9133 if (Size >= 8) {
9134 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9135 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9136 {LLT::fixed_vector(VSize / 8, 8),
9137 LLT::fixed_vector(VSize / 8, 8)}}))) {
9138 // If bitreverse is legal for i8 vector of the same size, then cast
9139 // to i8 vector type.
9140 // e.g. v4s32 -> v16s8
9141 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9142 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9143 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9144 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9145 MIRBuilder.buildBitcast(Dst, RBIT);
9146 } else {
9147 MachineInstrBuilder BSWAP =
9148 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9149
9150 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9151 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9152 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9153 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9154 APInt::getSplat(Size, APInt(8, 0xF0)));
9155
9156 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9157 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9158 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9159 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9160 APInt::getSplat(Size, APInt(8, 0xCC)));
9161
9162 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9163 // 6|7
9164 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9165 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9166 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9167 }
9168 } else {
9169 // Expand bitreverse for types smaller than 8 bits.
9171 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9173 if (I < J) {
9174 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9175 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9176 } else {
9177 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9178 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9179 }
9180
9181 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9182 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9183 if (I == 0)
9184 Tmp = Tmp2;
9185 else
9186 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9187 }
9188 MIRBuilder.buildCopy(Dst, Tmp);
9189 }
9190
9191 MI.eraseFromParent();
9192 return Legalized;
9193}
9194
9198
9199 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9200 int NameOpIdx = IsRead ? 1 : 0;
9201 int ValRegIndex = IsRead ? 0 : 1;
9202
9203 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9204 const LLT Ty = MRI.getType(ValReg);
9205 const MDString *RegStr = cast<MDString>(
9206 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9207
9208 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9209 if (!PhysReg) {
9210 const Function &Fn = MF.getFunction();
9212 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9213 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9214 Fn, MI.getDebugLoc()));
9215 if (IsRead)
9216 MIRBuilder.buildUndef(ValReg);
9217
9218 MI.eraseFromParent();
9219 return Legalized;
9220 }
9221
9222 if (IsRead)
9223 MIRBuilder.buildCopy(ValReg, PhysReg);
9224 else
9225 MIRBuilder.buildCopy(PhysReg, ValReg);
9226
9227 MI.eraseFromParent();
9228 return Legalized;
9229}
9230
9233 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9234 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9235 Register Result = MI.getOperand(0).getReg();
9236 LLT OrigTy = MRI.getType(Result);
9237 auto SizeInBits = OrigTy.getScalarSizeInBits();
9238 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9239
9240 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9241 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9242 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9243 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9244
9245 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9246 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9247 MIRBuilder.buildTrunc(Result, Shifted);
9248
9249 MI.eraseFromParent();
9250 return Legalized;
9251}
9252
9255 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9256 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9257
9258 if (Mask == fcNone) {
9259 MIRBuilder.buildConstant(DstReg, 0);
9260 MI.eraseFromParent();
9261 return Legalized;
9262 }
9263 if (Mask == fcAllFlags) {
9264 MIRBuilder.buildConstant(DstReg, 1);
9265 MI.eraseFromParent();
9266 return Legalized;
9267 }
9268
9269 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9270 // version
9271
9272 unsigned BitSize = SrcTy.getScalarSizeInBits();
9273 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9274
9275 LLT IntTy = LLT::scalar(BitSize);
9276 if (SrcTy.isVector())
9277 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9278 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9279
9280 // Various masks.
9281 APInt SignBit = APInt::getSignMask(BitSize);
9282 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9283 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9284 APInt ExpMask = Inf;
9285 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9286 APInt QNaNBitMask =
9287 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9288 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9289
9290 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9291 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9292 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9293 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9294 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9295
9296 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9297 auto Sign =
9299
9300 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9301 // Clang doesn't support capture of structured bindings:
9302 LLT DstTyCopy = DstTy;
9303 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9304 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9305 };
9306
9307 // Tests that involve more than one class should be processed first.
9308 if ((Mask & fcFinite) == fcFinite) {
9309 // finite(V) ==> abs(V) u< exp_mask
9310 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9311 ExpMaskC));
9312 Mask &= ~fcFinite;
9313 } else if ((Mask & fcFinite) == fcPosFinite) {
9314 // finite(V) && V > 0 ==> V u< exp_mask
9315 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9316 ExpMaskC));
9317 Mask &= ~fcPosFinite;
9318 } else if ((Mask & fcFinite) == fcNegFinite) {
9319 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9320 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9321 ExpMaskC);
9322 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9323 appendToRes(And);
9324 Mask &= ~fcNegFinite;
9325 }
9326
9327 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9328 // fcZero | fcSubnormal => test all exponent bits are 0
9329 // TODO: Handle sign bit specific cases
9330 // TODO: Handle inverted case
9331 if (PartialCheck == (fcZero | fcSubnormal)) {
9332 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9334 ExpBits, ZeroC));
9335 Mask &= ~PartialCheck;
9336 }
9337 }
9338
9339 // Check for individual classes.
9340 if (FPClassTest PartialCheck = Mask & fcZero) {
9341 if (PartialCheck == fcPosZero)
9343 AsInt, ZeroC));
9344 else if (PartialCheck == fcZero)
9345 appendToRes(
9347 else // fcNegZero
9349 AsInt, SignBitC));
9350 }
9351
9352 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9353 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9354 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9355 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9356 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9357 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9358 auto SubnormalRes =
9360 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9361 if (PartialCheck == fcNegSubnormal)
9362 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9363 appendToRes(SubnormalRes);
9364 }
9365
9366 if (FPClassTest PartialCheck = Mask & fcInf) {
9367 if (PartialCheck == fcPosInf)
9369 AsInt, InfC));
9370 else if (PartialCheck == fcInf)
9371 appendToRes(
9373 else { // fcNegInf
9374 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9375 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9377 AsInt, NegInfC));
9378 }
9379 }
9380
9381 if (FPClassTest PartialCheck = Mask & fcNan) {
9382 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9383 if (PartialCheck == fcNan) {
9384 // isnan(V) ==> abs(V) u> int(inf)
9385 appendToRes(
9387 } else if (PartialCheck == fcQNan) {
9388 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9389 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9390 InfWithQnanBitC));
9391 } else { // fcSNan
9392 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9393 // abs(V) u< (unsigned(Inf) | quiet_bit)
9394 auto IsNan =
9396 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9397 Abs, InfWithQnanBitC);
9398 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9399 }
9400 }
9401
9402 if (FPClassTest PartialCheck = Mask & fcNormal) {
9403 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9404 // (max_exp-1))
9405 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9406 auto ExpMinusOne = MIRBuilder.buildSub(
9407 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9408 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9409 auto NormalRes =
9411 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9412 if (PartialCheck == fcNegNormal)
9413 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9414 else if (PartialCheck == fcPosNormal) {
9415 auto PosSign = MIRBuilder.buildXor(
9416 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
9417 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9418 }
9419 appendToRes(NormalRes);
9420 }
9421
9422 MIRBuilder.buildCopy(DstReg, Res);
9423 MI.eraseFromParent();
9424 return Legalized;
9425}
9426
9428 // Implement G_SELECT in terms of XOR, AND, OR.
9429 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9430 MI.getFirst4RegLLTs();
9431
9432 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9433 if (IsEltPtr) {
9434 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9435 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9436 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9437 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9438 DstTy = NewTy;
9439 }
9440
9441 if (MaskTy.isScalar()) {
9442 // Turn the scalar condition into a vector condition mask if needed.
9443
9444 Register MaskElt = MaskReg;
9445
9446 // The condition was potentially zero extended before, but we want a sign
9447 // extended boolean.
9448 if (MaskTy != LLT::scalar(1))
9449 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9450
9451 // Continue the sign extension (or truncate) to match the data type.
9452 MaskElt =
9453 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9454
9455 if (DstTy.isVector()) {
9456 // Generate a vector splat idiom.
9457 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9458 MaskReg = ShufSplat.getReg(0);
9459 } else {
9460 MaskReg = MaskElt;
9461 }
9462 MaskTy = DstTy;
9463 } else if (!DstTy.isVector()) {
9464 // Cannot handle the case that mask is a vector and dst is a scalar.
9465 return UnableToLegalize;
9466 }
9467
9468 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9469 return UnableToLegalize;
9470 }
9471
9472 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9473 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9474 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9475 if (IsEltPtr) {
9476 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9477 MIRBuilder.buildIntToPtr(DstReg, Or);
9478 } else {
9479 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9480 }
9481 MI.eraseFromParent();
9482 return Legalized;
9483}
9484
9486 // Split DIVREM into individual instructions.
9487 unsigned Opcode = MI.getOpcode();
9488
9490 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9491 : TargetOpcode::G_UDIV,
9492 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9494 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9495 : TargetOpcode::G_UREM,
9496 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9497 MI.eraseFromParent();
9498 return Legalized;
9499}
9500
9503 // Expand %res = G_ABS %a into:
9504 // %v1 = G_ASHR %a, scalar_size-1
9505 // %v2 = G_ADD %a, %v1
9506 // %res = G_XOR %v2, %v1
9507 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9508 Register OpReg = MI.getOperand(1).getReg();
9509 auto ShiftAmt =
9510 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9511 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9512 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9513 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9514 MI.eraseFromParent();
9515 return Legalized;
9516}
9517
9520 // Expand %res = G_ABS %a into:
9521 // %v1 = G_CONSTANT 0
9522 // %v2 = G_SUB %v1, %a
9523 // %res = G_SMAX %a, %v2
9524 Register SrcReg = MI.getOperand(1).getReg();
9525 LLT Ty = MRI.getType(SrcReg);
9526 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9527 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9528 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9529 MI.eraseFromParent();
9530 return Legalized;
9531}
9532
9535 Register SrcReg = MI.getOperand(1).getReg();
9536 Register DestReg = MI.getOperand(0).getReg();
9537 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9538 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9539 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9540 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9541 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9542 MI.eraseFromParent();
9543 return Legalized;
9544}
9545
9547 Register SrcReg = MI.getOperand(1).getReg();
9548 Register DstReg = MI.getOperand(0).getReg();
9549
9550 LLT Ty = MRI.getType(DstReg);
9551
9552 // Reset sign bit
9554 DstReg, SrcReg,
9557
9558 MI.eraseFromParent();
9559 return Legalized;
9560}
9561
9564 Register SrcReg = MI.getOperand(1).getReg();
9565 LLT SrcTy = MRI.getType(SrcReg);
9566 LLT DstTy = MRI.getType(SrcReg);
9567
9568 // The source could be a scalar if the IR type was <1 x sN>.
9569 if (SrcTy.isScalar()) {
9570 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
9571 return UnableToLegalize; // FIXME: handle extension.
9572 // This can be just a plain copy.
9574 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
9576 return Legalized;
9577 }
9578 return UnableToLegalize;
9579}
9580
9582 MachineFunction &MF = *MI.getMF();
9584 LLVMContext &Ctx = MF.getFunction().getContext();
9585 Register ListPtr = MI.getOperand(1).getReg();
9586 LLT PtrTy = MRI.getType(ListPtr);
9587
9588 // LstPtr is a pointer to the head of the list. Get the address
9589 // of the head of the list.
9590 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
9591 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
9592 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
9593 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
9594
9595 const Align A(MI.getOperand(2).getImm());
9596 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
9597 if (A > TLI.getMinStackArgumentAlignment()) {
9598 Register AlignAmt =
9599 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
9600 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
9601 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
9602 VAList = AndDst.getReg(0);
9603 }
9604
9605 // Increment the pointer, VAList, to the next vaarg
9606 // The list should be bumped by the size of element in the current head of
9607 // list.
9608 Register Dst = MI.getOperand(0).getReg();
9609 LLT LLTTy = MRI.getType(Dst);
9610 Type *Ty = getTypeForLLT(LLTTy, Ctx);
9611 auto IncAmt =
9612 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
9613 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
9614
9615 // Store the increment VAList to the legalized pointer
9617 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
9618 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
9619 // Load the actual argument out of the pointer VAList
9620 Align EltAlignment = DL.getABITypeAlign(Ty);
9621 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
9622 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
9623 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
9624
9625 MI.eraseFromParent();
9626 return Legalized;
9627}
9628
9630 // On Darwin, -Os means optimize for size without hurting performance, so
9631 // only really optimize for size when -Oz (MinSize) is used.
9633 return MF.getFunction().hasMinSize();
9634 return MF.getFunction().hasOptSize();
9635}
9636
9637// Returns a list of types to use for memory op lowering in MemOps. A partial
9638// port of findOptimalMemOpLowering in TargetLowering.
9639static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
9640 unsigned Limit, const MemOp &Op,
9641 unsigned DstAS, unsigned SrcAS,
9642 const AttributeList &FuncAttributes,
9643 const TargetLowering &TLI) {
9644 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
9645 return false;
9646
9647 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
9648
9649 if (Ty == LLT()) {
9650 // Use the largest scalar type whose alignment constraints are satisfied.
9651 // We only need to check DstAlign here as SrcAlign is always greater or
9652 // equal to DstAlign (or zero).
9653 Ty = LLT::scalar(64);
9654 if (Op.isFixedDstAlign())
9655 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
9656 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
9657 Ty = LLT::scalar(Ty.getSizeInBytes());
9658 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
9659 // FIXME: check for the largest legal type we can load/store to.
9660 }
9661
9662 unsigned NumMemOps = 0;
9663 uint64_t Size = Op.size();
9664 while (Size) {
9665 unsigned TySize = Ty.getSizeInBytes();
9666 while (TySize > Size) {
9667 // For now, only use non-vector load / store's for the left-over pieces.
9668 LLT NewTy = Ty;
9669 // FIXME: check for mem op safety and legality of the types. Not all of
9670 // SDAGisms map cleanly to GISel concepts.
9671 if (NewTy.isVector())
9672 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
9673 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
9674 unsigned NewTySize = NewTy.getSizeInBytes();
9675 assert(NewTySize > 0 && "Could not find appropriate type");
9676
9677 // If the new LLT cannot cover all of the remaining bits, then consider
9678 // issuing a (or a pair of) unaligned and overlapping load / store.
9679 unsigned Fast;
9680 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9681 MVT VT = getMVTForLLT(Ty);
9682 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
9684 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
9686 Fast)
9687 TySize = Size;
9688 else {
9689 Ty = NewTy;
9690 TySize = NewTySize;
9691 }
9692 }
9693
9694 if (++NumMemOps > Limit)
9695 return false;
9696
9697 MemOps.push_back(Ty);
9698 Size -= TySize;
9699 }
9700
9701 return true;
9702}
9703
9704// Get a vectorized representation of the memset value operand, GISel edition.
9706 MachineRegisterInfo &MRI = *MIB.getMRI();
9707 unsigned NumBits = Ty.getScalarSizeInBits();
9708 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9709 if (!Ty.isVector() && ValVRegAndVal) {
9710 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9711 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
9712 return MIB.buildConstant(Ty, SplatVal).getReg(0);
9713 }
9714
9715 // Extend the byte value to the larger type, and then multiply by a magic
9716 // value 0x010101... in order to replicate it across every byte.
9717 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9718 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9719 return MIB.buildConstant(Ty, 0).getReg(0);
9720 }
9721
9722 LLT ExtType = Ty.getScalarType();
9723 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
9724 if (NumBits > 8) {
9725 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
9726 auto MagicMI = MIB.buildConstant(ExtType, Magic);
9727 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
9728 }
9729
9730 // For vector types create a G_BUILD_VECTOR.
9731 if (Ty.isVector())
9732 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
9733
9734 return Val;
9735}
9736
9738LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
9739 uint64_t KnownLen, Align Alignment,
9740 bool IsVolatile) {
9741 auto &MF = *MI.getParent()->getParent();
9742 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9743 auto &DL = MF.getDataLayout();
9745
9746 assert(KnownLen != 0 && "Have a zero length memset length!");
9747
9748 bool DstAlignCanChange = false;
9749 MachineFrameInfo &MFI = MF.getFrameInfo();
9750 bool OptSize = shouldLowerMemFuncForSize(MF);
9751
9752 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9753 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9754 DstAlignCanChange = true;
9755
9756 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
9757 std::vector<LLT> MemOps;
9758
9759 const auto &DstMMO = **MI.memoperands_begin();
9760 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9761
9762 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9763 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9764
9765 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
9766 MemOp::Set(KnownLen, DstAlignCanChange,
9767 Alignment,
9768 /*IsZeroMemset=*/IsZeroVal,
9769 /*IsVolatile=*/IsVolatile),
9770 DstPtrInfo.getAddrSpace(), ~0u,
9771 MF.getFunction().getAttributes(), TLI))
9772 return UnableToLegalize;
9773
9774 if (DstAlignCanChange) {
9775 // Get an estimate of the type from the LLT.
9776 Type *IRTy = getTypeForLLT(MemOps[0], C);
9777 Align NewAlign = DL.getABITypeAlign(IRTy);
9778 if (NewAlign > Alignment) {
9779 Alignment = NewAlign;
9780 unsigned FI = FIDef->getOperand(1).getIndex();
9781 // Give the stack frame object a larger alignment if needed.
9782 if (MFI.getObjectAlign(FI) < Alignment)
9783 MFI.setObjectAlignment(FI, Alignment);
9784 }
9785 }
9786
9787 MachineIRBuilder MIB(MI);
9788 // Find the largest store and generate the bit pattern for it.
9789 LLT LargestTy = MemOps[0];
9790 for (unsigned i = 1; i < MemOps.size(); i++)
9791 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
9792 LargestTy = MemOps[i];
9793
9794 // The memset stored value is always defined as an s8, so in order to make it
9795 // work with larger store types we need to repeat the bit pattern across the
9796 // wider type.
9797 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
9798
9799 if (!MemSetValue)
9800 return UnableToLegalize;
9801
9802 // Generate the stores. For each store type in the list, we generate the
9803 // matching store of that type to the destination address.
9804 LLT PtrTy = MRI.getType(Dst);
9805 unsigned DstOff = 0;
9806 unsigned Size = KnownLen;
9807 for (unsigned I = 0; I < MemOps.size(); I++) {
9808 LLT Ty = MemOps[I];
9809 unsigned TySize = Ty.getSizeInBytes();
9810 if (TySize > Size) {
9811 // Issuing an unaligned load / store pair that overlaps with the previous
9812 // pair. Adjust the offset accordingly.
9813 assert(I == MemOps.size() - 1 && I != 0);
9814 DstOff -= TySize - Size;
9815 }
9816
9817 // If this store is smaller than the largest store see whether we can get
9818 // the smaller value for free with a truncate.
9819 Register Value = MemSetValue;
9820 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9821 MVT VT = getMVTForLLT(Ty);
9822 MVT LargestVT = getMVTForLLT(LargestTy);
9823 if (!LargestTy.isVector() && !Ty.isVector() &&
9824 TLI.isTruncateFree(LargestVT, VT))
9825 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9826 else
9827 Value = getMemsetValue(Val, Ty, MIB);
9828 if (!Value)
9829 return UnableToLegalize;
9830 }
9831
9832 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9833
9834 Register Ptr = Dst;
9835 if (DstOff != 0) {
9836 auto Offset =
9837 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9838 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
9839 }
9840
9841 MIB.buildStore(Value, Ptr, *StoreMMO);
9842 DstOff += Ty.getSizeInBytes();
9843 Size -= TySize;
9844 }
9845
9846 MI.eraseFromParent();
9847 return Legalized;
9848}
9849
9851LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9852 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9853
9854 auto [Dst, Src, Len] = MI.getFirst3Regs();
9855
9856 const auto *MMOIt = MI.memoperands_begin();
9857 const MachineMemOperand *MemOp = *MMOIt;
9858 bool IsVolatile = MemOp->isVolatile();
9859
9860 // See if this is a constant length copy
9861 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9862 // FIXME: support dynamically sized G_MEMCPY_INLINE
9863 assert(LenVRegAndVal &&
9864 "inline memcpy with dynamic size is not yet supported");
9865 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9866 if (KnownLen == 0) {
9867 MI.eraseFromParent();
9868 return Legalized;
9869 }
9870
9871 const auto &DstMMO = **MI.memoperands_begin();
9872 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9873 Align DstAlign = DstMMO.getBaseAlign();
9874 Align SrcAlign = SrcMMO.getBaseAlign();
9875
9876 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9877 IsVolatile);
9878}
9879
9881LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9882 uint64_t KnownLen, Align DstAlign,
9883 Align SrcAlign, bool IsVolatile) {
9884 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9885 return lowerMemcpy(MI, Dst, Src, KnownLen,
9886 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9887 IsVolatile);
9888}
9889
9891LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9892 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9893 Align SrcAlign, bool IsVolatile) {
9894 auto &MF = *MI.getParent()->getParent();
9895 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9896 auto &DL = MF.getDataLayout();
9898
9899 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9900
9901 bool DstAlignCanChange = false;
9902 MachineFrameInfo &MFI = MF.getFrameInfo();
9903 Align Alignment = std::min(DstAlign, SrcAlign);
9904
9905 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9906 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9907 DstAlignCanChange = true;
9908
9909 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9910 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9911 // if the memcpy is in a tail call position.
9912
9913 std::vector<LLT> MemOps;
9914
9915 const auto &DstMMO = **MI.memoperands_begin();
9916 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9917 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9918 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9919
9921 MemOps, Limit,
9922 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9923 IsVolatile),
9924 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9925 MF.getFunction().getAttributes(), TLI))
9926 return UnableToLegalize;
9927
9928 if (DstAlignCanChange) {
9929 // Get an estimate of the type from the LLT.
9930 Type *IRTy = getTypeForLLT(MemOps[0], C);
9931 Align NewAlign = DL.getABITypeAlign(IRTy);
9932
9933 // Don't promote to an alignment that would require dynamic stack
9934 // realignment.
9936 if (!TRI->hasStackRealignment(MF))
9937 if (MaybeAlign StackAlign = DL.getStackAlignment())
9938 NewAlign = std::min(NewAlign, *StackAlign);
9939
9940 if (NewAlign > Alignment) {
9941 Alignment = NewAlign;
9942 unsigned FI = FIDef->getOperand(1).getIndex();
9943 // Give the stack frame object a larger alignment if needed.
9944 if (MFI.getObjectAlign(FI) < Alignment)
9945 MFI.setObjectAlignment(FI, Alignment);
9946 }
9947 }
9948
9949 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9950
9951 MachineIRBuilder MIB(MI);
9952 // Now we need to emit a pair of load and stores for each of the types we've
9953 // collected. I.e. for each type, generate a load from the source pointer of
9954 // that type width, and then generate a corresponding store to the dest buffer
9955 // of that value loaded. This can result in a sequence of loads and stores
9956 // mixed types, depending on what the target specifies as good types to use.
9957 unsigned CurrOffset = 0;
9958 unsigned Size = KnownLen;
9959 for (auto CopyTy : MemOps) {
9960 // Issuing an unaligned load / store pair that overlaps with the previous
9961 // pair. Adjust the offset accordingly.
9962 if (CopyTy.getSizeInBytes() > Size)
9963 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9964
9965 // Construct MMOs for the accesses.
9966 auto *LoadMMO =
9967 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9968 auto *StoreMMO =
9969 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9970
9971 // Create the load.
9972 Register LoadPtr = Src;
9974 if (CurrOffset != 0) {
9975 LLT SrcTy = MRI.getType(Src);
9976 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
9977 .getReg(0);
9978 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
9979 }
9980 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9981
9982 // Create the store.
9983 Register StorePtr = Dst;
9984 if (CurrOffset != 0) {
9985 LLT DstTy = MRI.getType(Dst);
9986 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
9987 }
9988 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9989 CurrOffset += CopyTy.getSizeInBytes();
9990 Size -= CopyTy.getSizeInBytes();
9991 }
9992
9993 MI.eraseFromParent();
9994 return Legalized;
9995}
9996
9998LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9999 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10000 bool IsVolatile) {
10001 auto &MF = *MI.getParent()->getParent();
10002 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10003 auto &DL = MF.getDataLayout();
10005
10006 assert(KnownLen != 0 && "Have a zero length memmove length!");
10007
10008 bool DstAlignCanChange = false;
10009 MachineFrameInfo &MFI = MF.getFrameInfo();
10010 bool OptSize = shouldLowerMemFuncForSize(MF);
10011 Align Alignment = std::min(DstAlign, SrcAlign);
10012
10013 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10014 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10015 DstAlignCanChange = true;
10016
10017 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10018 std::vector<LLT> MemOps;
10019
10020 const auto &DstMMO = **MI.memoperands_begin();
10021 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10022 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10023 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10024
10025 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10026 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10027 // same thing here.
10029 MemOps, Limit,
10030 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10031 /*IsVolatile*/ true),
10032 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10033 MF.getFunction().getAttributes(), TLI))
10034 return UnableToLegalize;
10035
10036 if (DstAlignCanChange) {
10037 // Get an estimate of the type from the LLT.
10038 Type *IRTy = getTypeForLLT(MemOps[0], C);
10039 Align NewAlign = DL.getABITypeAlign(IRTy);
10040
10041 // Don't promote to an alignment that would require dynamic stack
10042 // realignment.
10044 if (!TRI->hasStackRealignment(MF))
10045 if (MaybeAlign StackAlign = DL.getStackAlignment())
10046 NewAlign = std::min(NewAlign, *StackAlign);
10047
10048 if (NewAlign > Alignment) {
10049 Alignment = NewAlign;
10050 unsigned FI = FIDef->getOperand(1).getIndex();
10051 // Give the stack frame object a larger alignment if needed.
10052 if (MFI.getObjectAlign(FI) < Alignment)
10053 MFI.setObjectAlignment(FI, Alignment);
10054 }
10055 }
10056
10057 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10058
10059 MachineIRBuilder MIB(MI);
10060 // Memmove requires that we perform the loads first before issuing the stores.
10061 // Apart from that, this loop is pretty much doing the same thing as the
10062 // memcpy codegen function.
10063 unsigned CurrOffset = 0;
10065 for (auto CopyTy : MemOps) {
10066 // Construct MMO for the load.
10067 auto *LoadMMO =
10068 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10069
10070 // Create the load.
10071 Register LoadPtr = Src;
10072 if (CurrOffset != 0) {
10073 LLT SrcTy = MRI.getType(Src);
10074 auto Offset =
10075 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10076 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10077 }
10078 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10079 CurrOffset += CopyTy.getSizeInBytes();
10080 }
10081
10082 CurrOffset = 0;
10083 for (unsigned I = 0; I < MemOps.size(); ++I) {
10084 LLT CopyTy = MemOps[I];
10085 // Now store the values loaded.
10086 auto *StoreMMO =
10087 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10088
10089 Register StorePtr = Dst;
10090 if (CurrOffset != 0) {
10091 LLT DstTy = MRI.getType(Dst);
10092 auto Offset =
10093 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10094 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10095 }
10096 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10097 CurrOffset += CopyTy.getSizeInBytes();
10098 }
10099 MI.eraseFromParent();
10100 return Legalized;
10101}
10102
10105 const unsigned Opc = MI.getOpcode();
10106 // This combine is fairly complex so it's not written with a separate
10107 // matcher function.
10108 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10109 Opc == TargetOpcode::G_MEMSET) &&
10110 "Expected memcpy like instruction");
10111
10112 auto MMOIt = MI.memoperands_begin();
10113 const MachineMemOperand *MemOp = *MMOIt;
10114
10115 Align DstAlign = MemOp->getBaseAlign();
10116 Align SrcAlign;
10117 auto [Dst, Src, Len] = MI.getFirst3Regs();
10118
10119 if (Opc != TargetOpcode::G_MEMSET) {
10120 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10121 MemOp = *(++MMOIt);
10122 SrcAlign = MemOp->getBaseAlign();
10123 }
10124
10125 // See if this is a constant length copy
10126 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10127 if (!LenVRegAndVal)
10128 return UnableToLegalize;
10129 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10130
10131 if (KnownLen == 0) {
10132 MI.eraseFromParent();
10133 return Legalized;
10134 }
10135
10136 if (MaxLen && KnownLen > MaxLen)
10137 return UnableToLegalize;
10138
10139 bool IsVolatile = MemOp->isVolatile();
10140 if (Opc == TargetOpcode::G_MEMCPY) {
10141 auto &MF = *MI.getParent()->getParent();
10142 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10143 bool OptSize = shouldLowerMemFuncForSize(MF);
10144 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10145 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10146 IsVolatile);
10147 }
10148 if (Opc == TargetOpcode::G_MEMMOVE)
10149 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10150 if (Opc == TargetOpcode::G_MEMSET)
10151 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10152 return UnableToLegalize;
10153}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1347
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1098
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
void negate()
Negate this APInt in place.
Definition: APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1137
LLVM_ABI AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
LLVM_ABI AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:860
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:687
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:688
bool isSigned() const
Definition: InstrTypes.h:932
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791
const APFloat & getValueAPF() const
Definition: Constants.h:320
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:371
bool isBigEndian() const
Definition: DataLayout.h:199
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:318
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Represent a G_FCMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265
constexpr bool isScalar() const
Definition: LowLevelType.h:147
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:212
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:65
constexpr bool isPointerVector() const
Definition: LowLevelType.h:153
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
constexpr bool isValid() const
Definition: LowLevelType.h:146
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:160
constexpr bool isVector() const
Definition: LowLevelType.h:149
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:58
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:171
constexpr bool isByteSized() const
Definition: LowLevelType.h:261
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191
constexpr bool isPointer() const
Definition: LowLevelType.h:150
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:278
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:219
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:271
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:101
constexpr bool isPointerOrPointerVector() const
Definition: LowLevelType.h:154
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:228
constexpr LLT getScalarType() const
Definition: LowLevelType.h:206
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:201
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:125
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:64
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:71
A single uniqued string.
Definition: Metadata.h:720
LLVM_ABI StringRef getString() const
Definition: Metadata.cpp:617
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:247
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildBSwap(const DstOp &Dst, const SrcOp &Src0)
Build and insert Dst = G_BSWAP Src0.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildObjectPtrOffset(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert an instruction with appropriate flags for addressing some offset of an object,...
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
std::optional< MachineInstrBuilder > materializeObjectPtrOffset(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert an instruction with appropriate flags for addressing some offset of an object,...
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:938
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:798
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
LLT getVectorIdxLLT(const DataLayout &DL) const
Returns the type to be used for the index operand of: G_INSERT_VECTOR_ELT, G_EXTRACT_VECTOR_ELT,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition: Triple.h:608
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:66
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:84
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:58
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:75
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:53
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:88
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:72
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:2029
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:232
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1563
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1620
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:390
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1189
unsigned M1(unsigned Val)
Definition: VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:362
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:506
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1854
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:241
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:280
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:267
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Definition: CallLowering.h:52
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)