LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FLOG10:
475 RTLIBCASE(LOG10_F);
476 case TargetOpcode::G_FLOG:
477 RTLIBCASE(LOG_F);
478 case TargetOpcode::G_FLOG2:
479 RTLIBCASE(LOG2_F);
480 case TargetOpcode::G_FLDEXP:
481 RTLIBCASE(LDEXP_F);
482 case TargetOpcode::G_FCEIL:
483 RTLIBCASE(CEIL_F);
484 case TargetOpcode::G_FFLOOR:
485 RTLIBCASE(FLOOR_F);
486 case TargetOpcode::G_FMINNUM:
487 RTLIBCASE(FMIN_F);
488 case TargetOpcode::G_FMAXNUM:
489 RTLIBCASE(FMAX_F);
490 case TargetOpcode::G_FMINIMUMNUM:
491 RTLIBCASE(FMINIMUM_NUM_F);
492 case TargetOpcode::G_FMAXIMUMNUM:
493 RTLIBCASE(FMAXIMUM_NUM_F);
494 case TargetOpcode::G_FSQRT:
495 RTLIBCASE(SQRT_F);
496 case TargetOpcode::G_FRINT:
497 RTLIBCASE(RINT_F);
498 case TargetOpcode::G_FNEARBYINT:
499 RTLIBCASE(NEARBYINT_F);
500 case TargetOpcode::G_INTRINSIC_TRUNC:
501 RTLIBCASE(TRUNC_F);
502 case TargetOpcode::G_INTRINSIC_ROUND:
503 RTLIBCASE(ROUND_F);
504 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
505 RTLIBCASE(ROUNDEVEN_F);
506 case TargetOpcode::G_INTRINSIC_LRINT:
507 RTLIBCASE(LRINT_F);
508 case TargetOpcode::G_INTRINSIC_LLRINT:
509 RTLIBCASE(LLRINT_F);
510 }
511 llvm_unreachable("Unknown libcall function");
512#undef RTLIBCASE_INT
513#undef RTLIBCASE
514}
515
516/// True if an instruction is in tail position in its caller. Intended for
517/// legalizing libcalls as tail calls when possible.
520 const TargetInstrInfo &TII,
522 MachineBasicBlock &MBB = *MI.getParent();
523 const Function &F = MBB.getParent()->getFunction();
524
525 // Conservatively require the attributes of the call to match those of
526 // the return. Ignore NoAlias and NonNull because they don't affect the
527 // call sequence.
528 AttributeList CallerAttrs = F.getAttributes();
529 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
530 .removeAttribute(Attribute::NoAlias)
531 .removeAttribute(Attribute::NonNull)
532 .hasAttributes())
533 return false;
534
535 // It's not safe to eliminate the sign / zero extension of the return value.
536 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
537 CallerAttrs.hasRetAttr(Attribute::SExt))
538 return false;
539
540 // Only tail call if the following instruction is a standard return or if we
541 // have a `thisreturn` callee, and a sequence like:
542 //
543 // G_MEMCPY %0, %1, %2
544 // $x0 = COPY %0
545 // RET_ReallyLR implicit $x0
546 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
547 if (Next != MBB.instr_end() && Next->isCopy()) {
548 if (MI.getOpcode() == TargetOpcode::G_BZERO)
549 return false;
550
551 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
552 // mempy/etc routines return the same parameter. For other it will be the
553 // returned value.
554 Register VReg = MI.getOperand(0).getReg();
555 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
556 return false;
557
558 Register PReg = Next->getOperand(0).getReg();
559 if (!PReg.isPhysical())
560 return false;
561
562 auto Ret = next_nodbg(Next, MBB.instr_end());
563 if (Ret == MBB.instr_end() || !Ret->isReturn())
564 return false;
565
566 if (Ret->getNumImplicitOperands() != 1)
567 return false;
568
569 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
570 return false;
571
572 // Skip over the COPY that we just validated.
573 Next = Ret;
574 }
575
576 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
577 return false;
578
579 return true;
580}
581
583llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
584 const CallLowering::ArgInfo &Result,
586 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
587 MachineInstr *MI) {
588 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
589
591 Info.CallConv = CC;
592 Info.Callee = MachineOperand::CreateES(Name);
593 Info.OrigRet = Result;
594 if (MI)
595 Info.IsTailCall =
596 (Result.Ty->isVoidTy() ||
597 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
598 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
599 *MIRBuilder.getMRI());
600
601 llvm::append_range(Info.OrigArgs, Args);
602 if (!CLI.lowerCall(MIRBuilder, Info))
604
605 if (MI && Info.LoweredTailCall) {
606 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
607
608 // Check debug locations before removing the return.
609 LocObserver.checkpoint(true);
610
611 // We must have a return following the call (or debug insts) to get past
612 // isLibCallInTailPosition.
613 do {
614 MachineInstr *Next = MI->getNextNode();
615 assert(Next &&
616 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
617 "Expected instr following MI to be return or debug inst?");
618 // We lowered a tail call, so the call is now the return from the block.
619 // Delete the old return.
620 Next->eraseFromParent();
621 } while (MI->getNextNode());
622
623 // We expect to lose the debug location from the return.
624 LocObserver.checkpoint(false);
625 }
627}
628
630llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
631 const CallLowering::ArgInfo &Result,
633 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
634 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
635 const char *Name = TLI.getLibcallName(Libcall);
636 if (!Name)
638 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
639 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
640}
641
642// Useful for libcalls where all operands have the same type.
645 Type *OpType, LostDebugLocObserver &LocObserver) {
646 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
647
648 // FIXME: What does the original arg index mean here?
650 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
651 Args.push_back({MO.getReg(), OpType, 0});
652 return createLibcall(MIRBuilder, Libcall,
653 {MI.getOperand(0).getReg(), OpType, 0}, Args,
654 LocObserver, &MI);
655}
656
657LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
658 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
659 LostDebugLocObserver &LocObserver) {
660 MachineFunction &MF = *MI.getMF();
661 MachineRegisterInfo &MRI = MF.getRegInfo();
662
663 Register DstSin = MI.getOperand(0).getReg();
664 Register DstCos = MI.getOperand(1).getReg();
665 Register Src = MI.getOperand(2).getReg();
666 LLT DstTy = MRI.getType(DstSin);
667
668 int MemSize = DstTy.getSizeInBytes();
669 Align Alignment = getStackTemporaryAlignment(DstTy);
670 const DataLayout &DL = MIRBuilder.getDataLayout();
671 unsigned AddrSpace = DL.getAllocaAddrSpace();
672 MachinePointerInfo PtrInfo;
673
674 Register StackPtrSin =
675 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
676 .getReg(0);
677 Register StackPtrCos =
678 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
679 .getReg(0);
680
681 auto &Ctx = MF.getFunction().getContext();
682 auto LibcallResult =
684 {{0}, Type::getVoidTy(Ctx), 0},
685 {{Src, OpType, 0},
686 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
687 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
688 LocObserver, &MI);
689
690 if (LibcallResult != LegalizeResult::Legalized)
692
694 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
697
698 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
699 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
700 MI.eraseFromParent();
701
703}
704
707 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
708 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
709
711 // Add all the args, except for the last which is an imm denoting 'tail'.
712 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
713 Register Reg = MI.getOperand(i).getReg();
714
715 // Need derive an IR type for call lowering.
716 LLT OpLLT = MRI.getType(Reg);
717 Type *OpTy = nullptr;
718 if (OpLLT.isPointer())
719 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
720 else
721 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
722 Args.push_back({Reg, OpTy, 0});
723 }
724
725 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
726 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
727 RTLIB::Libcall RTLibcall;
728 unsigned Opc = MI.getOpcode();
729 const char *Name;
730 switch (Opc) {
731 case TargetOpcode::G_BZERO:
732 RTLibcall = RTLIB::BZERO;
733 Name = TLI.getLibcallName(RTLibcall);
734 break;
735 case TargetOpcode::G_MEMCPY:
736 RTLibcall = RTLIB::MEMCPY;
737 Name = TLI.getMemcpyName();
738 Args[0].Flags[0].setReturned();
739 break;
740 case TargetOpcode::G_MEMMOVE:
741 RTLibcall = RTLIB::MEMMOVE;
742 Name = TLI.getLibcallName(RTLibcall);
743 Args[0].Flags[0].setReturned();
744 break;
745 case TargetOpcode::G_MEMSET:
746 RTLibcall = RTLIB::MEMSET;
747 Name = TLI.getLibcallName(RTLibcall);
748 Args[0].Flags[0].setReturned();
749 break;
750 default:
751 llvm_unreachable("unsupported opcode");
752 }
753
754 // Unsupported libcall on the target.
755 if (!Name) {
756 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
757 << MIRBuilder.getTII().getName(Opc) << "\n");
759 }
760
762 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
763 Info.Callee = MachineOperand::CreateES(Name);
764 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
765 Info.IsTailCall =
766 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
767 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
768
769 llvm::append_range(Info.OrigArgs, Args);
770 if (!CLI.lowerCall(MIRBuilder, Info))
772
773 if (Info.LoweredTailCall) {
774 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
775
776 // Check debug locations before removing the return.
777 LocObserver.checkpoint(true);
778
779 // We must have a return following the call (or debug insts) to get past
780 // isLibCallInTailPosition.
781 do {
782 MachineInstr *Next = MI.getNextNode();
783 assert(Next &&
784 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
785 "Expected instr following MI to be return or debug inst?");
786 // We lowered a tail call, so the call is now the return from the block.
787 // Delete the old return.
788 Next->eraseFromParent();
789 } while (MI.getNextNode());
790
791 // We expect to lose the debug location from the return.
792 LocObserver.checkpoint(false);
793 }
794
796}
797
798static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
799 unsigned Opc = MI.getOpcode();
800 auto &AtomicMI = cast<GMemOperation>(MI);
801 auto &MMO = AtomicMI.getMMO();
802 auto Ordering = MMO.getMergedOrdering();
803 LLT MemType = MMO.getMemoryType();
804 uint64_t MemSize = MemType.getSizeInBytes();
805 if (MemType.isVector())
806 return RTLIB::UNKNOWN_LIBCALL;
807
808#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
809#define LCALL5(A) \
810 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
811 switch (Opc) {
812 case TargetOpcode::G_ATOMIC_CMPXCHG:
813 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
814 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
815 return getOutlineAtomicHelper(LC, Ordering, MemSize);
816 }
817 case TargetOpcode::G_ATOMICRMW_XCHG: {
818 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
819 return getOutlineAtomicHelper(LC, Ordering, MemSize);
820 }
821 case TargetOpcode::G_ATOMICRMW_ADD:
822 case TargetOpcode::G_ATOMICRMW_SUB: {
823 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
824 return getOutlineAtomicHelper(LC, Ordering, MemSize);
825 }
826 case TargetOpcode::G_ATOMICRMW_AND: {
827 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
828 return getOutlineAtomicHelper(LC, Ordering, MemSize);
829 }
830 case TargetOpcode::G_ATOMICRMW_OR: {
831 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
832 return getOutlineAtomicHelper(LC, Ordering, MemSize);
833 }
834 case TargetOpcode::G_ATOMICRMW_XOR: {
835 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
836 return getOutlineAtomicHelper(LC, Ordering, MemSize);
837 }
838 default:
839 return RTLIB::UNKNOWN_LIBCALL;
840 }
841#undef LCALLS
842#undef LCALL5
843}
844
847 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
848
849 Type *RetTy;
850 SmallVector<Register> RetRegs;
852 unsigned Opc = MI.getOpcode();
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
857 LLT SuccessLLT;
858 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
859 MI.getFirst4RegLLTs();
860 RetRegs.push_back(Ret);
861 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
862 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
863 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
864 NewLLT) = MI.getFirst5RegLLTs();
865 RetRegs.push_back(Success);
866 RetTy = StructType::get(
867 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
868 }
869 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
870 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
871 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
872 break;
873 }
874 case TargetOpcode::G_ATOMICRMW_XCHG:
875 case TargetOpcode::G_ATOMICRMW_ADD:
876 case TargetOpcode::G_ATOMICRMW_SUB:
877 case TargetOpcode::G_ATOMICRMW_AND:
878 case TargetOpcode::G_ATOMICRMW_OR:
879 case TargetOpcode::G_ATOMICRMW_XOR: {
880 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
881 RetRegs.push_back(Ret);
882 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
883 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
884 Val =
885 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
886 .getReg(0);
887 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
888 Val =
889 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
890 .getReg(0);
891 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
892 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
893 break;
894 }
895 default:
896 llvm_unreachable("unsupported opcode");
897 }
898
899 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
900 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
901 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
902 const char *Name = TLI.getLibcallName(RTLibcall);
903
904 // Unsupported libcall on the target.
905 if (!Name) {
906 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
907 << MIRBuilder.getTII().getName(Opc) << "\n");
909 }
910
912 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
913 Info.Callee = MachineOperand::CreateES(Name);
914 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
915
916 llvm::append_range(Info.OrigArgs, Args);
917 if (!CLI.lowerCall(MIRBuilder, Info))
919
921}
922
923static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
924 Type *FromType) {
925 auto ToMVT = MVT::getVT(ToType);
926 auto FromMVT = MVT::getVT(FromType);
927
928 switch (Opcode) {
929 case TargetOpcode::G_FPEXT:
930 return RTLIB::getFPEXT(FromMVT, ToMVT);
931 case TargetOpcode::G_FPTRUNC:
932 return RTLIB::getFPROUND(FromMVT, ToMVT);
933 case TargetOpcode::G_FPTOSI:
934 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
935 case TargetOpcode::G_FPTOUI:
936 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
937 case TargetOpcode::G_SITOFP:
938 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
939 case TargetOpcode::G_UITOFP:
940 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
941 }
942 llvm_unreachable("Unsupported libcall function");
943}
944
947 Type *FromType, LostDebugLocObserver &LocObserver,
948 const TargetLowering &TLI, bool IsSigned = false) {
949 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
950 if (FromType->isIntegerTy()) {
951 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
952 Arg.Flags[0].setSExt();
953 else
954 Arg.Flags[0].setZExt();
955 }
956
957 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
958 return createLibcall(MIRBuilder, Libcall,
959 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
960 &MI);
961}
962
963static RTLIB::Libcall
965 RTLIB::Libcall RTLibcall;
966 switch (MI.getOpcode()) {
967 case TargetOpcode::G_GET_FPENV:
968 RTLibcall = RTLIB::FEGETENV;
969 break;
970 case TargetOpcode::G_SET_FPENV:
971 case TargetOpcode::G_RESET_FPENV:
972 RTLibcall = RTLIB::FESETENV;
973 break;
974 case TargetOpcode::G_GET_FPMODE:
975 RTLibcall = RTLIB::FEGETMODE;
976 break;
977 case TargetOpcode::G_SET_FPMODE:
978 case TargetOpcode::G_RESET_FPMODE:
979 RTLibcall = RTLIB::FESETMODE;
980 break;
981 default:
982 llvm_unreachable("Unexpected opcode");
983 }
984 return RTLibcall;
985}
986
987// Some library functions that read FP state (fegetmode, fegetenv) write the
988// state into a region in memory. IR intrinsics that do the same operations
989// (get_fpmode, get_fpenv) return the state as integer value. To implement these
990// intrinsics via the library functions, we need to use temporary variable,
991// for example:
992//
993// %0:_(s32) = G_GET_FPMODE
994//
995// is transformed to:
996//
997// %1:_(p0) = G_FRAME_INDEX %stack.0
998// BL &fegetmode
999// %0:_(s32) = G_LOAD % 1
1000//
1002LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1004 LostDebugLocObserver &LocObserver) {
1005 const DataLayout &DL = MIRBuilder.getDataLayout();
1006 auto &MF = MIRBuilder.getMF();
1007 auto &MRI = *MIRBuilder.getMRI();
1008 auto &Ctx = MF.getFunction().getContext();
1009
1010 // Create temporary, where library function will put the read state.
1011 Register Dst = MI.getOperand(0).getReg();
1012 LLT StateTy = MRI.getType(Dst);
1013 TypeSize StateSize = StateTy.getSizeInBytes();
1014 Align TempAlign = getStackTemporaryAlignment(StateTy);
1015 MachinePointerInfo TempPtrInfo;
1016 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1017
1018 // Create a call to library function, with the temporary as an argument.
1019 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1020 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1021 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1022 auto Res =
1023 createLibcall(MIRBuilder, RTLibcall,
1024 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1025 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1026 LocObserver, nullptr);
1027 if (Res != LegalizerHelper::Legalized)
1028 return Res;
1029
1030 // Create a load from the temporary.
1031 MachineMemOperand *MMO = MF.getMachineMemOperand(
1032 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1033 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1034
1036}
1037
1038// Similar to `createGetStateLibcall` the function calls a library function
1039// using transient space in stack. In this case the library function reads
1040// content of memory region.
1042LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1044 LostDebugLocObserver &LocObserver) {
1045 const DataLayout &DL = MIRBuilder.getDataLayout();
1046 auto &MF = MIRBuilder.getMF();
1047 auto &MRI = *MIRBuilder.getMRI();
1048 auto &Ctx = MF.getFunction().getContext();
1049
1050 // Create temporary, where library function will get the new state.
1051 Register Src = MI.getOperand(0).getReg();
1052 LLT StateTy = MRI.getType(Src);
1053 TypeSize StateSize = StateTy.getSizeInBytes();
1054 Align TempAlign = getStackTemporaryAlignment(StateTy);
1055 MachinePointerInfo TempPtrInfo;
1056 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1057
1058 // Put the new state into the temporary.
1059 MachineMemOperand *MMO = MF.getMachineMemOperand(
1060 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1061 MIRBuilder.buildStore(Src, Temp, *MMO);
1062
1063 // Create a call to library function, with the temporary as an argument.
1064 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1065 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1066 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1067 return createLibcall(MIRBuilder, RTLibcall,
1068 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1069 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1070 LocObserver, nullptr);
1071}
1072
1073/// Returns the corresponding libcall for the given Pred and
1074/// the ICMP predicate that should be generated to compare with #0
1075/// after the libcall.
1076static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1078#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1079 do { \
1080 switch (Size) { \
1081 case 32: \
1082 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1083 case 64: \
1084 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1085 case 128: \
1086 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1087 default: \
1088 llvm_unreachable("unexpected size"); \
1089 } \
1090 } while (0)
1091
1092 switch (Pred) {
1093 case CmpInst::FCMP_OEQ:
1095 case CmpInst::FCMP_UNE:
1097 case CmpInst::FCMP_OGE:
1099 case CmpInst::FCMP_OLT:
1101 case CmpInst::FCMP_OLE:
1103 case CmpInst::FCMP_OGT:
1105 case CmpInst::FCMP_UNO:
1107 default:
1108 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1109 }
1110}
1111
1113LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1115 LostDebugLocObserver &LocObserver) {
1116 auto &MF = MIRBuilder.getMF();
1117 auto &Ctx = MF.getFunction().getContext();
1118 const GFCmp *Cmp = cast<GFCmp>(&MI);
1119
1120 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1121 unsigned Size = OpLLT.getSizeInBits();
1122 if ((Size != 32 && Size != 64 && Size != 128) ||
1123 OpLLT != MRI.getType(Cmp->getRHSReg()))
1124 return UnableToLegalize;
1125
1126 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1127
1128 // DstReg type is s32
1129 const Register DstReg = Cmp->getReg(0);
1130 LLT DstTy = MRI.getType(DstReg);
1131 const auto Cond = Cmp->getCond();
1132
1133 // Reference:
1134 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1135 // Generates a libcall followed by ICMP.
1136 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1137 const CmpInst::Predicate ICmpPred,
1138 const DstOp &Res) -> Register {
1139 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1140 constexpr LLT TempLLT = LLT::scalar(32);
1141 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1142 // Generate libcall, holding result in Temp
1143 const auto Status = createLibcall(
1144 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1145 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1146 LocObserver, &MI);
1147 if (!Status)
1148 return {};
1149
1150 // Compare temp with #0 to get the final result.
1151 return MIRBuilder
1152 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1153 .getReg(0);
1154 };
1155
1156 // Simple case if we have a direct mapping from predicate to libcall
1157 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1158 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1159 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1160 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1161 return Legalized;
1162 }
1163 return UnableToLegalize;
1164 }
1165
1166 // No direct mapping found, should be generated as combination of libcalls.
1167
1168 switch (Cond) {
1169 case CmpInst::FCMP_UEQ: {
1170 // FCMP_UEQ: unordered or equal
1171 // Convert into (FCMP_OEQ || FCMP_UNO).
1172
1173 const auto [OeqLibcall, OeqPred] =
1175 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1176
1177 const auto [UnoLibcall, UnoPred] =
1179 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1180 if (Oeq && Uno)
1181 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1182 else
1183 return UnableToLegalize;
1184
1185 break;
1186 }
1187 case CmpInst::FCMP_ONE: {
1188 // FCMP_ONE: ordered and operands are unequal
1189 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1190
1191 // We inverse the predicate instead of generating a NOT
1192 // to save one instruction.
1193 // On AArch64 isel can even select two cmp into a single ccmp.
1194 const auto [OeqLibcall, OeqPred] =
1196 const auto NotOeq =
1197 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1198
1199 const auto [UnoLibcall, UnoPred] =
1201 const auto NotUno =
1202 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1203
1204 if (NotOeq && NotUno)
1205 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1206 else
1207 return UnableToLegalize;
1208
1209 break;
1210 }
1211 case CmpInst::FCMP_ULT:
1212 case CmpInst::FCMP_UGE:
1213 case CmpInst::FCMP_UGT:
1214 case CmpInst::FCMP_ULE:
1215 case CmpInst::FCMP_ORD: {
1216 // Convert into: !(inverse(Pred))
1217 // E.g. FCMP_ULT becomes !FCMP_OGE
1218 // This is equivalent to the following, but saves some instructions.
1219 // MIRBuilder.buildNot(
1220 // PredTy,
1221 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1222 // Op1, Op2));
1223 const auto [InversedLibcall, InversedPred] =
1225 if (!BuildLibcall(InversedLibcall,
1226 CmpInst::getInversePredicate(InversedPred), DstReg))
1227 return UnableToLegalize;
1228 break;
1229 }
1230 default:
1231 return UnableToLegalize;
1232 }
1233
1234 return Legalized;
1235}
1236
1237// The function is used to legalize operations that set default environment
1238// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1239// On most targets supported in glibc FE_DFL_MODE is defined as
1240// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1241// it is not true, the target must provide custom lowering.
1243LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1245 LostDebugLocObserver &LocObserver) {
1246 const DataLayout &DL = MIRBuilder.getDataLayout();
1247 auto &MF = MIRBuilder.getMF();
1248 auto &Ctx = MF.getFunction().getContext();
1249
1250 // Create an argument for the library function.
1251 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1252 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1253 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1254 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1255 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1256 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1257 MIRBuilder.buildIntToPtr(Dest, DefValue);
1258
1259 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1260 return createLibcall(MIRBuilder, RTLibcall,
1261 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1262 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1263 LocObserver, &MI);
1264}
1265
1268 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1269
1270 switch (MI.getOpcode()) {
1271 default:
1272 return UnableToLegalize;
1273 case TargetOpcode::G_MUL:
1274 case TargetOpcode::G_SDIV:
1275 case TargetOpcode::G_UDIV:
1276 case TargetOpcode::G_SREM:
1277 case TargetOpcode::G_UREM:
1278 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1279 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1280 unsigned Size = LLTy.getSizeInBits();
1281 Type *HLTy = IntegerType::get(Ctx, Size);
1282 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1283 if (Status != Legalized)
1284 return Status;
1285 break;
1286 }
1287 case TargetOpcode::G_FADD:
1288 case TargetOpcode::G_FSUB:
1289 case TargetOpcode::G_FMUL:
1290 case TargetOpcode::G_FDIV:
1291 case TargetOpcode::G_FMA:
1292 case TargetOpcode::G_FPOW:
1293 case TargetOpcode::G_FREM:
1294 case TargetOpcode::G_FCOS:
1295 case TargetOpcode::G_FSIN:
1296 case TargetOpcode::G_FTAN:
1297 case TargetOpcode::G_FACOS:
1298 case TargetOpcode::G_FASIN:
1299 case TargetOpcode::G_FATAN:
1300 case TargetOpcode::G_FATAN2:
1301 case TargetOpcode::G_FCOSH:
1302 case TargetOpcode::G_FSINH:
1303 case TargetOpcode::G_FTANH:
1304 case TargetOpcode::G_FLOG10:
1305 case TargetOpcode::G_FLOG:
1306 case TargetOpcode::G_FLOG2:
1307 case TargetOpcode::G_FEXP:
1308 case TargetOpcode::G_FEXP2:
1309 case TargetOpcode::G_FEXP10:
1310 case TargetOpcode::G_FCEIL:
1311 case TargetOpcode::G_FFLOOR:
1312 case TargetOpcode::G_FMINNUM:
1313 case TargetOpcode::G_FMAXNUM:
1314 case TargetOpcode::G_FMINIMUMNUM:
1315 case TargetOpcode::G_FMAXIMUMNUM:
1316 case TargetOpcode::G_FSQRT:
1317 case TargetOpcode::G_FRINT:
1318 case TargetOpcode::G_FNEARBYINT:
1319 case TargetOpcode::G_INTRINSIC_TRUNC:
1320 case TargetOpcode::G_INTRINSIC_ROUND:
1321 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1325 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1326 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1327 return UnableToLegalize;
1328 }
1329 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1330 if (Status != Legalized)
1331 return Status;
1332 break;
1333 }
1334 case TargetOpcode::G_FSINCOS: {
1335 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1336 unsigned Size = LLTy.getSizeInBits();
1337 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1338 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1339 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1340 return UnableToLegalize;
1341 }
1342 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1343 }
1344 case TargetOpcode::G_LROUND:
1345 case TargetOpcode::G_LLROUND:
1346 case TargetOpcode::G_INTRINSIC_LRINT:
1347 case TargetOpcode::G_INTRINSIC_LLRINT: {
1348 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1349 unsigned Size = LLTy.getSizeInBits();
1350 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1351 Type *ITy = IntegerType::get(
1352 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1353 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1354 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1355 return UnableToLegalize;
1356 }
1357 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1359 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1360 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1361 if (Status != Legalized)
1362 return Status;
1363 MI.eraseFromParent();
1364 return Legalized;
1365 }
1366 case TargetOpcode::G_FPOWI:
1367 case TargetOpcode::G_FLDEXP: {
1368 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1369 unsigned Size = LLTy.getSizeInBits();
1370 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1371 Type *ITy = IntegerType::get(
1372 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1373 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1374 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1375 return UnableToLegalize;
1376 }
1377 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1379 {MI.getOperand(1).getReg(), HLTy, 0},
1380 {MI.getOperand(2).getReg(), ITy, 1}};
1381 Args[1].Flags[0].setSExt();
1383 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1384 Args, LocObserver, &MI);
1385 if (Status != Legalized)
1386 return Status;
1387 break;
1388 }
1389 case TargetOpcode::G_FPEXT:
1390 case TargetOpcode::G_FPTRUNC: {
1391 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1392 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1393 if (!FromTy || !ToTy)
1394 return UnableToLegalize;
1396 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1397 if (Status != Legalized)
1398 return Status;
1399 break;
1400 }
1401 case TargetOpcode::G_FCMP: {
1402 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1403 if (Status != Legalized)
1404 return Status;
1405 MI.eraseFromParent();
1406 return Status;
1407 }
1408 case TargetOpcode::G_FPTOSI:
1409 case TargetOpcode::G_FPTOUI: {
1410 // FIXME: Support other types
1411 Type *FromTy =
1412 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1413 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1414 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1415 return UnableToLegalize;
1417 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1418 if (Status != Legalized)
1419 return Status;
1420 break;
1421 }
1422 case TargetOpcode::G_SITOFP:
1423 case TargetOpcode::G_UITOFP: {
1424 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1425 Type *ToTy =
1426 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1427 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1428 return UnableToLegalize;
1429 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1431 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1432 LocObserver, TLI, IsSigned);
1433 if (Status != Legalized)
1434 return Status;
1435 break;
1436 }
1437 case TargetOpcode::G_ATOMICRMW_XCHG:
1438 case TargetOpcode::G_ATOMICRMW_ADD:
1439 case TargetOpcode::G_ATOMICRMW_SUB:
1440 case TargetOpcode::G_ATOMICRMW_AND:
1441 case TargetOpcode::G_ATOMICRMW_OR:
1442 case TargetOpcode::G_ATOMICRMW_XOR:
1443 case TargetOpcode::G_ATOMIC_CMPXCHG:
1444 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1446 if (Status != Legalized)
1447 return Status;
1448 break;
1449 }
1450 case TargetOpcode::G_BZERO:
1451 case TargetOpcode::G_MEMCPY:
1452 case TargetOpcode::G_MEMMOVE:
1453 case TargetOpcode::G_MEMSET: {
1454 LegalizeResult Result =
1455 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1456 if (Result != Legalized)
1457 return Result;
1458 MI.eraseFromParent();
1459 return Result;
1460 }
1461 case TargetOpcode::G_GET_FPENV:
1462 case TargetOpcode::G_GET_FPMODE: {
1463 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1464 if (Result != Legalized)
1465 return Result;
1466 break;
1467 }
1468 case TargetOpcode::G_SET_FPENV:
1469 case TargetOpcode::G_SET_FPMODE: {
1470 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1471 if (Result != Legalized)
1472 return Result;
1473 break;
1474 }
1475 case TargetOpcode::G_RESET_FPENV:
1476 case TargetOpcode::G_RESET_FPMODE: {
1477 LegalizeResult Result =
1478 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1479 if (Result != Legalized)
1480 return Result;
1481 break;
1482 }
1483 }
1484
1485 MI.eraseFromParent();
1486 return Legalized;
1487}
1488
1490 unsigned TypeIdx,
1491 LLT NarrowTy) {
1492 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1493 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1494
1495 switch (MI.getOpcode()) {
1496 default:
1497 return UnableToLegalize;
1498 case TargetOpcode::G_IMPLICIT_DEF: {
1499 Register DstReg = MI.getOperand(0).getReg();
1500 LLT DstTy = MRI.getType(DstReg);
1501
1502 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1503 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1504 // FIXME: Although this would also be legal for the general case, it causes
1505 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1506 // combines not being hit). This seems to be a problem related to the
1507 // artifact combiner.
1508 if (SizeOp0 % NarrowSize != 0) {
1509 LLT ImplicitTy = NarrowTy;
1510 if (DstTy.isVector())
1511 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1512
1513 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1514 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1515
1516 MI.eraseFromParent();
1517 return Legalized;
1518 }
1519
1520 int NumParts = SizeOp0 / NarrowSize;
1521
1523 for (int i = 0; i < NumParts; ++i)
1524 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1525
1526 if (DstTy.isVector())
1527 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1528 else
1529 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1530 MI.eraseFromParent();
1531 return Legalized;
1532 }
1533 case TargetOpcode::G_CONSTANT: {
1534 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1535 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1536 unsigned TotalSize = Ty.getSizeInBits();
1537 unsigned NarrowSize = NarrowTy.getSizeInBits();
1538 int NumParts = TotalSize / NarrowSize;
1539
1540 SmallVector<Register, 4> PartRegs;
1541 for (int I = 0; I != NumParts; ++I) {
1542 unsigned Offset = I * NarrowSize;
1543 auto K = MIRBuilder.buildConstant(NarrowTy,
1544 Val.lshr(Offset).trunc(NarrowSize));
1545 PartRegs.push_back(K.getReg(0));
1546 }
1547
1548 LLT LeftoverTy;
1549 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1550 SmallVector<Register, 1> LeftoverRegs;
1551 if (LeftoverBits != 0) {
1552 LeftoverTy = LLT::scalar(LeftoverBits);
1553 auto K = MIRBuilder.buildConstant(
1554 LeftoverTy,
1555 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1556 LeftoverRegs.push_back(K.getReg(0));
1557 }
1558
1559 insertParts(MI.getOperand(0).getReg(),
1560 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565 case TargetOpcode::G_SEXT:
1566 case TargetOpcode::G_ZEXT:
1567 case TargetOpcode::G_ANYEXT:
1568 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1569 case TargetOpcode::G_TRUNC: {
1570 if (TypeIdx != 1)
1571 return UnableToLegalize;
1572
1573 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1574 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1575 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1576 return UnableToLegalize;
1577 }
1578
1579 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1580 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1581 MI.eraseFromParent();
1582 return Legalized;
1583 }
1584 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1585 case TargetOpcode::G_FREEZE: {
1586 if (TypeIdx != 0)
1587 return UnableToLegalize;
1588
1589 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1590 // Should widen scalar first
1591 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1592 return UnableToLegalize;
1593
1594 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1596 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1597 Parts.push_back(
1598 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1599 .getReg(0));
1600 }
1601
1602 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1603 MI.eraseFromParent();
1604 return Legalized;
1605 }
1606 case TargetOpcode::G_ADD:
1607 case TargetOpcode::G_SUB:
1608 case TargetOpcode::G_SADDO:
1609 case TargetOpcode::G_SSUBO:
1610 case TargetOpcode::G_SADDE:
1611 case TargetOpcode::G_SSUBE:
1612 case TargetOpcode::G_UADDO:
1613 case TargetOpcode::G_USUBO:
1614 case TargetOpcode::G_UADDE:
1615 case TargetOpcode::G_USUBE:
1616 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1617 case TargetOpcode::G_MUL:
1618 case TargetOpcode::G_UMULH:
1619 return narrowScalarMul(MI, NarrowTy);
1620 case TargetOpcode::G_EXTRACT:
1621 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1622 case TargetOpcode::G_INSERT:
1623 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1624 case TargetOpcode::G_LOAD: {
1625 auto &LoadMI = cast<GLoad>(MI);
1626 Register DstReg = LoadMI.getDstReg();
1627 LLT DstTy = MRI.getType(DstReg);
1628 if (DstTy.isVector())
1629 return UnableToLegalize;
1630
1631 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1632 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1633 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1634 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1635 LoadMI.eraseFromParent();
1636 return Legalized;
1637 }
1638
1639 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1640 }
1641 case TargetOpcode::G_ZEXTLOAD:
1642 case TargetOpcode::G_SEXTLOAD: {
1643 auto &LoadMI = cast<GExtLoad>(MI);
1644 Register DstReg = LoadMI.getDstReg();
1645 Register PtrReg = LoadMI.getPointerReg();
1646
1647 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1648 auto &MMO = LoadMI.getMMO();
1649 unsigned MemSize = MMO.getSizeInBits().getValue();
1650
1651 if (MemSize == NarrowSize) {
1652 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1653 } else if (MemSize < NarrowSize) {
1654 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1655 } else if (MemSize > NarrowSize) {
1656 // FIXME: Need to split the load.
1657 return UnableToLegalize;
1658 }
1659
1660 if (isa<GZExtLoad>(LoadMI))
1661 MIRBuilder.buildZExt(DstReg, TmpReg);
1662 else
1663 MIRBuilder.buildSExt(DstReg, TmpReg);
1664
1665 LoadMI.eraseFromParent();
1666 return Legalized;
1667 }
1668 case TargetOpcode::G_STORE: {
1669 auto &StoreMI = cast<GStore>(MI);
1670
1671 Register SrcReg = StoreMI.getValueReg();
1672 LLT SrcTy = MRI.getType(SrcReg);
1673 if (SrcTy.isVector())
1674 return UnableToLegalize;
1675
1676 int NumParts = SizeOp0 / NarrowSize;
1677 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1678 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1679 if (SrcTy.isVector() && LeftoverBits != 0)
1680 return UnableToLegalize;
1681
1682 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1683 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1684 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1685 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1686 StoreMI.eraseFromParent();
1687 return Legalized;
1688 }
1689
1690 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1691 }
1692 case TargetOpcode::G_SELECT:
1693 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1694 case TargetOpcode::G_AND:
1695 case TargetOpcode::G_OR:
1696 case TargetOpcode::G_XOR: {
1697 // Legalize bitwise operation:
1698 // A = BinOp<Ty> B, C
1699 // into:
1700 // B1, ..., BN = G_UNMERGE_VALUES B
1701 // C1, ..., CN = G_UNMERGE_VALUES C
1702 // A1 = BinOp<Ty/N> B1, C2
1703 // ...
1704 // AN = BinOp<Ty/N> BN, CN
1705 // A = G_MERGE_VALUES A1, ..., AN
1706 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1707 }
1708 case TargetOpcode::G_SHL:
1709 case TargetOpcode::G_LSHR:
1710 case TargetOpcode::G_ASHR:
1711 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1712 case TargetOpcode::G_CTLZ:
1713 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1714 case TargetOpcode::G_CTTZ:
1715 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1716 case TargetOpcode::G_CTPOP:
1717 if (TypeIdx == 1)
1718 switch (MI.getOpcode()) {
1719 case TargetOpcode::G_CTLZ:
1720 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1721 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1722 case TargetOpcode::G_CTTZ:
1723 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1724 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1725 case TargetOpcode::G_CTPOP:
1726 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1727 default:
1728 return UnableToLegalize;
1729 }
1730
1731 Observer.changingInstr(MI);
1732 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1733 Observer.changedInstr(MI);
1734 return Legalized;
1735 case TargetOpcode::G_INTTOPTR:
1736 if (TypeIdx != 1)
1737 return UnableToLegalize;
1738
1739 Observer.changingInstr(MI);
1740 narrowScalarSrc(MI, NarrowTy, 1);
1741 Observer.changedInstr(MI);
1742 return Legalized;
1743 case TargetOpcode::G_PTRTOINT:
1744 if (TypeIdx != 0)
1745 return UnableToLegalize;
1746
1747 Observer.changingInstr(MI);
1748 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1749 Observer.changedInstr(MI);
1750 return Legalized;
1751 case TargetOpcode::G_PHI: {
1752 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1753 // NarrowSize.
1754 if (SizeOp0 % NarrowSize != 0)
1755 return UnableToLegalize;
1756
1757 unsigned NumParts = SizeOp0 / NarrowSize;
1758 SmallVector<Register, 2> DstRegs(NumParts);
1759 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1760 Observer.changingInstr(MI);
1761 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1762 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1763 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1764 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1765 SrcRegs[i / 2], MIRBuilder, MRI);
1766 }
1767 MachineBasicBlock &MBB = *MI.getParent();
1768 MIRBuilder.setInsertPt(MBB, MI);
1769 for (unsigned i = 0; i < NumParts; ++i) {
1770 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1772 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1773 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1774 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1775 }
1776 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1777 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1778 Observer.changedInstr(MI);
1779 MI.eraseFromParent();
1780 return Legalized;
1781 }
1782 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1783 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1784 if (TypeIdx != 2)
1785 return UnableToLegalize;
1786
1787 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, OpIdx);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 }
1793 case TargetOpcode::G_ICMP: {
1794 Register LHS = MI.getOperand(2).getReg();
1795 LLT SrcTy = MRI.getType(LHS);
1796 CmpInst::Predicate Pred =
1797 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1798
1799 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1800 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1801 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1802 LHSLeftoverRegs, MIRBuilder, MRI))
1803 return UnableToLegalize;
1804
1805 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1806 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1807 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1808 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1809 return UnableToLegalize;
1810
1811 // We now have the LHS and RHS of the compare split into narrow-type
1812 // registers, plus potentially some leftover type.
1813 Register Dst = MI.getOperand(0).getReg();
1814 LLT ResTy = MRI.getType(Dst);
1815 if (ICmpInst::isEquality(Pred)) {
1816 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1817 // them together. For each equal part, the result should be all 0s. For
1818 // each non-equal part, we'll get at least one 1.
1819 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1821 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1822 auto LHS = std::get<0>(LHSAndRHS);
1823 auto RHS = std::get<1>(LHSAndRHS);
1824 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1825 Xors.push_back(Xor);
1826 }
1827
1828 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1829 // to the desired narrow type so that we can OR them together later.
1830 SmallVector<Register, 4> WidenedXors;
1831 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1832 auto LHS = std::get<0>(LHSAndRHS);
1833 auto RHS = std::get<1>(LHSAndRHS);
1834 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1835 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1836 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1837 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1838 llvm::append_range(Xors, WidenedXors);
1839 }
1840
1841 // Now, for each part we broke up, we know if they are equal/not equal
1842 // based off the G_XOR. We can OR these all together and compare against
1843 // 0 to get the result.
1844 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1845 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1846 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1847 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1848 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1849 } else {
1850 Register CmpIn;
1851 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1852 Register CmpOut;
1853 CmpInst::Predicate PartPred;
1854
1855 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1856 PartPred = Pred;
1857 CmpOut = Dst;
1858 } else {
1859 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1860 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1861 }
1862
1863 if (!CmpIn) {
1864 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1865 RHSPartRegs[I]);
1866 } else {
1867 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1868 RHSPartRegs[I]);
1869 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1870 LHSPartRegs[I], RHSPartRegs[I]);
1871 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1872 }
1873
1874 CmpIn = CmpOut;
1875 }
1876
1877 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1878 Register CmpOut;
1879 CmpInst::Predicate PartPred;
1880
1881 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1882 PartPred = Pred;
1883 CmpOut = Dst;
1884 } else {
1885 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1886 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1887 }
1888
1889 if (!CmpIn) {
1890 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1891 RHSLeftoverRegs[I]);
1892 } else {
1893 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1894 RHSLeftoverRegs[I]);
1895 auto CmpEq =
1896 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1897 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1898 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1899 }
1900
1901 CmpIn = CmpOut;
1902 }
1903 }
1904 MI.eraseFromParent();
1905 return Legalized;
1906 }
1907 case TargetOpcode::G_FCMP:
1908 if (TypeIdx != 0)
1909 return UnableToLegalize;
1910
1911 Observer.changingInstr(MI);
1912 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1913 Observer.changedInstr(MI);
1914 return Legalized;
1915
1916 case TargetOpcode::G_SEXT_INREG: {
1917 if (TypeIdx != 0)
1918 return UnableToLegalize;
1919
1920 int64_t SizeInBits = MI.getOperand(2).getImm();
1921
1922 // So long as the new type has more bits than the bits we're extending we
1923 // don't need to break it apart.
1924 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1925 Observer.changingInstr(MI);
1926 // We don't lose any non-extension bits by truncating the src and
1927 // sign-extending the dst.
1928 MachineOperand &MO1 = MI.getOperand(1);
1929 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1930 MO1.setReg(TruncMIB.getReg(0));
1931
1932 MachineOperand &MO2 = MI.getOperand(0);
1933 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1934 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1935 MIRBuilder.buildSExt(MO2, DstExt);
1936 MO2.setReg(DstExt);
1937 Observer.changedInstr(MI);
1938 return Legalized;
1939 }
1940
1941 // Break it apart. Components below the extension point are unmodified. The
1942 // component containing the extension point becomes a narrower SEXT_INREG.
1943 // Components above it are ashr'd from the component containing the
1944 // extension point.
1945 if (SizeOp0 % NarrowSize != 0)
1946 return UnableToLegalize;
1947 int NumParts = SizeOp0 / NarrowSize;
1948
1949 // List the registers where the destination will be scattered.
1951 // List the registers where the source will be split.
1953
1954 // Create all the temporary registers.
1955 for (int i = 0; i < NumParts; ++i) {
1956 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1957
1958 SrcRegs.push_back(SrcReg);
1959 }
1960
1961 // Explode the big arguments into smaller chunks.
1962 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1963
1964 Register AshrCstReg =
1965 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1966 .getReg(0);
1967 Register FullExtensionReg;
1968 Register PartialExtensionReg;
1969
1970 // Do the operation on each small part.
1971 for (int i = 0; i < NumParts; ++i) {
1972 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1973 DstRegs.push_back(SrcRegs[i]);
1974 PartialExtensionReg = DstRegs.back();
1975 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1976 assert(PartialExtensionReg &&
1977 "Expected to visit partial extension before full");
1978 if (FullExtensionReg) {
1979 DstRegs.push_back(FullExtensionReg);
1980 continue;
1981 }
1982 DstRegs.push_back(
1983 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1984 .getReg(0));
1985 FullExtensionReg = DstRegs.back();
1986 } else {
1987 DstRegs.push_back(
1989 .buildInstr(
1990 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1991 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1992 .getReg(0));
1993 PartialExtensionReg = DstRegs.back();
1994 }
1995 }
1996
1997 // Gather the destination registers into the final destination.
1998 Register DstReg = MI.getOperand(0).getReg();
1999 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2000 MI.eraseFromParent();
2001 return Legalized;
2002 }
2003 case TargetOpcode::G_BSWAP:
2004 case TargetOpcode::G_BITREVERSE: {
2005 if (SizeOp0 % NarrowSize != 0)
2006 return UnableToLegalize;
2007
2008 Observer.changingInstr(MI);
2009 SmallVector<Register, 2> SrcRegs, DstRegs;
2010 unsigned NumParts = SizeOp0 / NarrowSize;
2011 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2012 MIRBuilder, MRI);
2013
2014 for (unsigned i = 0; i < NumParts; ++i) {
2015 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2016 {SrcRegs[NumParts - 1 - i]});
2017 DstRegs.push_back(DstPart.getReg(0));
2018 }
2019
2020 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2021
2022 Observer.changedInstr(MI);
2023 MI.eraseFromParent();
2024 return Legalized;
2025 }
2026 case TargetOpcode::G_PTR_ADD:
2027 case TargetOpcode::G_PTRMASK: {
2028 if (TypeIdx != 1)
2029 return UnableToLegalize;
2030 Observer.changingInstr(MI);
2031 narrowScalarSrc(MI, NarrowTy, 2);
2032 Observer.changedInstr(MI);
2033 return Legalized;
2034 }
2035 case TargetOpcode::G_FPTOUI:
2036 case TargetOpcode::G_FPTOSI:
2037 case TargetOpcode::G_FPTOUI_SAT:
2038 case TargetOpcode::G_FPTOSI_SAT:
2039 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2040 case TargetOpcode::G_FPEXT:
2041 if (TypeIdx != 0)
2042 return UnableToLegalize;
2043 Observer.changingInstr(MI);
2044 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2045 Observer.changedInstr(MI);
2046 return Legalized;
2047 case TargetOpcode::G_FLDEXP:
2048 case TargetOpcode::G_STRICT_FLDEXP:
2049 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2050 case TargetOpcode::G_VSCALE: {
2051 Register Dst = MI.getOperand(0).getReg();
2052 LLT Ty = MRI.getType(Dst);
2053
2054 // Assume VSCALE(1) fits into a legal integer
2055 const APInt One(NarrowTy.getSizeInBits(), 1);
2056 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2057 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2058 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2059 MIRBuilder.buildMul(Dst, ZExt, C);
2060
2061 MI.eraseFromParent();
2062 return Legalized;
2063 }
2064 }
2065}
2066
2068 LLT Ty = MRI.getType(Val);
2069 if (Ty.isScalar())
2070 return Val;
2071
2072 const DataLayout &DL = MIRBuilder.getDataLayout();
2073 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2074 if (Ty.isPointer()) {
2075 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2076 return Register();
2077 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2078 }
2079
2080 Register NewVal = Val;
2081
2082 assert(Ty.isVector());
2083 if (Ty.isPointerVector())
2084 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2085 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2086}
2087
2089 unsigned OpIdx, unsigned ExtOpcode) {
2090 MachineOperand &MO = MI.getOperand(OpIdx);
2091 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2092 MO.setReg(ExtB.getReg(0));
2093}
2094
2096 unsigned OpIdx) {
2097 MachineOperand &MO = MI.getOperand(OpIdx);
2098 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2099 MO.setReg(ExtB.getReg(0));
2100}
2101
2103 unsigned OpIdx, unsigned TruncOpcode) {
2104 MachineOperand &MO = MI.getOperand(OpIdx);
2105 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2106 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2107 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2108 MO.setReg(DstExt);
2109}
2110
2112 unsigned OpIdx, unsigned ExtOpcode) {
2113 MachineOperand &MO = MI.getOperand(OpIdx);
2114 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2115 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2116 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2117 MO.setReg(DstTrunc);
2118}
2119
2121 unsigned OpIdx) {
2122 MachineOperand &MO = MI.getOperand(OpIdx);
2123 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2124 Register Dst = MO.getReg();
2125 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2126 MO.setReg(DstExt);
2127 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2128}
2129
2131 unsigned OpIdx) {
2132 MachineOperand &MO = MI.getOperand(OpIdx);
2133 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2134}
2135
2137 MachineOperand &Op = MI.getOperand(OpIdx);
2138 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2139}
2140
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2144 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2145 MIRBuilder.buildBitcast(MO, CastDst);
2146 MO.setReg(CastDst);
2147}
2148
2150LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2151 LLT WideTy) {
2152 if (TypeIdx != 1)
2153 return UnableToLegalize;
2154
2155 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2156 if (DstTy.isVector())
2157 return UnableToLegalize;
2158
2159 LLT SrcTy = MRI.getType(Src1Reg);
2160 const int DstSize = DstTy.getSizeInBits();
2161 const int SrcSize = SrcTy.getSizeInBits();
2162 const int WideSize = WideTy.getSizeInBits();
2163 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2164
2165 unsigned NumOps = MI.getNumOperands();
2166 unsigned NumSrc = MI.getNumOperands() - 1;
2167 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2168
2169 if (WideSize >= DstSize) {
2170 // Directly pack the bits in the target type.
2171 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2172
2173 for (unsigned I = 2; I != NumOps; ++I) {
2174 const unsigned Offset = (I - 1) * PartSize;
2175
2176 Register SrcReg = MI.getOperand(I).getReg();
2177 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2178
2179 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2180
2181 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2182 MRI.createGenericVirtualRegister(WideTy);
2183
2184 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2185 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2186 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2187 ResultReg = NextResult;
2188 }
2189
2190 if (WideSize > DstSize)
2191 MIRBuilder.buildTrunc(DstReg, ResultReg);
2192 else if (DstTy.isPointer())
2193 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2194
2195 MI.eraseFromParent();
2196 return Legalized;
2197 }
2198
2199 // Unmerge the original values to the GCD type, and recombine to the next
2200 // multiple greater than the original type.
2201 //
2202 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2203 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2204 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2205 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2206 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2207 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2208 // %12:_(s12) = G_MERGE_VALUES %10, %11
2209 //
2210 // Padding with undef if necessary:
2211 //
2212 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2213 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2214 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2215 // %7:_(s2) = G_IMPLICIT_DEF
2216 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2217 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2218 // %10:_(s12) = G_MERGE_VALUES %8, %9
2219
2220 const int GCD = std::gcd(SrcSize, WideSize);
2221 LLT GCDTy = LLT::scalar(GCD);
2222
2223 SmallVector<Register, 8> NewMergeRegs;
2224 SmallVector<Register, 8> Unmerges;
2225 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2226
2227 // Decompose the original operands if they don't evenly divide.
2228 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2229 Register SrcReg = MO.getReg();
2230 if (GCD == SrcSize) {
2231 Unmerges.push_back(SrcReg);
2232 } else {
2233 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2234 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2235 Unmerges.push_back(Unmerge.getReg(J));
2236 }
2237 }
2238
2239 // Pad with undef to the next size that is a multiple of the requested size.
2240 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2241 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2242 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2243 Unmerges.push_back(UndefReg);
2244 }
2245
2246 const int PartsPerGCD = WideSize / GCD;
2247
2248 // Build merges of each piece.
2249 ArrayRef<Register> Slicer(Unmerges);
2250 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2251 auto Merge =
2252 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2253 NewMergeRegs.push_back(Merge.getReg(0));
2254 }
2255
2256 // A truncate may be necessary if the requested type doesn't evenly divide the
2257 // original result type.
2258 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2259 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2260 } else {
2261 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2262 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2263 }
2264
2265 MI.eraseFromParent();
2266 return Legalized;
2267}
2268
2270LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2271 LLT WideTy) {
2272 if (TypeIdx != 0)
2273 return UnableToLegalize;
2274
2275 int NumDst = MI.getNumOperands() - 1;
2276 Register SrcReg = MI.getOperand(NumDst).getReg();
2277 LLT SrcTy = MRI.getType(SrcReg);
2278 if (SrcTy.isVector())
2279 return UnableToLegalize;
2280
2281 Register Dst0Reg = MI.getOperand(0).getReg();
2282 LLT DstTy = MRI.getType(Dst0Reg);
2283 if (!DstTy.isScalar())
2284 return UnableToLegalize;
2285
2286 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2287 if (SrcTy.isPointer()) {
2288 const DataLayout &DL = MIRBuilder.getDataLayout();
2289 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2290 LLVM_DEBUG(
2291 dbgs() << "Not casting non-integral address space integer\n");
2292 return UnableToLegalize;
2293 }
2294
2295 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2296 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2297 }
2298
2299 // Widen SrcTy to WideTy. This does not affect the result, but since the
2300 // user requested this size, it is probably better handled than SrcTy and
2301 // should reduce the total number of legalization artifacts.
2302 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2303 SrcTy = WideTy;
2304 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2305 }
2306
2307 // Theres no unmerge type to target. Directly extract the bits from the
2308 // source type
2309 unsigned DstSize = DstTy.getSizeInBits();
2310
2311 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2312 for (int I = 1; I != NumDst; ++I) {
2313 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2314 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2315 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2316 }
2317
2318 MI.eraseFromParent();
2319 return Legalized;
2320 }
2321
2322 // Extend the source to a wider type.
2323 LLT LCMTy = getLCMType(SrcTy, WideTy);
2324
2325 Register WideSrc = SrcReg;
2326 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2327 // TODO: If this is an integral address space, cast to integer and anyext.
2328 if (SrcTy.isPointer()) {
2329 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2330 return UnableToLegalize;
2331 }
2332
2333 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2334 }
2335
2336 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2337
2338 // Create a sequence of unmerges and merges to the original results. Since we
2339 // may have widened the source, we will need to pad the results with dead defs
2340 // to cover the source register.
2341 // e.g. widen s48 to s64:
2342 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2343 //
2344 // =>
2345 // %4:_(s192) = G_ANYEXT %0:_(s96)
2346 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2347 // ; unpack to GCD type, with extra dead defs
2348 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2349 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2350 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2351 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2352 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2353 const LLT GCDTy = getGCDType(WideTy, DstTy);
2354 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2355 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2356
2357 // Directly unmerge to the destination without going through a GCD type
2358 // if possible
2359 if (PartsPerRemerge == 1) {
2360 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2361
2362 for (int I = 0; I != NumUnmerge; ++I) {
2363 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2364
2365 for (int J = 0; J != PartsPerUnmerge; ++J) {
2366 int Idx = I * PartsPerUnmerge + J;
2367 if (Idx < NumDst)
2368 MIB.addDef(MI.getOperand(Idx).getReg());
2369 else {
2370 // Create dead def for excess components.
2371 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2372 }
2373 }
2374
2375 MIB.addUse(Unmerge.getReg(I));
2376 }
2377 } else {
2378 SmallVector<Register, 16> Parts;
2379 for (int J = 0; J != NumUnmerge; ++J)
2380 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2381
2382 SmallVector<Register, 8> RemergeParts;
2383 for (int I = 0; I != NumDst; ++I) {
2384 for (int J = 0; J < PartsPerRemerge; ++J) {
2385 const int Idx = I * PartsPerRemerge + J;
2386 RemergeParts.emplace_back(Parts[Idx]);
2387 }
2388
2389 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2390 RemergeParts.clear();
2391 }
2392 }
2393
2394 MI.eraseFromParent();
2395 return Legalized;
2396}
2397
2399LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2400 LLT WideTy) {
2401 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2402 unsigned Offset = MI.getOperand(2).getImm();
2403
2404 if (TypeIdx == 0) {
2405 if (SrcTy.isVector() || DstTy.isVector())
2406 return UnableToLegalize;
2407
2408 SrcOp Src(SrcReg);
2409 if (SrcTy.isPointer()) {
2410 // Extracts from pointers can be handled only if they are really just
2411 // simple integers.
2412 const DataLayout &DL = MIRBuilder.getDataLayout();
2413 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2414 return UnableToLegalize;
2415
2416 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2417 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2418 SrcTy = SrcAsIntTy;
2419 }
2420
2421 if (DstTy.isPointer())
2422 return UnableToLegalize;
2423
2424 if (Offset == 0) {
2425 // Avoid a shift in the degenerate case.
2426 MIRBuilder.buildTrunc(DstReg,
2427 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2428 MI.eraseFromParent();
2429 return Legalized;
2430 }
2431
2432 // Do a shift in the source type.
2433 LLT ShiftTy = SrcTy;
2434 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2435 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2436 ShiftTy = WideTy;
2437 }
2438
2439 auto LShr = MIRBuilder.buildLShr(
2440 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2441 MIRBuilder.buildTrunc(DstReg, LShr);
2442 MI.eraseFromParent();
2443 return Legalized;
2444 }
2445
2446 if (SrcTy.isScalar()) {
2447 Observer.changingInstr(MI);
2448 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2449 Observer.changedInstr(MI);
2450 return Legalized;
2451 }
2452
2453 if (!SrcTy.isVector())
2454 return UnableToLegalize;
2455
2456 if (DstTy != SrcTy.getElementType())
2457 return UnableToLegalize;
2458
2459 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2460 return UnableToLegalize;
2461
2462 Observer.changingInstr(MI);
2463 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2464
2465 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2466 Offset);
2467 widenScalarDst(MI, WideTy.getScalarType(), 0);
2468 Observer.changedInstr(MI);
2469 return Legalized;
2470}
2471
2473LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2474 LLT WideTy) {
2475 if (TypeIdx != 0 || WideTy.isVector())
2476 return UnableToLegalize;
2477 Observer.changingInstr(MI);
2478 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2479 widenScalarDst(MI, WideTy);
2480 Observer.changedInstr(MI);
2481 return Legalized;
2482}
2483
2485LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2486 LLT WideTy) {
2487 unsigned Opcode;
2488 unsigned ExtOpcode;
2489 std::optional<Register> CarryIn;
2490 switch (MI.getOpcode()) {
2491 default:
2492 llvm_unreachable("Unexpected opcode!");
2493 case TargetOpcode::G_SADDO:
2494 Opcode = TargetOpcode::G_ADD;
2495 ExtOpcode = TargetOpcode::G_SEXT;
2496 break;
2497 case TargetOpcode::G_SSUBO:
2498 Opcode = TargetOpcode::G_SUB;
2499 ExtOpcode = TargetOpcode::G_SEXT;
2500 break;
2501 case TargetOpcode::G_UADDO:
2502 Opcode = TargetOpcode::G_ADD;
2503 ExtOpcode = TargetOpcode::G_ZEXT;
2504 break;
2505 case TargetOpcode::G_USUBO:
2506 Opcode = TargetOpcode::G_SUB;
2507 ExtOpcode = TargetOpcode::G_ZEXT;
2508 break;
2509 case TargetOpcode::G_SADDE:
2510 Opcode = TargetOpcode::G_UADDE;
2511 ExtOpcode = TargetOpcode::G_SEXT;
2512 CarryIn = MI.getOperand(4).getReg();
2513 break;
2514 case TargetOpcode::G_SSUBE:
2515 Opcode = TargetOpcode::G_USUBE;
2516 ExtOpcode = TargetOpcode::G_SEXT;
2517 CarryIn = MI.getOperand(4).getReg();
2518 break;
2519 case TargetOpcode::G_UADDE:
2520 Opcode = TargetOpcode::G_UADDE;
2521 ExtOpcode = TargetOpcode::G_ZEXT;
2522 CarryIn = MI.getOperand(4).getReg();
2523 break;
2524 case TargetOpcode::G_USUBE:
2525 Opcode = TargetOpcode::G_USUBE;
2526 ExtOpcode = TargetOpcode::G_ZEXT;
2527 CarryIn = MI.getOperand(4).getReg();
2528 break;
2529 }
2530
2531 if (TypeIdx == 1) {
2532 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2533
2534 Observer.changingInstr(MI);
2535 if (CarryIn)
2536 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2537 widenScalarDst(MI, WideTy, 1);
2538
2539 Observer.changedInstr(MI);
2540 return Legalized;
2541 }
2542
2543 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2544 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2545 // Do the arithmetic in the larger type.
2546 Register NewOp;
2547 if (CarryIn) {
2548 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2549 NewOp = MIRBuilder
2550 .buildInstr(Opcode, {WideTy, CarryOutTy},
2551 {LHSExt, RHSExt, *CarryIn})
2552 .getReg(0);
2553 } else {
2554 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2555 }
2556 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2557 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2558 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2559 // There is no overflow if the ExtOp is the same as NewOp.
2560 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2561 // Now trunc the NewOp to the original result.
2562 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2563 MI.eraseFromParent();
2564 return Legalized;
2565}
2566
2568LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2569 LLT WideTy) {
2570 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2571 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2572 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2573 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2574 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2575 // We can convert this to:
2576 // 1. Any extend iN to iM
2577 // 2. SHL by M-N
2578 // 3. [US][ADD|SUB|SHL]SAT
2579 // 4. L/ASHR by M-N
2580 //
2581 // It may be more efficient to lower this to a min and a max operation in
2582 // the higher precision arithmetic if the promoted operation isn't legal,
2583 // but this decision is up to the target's lowering request.
2584 Register DstReg = MI.getOperand(0).getReg();
2585
2586 unsigned NewBits = WideTy.getScalarSizeInBits();
2587 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2588
2589 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2590 // must not left shift the RHS to preserve the shift amount.
2591 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2592 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2593 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2594 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2595 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2596 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2597
2598 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2599 {ShiftL, ShiftR}, MI.getFlags());
2600
2601 // Use a shift that will preserve the number of sign bits when the trunc is
2602 // folded away.
2603 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2604 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2605
2606 MIRBuilder.buildTrunc(DstReg, Result);
2607 MI.eraseFromParent();
2608 return Legalized;
2609}
2610
2612LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2613 LLT WideTy) {
2614 if (TypeIdx == 1) {
2615 Observer.changingInstr(MI);
2616 widenScalarDst(MI, WideTy, 1);
2617 Observer.changedInstr(MI);
2618 return Legalized;
2619 }
2620
2621 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2622 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2623 LLT SrcTy = MRI.getType(LHS);
2624 LLT OverflowTy = MRI.getType(OriginalOverflow);
2625 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2626
2627 // To determine if the result overflowed in the larger type, we extend the
2628 // input to the larger type, do the multiply (checking if it overflows),
2629 // then also check the high bits of the result to see if overflow happened
2630 // there.
2631 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2632 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2633 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2634
2635 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2636 // so we don't need to check the overflow result of larger type Mulo.
2637 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2638
2639 unsigned MulOpc =
2640 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2641
2642 MachineInstrBuilder Mulo;
2643 if (WideMulCanOverflow)
2644 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2645 {LeftOperand, RightOperand});
2646 else
2647 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2648
2649 auto Mul = Mulo->getOperand(0);
2650 MIRBuilder.buildTrunc(Result, Mul);
2651
2652 MachineInstrBuilder ExtResult;
2653 // Overflow occurred if it occurred in the larger type, or if the high part
2654 // of the result does not zero/sign-extend the low part. Check this second
2655 // possibility first.
2656 if (IsSigned) {
2657 // For signed, overflow occurred when the high part does not sign-extend
2658 // the low part.
2659 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2660 } else {
2661 // Unsigned overflow occurred when the high part does not zero-extend the
2662 // low part.
2663 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2664 }
2665
2666 if (WideMulCanOverflow) {
2667 auto Overflow =
2668 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2669 // Finally check if the multiplication in the larger type itself overflowed.
2670 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2671 } else {
2672 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2673 }
2674 MI.eraseFromParent();
2675 return Legalized;
2676}
2677
2680 unsigned Opcode = MI.getOpcode();
2681 switch (Opcode) {
2682 default:
2683 return UnableToLegalize;
2684 case TargetOpcode::G_ATOMICRMW_XCHG:
2685 case TargetOpcode::G_ATOMICRMW_ADD:
2686 case TargetOpcode::G_ATOMICRMW_SUB:
2687 case TargetOpcode::G_ATOMICRMW_AND:
2688 case TargetOpcode::G_ATOMICRMW_OR:
2689 case TargetOpcode::G_ATOMICRMW_XOR:
2690 case TargetOpcode::G_ATOMICRMW_MIN:
2691 case TargetOpcode::G_ATOMICRMW_MAX:
2692 case TargetOpcode::G_ATOMICRMW_UMIN:
2693 case TargetOpcode::G_ATOMICRMW_UMAX:
2694 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2695 Observer.changingInstr(MI);
2696 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2697 widenScalarDst(MI, WideTy, 0);
2698 Observer.changedInstr(MI);
2699 return Legalized;
2700 case TargetOpcode::G_ATOMIC_CMPXCHG:
2701 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2702 Observer.changingInstr(MI);
2703 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2704 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2705 widenScalarDst(MI, WideTy, 0);
2706 Observer.changedInstr(MI);
2707 return Legalized;
2708 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2709 if (TypeIdx == 0) {
2710 Observer.changingInstr(MI);
2711 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2712 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2713 widenScalarDst(MI, WideTy, 0);
2714 Observer.changedInstr(MI);
2715 return Legalized;
2716 }
2717 assert(TypeIdx == 1 &&
2718 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2719 Observer.changingInstr(MI);
2720 widenScalarDst(MI, WideTy, 1);
2721 Observer.changedInstr(MI);
2722 return Legalized;
2723 case TargetOpcode::G_EXTRACT:
2724 return widenScalarExtract(MI, TypeIdx, WideTy);
2725 case TargetOpcode::G_INSERT:
2726 return widenScalarInsert(MI, TypeIdx, WideTy);
2727 case TargetOpcode::G_MERGE_VALUES:
2728 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2729 case TargetOpcode::G_UNMERGE_VALUES:
2730 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2731 case TargetOpcode::G_SADDO:
2732 case TargetOpcode::G_SSUBO:
2733 case TargetOpcode::G_UADDO:
2734 case TargetOpcode::G_USUBO:
2735 case TargetOpcode::G_SADDE:
2736 case TargetOpcode::G_SSUBE:
2737 case TargetOpcode::G_UADDE:
2738 case TargetOpcode::G_USUBE:
2739 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2740 case TargetOpcode::G_UMULO:
2741 case TargetOpcode::G_SMULO:
2742 return widenScalarMulo(MI, TypeIdx, WideTy);
2743 case TargetOpcode::G_SADDSAT:
2744 case TargetOpcode::G_SSUBSAT:
2745 case TargetOpcode::G_SSHLSAT:
2746 case TargetOpcode::G_UADDSAT:
2747 case TargetOpcode::G_USUBSAT:
2748 case TargetOpcode::G_USHLSAT:
2749 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2750 case TargetOpcode::G_CTTZ:
2751 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2752 case TargetOpcode::G_CTLZ:
2753 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2754 case TargetOpcode::G_CTPOP: {
2755 if (TypeIdx == 0) {
2756 Observer.changingInstr(MI);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 }
2761
2762 Register SrcReg = MI.getOperand(1).getReg();
2763
2764 // First extend the input.
2765 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2766 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2767 ? TargetOpcode::G_ANYEXT
2768 : TargetOpcode::G_ZEXT;
2769 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2770 LLT CurTy = MRI.getType(SrcReg);
2771 unsigned NewOpc = Opcode;
2772 if (NewOpc == TargetOpcode::G_CTTZ) {
2773 // The count is the same in the larger type except if the original
2774 // value was zero. This can be handled by setting the bit just off
2775 // the top of the original type.
2776 auto TopBit =
2778 MIBSrc = MIRBuilder.buildOr(
2779 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2780 // Now we know the operand is non-zero, use the more relaxed opcode.
2781 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2782 }
2783
2784 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2785
2786 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2787 // An optimization where the result is the CTLZ after the left shift by
2788 // (Difference in widety and current ty), that is,
2789 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2790 // Result = ctlz MIBSrc
2791 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2792 MIRBuilder.buildConstant(WideTy, SizeDiff));
2793 }
2794
2795 // Perform the operation at the larger size.
2796 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2797 // This is already the correct result for CTPOP and CTTZs
2798 if (Opcode == TargetOpcode::G_CTLZ) {
2799 // The correct result is NewOp - (Difference in widety and current ty).
2800 MIBNewOp = MIRBuilder.buildSub(
2801 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2802 }
2803
2804 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2805 MI.eraseFromParent();
2806 return Legalized;
2807 }
2808 case TargetOpcode::G_BSWAP: {
2809 Observer.changingInstr(MI);
2810 Register DstReg = MI.getOperand(0).getReg();
2811
2812 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2813 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2814 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2815 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2816
2817 MI.getOperand(0).setReg(DstExt);
2818
2819 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2820
2821 LLT Ty = MRI.getType(DstReg);
2822 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2823 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2824 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2825
2826 MIRBuilder.buildTrunc(DstReg, ShrReg);
2827 Observer.changedInstr(MI);
2828 return Legalized;
2829 }
2830 case TargetOpcode::G_BITREVERSE: {
2831 Observer.changingInstr(MI);
2832
2833 Register DstReg = MI.getOperand(0).getReg();
2834 LLT Ty = MRI.getType(DstReg);
2835 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2836
2837 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2838 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2839 MI.getOperand(0).setReg(DstExt);
2840 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2841
2842 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2843 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2844 MIRBuilder.buildTrunc(DstReg, Shift);
2845 Observer.changedInstr(MI);
2846 return Legalized;
2847 }
2848 case TargetOpcode::G_FREEZE:
2849 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2850 Observer.changingInstr(MI);
2851 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2852 widenScalarDst(MI, WideTy);
2853 Observer.changedInstr(MI);
2854 return Legalized;
2855
2856 case TargetOpcode::G_ABS:
2857 Observer.changingInstr(MI);
2858 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2859 widenScalarDst(MI, WideTy);
2860 Observer.changedInstr(MI);
2861 return Legalized;
2862
2863 case TargetOpcode::G_ADD:
2864 case TargetOpcode::G_AND:
2865 case TargetOpcode::G_MUL:
2866 case TargetOpcode::G_OR:
2867 case TargetOpcode::G_XOR:
2868 case TargetOpcode::G_SUB:
2869 case TargetOpcode::G_SHUFFLE_VECTOR:
2870 // Perform operation at larger width (any extension is fines here, high bits
2871 // don't affect the result) and then truncate the result back to the
2872 // original type.
2873 Observer.changingInstr(MI);
2874 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2875 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2876 widenScalarDst(MI, WideTy);
2877 Observer.changedInstr(MI);
2878 return Legalized;
2879
2880 case TargetOpcode::G_SBFX:
2881 case TargetOpcode::G_UBFX:
2882 Observer.changingInstr(MI);
2883
2884 if (TypeIdx == 0) {
2885 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2886 widenScalarDst(MI, WideTy);
2887 } else {
2888 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2889 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2890 }
2891
2892 Observer.changedInstr(MI);
2893 return Legalized;
2894
2895 case TargetOpcode::G_SHL:
2896 Observer.changingInstr(MI);
2897
2898 if (TypeIdx == 0) {
2899 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2900 widenScalarDst(MI, WideTy);
2901 } else {
2902 assert(TypeIdx == 1);
2903 // The "number of bits to shift" operand must preserve its value as an
2904 // unsigned integer:
2905 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2906 }
2907
2908 Observer.changedInstr(MI);
2909 return Legalized;
2910
2911 case TargetOpcode::G_ROTR:
2912 case TargetOpcode::G_ROTL:
2913 if (TypeIdx != 1)
2914 return UnableToLegalize;
2915
2916 Observer.changingInstr(MI);
2917 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2918 Observer.changedInstr(MI);
2919 return Legalized;
2920
2921 case TargetOpcode::G_SDIV:
2922 case TargetOpcode::G_SREM:
2923 case TargetOpcode::G_SMIN:
2924 case TargetOpcode::G_SMAX:
2925 case TargetOpcode::G_ABDS:
2926 Observer.changingInstr(MI);
2927 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2928 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2929 widenScalarDst(MI, WideTy);
2930 Observer.changedInstr(MI);
2931 return Legalized;
2932
2933 case TargetOpcode::G_SDIVREM:
2934 Observer.changingInstr(MI);
2935 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2936 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2937 widenScalarDst(MI, WideTy);
2938 widenScalarDst(MI, WideTy, 1);
2939 Observer.changedInstr(MI);
2940 return Legalized;
2941
2942 case TargetOpcode::G_ASHR:
2943 case TargetOpcode::G_LSHR:
2944 Observer.changingInstr(MI);
2945
2946 if (TypeIdx == 0) {
2947 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2948 : TargetOpcode::G_ZEXT;
2949
2950 widenScalarSrc(MI, WideTy, 1, CvtOp);
2951 widenScalarDst(MI, WideTy);
2952 } else {
2953 assert(TypeIdx == 1);
2954 // The "number of bits to shift" operand must preserve its value as an
2955 // unsigned integer:
2956 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2957 }
2958
2959 Observer.changedInstr(MI);
2960 return Legalized;
2961 case TargetOpcode::G_UDIV:
2962 case TargetOpcode::G_UREM:
2963 case TargetOpcode::G_ABDU:
2964 Observer.changingInstr(MI);
2965 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2966 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2967 widenScalarDst(MI, WideTy);
2968 Observer.changedInstr(MI);
2969 return Legalized;
2970 case TargetOpcode::G_UDIVREM:
2971 Observer.changingInstr(MI);
2972 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2973 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2974 widenScalarDst(MI, WideTy);
2975 widenScalarDst(MI, WideTy, 1);
2976 Observer.changedInstr(MI);
2977 return Legalized;
2978 case TargetOpcode::G_UMIN:
2979 case TargetOpcode::G_UMAX: {
2980 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2981
2982 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2983 unsigned ExtOpc =
2984 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
2985 getApproximateEVTForLLT(WideTy, Ctx))
2986 ? TargetOpcode::G_SEXT
2987 : TargetOpcode::G_ZEXT;
2988
2989 Observer.changingInstr(MI);
2990 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2991 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2992 widenScalarDst(MI, WideTy);
2993 Observer.changedInstr(MI);
2994 return Legalized;
2995 }
2996
2997 case TargetOpcode::G_SELECT:
2998 Observer.changingInstr(MI);
2999 if (TypeIdx == 0) {
3000 // Perform operation at larger width (any extension is fine here, high
3001 // bits don't affect the result) and then truncate the result back to the
3002 // original type.
3003 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3004 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3005 widenScalarDst(MI, WideTy);
3006 } else {
3007 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3008 // Explicit extension is required here since high bits affect the result.
3009 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3010 }
3011 Observer.changedInstr(MI);
3012 return Legalized;
3013
3014 case TargetOpcode::G_FPTOSI:
3015 case TargetOpcode::G_FPTOUI:
3016 case TargetOpcode::G_INTRINSIC_LRINT:
3017 case TargetOpcode::G_INTRINSIC_LLRINT:
3018 case TargetOpcode::G_IS_FPCLASS:
3019 Observer.changingInstr(MI);
3020
3021 if (TypeIdx == 0)
3022 widenScalarDst(MI, WideTy);
3023 else
3024 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3025
3026 Observer.changedInstr(MI);
3027 return Legalized;
3028 case TargetOpcode::G_SITOFP:
3029 Observer.changingInstr(MI);
3030
3031 if (TypeIdx == 0)
3032 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3033 else
3034 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3035
3036 Observer.changedInstr(MI);
3037 return Legalized;
3038 case TargetOpcode::G_UITOFP:
3039 Observer.changingInstr(MI);
3040
3041 if (TypeIdx == 0)
3042 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3043 else
3044 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3045
3046 Observer.changedInstr(MI);
3047 return Legalized;
3048 case TargetOpcode::G_FPTOSI_SAT:
3049 case TargetOpcode::G_FPTOUI_SAT:
3050 Observer.changingInstr(MI);
3051
3052 if (TypeIdx == 0) {
3053 Register OldDst = MI.getOperand(0).getReg();
3054 LLT Ty = MRI.getType(OldDst);
3055 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3056 Register NewDst;
3057 MI.getOperand(0).setReg(ExtReg);
3058 uint64_t ShortBits = Ty.getScalarSizeInBits();
3059 uint64_t WideBits = WideTy.getScalarSizeInBits();
3060 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3061 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3062 // z = i16 fptosi_sat(a)
3063 // ->
3064 // x = i32 fptosi_sat(a)
3065 // y = smin(x, 32767)
3066 // z = smax(y, -32768)
3067 auto MaxVal = MIRBuilder.buildConstant(
3068 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3069 auto MinVal = MIRBuilder.buildConstant(
3070 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3071 Register MidReg =
3072 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3073 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3074 } else {
3075 // z = i16 fptoui_sat(a)
3076 // ->
3077 // x = i32 fptoui_sat(a)
3078 // y = smin(x, 65535)
3079 auto MaxVal = MIRBuilder.buildConstant(
3080 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3081 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3082 }
3083 MIRBuilder.buildTrunc(OldDst, NewDst);
3084 } else
3085 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3086
3087 Observer.changedInstr(MI);
3088 return Legalized;
3089 case TargetOpcode::G_LOAD:
3090 case TargetOpcode::G_SEXTLOAD:
3091 case TargetOpcode::G_ZEXTLOAD:
3092 Observer.changingInstr(MI);
3093 widenScalarDst(MI, WideTy);
3094 Observer.changedInstr(MI);
3095 return Legalized;
3096
3097 case TargetOpcode::G_STORE: {
3098 if (TypeIdx != 0)
3099 return UnableToLegalize;
3100
3101 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3102 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3103 if (!Ty.isScalar()) {
3104 // We need to widen the vector element type.
3105 Observer.changingInstr(MI);
3106 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3107 // We also need to adjust the MMO to turn this into a truncating store.
3108 MachineMemOperand &MMO = **MI.memoperands_begin();
3109 MachineFunction &MF = MIRBuilder.getMF();
3110 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3111 MI.setMemRefs(MF, {NewMMO});
3112 Observer.changedInstr(MI);
3113 return Legalized;
3114 }
3115
3116 Observer.changingInstr(MI);
3117
3118 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3119 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3120 widenScalarSrc(MI, WideTy, 0, ExtType);
3121
3122 Observer.changedInstr(MI);
3123 return Legalized;
3124 }
3125 case TargetOpcode::G_CONSTANT: {
3126 MachineOperand &SrcMO = MI.getOperand(1);
3127 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3128 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3129 MRI.getType(MI.getOperand(0).getReg()));
3130 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3131 ExtOpc == TargetOpcode::G_ANYEXT) &&
3132 "Illegal Extend");
3133 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3134 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3135 ? SrcVal.sext(WideTy.getSizeInBits())
3136 : SrcVal.zext(WideTy.getSizeInBits());
3137 Observer.changingInstr(MI);
3138 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3139
3140 widenScalarDst(MI, WideTy);
3141 Observer.changedInstr(MI);
3142 return Legalized;
3143 }
3144 case TargetOpcode::G_FCONSTANT: {
3145 // To avoid changing the bits of the constant due to extension to a larger
3146 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3147 MachineOperand &SrcMO = MI.getOperand(1);
3148 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3149 MIRBuilder.setInstrAndDebugLoc(MI);
3150 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3151 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3152 MI.eraseFromParent();
3153 return Legalized;
3154 }
3155 case TargetOpcode::G_IMPLICIT_DEF: {
3156 Observer.changingInstr(MI);
3157 widenScalarDst(MI, WideTy);
3158 Observer.changedInstr(MI);
3159 return Legalized;
3160 }
3161 case TargetOpcode::G_BRCOND:
3162 Observer.changingInstr(MI);
3163 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3164 Observer.changedInstr(MI);
3165 return Legalized;
3166
3167 case TargetOpcode::G_FCMP:
3168 Observer.changingInstr(MI);
3169 if (TypeIdx == 0)
3170 widenScalarDst(MI, WideTy);
3171 else {
3172 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3173 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3174 }
3175 Observer.changedInstr(MI);
3176 return Legalized;
3177
3178 case TargetOpcode::G_ICMP:
3179 Observer.changingInstr(MI);
3180 if (TypeIdx == 0)
3181 widenScalarDst(MI, WideTy);
3182 else {
3183 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3184 CmpInst::Predicate Pred =
3185 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3186
3187 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3188 unsigned ExtOpcode =
3189 (CmpInst::isSigned(Pred) ||
3190 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3191 getApproximateEVTForLLT(WideTy, Ctx)))
3192 ? TargetOpcode::G_SEXT
3193 : TargetOpcode::G_ZEXT;
3194 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3195 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3196 }
3197 Observer.changedInstr(MI);
3198 return Legalized;
3199
3200 case TargetOpcode::G_PTR_ADD:
3201 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3202 Observer.changingInstr(MI);
3203 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3204 Observer.changedInstr(MI);
3205 return Legalized;
3206
3207 case TargetOpcode::G_PHI: {
3208 assert(TypeIdx == 0 && "Expecting only Idx 0");
3209
3210 Observer.changingInstr(MI);
3211 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3212 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3213 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3214 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3215 }
3216
3217 MachineBasicBlock &MBB = *MI.getParent();
3218 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3219 widenScalarDst(MI, WideTy);
3220 Observer.changedInstr(MI);
3221 return Legalized;
3222 }
3223 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3224 if (TypeIdx == 0) {
3225 Register VecReg = MI.getOperand(1).getReg();
3226 LLT VecTy = MRI.getType(VecReg);
3227 Observer.changingInstr(MI);
3228
3230 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3231 TargetOpcode::G_ANYEXT);
3232
3233 widenScalarDst(MI, WideTy, 0);
3234 Observer.changedInstr(MI);
3235 return Legalized;
3236 }
3237
3238 if (TypeIdx != 2)
3239 return UnableToLegalize;
3240 Observer.changingInstr(MI);
3241 // TODO: Probably should be zext
3242 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3243 Observer.changedInstr(MI);
3244 return Legalized;
3245 }
3246 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3247 if (TypeIdx == 0) {
3248 Observer.changingInstr(MI);
3249 const LLT WideEltTy = WideTy.getElementType();
3250
3251 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3252 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3253 widenScalarDst(MI, WideTy, 0);
3254 Observer.changedInstr(MI);
3255 return Legalized;
3256 }
3257
3258 if (TypeIdx == 1) {
3259 Observer.changingInstr(MI);
3260
3261 Register VecReg = MI.getOperand(1).getReg();
3262 LLT VecTy = MRI.getType(VecReg);
3263 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3264
3265 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3266 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3267 widenScalarDst(MI, WideVecTy, 0);
3268 Observer.changedInstr(MI);
3269 return Legalized;
3270 }
3271
3272 if (TypeIdx == 2) {
3273 Observer.changingInstr(MI);
3274 // TODO: Probably should be zext
3275 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3276 Observer.changedInstr(MI);
3277 return Legalized;
3278 }
3279
3280 return UnableToLegalize;
3281 }
3282 case TargetOpcode::G_FADD:
3283 case TargetOpcode::G_FMUL:
3284 case TargetOpcode::G_FSUB:
3285 case TargetOpcode::G_FMA:
3286 case TargetOpcode::G_FMAD:
3287 case TargetOpcode::G_FNEG:
3288 case TargetOpcode::G_FABS:
3289 case TargetOpcode::G_FCANONICALIZE:
3290 case TargetOpcode::G_FMINNUM:
3291 case TargetOpcode::G_FMAXNUM:
3292 case TargetOpcode::G_FMINNUM_IEEE:
3293 case TargetOpcode::G_FMAXNUM_IEEE:
3294 case TargetOpcode::G_FMINIMUM:
3295 case TargetOpcode::G_FMAXIMUM:
3296 case TargetOpcode::G_FMINIMUMNUM:
3297 case TargetOpcode::G_FMAXIMUMNUM:
3298 case TargetOpcode::G_FDIV:
3299 case TargetOpcode::G_FREM:
3300 case TargetOpcode::G_FCEIL:
3301 case TargetOpcode::G_FFLOOR:
3302 case TargetOpcode::G_FCOS:
3303 case TargetOpcode::G_FSIN:
3304 case TargetOpcode::G_FTAN:
3305 case TargetOpcode::G_FACOS:
3306 case TargetOpcode::G_FASIN:
3307 case TargetOpcode::G_FATAN:
3308 case TargetOpcode::G_FATAN2:
3309 case TargetOpcode::G_FCOSH:
3310 case TargetOpcode::G_FSINH:
3311 case TargetOpcode::G_FTANH:
3312 case TargetOpcode::G_FLOG10:
3313 case TargetOpcode::G_FLOG:
3314 case TargetOpcode::G_FLOG2:
3315 case TargetOpcode::G_FRINT:
3316 case TargetOpcode::G_FNEARBYINT:
3317 case TargetOpcode::G_FSQRT:
3318 case TargetOpcode::G_FEXP:
3319 case TargetOpcode::G_FEXP2:
3320 case TargetOpcode::G_FEXP10:
3321 case TargetOpcode::G_FPOW:
3322 case TargetOpcode::G_INTRINSIC_TRUNC:
3323 case TargetOpcode::G_INTRINSIC_ROUND:
3324 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3325 assert(TypeIdx == 0);
3326 Observer.changingInstr(MI);
3327
3328 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3329 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3330
3331 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3332 Observer.changedInstr(MI);
3333 return Legalized;
3334 case TargetOpcode::G_FPOWI:
3335 case TargetOpcode::G_FLDEXP:
3336 case TargetOpcode::G_STRICT_FLDEXP: {
3337 if (TypeIdx == 0) {
3338 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3339 return UnableToLegalize;
3340
3341 Observer.changingInstr(MI);
3342 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3343 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3344 Observer.changedInstr(MI);
3345 return Legalized;
3346 }
3347
3348 if (TypeIdx == 1) {
3349 // For some reason SelectionDAG tries to promote to a libcall without
3350 // actually changing the integer type for promotion.
3351 Observer.changingInstr(MI);
3352 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3353 Observer.changedInstr(MI);
3354 return Legalized;
3355 }
3356
3357 return UnableToLegalize;
3358 }
3359 case TargetOpcode::G_FFREXP: {
3360 Observer.changingInstr(MI);
3361
3362 if (TypeIdx == 0) {
3363 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3364 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3365 } else {
3366 widenScalarDst(MI, WideTy, 1);
3367 }
3368
3369 Observer.changedInstr(MI);
3370 return Legalized;
3371 }
3372 case TargetOpcode::G_INTTOPTR:
3373 if (TypeIdx != 1)
3374 return UnableToLegalize;
3375
3376 Observer.changingInstr(MI);
3377 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3378 Observer.changedInstr(MI);
3379 return Legalized;
3380 case TargetOpcode::G_PTRTOINT:
3381 if (TypeIdx != 0)
3382 return UnableToLegalize;
3383
3384 Observer.changingInstr(MI);
3385 widenScalarDst(MI, WideTy, 0);
3386 Observer.changedInstr(MI);
3387 return Legalized;
3388 case TargetOpcode::G_BUILD_VECTOR: {
3389 Observer.changingInstr(MI);
3390
3391 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3392 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3393 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3394
3395 // Avoid changing the result vector type if the source element type was
3396 // requested.
3397 if (TypeIdx == 1) {
3398 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3399 } else {
3400 widenScalarDst(MI, WideTy, 0);
3401 }
3402
3403 Observer.changedInstr(MI);
3404 return Legalized;
3405 }
3406 case TargetOpcode::G_SEXT_INREG:
3407 if (TypeIdx != 0)
3408 return UnableToLegalize;
3409
3410 Observer.changingInstr(MI);
3411 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3412 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3413 Observer.changedInstr(MI);
3414 return Legalized;
3415 case TargetOpcode::G_PTRMASK: {
3416 if (TypeIdx != 1)
3417 return UnableToLegalize;
3418 Observer.changingInstr(MI);
3419 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3420 Observer.changedInstr(MI);
3421 return Legalized;
3422 }
3423 case TargetOpcode::G_VECREDUCE_ADD: {
3424 if (TypeIdx != 1)
3425 return UnableToLegalize;
3426 Observer.changingInstr(MI);
3427 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3428 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3429 Observer.changedInstr(MI);
3430 return Legalized;
3431 }
3432 case TargetOpcode::G_VECREDUCE_FADD:
3433 case TargetOpcode::G_VECREDUCE_FMUL:
3434 case TargetOpcode::G_VECREDUCE_FMIN:
3435 case TargetOpcode::G_VECREDUCE_FMAX:
3436 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3437 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3438 if (TypeIdx != 0)
3439 return UnableToLegalize;
3440 Observer.changingInstr(MI);
3441 Register VecReg = MI.getOperand(1).getReg();
3442 LLT VecTy = MRI.getType(VecReg);
3443 LLT WideVecTy = VecTy.isVector()
3444 ? LLT::vector(VecTy.getElementCount(), WideTy)
3445 : WideTy;
3446 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3447 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 }
3451 case TargetOpcode::G_VSCALE: {
3452 MachineOperand &SrcMO = MI.getOperand(1);
3453 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3454 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3455 // The CImm is always a signed value
3456 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3457 Observer.changingInstr(MI);
3458 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3459 widenScalarDst(MI, WideTy);
3460 Observer.changedInstr(MI);
3461 return Legalized;
3462 }
3463 case TargetOpcode::G_SPLAT_VECTOR: {
3464 if (TypeIdx != 1)
3465 return UnableToLegalize;
3466
3467 Observer.changingInstr(MI);
3468 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3469 Observer.changedInstr(MI);
3470 return Legalized;
3471 }
3472 case TargetOpcode::G_INSERT_SUBVECTOR: {
3473 if (TypeIdx != 0)
3474 return UnableToLegalize;
3475
3477 Register BigVec = IS.getBigVec();
3478 Register SubVec = IS.getSubVec();
3479
3480 LLT SubVecTy = MRI.getType(SubVec);
3481 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3482
3483 // Widen the G_INSERT_SUBVECTOR
3484 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3485 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3486 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3487 IS.getIndexImm());
3488
3489 // Truncate back down
3490 auto SplatZero = MIRBuilder.buildSplatVector(
3491 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3492 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3493 SplatZero);
3494
3495 MI.eraseFromParent();
3496
3497 return Legalized;
3498 }
3499 }
3500}
3501
3503 MachineIRBuilder &B, Register Src, LLT Ty) {
3504 auto Unmerge = B.buildUnmerge(Ty, Src);
3505 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3506 Pieces.push_back(Unmerge.getReg(I));
3507}
3508
3509static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3510 MachineIRBuilder &MIRBuilder) {
3511 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3512 MachineFunction &MF = MIRBuilder.getMF();
3513 const DataLayout &DL = MIRBuilder.getDataLayout();
3514 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3515 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3516 LLT DstLLT = MRI.getType(DstReg);
3517
3518 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3519
3520 auto Addr = MIRBuilder.buildConstantPool(
3521 AddrPtrTy,
3522 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3523
3524 MachineMemOperand *MMO =
3526 MachineMemOperand::MOLoad, DstLLT, Alignment);
3527
3528 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3529}
3530
3533 const MachineOperand &ConstOperand = MI.getOperand(1);
3534 const Constant *ConstantVal = ConstOperand.getCImm();
3535
3536 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3537 MI.eraseFromParent();
3538
3539 return Legalized;
3540}
3541
3544 const MachineOperand &ConstOperand = MI.getOperand(1);
3545 const Constant *ConstantVal = ConstOperand.getFPImm();
3546
3547 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3548 MI.eraseFromParent();
3549
3550 return Legalized;
3551}
3552
3555 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3556 if (SrcTy.isVector()) {
3557 LLT SrcEltTy = SrcTy.getElementType();
3559
3560 if (DstTy.isVector()) {
3561 int NumDstElt = DstTy.getNumElements();
3562 int NumSrcElt = SrcTy.getNumElements();
3563
3564 LLT DstEltTy = DstTy.getElementType();
3565 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3566 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3567
3568 // If there's an element size mismatch, insert intermediate casts to match
3569 // the result element type.
3570 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3571 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3572 //
3573 // =>
3574 //
3575 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3576 // %3:_(<2 x s8>) = G_BITCAST %2
3577 // %4:_(<2 x s8>) = G_BITCAST %3
3578 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3579 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3580 SrcPartTy = SrcEltTy;
3581 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3582 //
3583 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3584 //
3585 // =>
3586 //
3587 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3588 // %3:_(s16) = G_BITCAST %2
3589 // %4:_(s16) = G_BITCAST %3
3590 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3591 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3592 DstCastTy = DstEltTy;
3593 }
3594
3595 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3596 for (Register &SrcReg : SrcRegs)
3597 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3598 } else
3599 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3600
3601 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3602 MI.eraseFromParent();
3603 return Legalized;
3604 }
3605
3606 if (DstTy.isVector()) {
3608 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3609 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3610 MI.eraseFromParent();
3611 return Legalized;
3612 }
3613
3614 return UnableToLegalize;
3615}
3616
3617/// Figure out the bit offset into a register when coercing a vector index for
3618/// the wide element type. This is only for the case when promoting vector to
3619/// one with larger elements.
3620//
3621///
3622/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3623/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3625 Register Idx,
3626 unsigned NewEltSize,
3627 unsigned OldEltSize) {
3628 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3629 LLT IdxTy = B.getMRI()->getType(Idx);
3630
3631 // Now figure out the amount we need to shift to get the target bits.
3632 auto OffsetMask = B.buildConstant(
3633 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3634 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3635 return B.buildShl(IdxTy, OffsetIdx,
3636 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3637}
3638
3639/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3640/// is casting to a vector with a smaller element size, perform multiple element
3641/// extracts and merge the results. If this is coercing to a vector with larger
3642/// elements, index the bitcasted vector and extract the target element with bit
3643/// operations. This is intended to force the indexing in the native register
3644/// size for architectures that can dynamically index the register file.
3647 LLT CastTy) {
3648 if (TypeIdx != 1)
3649 return UnableToLegalize;
3650
3651 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3652
3653 LLT SrcEltTy = SrcVecTy.getElementType();
3654 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3655 unsigned OldNumElts = SrcVecTy.getNumElements();
3656
3657 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3658 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3659
3660 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3661 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3662 if (NewNumElts > OldNumElts) {
3663 // Decreasing the vector element size
3664 //
3665 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3666 // =>
3667 // v4i32:castx = bitcast x:v2i64
3668 //
3669 // i64 = bitcast
3670 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3671 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3672 //
3673 if (NewNumElts % OldNumElts != 0)
3674 return UnableToLegalize;
3675
3676 // Type of the intermediate result vector.
3677 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3678 LLT MidTy =
3679 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3680
3681 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3682
3683 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3684 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3685
3686 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3687 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3688 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3689 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3690 NewOps[I] = Elt.getReg(0);
3691 }
3692
3693 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3694 MIRBuilder.buildBitcast(Dst, NewVec);
3695 MI.eraseFromParent();
3696 return Legalized;
3697 }
3698
3699 if (NewNumElts < OldNumElts) {
3700 if (NewEltSize % OldEltSize != 0)
3701 return UnableToLegalize;
3702
3703 // This only depends on powers of 2 because we use bit tricks to figure out
3704 // the bit offset we need to shift to get the target element. A general
3705 // expansion could emit division/multiply.
3706 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3707 return UnableToLegalize;
3708
3709 // Increasing the vector element size.
3710 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3711 //
3712 // =>
3713 //
3714 // %cast = G_BITCAST %vec
3715 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3716 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3717 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3718 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3719 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3720 // %elt = G_TRUNC %elt_bits
3721
3722 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3723 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3724
3725 // Divide to get the index in the wider element type.
3726 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3727
3728 Register WideElt = CastVec;
3729 if (CastTy.isVector()) {
3730 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3731 ScaledIdx).getReg(0);
3732 }
3733
3734 // Compute the bit offset into the register of the target element.
3736 MIRBuilder, Idx, NewEltSize, OldEltSize);
3737
3738 // Shift the wide element to get the target element.
3739 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3740 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3741 MI.eraseFromParent();
3742 return Legalized;
3743 }
3744
3745 return UnableToLegalize;
3746}
3747
3748/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3749/// TargetReg, while preserving other bits in \p TargetReg.
3750///
3751/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3753 Register TargetReg, Register InsertReg,
3754 Register OffsetBits) {
3755 LLT TargetTy = B.getMRI()->getType(TargetReg);
3756 LLT InsertTy = B.getMRI()->getType(InsertReg);
3757 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3758 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3759
3760 // Produce a bitmask of the value to insert
3761 auto EltMask = B.buildConstant(
3762 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3763 InsertTy.getSizeInBits()));
3764 // Shift it into position
3765 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3766 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3767
3768 // Clear out the bits in the wide element
3769 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3770
3771 // The value to insert has all zeros already, so stick it into the masked
3772 // wide element.
3773 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3774}
3775
3776/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3777/// is increasing the element size, perform the indexing in the target element
3778/// type, and use bit operations to insert at the element position. This is
3779/// intended for architectures that can dynamically index the register file and
3780/// want to force indexing in the native register size.
3783 LLT CastTy) {
3784 if (TypeIdx != 0)
3785 return UnableToLegalize;
3786
3787 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3788 MI.getFirst4RegLLTs();
3789 LLT VecTy = DstTy;
3790
3791 LLT VecEltTy = VecTy.getElementType();
3792 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3793 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3794 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3795
3796 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3797 unsigned OldNumElts = VecTy.getNumElements();
3798
3799 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3800 if (NewNumElts < OldNumElts) {
3801 if (NewEltSize % OldEltSize != 0)
3802 return UnableToLegalize;
3803
3804 // This only depends on powers of 2 because we use bit tricks to figure out
3805 // the bit offset we need to shift to get the target element. A general
3806 // expansion could emit division/multiply.
3807 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3808 return UnableToLegalize;
3809
3810 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3811 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3812
3813 // Divide to get the index in the wider element type.
3814 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3815
3816 Register ExtractedElt = CastVec;
3817 if (CastTy.isVector()) {
3818 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3819 ScaledIdx).getReg(0);
3820 }
3821
3822 // Compute the bit offset into the register of the target element.
3824 MIRBuilder, Idx, NewEltSize, OldEltSize);
3825
3826 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3827 Val, OffsetBits);
3828 if (CastTy.isVector()) {
3829 InsertedElt = MIRBuilder.buildInsertVectorElement(
3830 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3831 }
3832
3833 MIRBuilder.buildBitcast(Dst, InsertedElt);
3834 MI.eraseFromParent();
3835 return Legalized;
3836 }
3837
3838 return UnableToLegalize;
3839}
3840
3841// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3842// those that have smaller than legal operands.
3843//
3844// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3845//
3846// ===>
3847//
3848// s32 = G_BITCAST <4 x s8>
3849// s32 = G_BITCAST <4 x s8>
3850// s32 = G_BITCAST <4 x s8>
3851// s32 = G_BITCAST <4 x s8>
3852// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3853// <16 x s8> = G_BITCAST <4 x s32>
3856 LLT CastTy) {
3857 // Convert it to CONCAT instruction
3858 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3859 if (!ConcatMI) {
3860 return UnableToLegalize;
3861 }
3862
3863 // Check if bitcast is Legal
3864 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3865 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3866
3867 // Check if the build vector is Legal
3868 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3869 return UnableToLegalize;
3870 }
3871
3872 // Bitcast the sources
3873 SmallVector<Register> BitcastRegs;
3874 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3875 BitcastRegs.push_back(
3876 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3877 .getReg(0));
3878 }
3879
3880 // Build the scalar values into a vector
3881 Register BuildReg =
3882 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3883 MIRBuilder.buildBitcast(DstReg, BuildReg);
3884
3885 MI.eraseFromParent();
3886 return Legalized;
3887}
3888
3889// This bitcasts a shuffle vector to a different type currently of the same
3890// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3891// will be used instead.
3892//
3893// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3894// ===>
3895// <4 x s64> = G_PTRTOINT <4 x p0>
3896// <4 x s64> = G_PTRTOINT <4 x p0>
3897// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3898// <16 x p0> = G_INTTOPTR <16 x s64>
3901 LLT CastTy) {
3902 auto ShuffleMI = cast<GShuffleVector>(&MI);
3903 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3904 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3905
3906 // We currently only handle vectors of the same size.
3907 if (TypeIdx != 0 ||
3908 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3909 CastTy.getElementCount() != DstTy.getElementCount())
3910 return UnableToLegalize;
3911
3912 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3913
3914 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3915 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3916 auto Shuf =
3917 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3918 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3919
3920 MI.eraseFromParent();
3921 return Legalized;
3922}
3923
3924/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3925///
3926/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3927///
3928/// ===>
3929///
3930/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3931/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3932/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3935 LLT CastTy) {
3936 auto ES = cast<GExtractSubvector>(&MI);
3937
3938 if (!CastTy.isVector())
3939 return UnableToLegalize;
3940
3941 if (TypeIdx != 0)
3942 return UnableToLegalize;
3943
3944 Register Dst = ES->getReg(0);
3945 Register Src = ES->getSrcVec();
3946 uint64_t Idx = ES->getIndexImm();
3947
3948 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3949
3950 LLT DstTy = MRI.getType(Dst);
3951 LLT SrcTy = MRI.getType(Src);
3952 ElementCount DstTyEC = DstTy.getElementCount();
3953 ElementCount SrcTyEC = SrcTy.getElementCount();
3954 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3955 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3956
3957 if (DstTy == CastTy)
3958 return Legalized;
3959
3960 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3961 return UnableToLegalize;
3962
3963 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3964 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3965 if (CastEltSize < DstEltSize)
3966 return UnableToLegalize;
3967
3968 auto AdjustAmt = CastEltSize / DstEltSize;
3969 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3970 SrcTyMinElts % AdjustAmt != 0)
3971 return UnableToLegalize;
3972
3973 Idx /= AdjustAmt;
3974 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3975 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3976 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3977 MIRBuilder.buildBitcast(Dst, PromotedES);
3978
3979 ES->eraseFromParent();
3980 return Legalized;
3981}
3982
3983/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3984///
3985/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3986/// <vscale x 8 x i1>,
3987/// N
3988///
3989/// ===>
3990///
3991/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3992/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3993/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3994/// <vscale x 1 x i8>, N / 8
3995/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3998 LLT CastTy) {
3999 auto ES = cast<GInsertSubvector>(&MI);
4000
4001 if (!CastTy.isVector())
4002 return UnableToLegalize;
4003
4004 if (TypeIdx != 0)
4005 return UnableToLegalize;
4006
4007 Register Dst = ES->getReg(0);
4008 Register BigVec = ES->getBigVec();
4009 Register SubVec = ES->getSubVec();
4010 uint64_t Idx = ES->getIndexImm();
4011
4012 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4013
4014 LLT DstTy = MRI.getType(Dst);
4015 LLT BigVecTy = MRI.getType(BigVec);
4016 LLT SubVecTy = MRI.getType(SubVec);
4017
4018 if (DstTy == CastTy)
4019 return Legalized;
4020
4021 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4022 return UnableToLegalize;
4023
4024 ElementCount DstTyEC = DstTy.getElementCount();
4025 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4026 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4027 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4028 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4029 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4030
4031 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4032 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4033 if (CastEltSize < DstEltSize)
4034 return UnableToLegalize;
4035
4036 auto AdjustAmt = CastEltSize / DstEltSize;
4037 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4038 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4039 return UnableToLegalize;
4040
4041 Idx /= AdjustAmt;
4042 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4043 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4044 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4045 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4046 auto PromotedIS =
4047 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4048 MIRBuilder.buildBitcast(Dst, PromotedIS);
4049
4050 ES->eraseFromParent();
4051 return Legalized;
4052}
4053
4055 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4056 Register DstReg = LoadMI.getDstReg();
4057 Register PtrReg = LoadMI.getPointerReg();
4058 LLT DstTy = MRI.getType(DstReg);
4059 MachineMemOperand &MMO = LoadMI.getMMO();
4060 LLT MemTy = MMO.getMemoryType();
4061 MachineFunction &MF = MIRBuilder.getMF();
4062
4063 unsigned MemSizeInBits = MemTy.getSizeInBits();
4064 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4065
4066 if (MemSizeInBits != MemStoreSizeInBits) {
4067 if (MemTy.isVector())
4068 return UnableToLegalize;
4069
4070 // Promote to a byte-sized load if not loading an integral number of
4071 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4072 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4073 MachineMemOperand *NewMMO =
4074 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4075
4076 Register LoadReg = DstReg;
4077 LLT LoadTy = DstTy;
4078
4079 // If this wasn't already an extending load, we need to widen the result
4080 // register to avoid creating a load with a narrower result than the source.
4081 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4082 LoadTy = WideMemTy;
4083 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4084 }
4085
4086 if (isa<GSExtLoad>(LoadMI)) {
4087 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4088 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4089 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4090 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4091 // The extra bits are guaranteed to be zero, since we stored them that
4092 // way. A zext load from Wide thus automatically gives zext from MemVT.
4093 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4094 } else {
4095 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4096 }
4097
4098 if (DstTy != LoadTy)
4099 MIRBuilder.buildTrunc(DstReg, LoadReg);
4100
4101 LoadMI.eraseFromParent();
4102 return Legalized;
4103 }
4104
4105 // Big endian lowering not implemented.
4106 if (MIRBuilder.getDataLayout().isBigEndian())
4107 return UnableToLegalize;
4108
4109 // This load needs splitting into power of 2 sized loads.
4110 //
4111 // Our strategy here is to generate anyextending loads for the smaller
4112 // types up to next power-2 result type, and then combine the two larger
4113 // result values together, before truncating back down to the non-pow-2
4114 // type.
4115 // E.g. v1 = i24 load =>
4116 // v2 = i32 zextload (2 byte)
4117 // v3 = i32 load (1 byte)
4118 // v4 = i32 shl v3, 16
4119 // v5 = i32 or v4, v2
4120 // v1 = i24 trunc v5
4121 // By doing this we generate the correct truncate which should get
4122 // combined away as an artifact with a matching extend.
4123
4124 uint64_t LargeSplitSize, SmallSplitSize;
4125
4126 if (!isPowerOf2_32(MemSizeInBits)) {
4127 // This load needs splitting into power of 2 sized loads.
4128 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4129 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4130 } else {
4131 // This is already a power of 2, but we still need to split this in half.
4132 //
4133 // Assume we're being asked to decompose an unaligned load.
4134 // TODO: If this requires multiple splits, handle them all at once.
4135 auto &Ctx = MF.getFunction().getContext();
4136 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4137 return UnableToLegalize;
4138
4139 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4140 }
4141
4142 if (MemTy.isVector()) {
4143 // TODO: Handle vector extloads
4144 if (MemTy != DstTy)
4145 return UnableToLegalize;
4146
4147 Align Alignment = LoadMI.getAlign();
4148 // Given an alignment larger than the size of the memory, we can increase
4149 // the size of the load without needing to scalarize it.
4150 if (Alignment.value() * 8 > MemSizeInBits &&
4153 DstTy.getElementType());
4154 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4155 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4156 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4157 NewLoad.getReg(0));
4158 LoadMI.eraseFromParent();
4159 return Legalized;
4160 }
4161
4162 // TODO: We can do better than scalarizing the vector and at least split it
4163 // in half.
4164 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4165 }
4166
4167 MachineMemOperand *LargeMMO =
4168 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4169 MachineMemOperand *SmallMMO =
4170 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4171
4172 LLT PtrTy = MRI.getType(PtrReg);
4173 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4174 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4175 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4176 PtrReg, *LargeMMO);
4177
4178 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4179 LargeSplitSize / 8);
4180 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4181 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4182 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4183 SmallPtr, *SmallMMO);
4184
4185 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4186 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4187
4188 if (AnyExtTy == DstTy)
4189 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4190 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4191 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4192 MIRBuilder.buildTrunc(DstReg, {Or});
4193 } else {
4194 assert(DstTy.isPointer() && "expected pointer");
4195 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4196
4197 // FIXME: We currently consider this to be illegal for non-integral address
4198 // spaces, but we need still need a way to reinterpret the bits.
4199 MIRBuilder.buildIntToPtr(DstReg, Or);
4200 }
4201
4202 LoadMI.eraseFromParent();
4203 return Legalized;
4204}
4205
4207 // Lower a non-power of 2 store into multiple pow-2 stores.
4208 // E.g. split an i24 store into an i16 store + i8 store.
4209 // We do this by first extending the stored value to the next largest power
4210 // of 2 type, and then using truncating stores to store the components.
4211 // By doing this, likewise with G_LOAD, generate an extend that can be
4212 // artifact-combined away instead of leaving behind extracts.
4213 Register SrcReg = StoreMI.getValueReg();
4214 Register PtrReg = StoreMI.getPointerReg();
4215 LLT SrcTy = MRI.getType(SrcReg);
4216 MachineFunction &MF = MIRBuilder.getMF();
4217 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4218 LLT MemTy = MMO.getMemoryType();
4219
4220 unsigned StoreWidth = MemTy.getSizeInBits();
4221 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4222
4223 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4224 // Promote to a byte-sized store with upper bits zero if not
4225 // storing an integral number of bytes. For example, promote
4226 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4227 LLT WideTy = LLT::scalar(StoreSizeInBits);
4228
4229 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4230 // Avoid creating a store with a narrower source than result.
4231 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4232 SrcTy = WideTy;
4233 }
4234
4235 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4236
4237 MachineMemOperand *NewMMO =
4238 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4239 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4240 StoreMI.eraseFromParent();
4241 return Legalized;
4242 }
4243
4244 if (MemTy.isVector()) {
4245 if (MemTy != SrcTy)
4246 return scalarizeVectorBooleanStore(StoreMI);
4247
4248 // TODO: We can do better than scalarizing the vector and at least split it
4249 // in half.
4250 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4251 }
4252
4253 unsigned MemSizeInBits = MemTy.getSizeInBits();
4254 uint64_t LargeSplitSize, SmallSplitSize;
4255
4256 if (!isPowerOf2_32(MemSizeInBits)) {
4257 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4258 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4259 } else {
4260 auto &Ctx = MF.getFunction().getContext();
4261 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4262 return UnableToLegalize; // Don't know what we're being asked to do.
4263
4264 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4265 }
4266
4267 // Extend to the next pow-2. If this store was itself the result of lowering,
4268 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4269 // that's wider than the stored size.
4270 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4271 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4272
4273 if (SrcTy.isPointer()) {
4274 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4275 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4276 }
4277
4278 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4279
4280 // Obtain the smaller value by shifting away the larger value.
4281 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4282 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4283
4284 // Generate the PtrAdd and truncating stores.
4285 LLT PtrTy = MRI.getType(PtrReg);
4286 auto OffsetCst = MIRBuilder.buildConstant(
4287 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4288 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4289
4290 MachineMemOperand *LargeMMO =
4291 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4292 MachineMemOperand *SmallMMO =
4293 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4294 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4295 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4296 StoreMI.eraseFromParent();
4297 return Legalized;
4298}
4299
4302 Register SrcReg = StoreMI.getValueReg();
4303 Register PtrReg = StoreMI.getPointerReg();
4304 LLT SrcTy = MRI.getType(SrcReg);
4305 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4306 LLT MemTy = MMO.getMemoryType();
4307 LLT MemScalarTy = MemTy.getElementType();
4308 MachineFunction &MF = MIRBuilder.getMF();
4309
4310 assert(SrcTy.isVector() && "Expect a vector store type");
4311
4312 if (!MemScalarTy.isByteSized()) {
4313 // We need to build an integer scalar of the vector bit pattern.
4314 // It's not legal for us to add padding when storing a vector.
4315 unsigned NumBits = MemTy.getSizeInBits();
4316 LLT IntTy = LLT::scalar(NumBits);
4317 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4318 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4319
4320 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4321 auto Elt = MIRBuilder.buildExtractVectorElement(
4322 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4323 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4324 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4325 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4326 ? (MemTy.getNumElements() - 1) - I
4327 : I;
4328 auto ShiftAmt = MIRBuilder.buildConstant(
4329 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4330 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4331 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4332 }
4333 auto PtrInfo = MMO.getPointerInfo();
4334 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4335 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4336 StoreMI.eraseFromParent();
4337 return Legalized;
4338 }
4339
4340 // TODO: implement simple scalarization.
4341 return UnableToLegalize;
4342}
4343
4345LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4346 switch (MI.getOpcode()) {
4347 case TargetOpcode::G_LOAD: {
4348 if (TypeIdx != 0)
4349 return UnableToLegalize;
4350 MachineMemOperand &MMO = **MI.memoperands_begin();
4351
4352 // Not sure how to interpret a bitcast of an extending load.
4353 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4354 return UnableToLegalize;
4355
4356 Observer.changingInstr(MI);
4357 bitcastDst(MI, CastTy, 0);
4358 MMO.setType(CastTy);
4359 // The range metadata is no longer valid when reinterpreted as a different
4360 // type.
4361 MMO.clearRanges();
4362 Observer.changedInstr(MI);
4363 return Legalized;
4364 }
4365 case TargetOpcode::G_STORE: {
4366 if (TypeIdx != 0)
4367 return UnableToLegalize;
4368
4369 MachineMemOperand &MMO = **MI.memoperands_begin();
4370
4371 // Not sure how to interpret a bitcast of a truncating store.
4372 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4373 return UnableToLegalize;
4374
4375 Observer.changingInstr(MI);
4376 bitcastSrc(MI, CastTy, 0);
4377 MMO.setType(CastTy);
4378 Observer.changedInstr(MI);
4379 return Legalized;
4380 }
4381 case TargetOpcode::G_SELECT: {
4382 if (TypeIdx != 0)
4383 return UnableToLegalize;
4384
4385 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4386 LLVM_DEBUG(
4387 dbgs() << "bitcast action not implemented for vector select\n");
4388 return UnableToLegalize;
4389 }
4390
4391 Observer.changingInstr(MI);
4392 bitcastSrc(MI, CastTy, 2);
4393 bitcastSrc(MI, CastTy, 3);
4394 bitcastDst(MI, CastTy, 0);
4395 Observer.changedInstr(MI);
4396 return Legalized;
4397 }
4398 case TargetOpcode::G_AND:
4399 case TargetOpcode::G_OR:
4400 case TargetOpcode::G_XOR: {
4401 Observer.changingInstr(MI);
4402 bitcastSrc(MI, CastTy, 1);
4403 bitcastSrc(MI, CastTy, 2);
4404 bitcastDst(MI, CastTy, 0);
4405 Observer.changedInstr(MI);
4406 return Legalized;
4407 }
4408 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4409 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4410 case TargetOpcode::G_INSERT_VECTOR_ELT:
4411 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4412 case TargetOpcode::G_CONCAT_VECTORS:
4413 return bitcastConcatVector(MI, TypeIdx, CastTy);
4414 case TargetOpcode::G_SHUFFLE_VECTOR:
4415 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4416 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4417 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4418 case TargetOpcode::G_INSERT_SUBVECTOR:
4419 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4420 default:
4421 return UnableToLegalize;
4422 }
4423}
4424
4425// Legalize an instruction by changing the opcode in place.
4426void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4428 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4430}
4431
4433LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4434 using namespace TargetOpcode;
4435
4436 switch(MI.getOpcode()) {
4437 default:
4438 return UnableToLegalize;
4439 case TargetOpcode::G_FCONSTANT:
4440 return lowerFConstant(MI);
4441 case TargetOpcode::G_BITCAST:
4442 return lowerBitcast(MI);
4443 case TargetOpcode::G_SREM:
4444 case TargetOpcode::G_UREM: {
4445 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4446 auto Quot =
4447 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4448 {MI.getOperand(1), MI.getOperand(2)});
4449
4450 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4451 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4452 MI.eraseFromParent();
4453 return Legalized;
4454 }
4455 case TargetOpcode::G_SADDO:
4456 case TargetOpcode::G_SSUBO:
4457 return lowerSADDO_SSUBO(MI);
4458 case TargetOpcode::G_SADDE:
4459 return lowerSADDE(MI);
4460 case TargetOpcode::G_UMULH:
4461 case TargetOpcode::G_SMULH:
4462 return lowerSMULH_UMULH(MI);
4463 case TargetOpcode::G_SMULO:
4464 case TargetOpcode::G_UMULO: {
4465 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4466 // result.
4467 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4468 LLT Ty = MRI.getType(Res);
4469
4470 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4471 ? TargetOpcode::G_SMULH
4472 : TargetOpcode::G_UMULH;
4473
4474 Observer.changingInstr(MI);
4475 const auto &TII = MIRBuilder.getTII();
4476 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4477 MI.removeOperand(1);
4478 Observer.changedInstr(MI);
4479
4480 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4481 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4482
4483 // Move insert point forward so we can use the Res register if needed.
4484 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4485
4486 // For *signed* multiply, overflow is detected by checking:
4487 // (hi != (lo >> bitwidth-1))
4488 if (Opcode == TargetOpcode::G_SMULH) {
4489 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4490 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4491 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4492 } else {
4493 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4494 }
4495 return Legalized;
4496 }
4497 case TargetOpcode::G_FNEG: {
4498 auto [Res, SubByReg] = MI.getFirst2Regs();
4499 LLT Ty = MRI.getType(Res);
4500
4501 auto SignMask = MIRBuilder.buildConstant(
4502 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4503 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4504 MI.eraseFromParent();
4505 return Legalized;
4506 }
4507 case TargetOpcode::G_FSUB:
4508 case TargetOpcode::G_STRICT_FSUB: {
4509 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4510 LLT Ty = MRI.getType(Res);
4511
4512 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4513 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4514
4515 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4516 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4517 else
4518 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4519
4520 MI.eraseFromParent();
4521 return Legalized;
4522 }
4523 case TargetOpcode::G_FMAD:
4524 return lowerFMad(MI);
4525 case TargetOpcode::G_FFLOOR:
4526 return lowerFFloor(MI);
4527 case TargetOpcode::G_LROUND:
4528 case TargetOpcode::G_LLROUND: {
4529 Register DstReg = MI.getOperand(0).getReg();
4530 Register SrcReg = MI.getOperand(1).getReg();
4531 LLT SrcTy = MRI.getType(SrcReg);
4532 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4533 {SrcReg});
4534 MIRBuilder.buildFPTOSI(DstReg, Round);
4535 MI.eraseFromParent();
4536 return Legalized;
4537 }
4538 case TargetOpcode::G_INTRINSIC_ROUND:
4539 return lowerIntrinsicRound(MI);
4540 case TargetOpcode::G_FRINT: {
4541 // Since round even is the assumed rounding mode for unconstrained FP
4542 // operations, rint and roundeven are the same operation.
4543 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4544 return Legalized;
4545 }
4546 case TargetOpcode::G_INTRINSIC_LRINT:
4547 case TargetOpcode::G_INTRINSIC_LLRINT: {
4548 Register DstReg = MI.getOperand(0).getReg();
4549 Register SrcReg = MI.getOperand(1).getReg();
4550 LLT SrcTy = MRI.getType(SrcReg);
4551 auto Round =
4552 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4553 MIRBuilder.buildFPTOSI(DstReg, Round);
4554 MI.eraseFromParent();
4555 return Legalized;
4556 }
4557 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4558 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4559 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4560 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4561 **MI.memoperands_begin());
4562 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4563 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4564 MI.eraseFromParent();
4565 return Legalized;
4566 }
4567 case TargetOpcode::G_LOAD:
4568 case TargetOpcode::G_SEXTLOAD:
4569 case TargetOpcode::G_ZEXTLOAD:
4570 return lowerLoad(cast<GAnyLoad>(MI));
4571 case TargetOpcode::G_STORE:
4572 return lowerStore(cast<GStore>(MI));
4573 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4574 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4575 case TargetOpcode::G_CTLZ:
4576 case TargetOpcode::G_CTTZ:
4577 case TargetOpcode::G_CTPOP:
4578 return lowerBitCount(MI);
4579 case G_UADDO: {
4580 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4581
4582 Register NewRes = MRI.cloneVirtualRegister(Res);
4583
4584 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4585 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4586
4587 MIRBuilder.buildCopy(Res, NewRes);
4588
4589 MI.eraseFromParent();
4590 return Legalized;
4591 }
4592 case G_UADDE: {
4593 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4594 const LLT CondTy = MRI.getType(CarryOut);
4595 const LLT Ty = MRI.getType(Res);
4596
4597 Register NewRes = MRI.cloneVirtualRegister(Res);
4598
4599 // Initial add of the two operands.
4600 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4601
4602 // Initial check for carry.
4603 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4604
4605 // Add the sum and the carry.
4606 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4607 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4608
4609 // Second check for carry. We can only carry if the initial sum is all 1s
4610 // and the carry is set, resulting in a new sum of 0.
4611 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4612 auto ResEqZero =
4613 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4614 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4615 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4616
4617 MIRBuilder.buildCopy(Res, NewRes);
4618
4619 MI.eraseFromParent();
4620 return Legalized;
4621 }
4622 case G_USUBO: {
4623 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4624
4625 MIRBuilder.buildSub(Res, LHS, RHS);
4626 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4627
4628 MI.eraseFromParent();
4629 return Legalized;
4630 }
4631 case G_USUBE: {
4632 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4633 const LLT CondTy = MRI.getType(BorrowOut);
4634 const LLT Ty = MRI.getType(Res);
4635
4636 // Initial subtract of the two operands.
4637 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4638
4639 // Initial check for borrow.
4640 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4641
4642 // Subtract the borrow from the first subtract.
4643 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4644 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4645
4646 // Second check for borrow. We can only borrow if the initial difference is
4647 // 0 and the borrow is set, resulting in a new difference of all 1s.
4648 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4649 auto TmpResEqZero =
4650 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4651 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4652 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4653
4654 MI.eraseFromParent();
4655 return Legalized;
4656 }
4657 case G_UITOFP:
4658 return lowerUITOFP(MI);
4659 case G_SITOFP:
4660 return lowerSITOFP(MI);
4661 case G_FPTOUI:
4662 return lowerFPTOUI(MI);
4663 case G_FPTOSI:
4664 return lowerFPTOSI(MI);
4665 case G_FPTOUI_SAT:
4666 case G_FPTOSI_SAT:
4667 return lowerFPTOINT_SAT(MI);
4668 case G_FPTRUNC:
4669 return lowerFPTRUNC(MI);
4670 case G_FPOWI:
4671 return lowerFPOWI(MI);
4672 case G_SMIN:
4673 case G_SMAX:
4674 case G_UMIN:
4675 case G_UMAX:
4676 return lowerMinMax(MI);
4677 case G_SCMP:
4678 case G_UCMP:
4679 return lowerThreewayCompare(MI);
4680 case G_FCOPYSIGN:
4681 return lowerFCopySign(MI);
4682 case G_FMINNUM:
4683 case G_FMAXNUM:
4684 case G_FMINIMUMNUM:
4685 case G_FMAXIMUMNUM:
4686 return lowerFMinNumMaxNum(MI);
4687 case G_MERGE_VALUES:
4688 return lowerMergeValues(MI);
4689 case G_UNMERGE_VALUES:
4690 return lowerUnmergeValues(MI);
4691 case TargetOpcode::G_SEXT_INREG: {
4692 assert(MI.getOperand(2).isImm() && "Expected immediate");
4693 int64_t SizeInBits = MI.getOperand(2).getImm();
4694
4695 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4696 LLT DstTy = MRI.getType(DstReg);
4697 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4698
4699 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4700 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4701 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4702 MI.eraseFromParent();
4703 return Legalized;
4704 }
4705 case G_EXTRACT_VECTOR_ELT:
4706 case G_INSERT_VECTOR_ELT:
4708 case G_SHUFFLE_VECTOR:
4709 return lowerShuffleVector(MI);
4710 case G_VECTOR_COMPRESS:
4711 return lowerVECTOR_COMPRESS(MI);
4712 case G_DYN_STACKALLOC:
4713 return lowerDynStackAlloc(MI);
4714 case G_STACKSAVE:
4715 return lowerStackSave(MI);
4716 case G_STACKRESTORE:
4717 return lowerStackRestore(MI);
4718 case G_EXTRACT:
4719 return lowerExtract(MI);
4720 case G_INSERT:
4721 return lowerInsert(MI);
4722 case G_BSWAP:
4723 return lowerBswap(MI);
4724 case G_BITREVERSE:
4725 return lowerBitreverse(MI);
4726 case G_READ_REGISTER:
4727 case G_WRITE_REGISTER:
4728 return lowerReadWriteRegister(MI);
4729 case G_UADDSAT:
4730 case G_USUBSAT: {
4731 // Try to make a reasonable guess about which lowering strategy to use. The
4732 // target can override this with custom lowering and calling the
4733 // implementation functions.
4734 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4735 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4736 return lowerAddSubSatToMinMax(MI);
4738 }
4739 case G_SADDSAT:
4740 case G_SSUBSAT: {
4741 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4742
4743 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4744 // since it's a shorter expansion. However, we would need to figure out the
4745 // preferred boolean type for the carry out for the query.
4746 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4747 return lowerAddSubSatToMinMax(MI);
4749 }
4750 case G_SSHLSAT:
4751 case G_USHLSAT:
4752 return lowerShlSat(MI);
4753 case G_ABS:
4754 return lowerAbsToAddXor(MI);
4755 case G_ABDS:
4756 case G_ABDU: {
4757 bool IsSigned = MI.getOpcode() == G_ABDS;
4758 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4759 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4760 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4761 return lowerAbsDiffToMinMax(MI);
4762 }
4763 return lowerAbsDiffToSelect(MI);
4764 }
4765 case G_FABS:
4766 return lowerFAbs(MI);
4767 case G_SELECT:
4768 return lowerSelect(MI);
4769 case G_IS_FPCLASS:
4770 return lowerISFPCLASS(MI);
4771 case G_SDIVREM:
4772 case G_UDIVREM:
4773 return lowerDIVREM(MI);
4774 case G_FSHL:
4775 case G_FSHR:
4776 return lowerFunnelShift(MI);
4777 case G_ROTL:
4778 case G_ROTR:
4779 return lowerRotate(MI);
4780 case G_MEMSET:
4781 case G_MEMCPY:
4782 case G_MEMMOVE:
4783 return lowerMemCpyFamily(MI);
4784 case G_MEMCPY_INLINE:
4785 return lowerMemcpyInline(MI);
4786 case G_ZEXT:
4787 case G_SEXT:
4788 case G_ANYEXT:
4789 return lowerEXT(MI);
4790 case G_TRUNC:
4791 return lowerTRUNC(MI);
4793 return lowerVectorReduction(MI);
4794 case G_VAARG:
4795 return lowerVAArg(MI);
4796 case G_ATOMICRMW_SUB: {
4797 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4798 const LLT ValTy = MRI.getType(Val);
4799 MachineMemOperand *MMO = *MI.memoperands_begin();
4800
4801 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4802 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4803 MI.eraseFromParent();
4804 return Legalized;
4805 }
4806 }
4807}
4808
4810 Align MinAlign) const {
4811 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4812 // datalayout for the preferred alignment. Also there should be a target hook
4813 // for this to allow targets to reduce the alignment and ignore the
4814 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4815 // the type.
4816 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4817}
4818
4821 MachinePointerInfo &PtrInfo) {
4822 MachineFunction &MF = MIRBuilder.getMF();
4823 const DataLayout &DL = MIRBuilder.getDataLayout();
4824 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4825
4826 unsigned AddrSpace = DL.getAllocaAddrSpace();
4827 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4828
4829 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4830 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4831}
4832
4834 const SrcOp &Val) {
4835 LLT SrcTy = Val.getLLTTy(MRI);
4836 Align StackTypeAlign =
4837 std::max(getStackTemporaryAlignment(SrcTy),
4839 MachinePointerInfo PtrInfo;
4840 auto StackTemp =
4841 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4842
4843 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4844 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4845}
4846
4848 LLT VecTy) {
4849 LLT IdxTy = B.getMRI()->getType(IdxReg);
4850 unsigned NElts = VecTy.getNumElements();
4851
4852 int64_t IdxVal;
4853 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4854 if (IdxVal < VecTy.getNumElements())
4855 return IdxReg;
4856 // If a constant index would be out of bounds, clamp it as well.
4857 }
4858
4859 if (isPowerOf2_32(NElts)) {
4860 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4861 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4862 }
4863
4864 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4865 .getReg(0);
4866}
4867
4869 Register Index) {
4870 LLT EltTy = VecTy.getElementType();
4871
4872 // Calculate the element offset and add it to the pointer.
4873 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4874 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4875 "Converting bits to bytes lost precision");
4876
4877 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4878
4879 // Convert index to the correct size for the address space.
4880 const DataLayout &DL = MIRBuilder.getDataLayout();
4881 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4882 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4883 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4884 if (IdxTy != MRI.getType(Index))
4885 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4886
4887 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4888 MIRBuilder.buildConstant(IdxTy, EltSize));
4889
4890 LLT PtrTy = MRI.getType(VecPtr);
4891 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4892}
4893
4894#ifndef NDEBUG
4895/// Check that all vector operands have same number of elements. Other operands
4896/// should be listed in NonVecOp.
4899 std::initializer_list<unsigned> NonVecOpIndices) {
4900 if (MI.getNumMemOperands() != 0)
4901 return false;
4902
4903 LLT VecTy = MRI.getType(MI.getReg(0));
4904 if (!VecTy.isVector())
4905 return false;
4906 unsigned NumElts = VecTy.getNumElements();
4907
4908 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4909 MachineOperand &Op = MI.getOperand(OpIdx);
4910 if (!Op.isReg()) {
4911 if (!is_contained(NonVecOpIndices, OpIdx))
4912 return false;
4913 continue;
4914 }
4915
4916 LLT Ty = MRI.getType(Op.getReg());
4917 if (!Ty.isVector()) {
4918 if (!is_contained(NonVecOpIndices, OpIdx))
4919 return false;
4920 continue;
4921 }
4922
4923 if (Ty.getNumElements() != NumElts)
4924 return false;
4925 }
4926
4927 return true;
4928}
4929#endif
4930
4931/// Fill \p DstOps with DstOps that have same number of elements combined as
4932/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4933/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4934/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4935static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4936 unsigned NumElts) {
4937 LLT LeftoverTy;
4938 assert(Ty.isVector() && "Expected vector type");
4939 LLT EltTy = Ty.getElementType();
4940 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4941 int NumParts, NumLeftover;
4942 std::tie(NumParts, NumLeftover) =
4943 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4944
4945 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4946 for (int i = 0; i < NumParts; ++i) {
4947 DstOps.push_back(NarrowTy);
4948 }
4949
4950 if (LeftoverTy.isValid()) {
4951 assert(NumLeftover == 1 && "expected exactly one leftover");
4952 DstOps.push_back(LeftoverTy);
4953 }
4954}
4955
4956/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4957/// made from \p Op depending on operand type.
4959 MachineOperand &Op) {
4960 for (unsigned i = 0; i < N; ++i) {
4961 if (Op.isReg())
4962 Ops.push_back(Op.getReg());
4963 else if (Op.isImm())
4964 Ops.push_back(Op.getImm());
4965 else if (Op.isPredicate())
4966 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4967 else
4968 llvm_unreachable("Unsupported type");
4969 }
4970}
4971
4972// Handle splitting vector operations which need to have the same number of
4973// elements in each type index, but each type index may have a different element
4974// type.
4975//
4976// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4977// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4978// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4979//
4980// Also handles some irregular breakdown cases, e.g.
4981// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4982// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4983// s64 = G_SHL s64, s32
4986 GenericMachineInstr &MI, unsigned NumElts,
4987 std::initializer_list<unsigned> NonVecOpIndices) {
4988 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4989 "Non-compatible opcode or not specified non-vector operands");
4990 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4991
4992 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4993 unsigned NumDefs = MI.getNumDefs();
4994
4995 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4996 // Build instructions with DstOps to use instruction found by CSE directly.
4997 // CSE copies found instruction into given vreg when building with vreg dest.
4998 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4999 // Output registers will be taken from created instructions.
5000 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5001 for (unsigned i = 0; i < NumDefs; ++i) {
5002 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5003 }
5004
5005 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5006 // Operands listed in NonVecOpIndices will be used as is without splitting;
5007 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5008 // scalar condition (op 1), immediate in sext_inreg (op 2).
5009 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5010 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5011 ++UseIdx, ++UseNo) {
5012 if (is_contained(NonVecOpIndices, UseIdx)) {
5013 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5014 MI.getOperand(UseIdx));
5015 } else {
5016 SmallVector<Register, 8> SplitPieces;
5017 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5018 MRI);
5019 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5020 }
5021 }
5022
5023 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5024
5025 // Take i-th piece of each input operand split and build sub-vector/scalar
5026 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5027 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5029 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5030 Defs.push_back(OutputOpsPieces[DstNo][i]);
5031
5033 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5034 Uses.push_back(InputOpsPieces[InputNo][i]);
5035
5036 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5037 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5038 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5039 }
5040
5041 // Merge small outputs into MI's output for each def operand.
5042 if (NumLeftovers) {
5043 for (unsigned i = 0; i < NumDefs; ++i)
5044 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5045 } else {
5046 for (unsigned i = 0; i < NumDefs; ++i)
5047 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5048 }
5049
5050 MI.eraseFromParent();
5051 return Legalized;
5052}
5053
5056 unsigned NumElts) {
5057 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5058
5059 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5060 unsigned NumDefs = MI.getNumDefs();
5061
5062 SmallVector<DstOp, 8> OutputOpsPieces;
5063 SmallVector<Register, 8> OutputRegs;
5064 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5065
5066 // Instructions that perform register split will be inserted in basic block
5067 // where register is defined (basic block is in the next operand).
5068 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5069 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5070 UseIdx += 2, ++UseNo) {
5071 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5072 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5073 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5074 MIRBuilder, MRI);
5075 }
5076
5077 // Build PHIs with fewer elements.
5078 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5079 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5080 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5081 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5082 Phi.addDef(
5083 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5084 OutputRegs.push_back(Phi.getReg(0));
5085
5086 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5087 Phi.addUse(InputOpsPieces[j][i]);
5088 Phi.add(MI.getOperand(1 + j * 2 + 1));
5089 }
5090 }
5091
5092 // Set the insert point after the existing PHIs
5093 MachineBasicBlock &MBB = *MI.getParent();
5094 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5095
5096 // Merge small outputs into MI's def.
5097 if (NumLeftovers) {
5098 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5099 } else {
5100 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5101 }
5102
5103 MI.eraseFromParent();
5104 return Legalized;
5105}
5106
5109 unsigned TypeIdx,
5110 LLT NarrowTy) {
5111 const int NumDst = MI.getNumOperands() - 1;
5112 const Register SrcReg = MI.getOperand(NumDst).getReg();
5113 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5114 LLT SrcTy = MRI.getType(SrcReg);
5115
5116 if (TypeIdx != 1 || NarrowTy == DstTy)
5117 return UnableToLegalize;
5118
5119 // Requires compatible types. Otherwise SrcReg should have been defined by
5120 // merge-like instruction that would get artifact combined. Most likely
5121 // instruction that defines SrcReg has to perform more/fewer elements
5122 // legalization compatible with NarrowTy.
5123 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5124 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5125
5126 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5127 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5128 return UnableToLegalize;
5129
5130 // This is most likely DstTy (smaller then register size) packed in SrcTy
5131 // (larger then register size) and since unmerge was not combined it will be
5132 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5133 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5134
5135 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5136 //
5137 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5138 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5139 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5140 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5141 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5142 const int PartsPerUnmerge = NumDst / NumUnmerge;
5143
5144 for (int I = 0; I != NumUnmerge; ++I) {
5145 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5146
5147 for (int J = 0; J != PartsPerUnmerge; ++J)
5148 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5149 MIB.addUse(Unmerge.getReg(I));
5150 }
5151
5152 MI.eraseFromParent();
5153 return Legalized;
5154}
5155
5158 LLT NarrowTy) {
5159 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5160 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5161 // that should have been artifact combined. Most likely instruction that uses
5162 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5163 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5164 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5165 if (NarrowTy == SrcTy)
5166 return UnableToLegalize;
5167
5168 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5169 // is for old mir tests. Since the changes to more/fewer elements it should no
5170 // longer be possible to generate MIR like this when starting from llvm-ir
5171 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5172 if (TypeIdx == 1) {
5173 assert(SrcTy.isVector() && "Expected vector types");
5174 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5175 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5176 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5177 return UnableToLegalize;
5178 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5179 //
5180 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5181 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5182 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5183 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5184 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5185 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5186
5188 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5189 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5190 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5191 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5192 Elts.push_back(Unmerge.getReg(j));
5193 }
5194
5195 SmallVector<Register, 8> NarrowTyElts;
5196 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5197 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5198 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5199 ++i, Offset += NumNarrowTyElts) {
5200 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5201 NarrowTyElts.push_back(
5202 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5203 }
5204
5205 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5206 MI.eraseFromParent();
5207 return Legalized;
5208 }
5209
5210 assert(TypeIdx == 0 && "Bad type index");
5211 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5212 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5213 return UnableToLegalize;
5214
5215 // This is most likely SrcTy (smaller then register size) packed in DstTy
5216 // (larger then register size) and since merge was not combined it will be
5217 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5218 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5219
5220 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5221 //
5222 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5223 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5224 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5225 SmallVector<Register, 8> NarrowTyElts;
5226 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5227 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5228 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5229 for (unsigned i = 0; i < NumParts; ++i) {
5231 for (unsigned j = 0; j < NumElts; ++j)
5232 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5233 NarrowTyElts.push_back(
5234 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5235 }
5236
5237 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5238 MI.eraseFromParent();
5239 return Legalized;
5240}
5241
5244 unsigned TypeIdx,
5245 LLT NarrowVecTy) {
5246 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5247 Register InsertVal;
5248 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5249
5250 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5251 if (IsInsert)
5252 InsertVal = MI.getOperand(2).getReg();
5253
5254 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5255 LLT VecTy = MRI.getType(SrcVec);
5256
5257 // If the index is a constant, we can really break this down as you would
5258 // expect, and index into the target size pieces.
5259 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5260 if (MaybeCst) {
5261 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5262 // Avoid out of bounds indexing the pieces.
5263 if (IdxVal >= VecTy.getNumElements()) {
5264 MIRBuilder.buildUndef(DstReg);
5265 MI.eraseFromParent();
5266 return Legalized;
5267 }
5268
5269 if (!NarrowVecTy.isVector()) {
5270 SmallVector<Register, 8> SplitPieces;
5271 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5272 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5273 if (IsInsert) {
5274 SplitPieces[IdxVal] = InsertVal;
5275 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5276 } else {
5277 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5278 }
5279 } else {
5280 SmallVector<Register, 8> VecParts;
5281 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5282
5283 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5284 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5285 TargetOpcode::G_ANYEXT);
5286
5287 unsigned NewNumElts = NarrowVecTy.getNumElements();
5288
5289 LLT IdxTy = MRI.getType(Idx);
5290 int64_t PartIdx = IdxVal / NewNumElts;
5291 auto NewIdx =
5292 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5293
5294 if (IsInsert) {
5295 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5296
5297 // Use the adjusted index to insert into one of the subvectors.
5298 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5299 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5300 VecParts[PartIdx] = InsertPart.getReg(0);
5301
5302 // Recombine the inserted subvector with the others to reform the result
5303 // vector.
5304 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5305 } else {
5306 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5307 }
5308 }
5309
5310 MI.eraseFromParent();
5311 return Legalized;
5312 }
5313
5314 // With a variable index, we can't perform the operation in a smaller type, so
5315 // we're forced to expand this.
5316 //
5317 // TODO: We could emit a chain of compare/select to figure out which piece to
5318 // index.
5320}
5321
5324 LLT NarrowTy) {
5325 // FIXME: Don't know how to handle secondary types yet.
5326 if (TypeIdx != 0)
5327 return UnableToLegalize;
5328
5329 if (!NarrowTy.isByteSized()) {
5330 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5331 return UnableToLegalize;
5332 }
5333
5334 // This implementation doesn't work for atomics. Give up instead of doing
5335 // something invalid.
5336 if (LdStMI.isAtomic())
5337 return UnableToLegalize;
5338
5339 bool IsLoad = isa<GLoad>(LdStMI);
5340 Register ValReg = LdStMI.getReg(0);
5341 Register AddrReg = LdStMI.getPointerReg();
5342 LLT ValTy = MRI.getType(ValReg);
5343
5344 // FIXME: Do we need a distinct NarrowMemory legalize action?
5345 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5346 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5347 return UnableToLegalize;
5348 }
5349
5350 int NumParts = -1;
5351 int NumLeftover = -1;
5352 LLT LeftoverTy;
5353 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5354 if (IsLoad) {
5355 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5356 } else {
5357 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5358 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5359 NumParts = NarrowRegs.size();
5360 NumLeftover = NarrowLeftoverRegs.size();
5361 }
5362 }
5363
5364 if (NumParts == -1)
5365 return UnableToLegalize;
5366
5367 LLT PtrTy = MRI.getType(AddrReg);
5368 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5369
5370 unsigned TotalSize = ValTy.getSizeInBits();
5371
5372 // Split the load/store into PartTy sized pieces starting at Offset. If this
5373 // is a load, return the new registers in ValRegs. For a store, each elements
5374 // of ValRegs should be PartTy. Returns the next offset that needs to be
5375 // handled.
5376 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5377 auto MMO = LdStMI.getMMO();
5378 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5379 unsigned NumParts, unsigned Offset) -> unsigned {
5380 MachineFunction &MF = MIRBuilder.getMF();
5381 unsigned PartSize = PartTy.getSizeInBits();
5382 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5383 ++Idx) {
5384 unsigned ByteOffset = Offset / 8;
5385 Register NewAddrReg;
5386
5387 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5388 ByteOffset);
5389
5390 MachineMemOperand *NewMMO =
5391 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5392
5393 if (IsLoad) {
5394 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5395 ValRegs.push_back(Dst);
5396 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5397 } else {
5398 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5399 }
5400 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5401 }
5402
5403 return Offset;
5404 };
5405
5406 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5407 unsigned HandledOffset =
5408 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5409
5410 // Handle the rest of the register if this isn't an even type breakdown.
5411 if (LeftoverTy.isValid())
5412 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5413
5414 if (IsLoad) {
5415 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5416 LeftoverTy, NarrowLeftoverRegs);
5417 }
5418
5419 LdStMI.eraseFromParent();
5420 return Legalized;
5421}
5422
5425 LLT NarrowTy) {
5426 using namespace TargetOpcode;
5428 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5429
5430 switch (MI.getOpcode()) {
5431 case G_IMPLICIT_DEF:
5432 case G_TRUNC:
5433 case G_AND:
5434 case G_OR:
5435 case G_XOR:
5436 case G_ADD:
5437 case G_SUB:
5438 case G_MUL:
5439 case G_PTR_ADD:
5440 case G_SMULH:
5441 case G_UMULH:
5442 case G_FADD:
5443 case G_FMUL:
5444 case G_FSUB:
5445 case G_FNEG:
5446 case G_FABS:
5447 case G_FCANONICALIZE:
5448 case G_FDIV:
5449 case G_FREM:
5450 case G_FMA:
5451 case G_FMAD:
5452 case G_FPOW:
5453 case G_FEXP:
5454 case G_FEXP2:
5455 case G_FEXP10:
5456 case G_FLOG:
5457 case G_FLOG2:
5458 case G_FLOG10:
5459 case G_FLDEXP:
5460 case G_FNEARBYINT:
5461 case G_FCEIL:
5462 case G_FFLOOR:
5463 case G_FRINT:
5464 case G_INTRINSIC_LRINT:
5465 case G_INTRINSIC_LLRINT:
5466 case G_INTRINSIC_ROUND:
5467 case G_INTRINSIC_ROUNDEVEN:
5468 case G_LROUND:
5469 case G_LLROUND:
5470 case G_INTRINSIC_TRUNC:
5471 case G_FCOS:
5472 case G_FSIN:
5473 case G_FTAN:
5474 case G_FACOS:
5475 case G_FASIN:
5476 case G_FATAN:
5477 case G_FATAN2:
5478 case G_FCOSH:
5479 case G_FSINH:
5480 case G_FTANH:
5481 case G_FSQRT:
5482 case G_BSWAP:
5483 case G_BITREVERSE:
5484 case G_SDIV:
5485 case G_UDIV:
5486 case G_SREM:
5487 case G_UREM:
5488 case G_SDIVREM:
5489 case G_UDIVREM:
5490 case G_SMIN:
5491 case G_SMAX:
5492 case G_UMIN:
5493 case G_UMAX:
5494 case G_ABS:
5495 case G_FMINNUM:
5496 case G_FMAXNUM:
5497 case G_FMINNUM_IEEE:
5498 case G_FMAXNUM_IEEE:
5499 case G_FMINIMUM:
5500 case G_FMAXIMUM:
5501 case G_FMINIMUMNUM:
5502 case G_FMAXIMUMNUM:
5503 case G_FSHL:
5504 case G_FSHR:
5505 case G_ROTL:
5506 case G_ROTR:
5507 case G_FREEZE:
5508 case G_SADDSAT:
5509 case G_SSUBSAT:
5510 case G_UADDSAT:
5511 case G_USUBSAT:
5512 case G_UMULO:
5513 case G_SMULO:
5514 case G_SHL:
5515 case G_LSHR:
5516 case G_ASHR:
5517 case G_SSHLSAT:
5518 case G_USHLSAT:
5519 case G_CTLZ:
5520 case G_CTLZ_ZERO_UNDEF:
5521 case G_CTTZ:
5522 case G_CTTZ_ZERO_UNDEF:
5523 case G_CTPOP:
5524 case G_FCOPYSIGN:
5525 case G_ZEXT:
5526 case G_SEXT:
5527 case G_ANYEXT:
5528 case G_FPEXT:
5529 case G_FPTRUNC:
5530 case G_SITOFP:
5531 case G_UITOFP:
5532 case G_FPTOSI:
5533 case G_FPTOUI:
5534 case G_FPTOSI_SAT:
5535 case G_FPTOUI_SAT:
5536 case G_INTTOPTR:
5537 case G_PTRTOINT:
5538 case G_ADDRSPACE_CAST:
5539 case G_UADDO:
5540 case G_USUBO:
5541 case G_UADDE:
5542 case G_USUBE:
5543 case G_SADDO:
5544 case G_SSUBO:
5545 case G_SADDE:
5546 case G_SSUBE:
5547 case G_STRICT_FADD:
5548 case G_STRICT_FSUB:
5549 case G_STRICT_FMUL:
5550 case G_STRICT_FMA:
5551 case G_STRICT_FLDEXP:
5552 case G_FFREXP:
5553 return fewerElementsVectorMultiEltType(GMI, NumElts);
5554 case G_ICMP:
5555 case G_FCMP:
5556 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5557 case G_IS_FPCLASS:
5558 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5559 case G_SELECT:
5560 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5561 return fewerElementsVectorMultiEltType(GMI, NumElts);
5562 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5563 case G_PHI:
5564 return fewerElementsVectorPhi(GMI, NumElts);
5565 case G_UNMERGE_VALUES:
5566 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5567 case G_BUILD_VECTOR:
5568 assert(TypeIdx == 0 && "not a vector type index");
5569 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5570 case G_CONCAT_VECTORS:
5571 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5572 return UnableToLegalize;
5573 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5574 case G_EXTRACT_VECTOR_ELT:
5575 case G_INSERT_VECTOR_ELT:
5576 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5577 case G_LOAD:
5578 case G_STORE:
5579 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5580 case G_SEXT_INREG:
5581 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5583 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5584 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5585 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5586 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5587 case G_SHUFFLE_VECTOR:
5588 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5589 case G_FPOWI:
5590 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5591 case G_BITCAST:
5592 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5593 case G_INTRINSIC_FPTRUNC_ROUND:
5594 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5595 default:
5596 return UnableToLegalize;
5597 }
5598}
5599
5602 LLT NarrowTy) {
5603 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5604 "Not a bitcast operation");
5605
5606 if (TypeIdx != 0)
5607 return UnableToLegalize;
5608
5609 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5610
5611 unsigned NewElemCount =
5612 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5613 SmallVector<Register> SrcVRegs, BitcastVRegs;
5614 if (NewElemCount == 1) {
5615 LLT SrcNarrowTy = SrcTy.getElementType();
5616
5617 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5618 getUnmergeResults(SrcVRegs, *Unmerge);
5619 } else {
5620 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5621
5622 // Split the Src and Dst Reg into smaller registers
5623 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5624 return UnableToLegalize;
5625 }
5626
5627 // Build new smaller bitcast instructions
5628 // Not supporting Leftover types for now but will have to
5629 for (Register Reg : SrcVRegs)
5630 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5631
5632 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5633 MI.eraseFromParent();
5634 return Legalized;
5635}
5636
5638 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5639 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5640 if (TypeIdx != 0)
5641 return UnableToLegalize;
5642
5643 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5644 MI.getFirst3RegLLTs();
5645 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5646 // The shuffle should be canonicalized by now.
5647 if (DstTy != Src1Ty)
5648 return UnableToLegalize;
5649 if (DstTy != Src2Ty)
5650 return UnableToLegalize;
5651
5652 if (!isPowerOf2_32(DstTy.getNumElements()))
5653 return UnableToLegalize;
5654
5655 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5656 // Further legalization attempts will be needed to do split further.
5657 NarrowTy =
5658 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5659 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5660
5661 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5662 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5663 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5664 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5665 SplitSrc2Regs[1]};
5666
5667 Register Hi, Lo;
5668
5669 // If Lo or Hi uses elements from at most two of the four input vectors, then
5670 // express it as a vector shuffle of those two inputs. Otherwise extract the
5671 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5673 for (unsigned High = 0; High < 2; ++High) {
5674 Register &Output = High ? Hi : Lo;
5675
5676 // Build a shuffle mask for the output, discovering on the fly which
5677 // input vectors to use as shuffle operands (recorded in InputUsed).
5678 // If building a suitable shuffle vector proves too hard, then bail
5679 // out with useBuildVector set.
5680 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5681 unsigned FirstMaskIdx = High * NewElts;
5682 bool UseBuildVector = false;
5683 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5684 // The mask element. This indexes into the input.
5685 int Idx = Mask[FirstMaskIdx + MaskOffset];
5686
5687 // The input vector this mask element indexes into.
5688 unsigned Input = (unsigned)Idx / NewElts;
5689
5690 if (Input >= std::size(Inputs)) {
5691 // The mask element does not index into any input vector.
5692 Ops.push_back(-1);
5693 continue;
5694 }
5695
5696 // Turn the index into an offset from the start of the input vector.
5697 Idx -= Input * NewElts;
5698
5699 // Find or create a shuffle vector operand to hold this input.
5700 unsigned OpNo;
5701 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5702 if (InputUsed[OpNo] == Input) {
5703 // This input vector is already an operand.
5704 break;
5705 } else if (InputUsed[OpNo] == -1U) {
5706 // Create a new operand for this input vector.
5707 InputUsed[OpNo] = Input;
5708 break;
5709 }
5710 }
5711
5712 if (OpNo >= std::size(InputUsed)) {
5713 // More than two input vectors used! Give up on trying to create a
5714 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5715 UseBuildVector = true;
5716 break;
5717 }
5718
5719 // Add the mask index for the new shuffle vector.
5720 Ops.push_back(Idx + OpNo * NewElts);
5721 }
5722
5723 if (UseBuildVector) {
5724 LLT EltTy = NarrowTy.getElementType();
5726
5727 // Extract the input elements by hand.
5728 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5729 // The mask element. This indexes into the input.
5730 int Idx = Mask[FirstMaskIdx + MaskOffset];
5731
5732 // The input vector this mask element indexes into.
5733 unsigned Input = (unsigned)Idx / NewElts;
5734
5735 if (Input >= std::size(Inputs)) {
5736 // The mask element is "undef" or indexes off the end of the input.
5737 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5738 continue;
5739 }
5740
5741 // Turn the index into an offset from the start of the input vector.
5742 Idx -= Input * NewElts;
5743
5744 // Extract the vector element by hand.
5745 SVOps.push_back(MIRBuilder
5746 .buildExtractVectorElement(
5747 EltTy, Inputs[Input],
5748 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5749 .getReg(0));
5750 }
5751
5752 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5753 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5754 } else if (InputUsed[0] == -1U) {
5755 // No input vectors were used! The result is undefined.
5756 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5757 } else {
5758 Register Op0 = Inputs[InputUsed[0]];
5759 // If only one input was used, use an undefined vector for the other.
5760 Register Op1 = InputUsed[1] == -1U
5761 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5762 : Inputs[InputUsed[1]];
5763 // At least one input vector was used. Create a new shuffle vector.
5764 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5765 }
5766
5767 Ops.clear();
5768 }
5769
5770 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5771 MI.eraseFromParent();
5772 return Legalized;
5773}
5774
5776 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5777 auto &RdxMI = cast<GVecReduce>(MI);
5778
5779 if (TypeIdx != 1)
5780 return UnableToLegalize;
5781
5782 // The semantics of the normal non-sequential reductions allow us to freely
5783 // re-associate the operation.
5784 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5785
5786 if (NarrowTy.isVector() &&
5787 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5788 return UnableToLegalize;
5789
5790 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5791 SmallVector<Register> SplitSrcs;
5792 // If NarrowTy is a scalar then we're being asked to scalarize.
5793 const unsigned NumParts =
5794 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5795 : SrcTy.getNumElements();
5796
5797 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5798 if (NarrowTy.isScalar()) {
5799 if (DstTy != NarrowTy)
5800 return UnableToLegalize; // FIXME: handle implicit extensions.
5801
5802 if (isPowerOf2_32(NumParts)) {
5803 // Generate a tree of scalar operations to reduce the critical path.
5804 SmallVector<Register> PartialResults;
5805 unsigned NumPartsLeft = NumParts;
5806 while (NumPartsLeft > 1) {
5807 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5808 PartialResults.emplace_back(
5810 .buildInstr(ScalarOpc, {NarrowTy},
5811 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5812 .getReg(0));
5813 }
5814 SplitSrcs = PartialResults;
5815 PartialResults.clear();
5816 NumPartsLeft = SplitSrcs.size();
5817 }
5818 assert(SplitSrcs.size() == 1);
5819 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5820 MI.eraseFromParent();
5821 return Legalized;
5822 }
5823 // If we can't generate a tree, then just do sequential operations.
5824 Register Acc = SplitSrcs[0];
5825 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5826 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5827 .getReg(0);
5828 MIRBuilder.buildCopy(DstReg, Acc);
5829 MI.eraseFromParent();
5830 return Legalized;
5831 }
5832 SmallVector<Register> PartialReductions;
5833 for (unsigned Part = 0; Part < NumParts; ++Part) {
5834 PartialReductions.push_back(
5835 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5836 .getReg(0));
5837 }
5838
5839 // If the types involved are powers of 2, we can generate intermediate vector
5840 // ops, before generating a final reduction operation.
5841 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5842 isPowerOf2_32(NarrowTy.getNumElements())) {
5843 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5844 }
5845
5846 Register Acc = PartialReductions[0];
5847 for (unsigned Part = 1; Part < NumParts; ++Part) {
5848 if (Part == NumParts - 1) {
5849 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5850 {Acc, PartialReductions[Part]});
5851 } else {
5852 Acc = MIRBuilder
5853 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5854 .getReg(0);
5855 }
5856 }
5857 MI.eraseFromParent();
5858 return Legalized;
5859}
5860
5863 unsigned int TypeIdx,
5864 LLT NarrowTy) {
5865 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5866 MI.getFirst3RegLLTs();
5867 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5868 DstTy != NarrowTy)
5869 return UnableToLegalize;
5870
5871 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5872 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5873 "Unexpected vecreduce opcode");
5874 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5875 ? TargetOpcode::G_FADD
5876 : TargetOpcode::G_FMUL;
5877
5878 SmallVector<Register> SplitSrcs;
5879 unsigned NumParts = SrcTy.getNumElements();
5880 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5881 Register Acc = ScalarReg;
5882 for (unsigned i = 0; i < NumParts; i++)
5883 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5884 .getReg(0);
5885
5886 MIRBuilder.buildCopy(DstReg, Acc);
5887 MI.eraseFromParent();
5888 return Legalized;
5889}
5890
5892LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5893 LLT SrcTy, LLT NarrowTy,
5894 unsigned ScalarOpc) {
5895 SmallVector<Register> SplitSrcs;
5896 // Split the sources into NarrowTy size pieces.
5897 extractParts(SrcReg, NarrowTy,
5898 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5899 MIRBuilder, MRI);
5900 // We're going to do a tree reduction using vector operations until we have
5901 // one NarrowTy size value left.
5902 while (SplitSrcs.size() > 1) {
5903 SmallVector<Register> PartialRdxs;
5904 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5905 Register LHS = SplitSrcs[Idx];
5906 Register RHS = SplitSrcs[Idx + 1];
5907 // Create the intermediate vector op.
5908 Register Res =
5909 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5910 PartialRdxs.push_back(Res);
5911 }
5912 SplitSrcs = std::move(PartialRdxs);
5913 }
5914 // Finally generate the requested NarrowTy based reduction.
5915 Observer.changingInstr(MI);
5916 MI.getOperand(1).setReg(SplitSrcs[0]);
5917 Observer.changedInstr(MI);
5918 return Legalized;
5919}
5920
5923 const LLT HalfTy, const LLT AmtTy) {
5924
5925 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5926 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5927 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5928
5929 if (Amt.isZero()) {
5930 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5931 MI.eraseFromParent();
5932 return Legalized;
5933 }
5934
5935 LLT NVT = HalfTy;
5936 unsigned NVTBits = HalfTy.getSizeInBits();
5937 unsigned VTBits = 2 * NVTBits;
5938
5939 SrcOp Lo(Register(0)), Hi(Register(0));
5940 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5941 if (Amt.ugt(VTBits)) {
5942 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5943 } else if (Amt.ugt(NVTBits)) {
5944 Lo = MIRBuilder.buildConstant(NVT, 0);
5945 Hi = MIRBuilder.buildShl(NVT, InL,
5946 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5947 } else if (Amt == NVTBits) {
5948 Lo = MIRBuilder.buildConstant(NVT, 0);
5949 Hi = InL;
5950 } else {
5951 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5952 auto OrLHS =
5953 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5954 auto OrRHS = MIRBuilder.buildLShr(
5955 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5956 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5957 }
5958 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5959 if (Amt.ugt(VTBits)) {
5960 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5961 } else if (Amt.ugt(NVTBits)) {
5962 Lo = MIRBuilder.buildLShr(NVT, InH,
5963 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5964 Hi = MIRBuilder.buildConstant(NVT, 0);
5965 } else if (Amt == NVTBits) {
5966 Lo = InH;
5967 Hi = MIRBuilder.buildConstant(NVT, 0);
5968 } else {
5969 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5970
5971 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5972 auto OrRHS = MIRBuilder.buildShl(
5973 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5974
5975 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5976 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5977 }
5978 } else {
5979 if (Amt.ugt(VTBits)) {
5980 Hi = Lo = MIRBuilder.buildAShr(
5981 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5982 } else if (Amt.ugt(NVTBits)) {
5983 Lo = MIRBuilder.buildAShr(NVT, InH,
5984 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5985 Hi = MIRBuilder.buildAShr(NVT, InH,
5986 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5987 } else if (Amt == NVTBits) {
5988 Lo = InH;
5989 Hi = MIRBuilder.buildAShr(NVT, InH,
5990 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5991 } else {
5992 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5993
5994 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5995 auto OrRHS = MIRBuilder.buildShl(
5996 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5997
5998 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5999 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6000 }
6001 }
6002
6003 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6004 MI.eraseFromParent();
6005
6006 return Legalized;
6007}
6008
6011 LLT RequestedTy) {
6012 if (TypeIdx == 1) {
6013 Observer.changingInstr(MI);
6014 narrowScalarSrc(MI, RequestedTy, 2);
6015 Observer.changedInstr(MI);
6016 return Legalized;
6017 }
6018
6019 Register DstReg = MI.getOperand(0).getReg();
6020 LLT DstTy = MRI.getType(DstReg);
6021 if (DstTy.isVector())
6022 return UnableToLegalize;
6023
6024 Register Amt = MI.getOperand(2).getReg();
6025 LLT ShiftAmtTy = MRI.getType(Amt);
6026 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6027 if (DstEltSize % 2 != 0)
6028 return UnableToLegalize;
6029
6030 // Check if we should use multi-way splitting instead of recursive binary
6031 // splitting.
6032 //
6033 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6034 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6035 // and dependency chains created by usual binary splitting approach
6036 // (128->64->32).
6037 //
6038 // The >= 8 parts threshold ensures we only use this optimization when binary
6039 // splitting would require multiple recursive passes, avoiding overhead for
6040 // simple 2-way splits where binary approach is sufficient.
6041 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6042 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6043 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6044 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6045 // steps).
6046 if (NumParts >= 8)
6047 return narrowScalarShiftMultiway(MI, RequestedTy);
6048 }
6049
6050 // Fall back to binary splitting:
6051 // Ignore the input type. We can only go to exactly half the size of the
6052 // input. If that isn't small enough, the resulting pieces will be further
6053 // legalized.
6054 const unsigned NewBitSize = DstEltSize / 2;
6055 const LLT HalfTy = LLT::scalar(NewBitSize);
6056 const LLT CondTy = LLT::scalar(1);
6057
6058 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6059 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6060 ShiftAmtTy);
6061 }
6062
6063 // TODO: Expand with known bits.
6064
6065 // Handle the fully general expansion by an unknown amount.
6066 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6067
6068 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6069 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6070 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6071
6072 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6073 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6074
6075 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6076 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6077 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6078
6079 Register ResultRegs[2];
6080 switch (MI.getOpcode()) {
6081 case TargetOpcode::G_SHL: {
6082 // Short: ShAmt < NewBitSize
6083 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6084
6085 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6086 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6087 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6088
6089 // Long: ShAmt >= NewBitSize
6090 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6091 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6092
6093 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6094 auto Hi = MIRBuilder.buildSelect(
6095 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6096
6097 ResultRegs[0] = Lo.getReg(0);
6098 ResultRegs[1] = Hi.getReg(0);
6099 break;
6100 }
6101 case TargetOpcode::G_LSHR:
6102 case TargetOpcode::G_ASHR: {
6103 // Short: ShAmt < NewBitSize
6104 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6105
6106 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6107 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6108 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6109
6110 // Long: ShAmt >= NewBitSize
6112 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6113 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6114 } else {
6115 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6116 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6117 }
6118 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6119 {InH, AmtExcess}); // Lo from Hi part.
6120
6121 auto Lo = MIRBuilder.buildSelect(
6122 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6123
6124 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6125
6126 ResultRegs[0] = Lo.getReg(0);
6127 ResultRegs[1] = Hi.getReg(0);
6128 break;
6129 }
6130 default:
6131 llvm_unreachable("not a shift");
6132 }
6133
6134 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6135 MI.eraseFromParent();
6136 return Legalized;
6137}
6138
6140 unsigned PartIdx,
6141 unsigned NumParts,
6142 ArrayRef<Register> SrcParts,
6143 const ShiftParams &Params,
6144 LLT TargetTy, LLT ShiftAmtTy) {
6145 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6146 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6147 assert(WordShiftConst && BitShiftConst && "Expected constants");
6148
6149 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6150 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6151 const bool NeedsInterWordShift = ShiftBits != 0;
6152
6153 switch (Opcode) {
6154 case TargetOpcode::G_SHL: {
6155 // Data moves from lower indices to higher indices
6156 // If this part would come from a source beyond our range, it's zero
6157 if (PartIdx < ShiftWords)
6158 return Params.Zero;
6159
6160 unsigned SrcIdx = PartIdx - ShiftWords;
6161 if (!NeedsInterWordShift)
6162 return SrcParts[SrcIdx];
6163
6164 // Combine shifted main part with carry from previous part
6165 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6166 if (SrcIdx > 0) {
6167 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6168 Params.InvBitShift);
6169 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6170 }
6171 return Hi.getReg(0);
6172 }
6173
6174 case TargetOpcode::G_LSHR: {
6175 unsigned SrcIdx = PartIdx + ShiftWords;
6176 if (SrcIdx >= NumParts)
6177 return Params.Zero;
6178 if (!NeedsInterWordShift)
6179 return SrcParts[SrcIdx];
6180
6181 // Combine shifted main part with carry from next part
6182 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6183 if (SrcIdx + 1 < NumParts) {
6184 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6185 Params.InvBitShift);
6186 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6187 }
6188 return Lo.getReg(0);
6189 }
6190
6191 case TargetOpcode::G_ASHR: {
6192 // Like LSHR but preserves sign bit
6193 unsigned SrcIdx = PartIdx + ShiftWords;
6194 if (SrcIdx >= NumParts)
6195 return Params.SignBit;
6196 if (!NeedsInterWordShift)
6197 return SrcParts[SrcIdx];
6198
6199 // Only the original MSB part uses arithmetic shift to preserve sign. All
6200 // other parts use logical shift since they're just moving data bits.
6201 auto Lo =
6202 (SrcIdx == NumParts - 1)
6203 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6204 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6205 Register HiSrc =
6206 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6207 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6208 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6209 }
6210
6211 default:
6212 llvm_unreachable("not a shift");
6213 }
6214}
6215
6217 Register MainOperand,
6218 Register ShiftAmt,
6219 LLT TargetTy,
6220 Register CarryOperand) {
6221 // This helper generates a single output part for variable shifts by combining
6222 // the main operand (shifted by BitShift) with carry bits from an adjacent
6223 // part.
6224
6225 // For G_ASHR, individual parts don't have their own sign bit, only the
6226 // complete value does. So we use LSHR for the main operand shift in ASHR
6227 // context.
6228 unsigned MainOpcode =
6229 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6230
6231 // Perform the primary shift on the main operand
6232 Register MainShifted =
6233 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6234 .getReg(0);
6235
6236 // No carry operand available
6237 if (!CarryOperand.isValid())
6238 return MainShifted;
6239
6240 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6241 // so carry bits aren't needed.
6242 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6243 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6244 LLT BoolTy = LLT::scalar(1);
6245 auto IsZeroBitShift =
6246 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6247
6248 // Extract bits from the adjacent part that will "carry over" into this part.
6249 // The carry direction is opposite to the main shift direction, so we can
6250 // align the two shifted values before combining them with OR.
6251
6252 // Determine the carry shift opcode (opposite direction)
6253 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6254 : TargetOpcode::G_SHL;
6255
6256 // Calculate inverse shift amount: BitWidth - ShiftAmt
6257 auto TargetBitsConst =
6258 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6259 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6260
6261 // Shift the carry operand
6262 Register CarryBits =
6264 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6265 .getReg(0);
6266
6267 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6268 // TargetBits which would be poison for the individual carry shift operation).
6269 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6270 Register SafeCarryBits =
6271 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6272 .getReg(0);
6273
6274 // Combine the main shifted part with the carry bits
6275 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6276}
6277
6280 const APInt &Amt,
6281 LLT TargetTy,
6282 LLT ShiftAmtTy) {
6283 // Any wide shift can be decomposed into WordShift + BitShift components.
6284 // When shift amount is known constant, directly compute the decomposition
6285 // values and generate constant registers.
6286 Register DstReg = MI.getOperand(0).getReg();
6287 Register SrcReg = MI.getOperand(1).getReg();
6288 LLT DstTy = MRI.getType(DstReg);
6289
6290 const unsigned DstBits = DstTy.getScalarSizeInBits();
6291 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6292 const unsigned NumParts = DstBits / TargetBits;
6293
6294 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6295
6296 // When the shift amount is known at compile time, we just calculate which
6297 // source parts contribute to each output part.
6298
6299 SmallVector<Register, 8> SrcParts;
6300 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6301
6302 if (Amt.isZero()) {
6303 // No shift needed, just copy
6304 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6305 MI.eraseFromParent();
6306 return Legalized;
6307 }
6308
6309 ShiftParams Params;
6310 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6311 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6312
6313 // Generate constants and values needed by all shift types
6314 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6315 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6316 Params.InvBitShift =
6317 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6318 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6319
6320 // For ASHR, we need the sign-extended value to fill shifted-out positions
6321 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6322 Params.SignBit =
6324 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6325 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6326 .getReg(0);
6327
6328 SmallVector<Register, 8> DstParts(NumParts);
6329 for (unsigned I = 0; I < NumParts; ++I)
6330 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6331 Params, TargetTy, ShiftAmtTy);
6332
6333 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6334 MI.eraseFromParent();
6335 return Legalized;
6336}
6337
6340 Register DstReg = MI.getOperand(0).getReg();
6341 Register SrcReg = MI.getOperand(1).getReg();
6342 Register AmtReg = MI.getOperand(2).getReg();
6343 LLT DstTy = MRI.getType(DstReg);
6344 LLT ShiftAmtTy = MRI.getType(AmtReg);
6345
6346 const unsigned DstBits = DstTy.getScalarSizeInBits();
6347 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6348 const unsigned NumParts = DstBits / TargetBits;
6349
6350 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6351 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6352
6353 // If the shift amount is known at compile time, we can use direct indexing
6354 // instead of generating select chains in the general case.
6355 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6356 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6357 ShiftAmtTy);
6358
6359 // For runtime-variable shift amounts, we must generate a more complex
6360 // sequence that handles all possible shift values using select chains.
6361
6362 // Split the input into target-sized pieces
6363 SmallVector<Register, 8> SrcParts;
6364 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6365
6366 // Shifting by zero should be a no-op.
6367 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6368 LLT BoolTy = LLT::scalar(1);
6369 auto IsZeroShift =
6370 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6371
6372 // Any wide shift can be decomposed into two components:
6373 // 1. WordShift: number of complete target-sized words to shift
6374 // 2. BitShift: number of bits to shift within each word
6375 //
6376 // Example: 128-bit >> 50 with 32-bit target:
6377 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6378 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6379 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6380 auto TargetBitsLog2Const =
6381 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6382 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6383
6384 Register WordShift =
6385 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6386 Register BitShift =
6387 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6388
6389 // Fill values:
6390 // - SHL/LSHR: fill with zeros
6391 // - ASHR: fill with sign-extended MSB
6392 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6393
6394 Register FillValue;
6395 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6396 auto TargetBitsMinusOneConst =
6397 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6398 FillValue = MIRBuilder
6399 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6400 TargetBitsMinusOneConst)
6401 .getReg(0);
6402 } else {
6403 FillValue = ZeroReg;
6404 }
6405
6406 SmallVector<Register, 8> DstParts(NumParts);
6407
6408 // For each output part, generate a select chain that chooses the correct
6409 // result based on the runtime WordShift value. This handles all possible
6410 // word shift amounts by pre-calculating what each would produce.
6411 for (unsigned I = 0; I < NumParts; ++I) {
6412 // Initialize with appropriate default value for this shift type
6413 Register InBoundsResult = FillValue;
6414
6415 // clang-format off
6416 // Build a branchless select chain by pre-computing results for all possible
6417 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6418 //
6419 // K=0: select(WordShift==0, result0, FillValue)
6420 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6421 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6422 // clang-format on
6423 for (unsigned K = 0; K < NumParts; ++K) {
6424 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6425 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6426 WordShift, WordShiftKConst);
6427
6428 // Calculate source indices for this word shift
6429 //
6430 // For 4-part 128-bit value with K=1 word shift:
6431 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6432 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6433 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6434 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6435 int MainSrcIdx;
6436 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6437
6438 switch (MI.getOpcode()) {
6439 case TargetOpcode::G_SHL:
6440 MainSrcIdx = (int)I - (int)K;
6441 CarrySrcIdx = MainSrcIdx - 1;
6442 break;
6443 case TargetOpcode::G_LSHR:
6444 case TargetOpcode::G_ASHR:
6445 MainSrcIdx = (int)I + (int)K;
6446 CarrySrcIdx = MainSrcIdx + 1;
6447 break;
6448 default:
6449 llvm_unreachable("Not a shift");
6450 }
6451
6452 // Check bounds and build the result for this word shift
6453 Register ResultForK;
6454 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6455 Register MainOp = SrcParts[MainSrcIdx];
6456 Register CarryOp;
6457
6458 // Determine carry operand with bounds checking
6459 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6460 CarryOp = SrcParts[CarrySrcIdx];
6461 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6462 CarrySrcIdx >= (int)NumParts)
6463 CarryOp = FillValue; // Use sign extension
6464
6465 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6466 TargetTy, CarryOp);
6467 } else {
6468 // Out of bounds - use fill value for this k
6469 ResultForK = FillValue;
6470 }
6471
6472 // Select this result if WordShift equals k
6473 InBoundsResult =
6475 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6476 .getReg(0);
6477 }
6478
6479 // Handle zero-shift special case: if shift is 0, use original input
6480 DstParts[I] =
6482 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6483 .getReg(0);
6484 }
6485
6486 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6487 MI.eraseFromParent();
6488 return Legalized;
6489}
6490
6493 LLT MoreTy) {
6494 assert(TypeIdx == 0 && "Expecting only Idx 0");
6495
6496 Observer.changingInstr(MI);
6497 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6498 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6499 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6500 moreElementsVectorSrc(MI, MoreTy, I);
6501 }
6502
6503 MachineBasicBlock &MBB = *MI.getParent();
6504 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6505 moreElementsVectorDst(MI, MoreTy, 0);
6506 Observer.changedInstr(MI);
6507 return Legalized;
6508}
6509
6510MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6511 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6512 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6513
6514 switch (Opcode) {
6515 default:
6517 "getNeutralElementForVecReduce called with invalid opcode!");
6518 case TargetOpcode::G_VECREDUCE_ADD:
6519 case TargetOpcode::G_VECREDUCE_OR:
6520 case TargetOpcode::G_VECREDUCE_XOR:
6521 case TargetOpcode::G_VECREDUCE_UMAX:
6522 return MIRBuilder.buildConstant(Ty, 0);
6523 case TargetOpcode::G_VECREDUCE_MUL:
6524 return MIRBuilder.buildConstant(Ty, 1);
6525 case TargetOpcode::G_VECREDUCE_AND:
6526 case TargetOpcode::G_VECREDUCE_UMIN:
6528 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6529 case TargetOpcode::G_VECREDUCE_SMAX:
6531 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6532 case TargetOpcode::G_VECREDUCE_SMIN:
6534 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6535 case TargetOpcode::G_VECREDUCE_FADD:
6536 return MIRBuilder.buildFConstant(Ty, -0.0);
6537 case TargetOpcode::G_VECREDUCE_FMUL:
6538 return MIRBuilder.buildFConstant(Ty, 1.0);
6539 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6540 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6541 assert(false && "getNeutralElementForVecReduce unimplemented for "
6542 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6543 }
6544 llvm_unreachable("switch expected to return!");
6545}
6546
6549 LLT MoreTy) {
6550 unsigned Opc = MI.getOpcode();
6551 switch (Opc) {
6552 case TargetOpcode::G_IMPLICIT_DEF:
6553 case TargetOpcode::G_LOAD: {
6554 if (TypeIdx != 0)
6555 return UnableToLegalize;
6556 Observer.changingInstr(MI);
6557 moreElementsVectorDst(MI, MoreTy, 0);
6558 Observer.changedInstr(MI);
6559 return Legalized;
6560 }
6561 case TargetOpcode::G_STORE:
6562 if (TypeIdx != 0)
6563 return UnableToLegalize;
6564 Observer.changingInstr(MI);
6565 moreElementsVectorSrc(MI, MoreTy, 0);
6566 Observer.changedInstr(MI);
6567 return Legalized;
6568 case TargetOpcode::G_AND:
6569 case TargetOpcode::G_OR:
6570 case TargetOpcode::G_XOR:
6571 case TargetOpcode::G_ADD:
6572 case TargetOpcode::G_SUB:
6573 case TargetOpcode::G_MUL:
6574 case TargetOpcode::G_FADD:
6575 case TargetOpcode::G_FSUB:
6576 case TargetOpcode::G_FMUL:
6577 case TargetOpcode::G_FDIV:
6578 case TargetOpcode::G_FCOPYSIGN:
6579 case TargetOpcode::G_UADDSAT:
6580 case TargetOpcode::G_USUBSAT:
6581 case TargetOpcode::G_SADDSAT:
6582 case TargetOpcode::G_SSUBSAT:
6583 case TargetOpcode::G_SMIN:
6584 case TargetOpcode::G_SMAX:
6585 case TargetOpcode::G_UMIN:
6586 case TargetOpcode::G_UMAX:
6587 case TargetOpcode::G_FMINNUM:
6588 case TargetOpcode::G_FMAXNUM:
6589 case TargetOpcode::G_FMINNUM_IEEE:
6590 case TargetOpcode::G_FMAXNUM_IEEE:
6591 case TargetOpcode::G_FMINIMUM:
6592 case TargetOpcode::G_FMAXIMUM:
6593 case TargetOpcode::G_FMINIMUMNUM:
6594 case TargetOpcode::G_FMAXIMUMNUM:
6595 case TargetOpcode::G_STRICT_FADD:
6596 case TargetOpcode::G_STRICT_FSUB:
6597 case TargetOpcode::G_STRICT_FMUL:
6598 case TargetOpcode::G_SHL:
6599 case TargetOpcode::G_ASHR:
6600 case TargetOpcode::G_LSHR: {
6601 Observer.changingInstr(MI);
6602 moreElementsVectorSrc(MI, MoreTy, 1);
6603 moreElementsVectorSrc(MI, MoreTy, 2);
6604 moreElementsVectorDst(MI, MoreTy, 0);
6605 Observer.changedInstr(MI);
6606 return Legalized;
6607 }
6608 case TargetOpcode::G_FMA:
6609 case TargetOpcode::G_STRICT_FMA:
6610 case TargetOpcode::G_FSHR:
6611 case TargetOpcode::G_FSHL: {
6612 Observer.changingInstr(MI);
6613 moreElementsVectorSrc(MI, MoreTy, 1);
6614 moreElementsVectorSrc(MI, MoreTy, 2);
6615 moreElementsVectorSrc(MI, MoreTy, 3);
6616 moreElementsVectorDst(MI, MoreTy, 0);
6617 Observer.changedInstr(MI);
6618 return Legalized;
6619 }
6620 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6621 case TargetOpcode::G_EXTRACT:
6622 if (TypeIdx != 1)
6623 return UnableToLegalize;
6624 Observer.changingInstr(MI);
6625 moreElementsVectorSrc(MI, MoreTy, 1);
6626 Observer.changedInstr(MI);
6627 return Legalized;
6628 case TargetOpcode::G_INSERT:
6629 case TargetOpcode::G_INSERT_VECTOR_ELT:
6630 case TargetOpcode::G_FREEZE:
6631 case TargetOpcode::G_FNEG:
6632 case TargetOpcode::G_FABS:
6633 case TargetOpcode::G_FSQRT:
6634 case TargetOpcode::G_FCEIL:
6635 case TargetOpcode::G_FFLOOR:
6636 case TargetOpcode::G_FNEARBYINT:
6637 case TargetOpcode::G_FRINT:
6638 case TargetOpcode::G_INTRINSIC_ROUND:
6639 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6640 case TargetOpcode::G_INTRINSIC_TRUNC:
6641 case TargetOpcode::G_BITREVERSE:
6642 case TargetOpcode::G_BSWAP:
6643 case TargetOpcode::G_FCANONICALIZE:
6644 case TargetOpcode::G_SEXT_INREG:
6645 case TargetOpcode::G_ABS:
6646 case TargetOpcode::G_CTLZ:
6647 case TargetOpcode::G_CTPOP:
6648 if (TypeIdx != 0)
6649 return UnableToLegalize;
6650 Observer.changingInstr(MI);
6651 moreElementsVectorSrc(MI, MoreTy, 1);
6652 moreElementsVectorDst(MI, MoreTy, 0);
6653 Observer.changedInstr(MI);
6654 return Legalized;
6655 case TargetOpcode::G_SELECT: {
6656 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6657 if (TypeIdx == 1) {
6658 if (!CondTy.isScalar() ||
6659 DstTy.getElementCount() != MoreTy.getElementCount())
6660 return UnableToLegalize;
6661
6662 // This is turning a scalar select of vectors into a vector
6663 // select. Broadcast the select condition.
6664 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6665 Observer.changingInstr(MI);
6666 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6667 Observer.changedInstr(MI);
6668 return Legalized;
6669 }
6670
6671 if (CondTy.isVector())
6672 return UnableToLegalize;
6673
6674 Observer.changingInstr(MI);
6675 moreElementsVectorSrc(MI, MoreTy, 2);
6676 moreElementsVectorSrc(MI, MoreTy, 3);
6677 moreElementsVectorDst(MI, MoreTy, 0);
6678 Observer.changedInstr(MI);
6679 return Legalized;
6680 }
6681 case TargetOpcode::G_UNMERGE_VALUES:
6682 return UnableToLegalize;
6683 case TargetOpcode::G_PHI:
6684 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6685 case TargetOpcode::G_SHUFFLE_VECTOR:
6686 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6687 case TargetOpcode::G_BUILD_VECTOR: {
6689 for (auto Op : MI.uses()) {
6690 Elts.push_back(Op.getReg());
6691 }
6692
6693 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6694 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6695 }
6696
6697 MIRBuilder.buildDeleteTrailingVectorElements(
6698 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6699 MI.eraseFromParent();
6700 return Legalized;
6701 }
6702 case TargetOpcode::G_SEXT:
6703 case TargetOpcode::G_ZEXT:
6704 case TargetOpcode::G_ANYEXT:
6705 case TargetOpcode::G_TRUNC:
6706 case TargetOpcode::G_FPTRUNC:
6707 case TargetOpcode::G_FPEXT:
6708 case TargetOpcode::G_FPTOSI:
6709 case TargetOpcode::G_FPTOUI:
6710 case TargetOpcode::G_FPTOSI_SAT:
6711 case TargetOpcode::G_FPTOUI_SAT:
6712 case TargetOpcode::G_SITOFP:
6713 case TargetOpcode::G_UITOFP: {
6714 Observer.changingInstr(MI);
6715 LLT SrcExtTy;
6716 LLT DstExtTy;
6717 if (TypeIdx == 0) {
6718 DstExtTy = MoreTy;
6719 SrcExtTy = LLT::fixed_vector(
6720 MoreTy.getNumElements(),
6721 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6722 } else {
6723 DstExtTy = LLT::fixed_vector(
6724 MoreTy.getNumElements(),
6725 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6726 SrcExtTy = MoreTy;
6727 }
6728 moreElementsVectorSrc(MI, SrcExtTy, 1);
6729 moreElementsVectorDst(MI, DstExtTy, 0);
6730 Observer.changedInstr(MI);
6731 return Legalized;
6732 }
6733 case TargetOpcode::G_ICMP:
6734 case TargetOpcode::G_FCMP: {
6735 if (TypeIdx != 1)
6736 return UnableToLegalize;
6737
6738 Observer.changingInstr(MI);
6739 moreElementsVectorSrc(MI, MoreTy, 2);
6740 moreElementsVectorSrc(MI, MoreTy, 3);
6741 LLT CondTy = LLT::fixed_vector(
6742 MoreTy.getNumElements(),
6743 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6744 moreElementsVectorDst(MI, CondTy, 0);
6745 Observer.changedInstr(MI);
6746 return Legalized;
6747 }
6748 case TargetOpcode::G_BITCAST: {
6749 if (TypeIdx != 0)
6750 return UnableToLegalize;
6751
6752 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6753 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6754
6755 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6756 if (coefficient % DstTy.getNumElements() != 0)
6757 return UnableToLegalize;
6758
6759 coefficient = coefficient / DstTy.getNumElements();
6760
6761 LLT NewTy = SrcTy.changeElementCount(
6762 ElementCount::get(coefficient, MoreTy.isScalable()));
6763 Observer.changingInstr(MI);
6764 moreElementsVectorSrc(MI, NewTy, 1);
6765 moreElementsVectorDst(MI, MoreTy, 0);
6766 Observer.changedInstr(MI);
6767 return Legalized;
6768 }
6769 case TargetOpcode::G_VECREDUCE_FADD:
6770 case TargetOpcode::G_VECREDUCE_FMUL:
6771 case TargetOpcode::G_VECREDUCE_ADD:
6772 case TargetOpcode::G_VECREDUCE_MUL:
6773 case TargetOpcode::G_VECREDUCE_AND:
6774 case TargetOpcode::G_VECREDUCE_OR:
6775 case TargetOpcode::G_VECREDUCE_XOR:
6776 case TargetOpcode::G_VECREDUCE_SMAX:
6777 case TargetOpcode::G_VECREDUCE_SMIN:
6778 case TargetOpcode::G_VECREDUCE_UMAX:
6779 case TargetOpcode::G_VECREDUCE_UMIN: {
6780 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6781 MachineOperand &MO = MI.getOperand(1);
6782 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6783 auto NeutralElement = getNeutralElementForVecReduce(
6784 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6785
6786 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6787 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6788 i != e; i++) {
6789 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6790 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6791 NeutralElement, Idx);
6792 }
6793
6794 Observer.changingInstr(MI);
6795 MO.setReg(NewVec.getReg(0));
6796 Observer.changedInstr(MI);
6797 return Legalized;
6798 }
6799
6800 default:
6801 return UnableToLegalize;
6802 }
6803}
6804
6807 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6808 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6809 unsigned MaskNumElts = Mask.size();
6810 unsigned SrcNumElts = SrcTy.getNumElements();
6811 LLT DestEltTy = DstTy.getElementType();
6812
6813 if (MaskNumElts == SrcNumElts)
6814 return Legalized;
6815
6816 if (MaskNumElts < SrcNumElts) {
6817 // Extend mask to match new destination vector size with
6818 // undef values.
6819 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6820 llvm::copy(Mask, NewMask.begin());
6821
6822 moreElementsVectorDst(MI, SrcTy, 0);
6823 MIRBuilder.setInstrAndDebugLoc(MI);
6824 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6825 MI.getOperand(1).getReg(),
6826 MI.getOperand(2).getReg(), NewMask);
6827 MI.eraseFromParent();
6828
6829 return Legalized;
6830 }
6831
6832 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6833 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6834 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6835
6836 // Create new source vectors by concatenating the initial
6837 // source vectors with undefined vectors of the same size.
6838 auto Undef = MIRBuilder.buildUndef(SrcTy);
6839 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6840 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6841 MOps1[0] = MI.getOperand(1).getReg();
6842 MOps2[0] = MI.getOperand(2).getReg();
6843
6844 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6845 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6846
6847 // Readjust mask for new input vector length.
6848 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6849 for (unsigned I = 0; I != MaskNumElts; ++I) {
6850 int Idx = Mask[I];
6851 if (Idx >= static_cast<int>(SrcNumElts))
6852 Idx += PaddedMaskNumElts - SrcNumElts;
6853 MappedOps[I] = Idx;
6854 }
6855
6856 // If we got more elements than required, extract subvector.
6857 if (MaskNumElts != PaddedMaskNumElts) {
6858 auto Shuffle =
6859 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6860
6861 SmallVector<Register, 16> Elts(MaskNumElts);
6862 for (unsigned I = 0; I < MaskNumElts; ++I) {
6863 Elts[I] =
6864 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6865 .getReg(0);
6866 }
6867 MIRBuilder.buildBuildVector(DstReg, Elts);
6868 } else {
6869 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6870 }
6871
6872 MI.eraseFromParent();
6874}
6875
6878 unsigned int TypeIdx, LLT MoreTy) {
6879 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6880 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6881 unsigned NumElts = DstTy.getNumElements();
6882 unsigned WidenNumElts = MoreTy.getNumElements();
6883
6884 if (DstTy.isVector() && Src1Ty.isVector() &&
6885 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6887 }
6888
6889 if (TypeIdx != 0)
6890 return UnableToLegalize;
6891
6892 // Expect a canonicalized shuffle.
6893 if (DstTy != Src1Ty || DstTy != Src2Ty)
6894 return UnableToLegalize;
6895
6896 moreElementsVectorSrc(MI, MoreTy, 1);
6897 moreElementsVectorSrc(MI, MoreTy, 2);
6898
6899 // Adjust mask based on new input vector length.
6900 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6901 for (unsigned I = 0; I != NumElts; ++I) {
6902 int Idx = Mask[I];
6903 if (Idx < static_cast<int>(NumElts))
6904 NewMask[I] = Idx;
6905 else
6906 NewMask[I] = Idx - NumElts + WidenNumElts;
6907 }
6908 moreElementsVectorDst(MI, MoreTy, 0);
6909 MIRBuilder.setInstrAndDebugLoc(MI);
6910 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6911 MI.getOperand(1).getReg(),
6912 MI.getOperand(2).getReg(), NewMask);
6913 MI.eraseFromParent();
6914 return Legalized;
6915}
6916
6917void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6918 ArrayRef<Register> Src1Regs,
6919 ArrayRef<Register> Src2Regs,
6920 LLT NarrowTy) {
6922 unsigned SrcParts = Src1Regs.size();
6923 unsigned DstParts = DstRegs.size();
6924
6925 unsigned DstIdx = 0; // Low bits of the result.
6926 Register FactorSum =
6927 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6928 DstRegs[DstIdx] = FactorSum;
6929
6930 Register CarrySumPrevDstIdx;
6932
6933 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6934 // Collect low parts of muls for DstIdx.
6935 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6936 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6938 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6939 Factors.push_back(Mul.getReg(0));
6940 }
6941 // Collect high parts of muls from previous DstIdx.
6942 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6943 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6944 MachineInstrBuilder Umulh =
6945 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6946 Factors.push_back(Umulh.getReg(0));
6947 }
6948 // Add CarrySum from additions calculated for previous DstIdx.
6949 if (DstIdx != 1) {
6950 Factors.push_back(CarrySumPrevDstIdx);
6951 }
6952
6953 Register CarrySum;
6954 // Add all factors and accumulate all carries into CarrySum.
6955 if (DstIdx != DstParts - 1) {
6956 MachineInstrBuilder Uaddo =
6957 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6958 FactorSum = Uaddo.getReg(0);
6959 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6960 for (unsigned i = 2; i < Factors.size(); ++i) {
6961 MachineInstrBuilder Uaddo =
6962 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6963 FactorSum = Uaddo.getReg(0);
6964 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6965 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6966 }
6967 } else {
6968 // Since value for the next index is not calculated, neither is CarrySum.
6969 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6970 for (unsigned i = 2; i < Factors.size(); ++i)
6971 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6972 }
6973
6974 CarrySumPrevDstIdx = CarrySum;
6975 DstRegs[DstIdx] = FactorSum;
6976 Factors.clear();
6977 }
6978}
6979
6982 LLT NarrowTy) {
6983 if (TypeIdx != 0)
6984 return UnableToLegalize;
6985
6986 Register DstReg = MI.getOperand(0).getReg();
6987 LLT DstType = MRI.getType(DstReg);
6988 // FIXME: add support for vector types
6989 if (DstType.isVector())
6990 return UnableToLegalize;
6991
6992 unsigned Opcode = MI.getOpcode();
6993 unsigned OpO, OpE, OpF;
6994 switch (Opcode) {
6995 case TargetOpcode::G_SADDO:
6996 case TargetOpcode::G_SADDE:
6997 case TargetOpcode::G_UADDO:
6998 case TargetOpcode::G_UADDE:
6999 case TargetOpcode::G_ADD:
7000 OpO = TargetOpcode::G_UADDO;
7001 OpE = TargetOpcode::G_UADDE;
7002 OpF = TargetOpcode::G_UADDE;
7003 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7004 OpF = TargetOpcode::G_SADDE;
7005 break;
7006 case TargetOpcode::G_SSUBO:
7007 case TargetOpcode::G_SSUBE:
7008 case TargetOpcode::G_USUBO:
7009 case TargetOpcode::G_USUBE:
7010 case TargetOpcode::G_SUB:
7011 OpO = TargetOpcode::G_USUBO;
7012 OpE = TargetOpcode::G_USUBE;
7013 OpF = TargetOpcode::G_USUBE;
7014 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7015 OpF = TargetOpcode::G_SSUBE;
7016 break;
7017 default:
7018 llvm_unreachable("Unexpected add/sub opcode!");
7019 }
7020
7021 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7022 unsigned NumDefs = MI.getNumExplicitDefs();
7023 Register Src1 = MI.getOperand(NumDefs).getReg();
7024 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7025 Register CarryDst, CarryIn;
7026 if (NumDefs == 2)
7027 CarryDst = MI.getOperand(1).getReg();
7028 if (MI.getNumOperands() == NumDefs + 3)
7029 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7030
7031 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7032 LLT LeftoverTy, DummyTy;
7033 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7034 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7035 MIRBuilder, MRI);
7036 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7037 MRI);
7038
7039 int NarrowParts = Src1Regs.size();
7040 Src1Regs.append(Src1Left);
7041 Src2Regs.append(Src2Left);
7042 DstRegs.reserve(Src1Regs.size());
7043
7044 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7045 Register DstReg =
7046 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7047 Register CarryOut;
7048 // Forward the final carry-out to the destination register
7049 if (i == e - 1 && CarryDst)
7050 CarryOut = CarryDst;
7051 else
7052 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7053
7054 if (!CarryIn) {
7055 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7056 {Src1Regs[i], Src2Regs[i]});
7057 } else if (i == e - 1) {
7058 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7059 {Src1Regs[i], Src2Regs[i], CarryIn});
7060 } else {
7061 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7062 {Src1Regs[i], Src2Regs[i], CarryIn});
7063 }
7064
7065 DstRegs.push_back(DstReg);
7066 CarryIn = CarryOut;
7067 }
7068 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7069 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7070 ArrayRef(DstRegs).drop_front(NarrowParts));
7071
7072 MI.eraseFromParent();
7073 return Legalized;
7074}
7075
7078 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7079
7080 LLT Ty = MRI.getType(DstReg);
7081 if (Ty.isVector())
7082 return UnableToLegalize;
7083
7084 unsigned Size = Ty.getSizeInBits();
7085 unsigned NarrowSize = NarrowTy.getSizeInBits();
7086 if (Size % NarrowSize != 0)
7087 return UnableToLegalize;
7088
7089 unsigned NumParts = Size / NarrowSize;
7090 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7091 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7092
7093 SmallVector<Register, 2> Src1Parts, Src2Parts;
7094 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7095 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7096 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7097 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7098
7099 // Take only high half of registers if this is high mul.
7100 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7101 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7102 MI.eraseFromParent();
7103 return Legalized;
7104}
7105
7108 LLT NarrowTy) {
7109 if (TypeIdx != 0)
7110 return UnableToLegalize;
7111
7112 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7113
7114 Register Src = MI.getOperand(1).getReg();
7115 LLT SrcTy = MRI.getType(Src);
7116
7117 // If all finite floats fit into the narrowed integer type, we can just swap
7118 // out the result type. This is practically only useful for conversions from
7119 // half to at least 16-bits, so just handle the one case.
7120 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7121 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7122 return UnableToLegalize;
7123
7124 Observer.changingInstr(MI);
7125 narrowScalarDst(MI, NarrowTy, 0,
7126 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7127 Observer.changedInstr(MI);
7128 return Legalized;
7129}
7130
7133 LLT NarrowTy) {
7134 if (TypeIdx != 1)
7135 return UnableToLegalize;
7136
7137 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7138
7139 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7140 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7141 // NarrowSize.
7142 if (SizeOp1 % NarrowSize != 0)
7143 return UnableToLegalize;
7144 int NumParts = SizeOp1 / NarrowSize;
7145
7146 SmallVector<Register, 2> SrcRegs, DstRegs;
7147 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7148 MIRBuilder, MRI);
7149
7150 Register OpReg = MI.getOperand(0).getReg();
7151 uint64_t OpStart = MI.getOperand(2).getImm();
7152 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7153 for (int i = 0; i < NumParts; ++i) {
7154 unsigned SrcStart = i * NarrowSize;
7155
7156 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7157 // No part of the extract uses this subregister, ignore it.
7158 continue;
7159 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7160 // The entire subregister is extracted, forward the value.
7161 DstRegs.push_back(SrcRegs[i]);
7162 continue;
7163 }
7164
7165 // OpSegStart is where this destination segment would start in OpReg if it
7166 // extended infinitely in both directions.
7167 int64_t ExtractOffset;
7168 uint64_t SegSize;
7169 if (OpStart < SrcStart) {
7170 ExtractOffset = 0;
7171 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7172 } else {
7173 ExtractOffset = OpStart - SrcStart;
7174 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7175 }
7176
7177 Register SegReg = SrcRegs[i];
7178 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7179 // A genuine extract is needed.
7180 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7181 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7182 }
7183
7184 DstRegs.push_back(SegReg);
7185 }
7186
7187 Register DstReg = MI.getOperand(0).getReg();
7188 if (MRI.getType(DstReg).isVector())
7189 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7190 else if (DstRegs.size() > 1)
7191 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7192 else
7193 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7194 MI.eraseFromParent();
7195 return Legalized;
7196}
7197
7200 LLT NarrowTy) {
7201 // FIXME: Don't know how to handle secondary types yet.
7202 if (TypeIdx != 0)
7203 return UnableToLegalize;
7204
7205 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7206 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7207 LLT LeftoverTy;
7208 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7209 LeftoverRegs, MIRBuilder, MRI);
7210
7211 SrcRegs.append(LeftoverRegs);
7212
7213 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7214 Register OpReg = MI.getOperand(2).getReg();
7215 uint64_t OpStart = MI.getOperand(3).getImm();
7216 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7217 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7218 unsigned DstStart = I * NarrowSize;
7219
7220 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7221 // The entire subregister is defined by this insert, forward the new
7222 // value.
7223 DstRegs.push_back(OpReg);
7224 continue;
7225 }
7226
7227 Register SrcReg = SrcRegs[I];
7228 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7229 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7230 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7231 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7232 }
7233
7234 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7235 // No part of the insert affects this subregister, forward the original.
7236 DstRegs.push_back(SrcReg);
7237 continue;
7238 }
7239
7240 // OpSegStart is where this destination segment would start in OpReg if it
7241 // extended infinitely in both directions.
7242 int64_t ExtractOffset, InsertOffset;
7243 uint64_t SegSize;
7244 if (OpStart < DstStart) {
7245 InsertOffset = 0;
7246 ExtractOffset = DstStart - OpStart;
7247 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7248 } else {
7249 InsertOffset = OpStart - DstStart;
7250 ExtractOffset = 0;
7251 SegSize =
7252 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7253 }
7254
7255 Register SegReg = OpReg;
7256 if (ExtractOffset != 0 || SegSize != OpSize) {
7257 // A genuine extract is needed.
7258 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7259 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7260 }
7261
7262 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7263 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7264 DstRegs.push_back(DstReg);
7265 }
7266
7267 uint64_t WideSize = DstRegs.size() * NarrowSize;
7268 Register DstReg = MI.getOperand(0).getReg();
7269 if (WideSize > RegTy.getSizeInBits()) {
7270 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7271 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7272 MIRBuilder.buildTrunc(DstReg, MergeReg);
7273 } else
7274 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7275
7276 MI.eraseFromParent();
7277 return Legalized;
7278}
7279
7282 LLT NarrowTy) {
7283 Register DstReg = MI.getOperand(0).getReg();
7284 LLT DstTy = MRI.getType(DstReg);
7285
7286 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7287
7288 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7289 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7290 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7291 LLT LeftoverTy;
7292 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7293 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7294 return UnableToLegalize;
7295
7296 LLT Unused;
7297 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7298 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7299 llvm_unreachable("inconsistent extractParts result");
7300
7301 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7302 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7303 {Src0Regs[I], Src1Regs[I]});
7304 DstRegs.push_back(Inst.getReg(0));
7305 }
7306
7307 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7308 auto Inst = MIRBuilder.buildInstr(
7309 MI.getOpcode(),
7310 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7311 DstLeftoverRegs.push_back(Inst.getReg(0));
7312 }
7313
7314 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7315 LeftoverTy, DstLeftoverRegs);
7316
7317 MI.eraseFromParent();
7318 return Legalized;
7319}
7320
7323 LLT NarrowTy) {
7324 if (TypeIdx != 0)
7325 return UnableToLegalize;
7326
7327 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7328
7329 LLT DstTy = MRI.getType(DstReg);
7330 if (DstTy.isVector())
7331 return UnableToLegalize;
7332
7334 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7335 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7336 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7337
7338 MI.eraseFromParent();
7339 return Legalized;
7340}
7341
7344 LLT NarrowTy) {
7345 if (TypeIdx != 0)
7346 return UnableToLegalize;
7347
7348 Register CondReg = MI.getOperand(1).getReg();
7349 LLT CondTy = MRI.getType(CondReg);
7350 if (CondTy.isVector()) // TODO: Handle vselect
7351 return UnableToLegalize;
7352
7353 Register DstReg = MI.getOperand(0).getReg();
7354 LLT DstTy = MRI.getType(DstReg);
7355
7356 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7357 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7358 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7359 LLT LeftoverTy;
7360 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7361 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7362 return UnableToLegalize;
7363
7364 LLT Unused;
7365 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7366 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7367 llvm_unreachable("inconsistent extractParts result");
7368
7369 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7370 auto Select = MIRBuilder.buildSelect(NarrowTy,
7371 CondReg, Src1Regs[I], Src2Regs[I]);
7372 DstRegs.push_back(Select.getReg(0));
7373 }
7374
7375 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7376 auto Select = MIRBuilder.buildSelect(
7377 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7378 DstLeftoverRegs.push_back(Select.getReg(0));
7379 }
7380
7381 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7382 LeftoverTy, DstLeftoverRegs);
7383
7384 MI.eraseFromParent();
7385 return Legalized;
7386}
7387
7390 LLT NarrowTy) {
7391 if (TypeIdx != 1)
7392 return UnableToLegalize;
7393
7394 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7395 unsigned NarrowSize = NarrowTy.getSizeInBits();
7396
7397 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7398 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7399
7401 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7402 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7403 auto C_0 = B.buildConstant(NarrowTy, 0);
7404 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7405 UnmergeSrc.getReg(1), C_0);
7406 auto LoCTLZ = IsUndef ?
7407 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7408 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7409 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7410 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7411 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7412 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7413
7414 MI.eraseFromParent();
7415 return Legalized;
7416 }
7417
7418 return UnableToLegalize;
7419}
7420
7423 LLT NarrowTy) {
7424 if (TypeIdx != 1)
7425 return UnableToLegalize;
7426
7427 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7428 unsigned NarrowSize = NarrowTy.getSizeInBits();
7429
7430 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7431 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7432
7434 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7435 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7436 auto C_0 = B.buildConstant(NarrowTy, 0);
7437 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7438 UnmergeSrc.getReg(0), C_0);
7439 auto HiCTTZ = IsUndef ?
7440 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7441 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7442 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7443 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7444 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7445 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7446
7447 MI.eraseFromParent();
7448 return Legalized;
7449 }
7450
7451 return UnableToLegalize;
7452}
7453
7456 LLT NarrowTy) {
7457 if (TypeIdx != 1)
7458 return UnableToLegalize;
7459
7460 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7461 unsigned NarrowSize = NarrowTy.getSizeInBits();
7462
7463 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7464 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7465
7466 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7467 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7468 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7469
7470 MI.eraseFromParent();
7471 return Legalized;
7472 }
7473
7474 return UnableToLegalize;
7475}
7476
7479 LLT NarrowTy) {
7480 if (TypeIdx != 1)
7481 return UnableToLegalize;
7482
7484 Register ExpReg = MI.getOperand(2).getReg();
7485 LLT ExpTy = MRI.getType(ExpReg);
7486
7487 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7488
7489 // Clamp the exponent to the range of the target type.
7490 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7491 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7492 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7493 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7494
7495 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7496 Observer.changingInstr(MI);
7497 MI.getOperand(2).setReg(Trunc.getReg(0));
7498 Observer.changedInstr(MI);
7499 return Legalized;
7500}
7501
7504 unsigned Opc = MI.getOpcode();
7505 const auto &TII = MIRBuilder.getTII();
7506 auto isSupported = [this](const LegalityQuery &Q) {
7507 auto QAction = LI.getAction(Q).Action;
7508 return QAction == Legal || QAction == Libcall || QAction == Custom;
7509 };
7510 switch (Opc) {
7511 default:
7512 return UnableToLegalize;
7513 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7514 // This trivially expands to CTLZ.
7515 Observer.changingInstr(MI);
7516 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7517 Observer.changedInstr(MI);
7518 return Legalized;
7519 }
7520 case TargetOpcode::G_CTLZ: {
7521 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7522 unsigned Len = SrcTy.getSizeInBits();
7523
7524 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7525 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7526 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7527 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7528 auto ICmp = MIRBuilder.buildICmp(
7529 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7530 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7531 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7532 MI.eraseFromParent();
7533 return Legalized;
7534 }
7535 // for now, we do this:
7536 // NewLen = NextPowerOf2(Len);
7537 // x = x | (x >> 1);
7538 // x = x | (x >> 2);
7539 // ...
7540 // x = x | (x >>16);
7541 // x = x | (x >>32); // for 64-bit input
7542 // Upto NewLen/2
7543 // return Len - popcount(x);
7544 //
7545 // Ref: "Hacker's Delight" by Henry Warren
7546 Register Op = SrcReg;
7547 unsigned NewLen = PowerOf2Ceil(Len);
7548 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7549 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7550 auto MIBOp = MIRBuilder.buildOr(
7551 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7552 Op = MIBOp.getReg(0);
7553 }
7554 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7555 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7556 MIBPop);
7557 MI.eraseFromParent();
7558 return Legalized;
7559 }
7560 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7561 // This trivially expands to CTTZ.
7562 Observer.changingInstr(MI);
7563 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7564 Observer.changedInstr(MI);
7565 return Legalized;
7566 }
7567 case TargetOpcode::G_CTTZ: {
7568 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7569
7570 unsigned Len = SrcTy.getSizeInBits();
7571 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7572 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7573 // zero.
7574 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7575 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7576 auto ICmp = MIRBuilder.buildICmp(
7577 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7578 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7579 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7580 MI.eraseFromParent();
7581 return Legalized;
7582 }
7583 // for now, we use: { return popcount(~x & (x - 1)); }
7584 // unless the target has ctlz but not ctpop, in which case we use:
7585 // { return 32 - nlz(~x & (x-1)); }
7586 // Ref: "Hacker's Delight" by Henry Warren
7587 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7588 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7589 auto MIBTmp = MIRBuilder.buildAnd(
7590 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7591 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7592 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7593 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7594 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7595 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7596 MI.eraseFromParent();
7597 return Legalized;
7598 }
7599 Observer.changingInstr(MI);
7600 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7601 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7602 Observer.changedInstr(MI);
7603 return Legalized;
7604 }
7605 case TargetOpcode::G_CTPOP: {
7606 Register SrcReg = MI.getOperand(1).getReg();
7607 LLT Ty = MRI.getType(SrcReg);
7608 unsigned Size = Ty.getSizeInBits();
7610
7611 // Count set bits in blocks of 2 bits. Default approach would be
7612 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7613 // We use following formula instead:
7614 // B2Count = val - { (val >> 1) & 0x55555555 }
7615 // since it gives same result in blocks of 2 with one instruction less.
7616 auto C_1 = B.buildConstant(Ty, 1);
7617 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7618 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7619 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7620 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7621 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7622
7623 // In order to get count in blocks of 4 add values from adjacent block of 2.
7624 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7625 auto C_2 = B.buildConstant(Ty, 2);
7626 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7627 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7628 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7629 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7630 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7631 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7632
7633 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7634 // addition since count value sits in range {0,...,8} and 4 bits are enough
7635 // to hold such binary values. After addition high 4 bits still hold count
7636 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7637 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7638 auto C_4 = B.buildConstant(Ty, 4);
7639 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7640 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7641 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7642 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7643 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7644
7645 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7646 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7647 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7648 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7649
7650 // Shift count result from 8 high bits to low bits.
7651 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7652
7653 auto IsMulSupported = [this](const LLT Ty) {
7654 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7655 return Action == Legal || Action == WidenScalar || Action == Custom;
7656 };
7657 if (IsMulSupported(Ty)) {
7658 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7659 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7660 } else {
7661 auto ResTmp = B8Count;
7662 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7663 auto ShiftC = B.buildConstant(Ty, Shift);
7664 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7665 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7666 }
7667 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7668 }
7669 MI.eraseFromParent();
7670 return Legalized;
7671 }
7672 }
7673}
7674
7675// Check that (every element of) Reg is undef or not an exact multiple of BW.
7677 Register Reg, unsigned BW) {
7678 return matchUnaryPredicate(
7679 MRI, Reg,
7680 [=](const Constant *C) {
7681 // Null constant here means an undef.
7683 return !CI || CI->getValue().urem(BW) != 0;
7684 },
7685 /*AllowUndefs*/ true);
7686}
7687
7690 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7691 LLT Ty = MRI.getType(Dst);
7692 LLT ShTy = MRI.getType(Z);
7693
7694 unsigned BW = Ty.getScalarSizeInBits();
7695
7696 if (!isPowerOf2_32(BW))
7697 return UnableToLegalize;
7698
7699 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7700 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7701
7702 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7703 // fshl X, Y, Z -> fshr X, Y, -Z
7704 // fshr X, Y, Z -> fshl X, Y, -Z
7705 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7706 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7707 } else {
7708 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7709 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7710 auto One = MIRBuilder.buildConstant(ShTy, 1);
7711 if (IsFSHL) {
7712 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7713 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7714 } else {
7715 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7716 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7717 }
7718
7719 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7720 }
7721
7722 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7723 MI.eraseFromParent();
7724 return Legalized;
7725}
7726
7729 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7730 LLT Ty = MRI.getType(Dst);
7731 LLT ShTy = MRI.getType(Z);
7732
7733 const unsigned BW = Ty.getScalarSizeInBits();
7734 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7735
7736 Register ShX, ShY;
7737 Register ShAmt, InvShAmt;
7738
7739 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7740 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7741 // fshl: X << C | Y >> (BW - C)
7742 // fshr: X << (BW - C) | Y >> C
7743 // where C = Z % BW is not zero
7744 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7745 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7746 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7747 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7748 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7749 } else {
7750 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7751 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7752 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7753 if (isPowerOf2_32(BW)) {
7754 // Z % BW -> Z & (BW - 1)
7755 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7756 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7757 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7758 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7759 } else {
7760 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7761 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7762 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7763 }
7764
7765 auto One = MIRBuilder.buildConstant(ShTy, 1);
7766 if (IsFSHL) {
7767 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7768 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7769 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7770 } else {
7771 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7772 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7773 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7774 }
7775 }
7776
7777 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7778 MI.eraseFromParent();
7779 return Legalized;
7780}
7781
7784 // These operations approximately do the following (while avoiding undefined
7785 // shifts by BW):
7786 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7787 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7788 Register Dst = MI.getOperand(0).getReg();
7789 LLT Ty = MRI.getType(Dst);
7790 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7791
7792 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7793 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7794
7795 // TODO: Use smarter heuristic that accounts for vector legalization.
7796 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7797 return lowerFunnelShiftAsShifts(MI);
7798
7799 // This only works for powers of 2, fallback to shifts if it fails.
7800 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7801 if (Result == UnableToLegalize)
7802 return lowerFunnelShiftAsShifts(MI);
7803 return Result;
7804}
7805
7807 auto [Dst, Src] = MI.getFirst2Regs();
7808 LLT DstTy = MRI.getType(Dst);
7809 LLT SrcTy = MRI.getType(Src);
7810
7811 uint32_t DstTySize = DstTy.getSizeInBits();
7812 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7813 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7814
7815 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7816 !isPowerOf2_32(SrcTyScalarSize))
7817 return UnableToLegalize;
7818
7819 // The step between extend is too large, split it by creating an intermediate
7820 // extend instruction
7821 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7822 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7823 // If the destination type is illegal, split it into multiple statements
7824 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7825 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7826 // Unmerge the vector
7827 LLT EltTy = MidTy.changeElementCount(
7829 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7830
7831 // ZExt the vectors
7832 LLT ZExtResTy = DstTy.changeElementCount(
7834 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7835 {UnmergeSrc.getReg(0)});
7836 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7837 {UnmergeSrc.getReg(1)});
7838
7839 // Merge the ending vectors
7840 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7841
7842 MI.eraseFromParent();
7843 return Legalized;
7844 }
7845 return UnableToLegalize;
7846}
7847
7849 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7850 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7851 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7852 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7853 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7854 // %lo16(<4 x s16>) = G_TRUNC %inlo
7855 // %hi16(<4 x s16>) = G_TRUNC %inhi
7856 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7857 // %res(<8 x s8>) = G_TRUNC %in16
7858
7859 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7860
7861 Register DstReg = MI.getOperand(0).getReg();
7862 Register SrcReg = MI.getOperand(1).getReg();
7863 LLT DstTy = MRI.getType(DstReg);
7864 LLT SrcTy = MRI.getType(SrcReg);
7865
7866 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7868 isPowerOf2_32(SrcTy.getNumElements()) &&
7869 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7870 // Split input type.
7871 LLT SplitSrcTy = SrcTy.changeElementCount(
7872 SrcTy.getElementCount().divideCoefficientBy(2));
7873
7874 // First, split the source into two smaller vectors.
7875 SmallVector<Register, 2> SplitSrcs;
7876 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7877
7878 // Truncate the splits into intermediate narrower elements.
7879 LLT InterTy;
7880 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7881 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7882 else
7883 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7884 for (Register &Src : SplitSrcs)
7885 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7886
7887 // Combine the new truncates into one vector
7888 auto Merge = MIRBuilder.buildMergeLikeInstr(
7889 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7890
7891 // Truncate the new vector to the final result type
7892 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7893 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7894 else
7895 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7896
7897 MI.eraseFromParent();
7898
7899 return Legalized;
7900 }
7901 return UnableToLegalize;
7902}
7903
7906 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7907 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7908 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7909 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7910 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7911 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7912 MI.eraseFromParent();
7913 return Legalized;
7914}
7915
7917 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7918
7919 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7920 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7921
7922 MIRBuilder.setInstrAndDebugLoc(MI);
7923
7924 // If a rotate in the other direction is supported, use it.
7925 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7926 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7927 isPowerOf2_32(EltSizeInBits))
7928 return lowerRotateWithReverseRotate(MI);
7929
7930 // If a funnel shift is supported, use it.
7931 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7932 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7933 bool IsFShLegal = false;
7934 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7935 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7936 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7937 Register R3) {
7938 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7939 MI.eraseFromParent();
7940 return Legalized;
7941 };
7942 // If a funnel shift in the other direction is supported, use it.
7943 if (IsFShLegal) {
7944 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7945 } else if (isPowerOf2_32(EltSizeInBits)) {
7946 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7947 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7948 }
7949 }
7950
7951 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7952 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7953 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7954 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7955 Register ShVal;
7956 Register RevShiftVal;
7957 if (isPowerOf2_32(EltSizeInBits)) {
7958 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7959 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7960 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7961 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7962 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7963 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7964 RevShiftVal =
7965 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7966 } else {
7967 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7968 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7969 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7970 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7971 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7972 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7973 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7974 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7975 RevShiftVal =
7976 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7977 }
7978 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7979 MI.eraseFromParent();
7980 return Legalized;
7981}
7982
7983// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7984// representation.
7987 auto [Dst, Src] = MI.getFirst2Regs();
7988 const LLT S64 = LLT::scalar(64);
7989 const LLT S32 = LLT::scalar(32);
7990 const LLT S1 = LLT::scalar(1);
7991
7992 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7993
7994 // unsigned cul2f(ulong u) {
7995 // uint lz = clz(u);
7996 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7997 // u = (u << lz) & 0x7fffffffffffffffUL;
7998 // ulong t = u & 0xffffffffffUL;
7999 // uint v = (e << 23) | (uint)(u >> 40);
8000 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8001 // return as_float(v + r);
8002 // }
8003
8004 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8005 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8006
8007 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8008
8009 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8010 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8011
8012 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8013 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8014
8015 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8016 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8017
8018 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8019
8020 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8021 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8022
8023 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8024 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8025 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8026
8027 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8028 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8029 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8030 auto One = MIRBuilder.buildConstant(S32, 1);
8031
8032 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8033 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8034 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8035 MIRBuilder.buildAdd(Dst, V, R);
8036
8037 MI.eraseFromParent();
8038 return Legalized;
8039}
8040
8041// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8042// operations and G_SITOFP
8045 auto [Dst, Src] = MI.getFirst2Regs();
8046 const LLT S64 = LLT::scalar(64);
8047 const LLT S32 = LLT::scalar(32);
8048 const LLT S1 = LLT::scalar(1);
8049
8050 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8051
8052 // For i64 < INT_MAX we simply reuse SITOFP.
8053 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8054 // saved before division, convert to float by SITOFP, multiply the result
8055 // by 2.
8056 auto One = MIRBuilder.buildConstant(S64, 1);
8057 auto Zero = MIRBuilder.buildConstant(S64, 0);
8058 // Result if Src < INT_MAX
8059 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8060 // Result if Src >= INT_MAX
8061 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8062 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8063 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8064 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8065 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8066 // Check if the original value is larger than INT_MAX by comparing with
8067 // zero to pick one of the two conversions.
8068 auto IsLarge =
8069 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8070 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8071
8072 MI.eraseFromParent();
8073 return Legalized;
8074}
8075
8076// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8077// IEEE double representation.
8080 auto [Dst, Src] = MI.getFirst2Regs();
8081 const LLT S64 = LLT::scalar(64);
8082 const LLT S32 = LLT::scalar(32);
8083
8084 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8085
8086 // We create double value from 32 bit parts with 32 exponent difference.
8087 // Note that + and - are float operations that adjust the implicit leading
8088 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8089 //
8090 // X = 2^52 * 1.0...LowBits
8091 // Y = 2^84 * 1.0...HighBits
8092 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8093 // = - 2^52 * 1.0...HighBits
8094 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8095 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8096 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8097 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8098 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8099 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8100
8101 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8102 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8103 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8104 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8105 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8106 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8107 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8108
8109 MI.eraseFromParent();
8110 return Legalized;
8111}
8112
8113/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8114/// convert fpround f64->f16 without double-rounding, so we manually perform the
8115/// lowering here where we know it is valid.
8118 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8119 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8120 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8121 : MIRBuilder.buildSITOFP(SrcTy, Src);
8122 LLT S32Ty = SrcTy.changeElementSize(32);
8123 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8124 MIRBuilder.buildFPTrunc(Dst, M2);
8125 MI.eraseFromParent();
8127}
8128
8130 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8131
8132 if (SrcTy == LLT::scalar(1)) {
8133 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8134 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8135 MIRBuilder.buildSelect(Dst, Src, True, False);
8136 MI.eraseFromParent();
8137 return Legalized;
8138 }
8139
8140 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8141 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8142
8143 if (SrcTy != LLT::scalar(64))
8144 return UnableToLegalize;
8145
8146 if (DstTy == LLT::scalar(32))
8147 // TODO: SelectionDAG has several alternative expansions to port which may
8148 // be more reasonable depending on the available instructions. We also need
8149 // a more advanced mechanism to choose an optimal version depending on
8150 // target features such as sitofp or CTLZ availability.
8152
8153 if (DstTy == LLT::scalar(64))
8155
8156 return UnableToLegalize;
8157}
8158
8160 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8161
8162 const LLT S64 = LLT::scalar(64);
8163 const LLT S32 = LLT::scalar(32);
8164 const LLT S1 = LLT::scalar(1);
8165
8166 if (SrcTy == S1) {
8167 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8168 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8169 MIRBuilder.buildSelect(Dst, Src, True, False);
8170 MI.eraseFromParent();
8171 return Legalized;
8172 }
8173
8174 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8175 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8176
8177 if (SrcTy != S64)
8178 return UnableToLegalize;
8179
8180 if (DstTy == S32) {
8181 // signed cl2f(long l) {
8182 // long s = l >> 63;
8183 // float r = cul2f((l + s) ^ s);
8184 // return s ? -r : r;
8185 // }
8186 Register L = Src;
8187 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8188 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8189
8190 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8191 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8192 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8193
8194 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8195 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8196 MIRBuilder.buildConstant(S64, 0));
8197 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8198 MI.eraseFromParent();
8199 return Legalized;
8200 }
8201
8202 return UnableToLegalize;
8203}
8204
8206 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8207 const LLT S64 = LLT::scalar(64);
8208 const LLT S32 = LLT::scalar(32);
8209
8210 if (SrcTy != S64 && SrcTy != S32)
8211 return UnableToLegalize;
8212 if (DstTy != S32 && DstTy != S64)
8213 return UnableToLegalize;
8214
8215 // FPTOSI gives same result as FPTOUI for positive signed integers.
8216 // FPTOUI needs to deal with fp values that convert to unsigned integers
8217 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8218
8219 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8220 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8222 APInt::getZero(SrcTy.getSizeInBits()));
8223 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8224
8225 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8226
8227 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8228 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8229 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8230 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8231 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8232 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8233 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8234
8235 const LLT S1 = LLT::scalar(1);
8236
8237 MachineInstrBuilder FCMP =
8238 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8239 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8240
8241 MI.eraseFromParent();
8242 return Legalized;
8243}
8244
8246 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8247 const LLT S64 = LLT::scalar(64);
8248 const LLT S32 = LLT::scalar(32);
8249
8250 // FIXME: Only f32 to i64 conversions are supported.
8251 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8252 return UnableToLegalize;
8253
8254 // Expand f32 -> i64 conversion
8255 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8256 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8257
8258 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8259
8260 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8261 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8262
8263 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8264 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8265
8266 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8267 APInt::getSignMask(SrcEltBits));
8268 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8269 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8270 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8271 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8272
8273 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8274 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8275 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8276
8277 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8278 R = MIRBuilder.buildZExt(DstTy, R);
8279
8280 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8281 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8282 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8283 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8284
8285 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8286 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8287
8288 const LLT S1 = LLT::scalar(1);
8289 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8290 S1, Exponent, ExponentLoBit);
8291
8292 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8293
8294 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8295 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8296
8297 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8298
8299 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8300 S1, Exponent, ZeroSrcTy);
8301
8302 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8303 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8304
8305 MI.eraseFromParent();
8306 return Legalized;
8307}
8308
8311 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8312
8313 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8314 unsigned SatWidth = DstTy.getScalarSizeInBits();
8315
8316 // Determine minimum and maximum integer values and their corresponding
8317 // floating-point values.
8318 APInt MinInt, MaxInt;
8319 if (IsSigned) {
8320 MinInt = APInt::getSignedMinValue(SatWidth);
8321 MaxInt = APInt::getSignedMaxValue(SatWidth);
8322 } else {
8323 MinInt = APInt::getMinValue(SatWidth);
8324 MaxInt = APInt::getMaxValue(SatWidth);
8325 }
8326
8327 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8328 APFloat MinFloat(Semantics);
8329 APFloat MaxFloat(Semantics);
8330
8331 APFloat::opStatus MinStatus =
8332 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8333 APFloat::opStatus MaxStatus =
8334 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8335 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8336 !(MaxStatus & APFloat::opStatus::opInexact);
8337
8338 // If the integer bounds are exactly representable as floats, emit a
8339 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8340 // and selects.
8341 if (AreExactFloatBounds) {
8342 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8343 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8344 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8345 SrcTy.changeElementSize(1), Src, MaxC);
8346 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8347 // Clamp by MaxFloat from above. NaN cannot occur.
8348 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8349 auto MinP =
8350 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8352 auto Min =
8353 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8354 // Convert clamped value to integer. In the unsigned case we're done,
8355 // because we mapped NaN to MinFloat, which will cast to zero.
8356 if (!IsSigned) {
8357 MIRBuilder.buildFPTOUI(Dst, Min);
8358 MI.eraseFromParent();
8359 return Legalized;
8360 }
8361
8362 // Otherwise, select 0 if Src is NaN.
8363 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8364 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8365 DstTy.changeElementSize(1), Src, Src);
8366 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8367 FpToInt);
8368 MI.eraseFromParent();
8369 return Legalized;
8370 }
8371
8372 // Result of direct conversion. The assumption here is that the operation is
8373 // non-trapping and it's fine to apply it to an out-of-range value if we
8374 // select it away later.
8375 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8376 : MIRBuilder.buildFPTOUI(DstTy, Src);
8377
8378 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8379 // MinInt if Src is NaN.
8380 auto ULT =
8381 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8382 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8383 auto Max = MIRBuilder.buildSelect(
8384 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8385 // If Src OGT MaxFloat, select MaxInt.
8386 auto OGT =
8387 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8388 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8389
8390 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8391 // is already zero.
8392 if (!IsSigned) {
8393 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8394 Max);
8395 MI.eraseFromParent();
8396 return Legalized;
8397 }
8398
8399 // Otherwise, select 0 if Src is NaN.
8400 auto Min = MIRBuilder.buildSelect(
8401 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8402 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8403 DstTy.changeElementSize(1), Src, Src);
8404 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8405 MI.eraseFromParent();
8406 return Legalized;
8407}
8408
8409// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8412 const LLT S1 = LLT::scalar(1);
8413 const LLT S32 = LLT::scalar(32);
8414
8415 auto [Dst, Src] = MI.getFirst2Regs();
8416 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8417 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8418
8419 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8420 return UnableToLegalize;
8421
8422 if (MI.getFlag(MachineInstr::FmAfn)) {
8423 unsigned Flags = MI.getFlags();
8424 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8425 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8426 MI.eraseFromParent();
8427 return Legalized;
8428 }
8429
8430 const unsigned ExpMask = 0x7ff;
8431 const unsigned ExpBiasf64 = 1023;
8432 const unsigned ExpBiasf16 = 15;
8433
8434 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8435 Register U = Unmerge.getReg(0);
8436 Register UH = Unmerge.getReg(1);
8437
8438 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8439 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8440
8441 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8442 // add the f16 bias (15) to get the biased exponent for the f16 format.
8443 E = MIRBuilder.buildAdd(
8444 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8445
8446 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8447 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8448
8449 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8450 MIRBuilder.buildConstant(S32, 0x1ff));
8451 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8452
8453 auto Zero = MIRBuilder.buildConstant(S32, 0);
8454 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8455 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8456 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8457
8458 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8459 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8460 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8461 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8462
8463 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8464 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8465
8466 // N = M | (E << 12);
8467 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8468 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8469
8470 // B = clamp(1-E, 0, 13);
8471 auto One = MIRBuilder.buildConstant(S32, 1);
8472 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8473 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8474 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8475
8476 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8477 MIRBuilder.buildConstant(S32, 0x1000));
8478
8479 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8480 auto D0 = MIRBuilder.buildShl(S32, D, B);
8481
8482 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8483 D0, SigSetHigh);
8484 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8485 D = MIRBuilder.buildOr(S32, D, D1);
8486
8487 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8488 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8489
8490 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8491 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8492
8493 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8494 MIRBuilder.buildConstant(S32, 3));
8495 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8496
8497 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8498 MIRBuilder.buildConstant(S32, 5));
8499 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8500
8501 V1 = MIRBuilder.buildOr(S32, V0, V1);
8502 V = MIRBuilder.buildAdd(S32, V, V1);
8503
8504 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8505 E, MIRBuilder.buildConstant(S32, 30));
8506 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8507 MIRBuilder.buildConstant(S32, 0x7c00), V);
8508
8509 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8510 E, MIRBuilder.buildConstant(S32, 1039));
8511 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8512
8513 // Extract the sign bit.
8514 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8515 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8516
8517 // Insert the sign bit
8518 V = MIRBuilder.buildOr(S32, Sign, V);
8519
8520 MIRBuilder.buildTrunc(Dst, V);
8521 MI.eraseFromParent();
8522 return Legalized;
8523}
8524
8527 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8528 const LLT S64 = LLT::scalar(64);
8529 const LLT S16 = LLT::scalar(16);
8530
8531 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8533
8534 return UnableToLegalize;
8535}
8536
8538 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8539 LLT Ty = MRI.getType(Dst);
8540
8541 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8542 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8543 MI.eraseFromParent();
8544 return Legalized;
8545}
8546
8548 switch (Opc) {
8549 case TargetOpcode::G_SMIN:
8550 return CmpInst::ICMP_SLT;
8551 case TargetOpcode::G_SMAX:
8552 return CmpInst::ICMP_SGT;
8553 case TargetOpcode::G_UMIN:
8554 return CmpInst::ICMP_ULT;
8555 case TargetOpcode::G_UMAX:
8556 return CmpInst::ICMP_UGT;
8557 default:
8558 llvm_unreachable("not in integer min/max");
8559 }
8560}
8561
8563 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8564
8565 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8566 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8567
8568 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8569 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8570
8571 MI.eraseFromParent();
8572 return Legalized;
8573}
8574
8577 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8578
8579 Register Dst = Cmp->getReg(0);
8580 LLT DstTy = MRI.getType(Dst);
8581 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8582 LLT CmpTy = DstTy.changeElementSize(1);
8583
8584 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8587 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8590
8591 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8592 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8593 Cmp->getRHSReg());
8594 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8595 Cmp->getRHSReg());
8596
8597 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8598 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8599 if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
8601 auto One = MIRBuilder.buildConstant(DstTy, 1);
8602 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8603
8604 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8605 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8606 } else {
8608 std::swap(IsGT, IsLT);
8609 // Extend boolean results to DstTy, which is at least i2, before subtracting
8610 // them.
8611 unsigned BoolExtOp =
8612 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8613 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8614 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8615 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8616 }
8617
8618 MI.eraseFromParent();
8619 return Legalized;
8620}
8621
8624 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8625 const int Src0Size = Src0Ty.getScalarSizeInBits();
8626 const int Src1Size = Src1Ty.getScalarSizeInBits();
8627
8628 auto SignBitMask = MIRBuilder.buildConstant(
8629 Src0Ty, APInt::getSignMask(Src0Size));
8630
8631 auto NotSignBitMask = MIRBuilder.buildConstant(
8632 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8633
8634 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8635 Register And1;
8636 if (Src0Ty == Src1Ty) {
8637 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8638 } else if (Src0Size > Src1Size) {
8639 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8640 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8641 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8642 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8643 } else {
8644 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8645 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8646 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8647 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8648 }
8649
8650 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8651 // constants are a nan and -0.0, but the final result should preserve
8652 // everything.
8653 unsigned Flags = MI.getFlags();
8654
8655 // We masked the sign bit and the not-sign bit, so these are disjoint.
8656 Flags |= MachineInstr::Disjoint;
8657
8658 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8659
8660 MI.eraseFromParent();
8661 return Legalized;
8662}
8663
8666 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8667 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8668 // depend on fminnum/fmaxnum.
8669
8670 unsigned NewOp;
8671 switch (MI.getOpcode()) {
8672 case TargetOpcode::G_FMINNUM:
8673 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8674 break;
8675 case TargetOpcode::G_FMINIMUMNUM:
8676 NewOp = TargetOpcode::G_FMINNUM;
8677 break;
8678 case TargetOpcode::G_FMAXNUM:
8679 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8680 break;
8681 case TargetOpcode::G_FMAXIMUMNUM:
8682 NewOp = TargetOpcode::G_FMAXNUM;
8683 break;
8684 default:
8685 llvm_unreachable("unexpected min/max opcode");
8686 }
8687
8688 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8689 LLT Ty = MRI.getType(Dst);
8690
8691 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8692 // Insert canonicalizes if it's possible we need to quiet to get correct
8693 // sNaN behavior.
8694
8695 // Note this must be done here, and not as an optimization combine in the
8696 // absence of a dedicate quiet-snan instruction as we're using an
8697 // omni-purpose G_FCANONICALIZE.
8698 if (!isKnownNeverSNaN(Src0, MRI))
8699 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8700
8701 if (!isKnownNeverSNaN(Src1, MRI))
8702 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8703 }
8704
8705 // If there are no nans, it's safe to simply replace this with the non-IEEE
8706 // version.
8707 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8708 MI.eraseFromParent();
8709 return Legalized;
8710}
8711
8713 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8714 Register DstReg = MI.getOperand(0).getReg();
8715 LLT Ty = MRI.getType(DstReg);
8716 unsigned Flags = MI.getFlags();
8717
8718 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8719 Flags);
8720 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8721 MI.eraseFromParent();
8722 return Legalized;
8723}
8724
8727 auto [DstReg, X] = MI.getFirst2Regs();
8728 const unsigned Flags = MI.getFlags();
8729 const LLT Ty = MRI.getType(DstReg);
8730 const LLT CondTy = Ty.changeElementSize(1);
8731
8732 // round(x) =>
8733 // t = trunc(x);
8734 // d = fabs(x - t);
8735 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8736 // return t + o;
8737
8738 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8739
8740 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8741 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8742
8743 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8744 auto Cmp =
8745 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8746
8747 // Could emit G_UITOFP instead
8748 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8749 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8750 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8751 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8752
8753 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8754
8755 MI.eraseFromParent();
8756 return Legalized;
8757}
8758
8760 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8761 unsigned Flags = MI.getFlags();
8762 LLT Ty = MRI.getType(DstReg);
8763 const LLT CondTy = Ty.changeElementSize(1);
8764
8765 // result = trunc(src);
8766 // if (src < 0.0 && src != result)
8767 // result += -1.0.
8768
8769 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8770 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8771
8772 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8773 SrcReg, Zero, Flags);
8774 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8775 SrcReg, Trunc, Flags);
8776 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8777 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8778
8779 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8780 MI.eraseFromParent();
8781 return Legalized;
8782}
8783
8786 const unsigned NumOps = MI.getNumOperands();
8787 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8788 unsigned PartSize = Src0Ty.getSizeInBits();
8789
8790 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8791 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8792
8793 for (unsigned I = 2; I != NumOps; ++I) {
8794 const unsigned Offset = (I - 1) * PartSize;
8795
8796 Register SrcReg = MI.getOperand(I).getReg();
8797 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8798
8799 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8800 MRI.createGenericVirtualRegister(WideTy);
8801
8802 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8803 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8804 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8805 ResultReg = NextResult;
8806 }
8807
8808 if (DstTy.isPointer()) {
8809 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8810 DstTy.getAddressSpace())) {
8811 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8812 return UnableToLegalize;
8813 }
8814
8815 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8816 }
8817
8818 MI.eraseFromParent();
8819 return Legalized;
8820}
8821
8824 const unsigned NumDst = MI.getNumOperands() - 1;
8825 Register SrcReg = MI.getOperand(NumDst).getReg();
8826 Register Dst0Reg = MI.getOperand(0).getReg();
8827 LLT DstTy = MRI.getType(Dst0Reg);
8828 if (DstTy.isPointer())
8829 return UnableToLegalize; // TODO
8830
8831 SrcReg = coerceToScalar(SrcReg);
8832 if (!SrcReg)
8833 return UnableToLegalize;
8834
8835 // Expand scalarizing unmerge as bitcast to integer and shift.
8836 LLT IntTy = MRI.getType(SrcReg);
8837
8838 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8839
8840 const unsigned DstSize = DstTy.getSizeInBits();
8841 unsigned Offset = DstSize;
8842 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8843 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8844 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8845 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8846 }
8847
8848 MI.eraseFromParent();
8849 return Legalized;
8850}
8851
8852/// Lower a vector extract or insert by writing the vector to a stack temporary
8853/// and reloading the element or vector.
8854///
8855/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8856/// =>
8857/// %stack_temp = G_FRAME_INDEX
8858/// G_STORE %vec, %stack_temp
8859/// %idx = clamp(%idx, %vec.getNumElements())
8860/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8861/// %dst = G_LOAD %element_ptr
8864 Register DstReg = MI.getOperand(0).getReg();
8865 Register SrcVec = MI.getOperand(1).getReg();
8866 Register InsertVal;
8867 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8868 InsertVal = MI.getOperand(2).getReg();
8869
8870 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8871
8872 LLT VecTy = MRI.getType(SrcVec);
8873 LLT EltTy = VecTy.getElementType();
8874 unsigned NumElts = VecTy.getNumElements();
8875
8876 int64_t IdxVal;
8877 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8879 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8880
8881 if (InsertVal) {
8882 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8883 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8884 } else {
8885 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8886 }
8887
8888 MI.eraseFromParent();
8889 return Legalized;
8890 }
8891
8892 if (!EltTy.isByteSized()) { // Not implemented.
8893 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8894 return UnableToLegalize;
8895 }
8896
8897 unsigned EltBytes = EltTy.getSizeInBytes();
8898 Align VecAlign = getStackTemporaryAlignment(VecTy);
8899 Align EltAlign;
8900
8901 MachinePointerInfo PtrInfo;
8902 auto StackTemp = createStackTemporary(
8903 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8904 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8905
8906 // Get the pointer to the element, and be sure not to hit undefined behavior
8907 // if the index is out of bounds.
8908 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8909
8910 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8911 int64_t Offset = IdxVal * EltBytes;
8912 PtrInfo = PtrInfo.getWithOffset(Offset);
8913 EltAlign = commonAlignment(VecAlign, Offset);
8914 } else {
8915 // We lose information with a variable offset.
8916 EltAlign = getStackTemporaryAlignment(EltTy);
8917 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8918 }
8919
8920 if (InsertVal) {
8921 // Write the inserted element
8922 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8923
8924 // Reload the whole vector.
8925 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8926 } else {
8927 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8928 }
8929
8930 MI.eraseFromParent();
8931 return Legalized;
8932}
8933
8936 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8937 MI.getFirst3RegLLTs();
8938 LLT IdxTy = LLT::scalar(32);
8939
8940 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8941 Register Undef;
8943 LLT EltTy = DstTy.getScalarType();
8944
8945 for (int Idx : Mask) {
8946 if (Idx < 0) {
8947 if (!Undef.isValid())
8948 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8949 BuildVec.push_back(Undef);
8950 continue;
8951 }
8952
8953 if (Src0Ty.isScalar()) {
8954 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8955 } else {
8956 int NumElts = Src0Ty.getNumElements();
8957 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8958 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8959 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8960 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8961 BuildVec.push_back(Extract.getReg(0));
8962 }
8963 }
8964
8965 if (DstTy.isVector())
8966 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8967 else
8968 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8969 MI.eraseFromParent();
8970 return Legalized;
8971}
8972
8975 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8976 MI.getFirst4RegLLTs();
8977
8978 if (VecTy.isScalableVector())
8979 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8980
8981 Align VecAlign = getStackTemporaryAlignment(VecTy);
8982 MachinePointerInfo PtrInfo;
8983 Register StackPtr =
8984 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8985 PtrInfo)
8986 .getReg(0);
8987 MachinePointerInfo ValPtrInfo =
8989
8990 LLT IdxTy = LLT::scalar(32);
8991 LLT ValTy = VecTy.getElementType();
8992 Align ValAlign = getStackTemporaryAlignment(ValTy);
8993
8994 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8995
8996 bool HasPassthru =
8997 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8998
8999 if (HasPassthru)
9000 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9001
9002 Register LastWriteVal;
9003 std::optional<APInt> PassthruSplatVal =
9004 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9005
9006 if (PassthruSplatVal.has_value()) {
9007 LastWriteVal =
9008 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9009 } else if (HasPassthru) {
9010 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9011 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9012 {LLT::scalar(32)}, {Popcount});
9013
9014 Register LastElmtPtr =
9015 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9016 LastWriteVal =
9017 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9018 .getReg(0);
9019 }
9020
9021 unsigned NumElmts = VecTy.getNumElements();
9022 for (unsigned I = 0; I < NumElmts; ++I) {
9023 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9024 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9025 Register ElmtPtr =
9026 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9027 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9028
9029 LLT MaskITy = MaskTy.getElementType();
9030 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9031 if (MaskITy.getSizeInBits() > 1)
9032 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9033
9034 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9035 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9036
9037 if (HasPassthru && I == NumElmts - 1) {
9038 auto EndOfVector =
9039 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9040 auto AllLanesSelected = MIRBuilder.buildICmp(
9041 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9042 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9043 {OutPos, EndOfVector});
9044 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9045
9046 LastWriteVal =
9047 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9048 .getReg(0);
9049 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9050 }
9051 }
9052
9053 // TODO: Use StackPtr's FrameIndex alignment.
9054 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9055
9056 MI.eraseFromParent();
9057 return Legalized;
9058}
9059
9061 Register AllocSize,
9062 Align Alignment,
9063 LLT PtrTy) {
9064 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9065
9066 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9067 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9068
9069 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9070 // have to generate an extra instruction to negate the alloc and then use
9071 // G_PTR_ADD to add the negative offset.
9072 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9073 if (Alignment > Align(1)) {
9074 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9075 AlignMask.negate();
9076 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9077 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9078 }
9079
9080 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9081}
9082
9085 const auto &MF = *MI.getMF();
9086 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9087 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9088 return UnableToLegalize;
9089
9090 Register Dst = MI.getOperand(0).getReg();
9091 Register AllocSize = MI.getOperand(1).getReg();
9092 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9093
9094 LLT PtrTy = MRI.getType(Dst);
9095 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9096 Register SPTmp =
9097 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9098
9099 MIRBuilder.buildCopy(SPReg, SPTmp);
9100 MIRBuilder.buildCopy(Dst, SPTmp);
9101
9102 MI.eraseFromParent();
9103 return Legalized;
9104}
9105
9108 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9109 if (!StackPtr)
9110 return UnableToLegalize;
9111
9112 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9113 MI.eraseFromParent();
9114 return Legalized;
9115}
9116
9119 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9120 if (!StackPtr)
9121 return UnableToLegalize;
9122
9123 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9124 MI.eraseFromParent();
9125 return Legalized;
9126}
9127
9130 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9131 unsigned Offset = MI.getOperand(2).getImm();
9132
9133 // Extract sub-vector or one element
9134 if (SrcTy.isVector()) {
9135 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9136 unsigned DstSize = DstTy.getSizeInBits();
9137
9138 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9139 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9140 // Unmerge and allow access to each Src element for the artifact combiner.
9141 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9142
9143 // Take element(s) we need to extract and copy it (merge them).
9144 SmallVector<Register, 8> SubVectorElts;
9145 for (unsigned Idx = Offset / SrcEltSize;
9146 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9147 SubVectorElts.push_back(Unmerge.getReg(Idx));
9148 }
9149 if (SubVectorElts.size() == 1)
9150 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9151 else
9152 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9153
9154 MI.eraseFromParent();
9155 return Legalized;
9156 }
9157 }
9158
9159 if (DstTy.isScalar() &&
9160 (SrcTy.isScalar() ||
9161 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9162 LLT SrcIntTy = SrcTy;
9163 if (!SrcTy.isScalar()) {
9164 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9165 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9166 }
9167
9168 if (Offset == 0)
9169 MIRBuilder.buildTrunc(DstReg, SrcReg);
9170 else {
9171 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9172 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9173 MIRBuilder.buildTrunc(DstReg, Shr);
9174 }
9175
9176 MI.eraseFromParent();
9177 return Legalized;
9178 }
9179
9180 return UnableToLegalize;
9181}
9182
9184 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9185 uint64_t Offset = MI.getOperand(3).getImm();
9186
9187 LLT DstTy = MRI.getType(Src);
9188 LLT InsertTy = MRI.getType(InsertSrc);
9189
9190 // Insert sub-vector or one element
9191 if (DstTy.isVector() && !InsertTy.isPointer()) {
9192 LLT EltTy = DstTy.getElementType();
9193 unsigned EltSize = EltTy.getSizeInBits();
9194 unsigned InsertSize = InsertTy.getSizeInBits();
9195
9196 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9197 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9198 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9200 unsigned Idx = 0;
9201 // Elements from Src before insert start Offset
9202 for (; Idx < Offset / EltSize; ++Idx) {
9203 DstElts.push_back(UnmergeSrc.getReg(Idx));
9204 }
9205
9206 // Replace elements in Src with elements from InsertSrc
9207 if (InsertTy.getSizeInBits() > EltSize) {
9208 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9209 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9210 ++Idx, ++i) {
9211 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9212 }
9213 } else {
9214 DstElts.push_back(InsertSrc);
9215 ++Idx;
9216 }
9217
9218 // Remaining elements from Src after insert
9219 for (; Idx < DstTy.getNumElements(); ++Idx) {
9220 DstElts.push_back(UnmergeSrc.getReg(Idx));
9221 }
9222
9223 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9224 MI.eraseFromParent();
9225 return Legalized;
9226 }
9227 }
9228
9229 if (InsertTy.isVector() ||
9230 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9231 return UnableToLegalize;
9232
9233 const DataLayout &DL = MIRBuilder.getDataLayout();
9234 if ((DstTy.isPointer() &&
9235 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9236 (InsertTy.isPointer() &&
9237 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9238 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9239 return UnableToLegalize;
9240 }
9241
9242 LLT IntDstTy = DstTy;
9243
9244 if (!DstTy.isScalar()) {
9245 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9246 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9247 }
9248
9249 if (!InsertTy.isScalar()) {
9250 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9251 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9252 }
9253
9254 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9255 if (Offset != 0) {
9256 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9257 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9258 }
9259
9261 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9262
9263 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9264 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9265 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9266
9267 MIRBuilder.buildCast(Dst, Or);
9268 MI.eraseFromParent();
9269 return Legalized;
9270}
9271
9274 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9275 MI.getFirst4RegLLTs();
9276 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9277
9278 LLT Ty = Dst0Ty;
9279 LLT BoolTy = Dst1Ty;
9280
9281 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9282
9283 if (IsAdd)
9284 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9285 else
9286 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9287
9288 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9289
9290 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9291
9292 // For an addition, the result should be less than one of the operands (LHS)
9293 // if and only if the other operand (RHS) is negative, otherwise there will
9294 // be overflow.
9295 // For a subtraction, the result should be less than one of the operands
9296 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9297 // otherwise there will be overflow.
9298 auto ResultLowerThanLHS =
9299 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9300 auto ConditionRHS = MIRBuilder.buildICmp(
9301 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9302
9303 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9304
9305 MIRBuilder.buildCopy(Dst0, NewDst0);
9306 MI.eraseFromParent();
9307
9308 return Legalized;
9309}
9310
9312 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9313 const LLT Ty = MRI.getType(Res);
9314
9315 // sum = LHS + RHS + zext(CarryIn)
9316 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9317 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9318 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9319 MIRBuilder.buildCopy(Res, Sum);
9320
9321 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9322 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9323 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9324 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9325
9326 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9327 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9328
9329 MI.eraseFromParent();
9330 return Legalized;
9331}
9332
9335 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9336 LLT Ty = MRI.getType(Res);
9337 bool IsSigned;
9338 bool IsAdd;
9339 unsigned BaseOp;
9340 switch (MI.getOpcode()) {
9341 default:
9342 llvm_unreachable("unexpected addsat/subsat opcode");
9343 case TargetOpcode::G_UADDSAT:
9344 IsSigned = false;
9345 IsAdd = true;
9346 BaseOp = TargetOpcode::G_ADD;
9347 break;
9348 case TargetOpcode::G_SADDSAT:
9349 IsSigned = true;
9350 IsAdd = true;
9351 BaseOp = TargetOpcode::G_ADD;
9352 break;
9353 case TargetOpcode::G_USUBSAT:
9354 IsSigned = false;
9355 IsAdd = false;
9356 BaseOp = TargetOpcode::G_SUB;
9357 break;
9358 case TargetOpcode::G_SSUBSAT:
9359 IsSigned = true;
9360 IsAdd = false;
9361 BaseOp = TargetOpcode::G_SUB;
9362 break;
9363 }
9364
9365 if (IsSigned) {
9366 // sadd.sat(a, b) ->
9367 // hi = 0x7fffffff - smax(a, 0)
9368 // lo = 0x80000000 - smin(a, 0)
9369 // a + smin(smax(lo, b), hi)
9370 // ssub.sat(a, b) ->
9371 // lo = smax(a, -1) - 0x7fffffff
9372 // hi = smin(a, -1) - 0x80000000
9373 // a - smin(smax(lo, b), hi)
9374 // TODO: AMDGPU can use a "median of 3" instruction here:
9375 // a +/- med3(lo, b, hi)
9376 uint64_t NumBits = Ty.getScalarSizeInBits();
9377 auto MaxVal =
9378 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9379 auto MinVal =
9380 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9382 if (IsAdd) {
9383 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9384 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9385 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9386 } else {
9387 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9388 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9389 MaxVal);
9390 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9391 MinVal);
9392 }
9393 auto RHSClamped =
9394 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9395 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9396 } else {
9397 // uadd.sat(a, b) -> a + umin(~a, b)
9398 // usub.sat(a, b) -> a - umin(a, b)
9399 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9400 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9401 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9402 }
9403
9404 MI.eraseFromParent();
9405 return Legalized;
9406}
9407
9410 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9411 LLT Ty = MRI.getType(Res);
9412 LLT BoolTy = Ty.changeElementSize(1);
9413 bool IsSigned;
9414 bool IsAdd;
9415 unsigned OverflowOp;
9416 switch (MI.getOpcode()) {
9417 default:
9418 llvm_unreachable("unexpected addsat/subsat opcode");
9419 case TargetOpcode::G_UADDSAT:
9420 IsSigned = false;
9421 IsAdd = true;
9422 OverflowOp = TargetOpcode::G_UADDO;
9423 break;
9424 case TargetOpcode::G_SADDSAT:
9425 IsSigned = true;
9426 IsAdd = true;
9427 OverflowOp = TargetOpcode::G_SADDO;
9428 break;
9429 case TargetOpcode::G_USUBSAT:
9430 IsSigned = false;
9431 IsAdd = false;
9432 OverflowOp = TargetOpcode::G_USUBO;
9433 break;
9434 case TargetOpcode::G_SSUBSAT:
9435 IsSigned = true;
9436 IsAdd = false;
9437 OverflowOp = TargetOpcode::G_SSUBO;
9438 break;
9439 }
9440
9441 auto OverflowRes =
9442 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9443 Register Tmp = OverflowRes.getReg(0);
9444 Register Ov = OverflowRes.getReg(1);
9445 MachineInstrBuilder Clamp;
9446 if (IsSigned) {
9447 // sadd.sat(a, b) ->
9448 // {tmp, ov} = saddo(a, b)
9449 // ov ? (tmp >>s 31) + 0x80000000 : r
9450 // ssub.sat(a, b) ->
9451 // {tmp, ov} = ssubo(a, b)
9452 // ov ? (tmp >>s 31) + 0x80000000 : r
9453 uint64_t NumBits = Ty.getScalarSizeInBits();
9454 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9455 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9456 auto MinVal =
9457 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9458 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9459 } else {
9460 // uadd.sat(a, b) ->
9461 // {tmp, ov} = uaddo(a, b)
9462 // ov ? 0xffffffff : tmp
9463 // usub.sat(a, b) ->
9464 // {tmp, ov} = usubo(a, b)
9465 // ov ? 0 : tmp
9466 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9467 }
9468 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9469
9470 MI.eraseFromParent();
9471 return Legalized;
9472}
9473
9476 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9477 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9478 "Expected shlsat opcode!");
9479 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9480 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9481 LLT Ty = MRI.getType(Res);
9482 LLT BoolTy = Ty.changeElementSize(1);
9483
9484 unsigned BW = Ty.getScalarSizeInBits();
9485 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9486 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9487 : MIRBuilder.buildLShr(Ty, Result, RHS);
9488
9489 MachineInstrBuilder SatVal;
9490 if (IsSigned) {
9491 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9492 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9493 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9494 MIRBuilder.buildConstant(Ty, 0));
9495 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9496 } else {
9497 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9498 }
9499 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9500 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9501
9502 MI.eraseFromParent();
9503 return Legalized;
9504}
9505
9507 auto [Dst, Src] = MI.getFirst2Regs();
9508 const LLT Ty = MRI.getType(Src);
9509 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9510 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9511
9512 // Swap most and least significant byte, set remaining bytes in Res to zero.
9513 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9514 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9515 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9516 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9517
9518 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9519 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9520 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9521 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9522 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9523 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9524 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9525 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9526 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9527 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9528 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9529 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9530 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9531 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9532 }
9533 Res.getInstr()->getOperand(0).setReg(Dst);
9534
9535 MI.eraseFromParent();
9536 return Legalized;
9537}
9538
9539//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9541 MachineInstrBuilder Src, const APInt &Mask) {
9542 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9543 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9544 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9545 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9546 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9547 return B.buildOr(Dst, LHS, RHS);
9548}
9549
9552 auto [Dst, Src] = MI.getFirst2Regs();
9553 const LLT SrcTy = MRI.getType(Src);
9554 unsigned Size = SrcTy.getScalarSizeInBits();
9555 unsigned VSize = SrcTy.getSizeInBits();
9556
9557 if (Size >= 8) {
9558 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9559 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9560 {LLT::fixed_vector(VSize / 8, 8),
9561 LLT::fixed_vector(VSize / 8, 8)}}))) {
9562 // If bitreverse is legal for i8 vector of the same size, then cast
9563 // to i8 vector type.
9564 // e.g. v4s32 -> v16s8
9565 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9566 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9567 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9568 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9569 MIRBuilder.buildBitcast(Dst, RBIT);
9570 } else {
9571 MachineInstrBuilder BSWAP =
9572 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9573
9574 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9575 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9576 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9577 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9578 APInt::getSplat(Size, APInt(8, 0xF0)));
9579
9580 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9581 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9582 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9583 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9584 APInt::getSplat(Size, APInt(8, 0xCC)));
9585
9586 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9587 // 6|7
9588 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9589 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9590 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9591 }
9592 } else {
9593 // Expand bitreverse for types smaller than 8 bits.
9595 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9597 if (I < J) {
9598 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9599 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9600 } else {
9601 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9602 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9603 }
9604
9605 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9606 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9607 if (I == 0)
9608 Tmp = Tmp2;
9609 else
9610 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9611 }
9612 MIRBuilder.buildCopy(Dst, Tmp);
9613 }
9614
9615 MI.eraseFromParent();
9616 return Legalized;
9617}
9618
9621 MachineFunction &MF = MIRBuilder.getMF();
9622
9623 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9624 int NameOpIdx = IsRead ? 1 : 0;
9625 int ValRegIndex = IsRead ? 0 : 1;
9626
9627 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9628 const LLT Ty = MRI.getType(ValReg);
9629 const MDString *RegStr = cast<MDString>(
9630 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9631
9632 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9633 if (!PhysReg) {
9634 const Function &Fn = MF.getFunction();
9636 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9637 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9638 Fn, MI.getDebugLoc()));
9639 if (IsRead)
9640 MIRBuilder.buildUndef(ValReg);
9641
9642 MI.eraseFromParent();
9643 return Legalized;
9644 }
9645
9646 if (IsRead)
9647 MIRBuilder.buildCopy(ValReg, PhysReg);
9648 else
9649 MIRBuilder.buildCopy(PhysReg, ValReg);
9650
9651 MI.eraseFromParent();
9652 return Legalized;
9653}
9654
9657 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9658 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9659 Register Result = MI.getOperand(0).getReg();
9660 LLT OrigTy = MRI.getType(Result);
9661 auto SizeInBits = OrigTy.getScalarSizeInBits();
9662 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9663
9664 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9665 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9666 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9667 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9668
9669 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9670 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9671 MIRBuilder.buildTrunc(Result, Shifted);
9672
9673 MI.eraseFromParent();
9674 return Legalized;
9675}
9676
9679 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9680 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9681
9682 if (Mask == fcNone) {
9683 MIRBuilder.buildConstant(DstReg, 0);
9684 MI.eraseFromParent();
9685 return Legalized;
9686 }
9687 if (Mask == fcAllFlags) {
9688 MIRBuilder.buildConstant(DstReg, 1);
9689 MI.eraseFromParent();
9690 return Legalized;
9691 }
9692
9693 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9694 // version
9695
9696 unsigned BitSize = SrcTy.getScalarSizeInBits();
9697 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9698
9699 LLT IntTy = LLT::scalar(BitSize);
9700 if (SrcTy.isVector())
9701 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9702 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9703
9704 // Various masks.
9705 APInt SignBit = APInt::getSignMask(BitSize);
9706 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9707 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9708 APInt ExpMask = Inf;
9709 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9710 APInt QNaNBitMask =
9711 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9712 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9713
9714 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9715 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9716 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9717 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9718 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9719
9720 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9721 auto Sign =
9722 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9723
9724 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9725 // Clang doesn't support capture of structured bindings:
9726 LLT DstTyCopy = DstTy;
9727 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9728 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9729 };
9730
9731 // Tests that involve more than one class should be processed first.
9732 if ((Mask & fcFinite) == fcFinite) {
9733 // finite(V) ==> abs(V) u< exp_mask
9734 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9735 ExpMaskC));
9736 Mask &= ~fcFinite;
9737 } else if ((Mask & fcFinite) == fcPosFinite) {
9738 // finite(V) && V > 0 ==> V u< exp_mask
9739 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9740 ExpMaskC));
9741 Mask &= ~fcPosFinite;
9742 } else if ((Mask & fcFinite) == fcNegFinite) {
9743 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9744 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9745 ExpMaskC);
9746 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9747 appendToRes(And);
9748 Mask &= ~fcNegFinite;
9749 }
9750
9751 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9752 // fcZero | fcSubnormal => test all exponent bits are 0
9753 // TODO: Handle sign bit specific cases
9754 // TODO: Handle inverted case
9755 if (PartialCheck == (fcZero | fcSubnormal)) {
9756 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9757 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9758 ExpBits, ZeroC));
9759 Mask &= ~PartialCheck;
9760 }
9761 }
9762
9763 // Check for individual classes.
9764 if (FPClassTest PartialCheck = Mask & fcZero) {
9765 if (PartialCheck == fcPosZero)
9766 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9767 AsInt, ZeroC));
9768 else if (PartialCheck == fcZero)
9769 appendToRes(
9770 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9771 else // fcNegZero
9772 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9773 AsInt, SignBitC));
9774 }
9775
9776 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9777 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9778 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9779 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9780 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9781 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9782 auto SubnormalRes =
9783 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9784 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9785 if (PartialCheck == fcNegSubnormal)
9786 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9787 appendToRes(SubnormalRes);
9788 }
9789
9790 if (FPClassTest PartialCheck = Mask & fcInf) {
9791 if (PartialCheck == fcPosInf)
9792 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9793 AsInt, InfC));
9794 else if (PartialCheck == fcInf)
9795 appendToRes(
9796 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9797 else { // fcNegInf
9798 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9799 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9800 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9801 AsInt, NegInfC));
9802 }
9803 }
9804
9805 if (FPClassTest PartialCheck = Mask & fcNan) {
9806 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9807 if (PartialCheck == fcNan) {
9808 // isnan(V) ==> abs(V) u> int(inf)
9809 appendToRes(
9810 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9811 } else if (PartialCheck == fcQNan) {
9812 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9813 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9814 InfWithQnanBitC));
9815 } else { // fcSNan
9816 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9817 // abs(V) u< (unsigned(Inf) | quiet_bit)
9818 auto IsNan =
9819 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9820 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9821 Abs, InfWithQnanBitC);
9822 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9823 }
9824 }
9825
9826 if (FPClassTest PartialCheck = Mask & fcNormal) {
9827 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9828 // (max_exp-1))
9829 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9830 auto ExpMinusOne = MIRBuilder.buildSub(
9831 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9832 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9833 auto NormalRes =
9834 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9835 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9836 if (PartialCheck == fcNegNormal)
9837 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9838 else if (PartialCheck == fcPosNormal) {
9839 auto PosSign = MIRBuilder.buildXor(
9840 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
9841 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9842 }
9843 appendToRes(NormalRes);
9844 }
9845
9846 MIRBuilder.buildCopy(DstReg, Res);
9847 MI.eraseFromParent();
9848 return Legalized;
9849}
9850
9852 // Implement G_SELECT in terms of XOR, AND, OR.
9853 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9854 MI.getFirst4RegLLTs();
9855
9856 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9857 if (IsEltPtr) {
9858 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9859 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9860 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9861 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9862 DstTy = NewTy;
9863 }
9864
9865 if (MaskTy.isScalar()) {
9866 // Turn the scalar condition into a vector condition mask if needed.
9867
9868 Register MaskElt = MaskReg;
9869
9870 // The condition was potentially zero extended before, but we want a sign
9871 // extended boolean.
9872 if (MaskTy != LLT::scalar(1))
9873 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9874
9875 // Continue the sign extension (or truncate) to match the data type.
9876 MaskElt =
9877 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9878
9879 if (DstTy.isVector()) {
9880 // Generate a vector splat idiom.
9881 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9882 MaskReg = ShufSplat.getReg(0);
9883 } else {
9884 MaskReg = MaskElt;
9885 }
9886 MaskTy = DstTy;
9887 } else if (!DstTy.isVector()) {
9888 // Cannot handle the case that mask is a vector and dst is a scalar.
9889 return UnableToLegalize;
9890 }
9891
9892 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9893 return UnableToLegalize;
9894 }
9895
9896 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9897 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9898 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9899 if (IsEltPtr) {
9900 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9901 MIRBuilder.buildIntToPtr(DstReg, Or);
9902 } else {
9903 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9904 }
9905 MI.eraseFromParent();
9906 return Legalized;
9907}
9908
9910 // Split DIVREM into individual instructions.
9911 unsigned Opcode = MI.getOpcode();
9912
9913 MIRBuilder.buildInstr(
9914 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9915 : TargetOpcode::G_UDIV,
9916 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9917 MIRBuilder.buildInstr(
9918 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9919 : TargetOpcode::G_UREM,
9920 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9921 MI.eraseFromParent();
9922 return Legalized;
9923}
9924
9927 // Expand %res = G_ABS %a into:
9928 // %v1 = G_ASHR %a, scalar_size-1
9929 // %v2 = G_ADD %a, %v1
9930 // %res = G_XOR %v2, %v1
9931 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9932 Register OpReg = MI.getOperand(1).getReg();
9933 auto ShiftAmt =
9934 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9935 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9936 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9937 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9938 MI.eraseFromParent();
9939 return Legalized;
9940}
9941
9944 // Expand %res = G_ABS %a into:
9945 // %v1 = G_CONSTANT 0
9946 // %v2 = G_SUB %v1, %a
9947 // %res = G_SMAX %a, %v2
9948 Register SrcReg = MI.getOperand(1).getReg();
9949 LLT Ty = MRI.getType(SrcReg);
9950 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9951 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9952 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9953 MI.eraseFromParent();
9954 return Legalized;
9955}
9956
9959 Register SrcReg = MI.getOperand(1).getReg();
9960 Register DestReg = MI.getOperand(0).getReg();
9961 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9962 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9963 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9964 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9965 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9966 MI.eraseFromParent();
9967 return Legalized;
9968}
9969
9972 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
9973 MI.getOpcode() == TargetOpcode::G_ABDU) &&
9974 "Expected G_ABDS or G_ABDU instruction");
9975
9976 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
9977 LLT Ty = MRI.getType(LHS);
9978
9979 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9980 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9981 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
9982 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
9983 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
9986 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
9987 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
9988
9989 MI.eraseFromParent();
9990 return Legalized;
9991}
9992
9995 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
9996 MI.getOpcode() == TargetOpcode::G_ABDU) &&
9997 "Expected G_ABDS or G_ABDU instruction");
9998
9999 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10000 LLT Ty = MRI.getType(LHS);
10001
10002 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10003 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10004 Register MaxReg, MinReg;
10005 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10006 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10007 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10008 } else {
10009 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10010 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10011 }
10012 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10013
10014 MI.eraseFromParent();
10015 return Legalized;
10016}
10017
10019 Register SrcReg = MI.getOperand(1).getReg();
10020 Register DstReg = MI.getOperand(0).getReg();
10021
10022 LLT Ty = MRI.getType(DstReg);
10023
10024 // Reset sign bit
10025 MIRBuilder.buildAnd(
10026 DstReg, SrcReg,
10027 MIRBuilder.buildConstant(
10028 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10029
10030 MI.eraseFromParent();
10031 return Legalized;
10032}
10033
10036 Register SrcReg = MI.getOperand(1).getReg();
10037 LLT SrcTy = MRI.getType(SrcReg);
10038 LLT DstTy = MRI.getType(SrcReg);
10039
10040 // The source could be a scalar if the IR type was <1 x sN>.
10041 if (SrcTy.isScalar()) {
10042 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10043 return UnableToLegalize; // FIXME: handle extension.
10044 // This can be just a plain copy.
10045 Observer.changingInstr(MI);
10046 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10047 Observer.changedInstr(MI);
10048 return Legalized;
10049 }
10050 return UnableToLegalize;
10051}
10052
10054 MachineFunction &MF = *MI.getMF();
10055 const DataLayout &DL = MIRBuilder.getDataLayout();
10056 LLVMContext &Ctx = MF.getFunction().getContext();
10057 Register ListPtr = MI.getOperand(1).getReg();
10058 LLT PtrTy = MRI.getType(ListPtr);
10059
10060 // LstPtr is a pointer to the head of the list. Get the address
10061 // of the head of the list.
10062 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10063 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10064 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10065 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10066
10067 const Align A(MI.getOperand(2).getImm());
10068 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10069 if (A > TLI.getMinStackArgumentAlignment()) {
10070 Register AlignAmt =
10071 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10072 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10073 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10074 VAList = AndDst.getReg(0);
10075 }
10076
10077 // Increment the pointer, VAList, to the next vaarg
10078 // The list should be bumped by the size of element in the current head of
10079 // list.
10080 Register Dst = MI.getOperand(0).getReg();
10081 LLT LLTTy = MRI.getType(Dst);
10082 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10083 auto IncAmt =
10084 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10085 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10086
10087 // Store the increment VAList to the legalized pointer
10089 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10090 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10091 // Load the actual argument out of the pointer VAList
10092 Align EltAlignment = DL.getABITypeAlign(Ty);
10093 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10094 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10095 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10096
10097 MI.eraseFromParent();
10098 return Legalized;
10099}
10100
10102 // On Darwin, -Os means optimize for size without hurting performance, so
10103 // only really optimize for size when -Oz (MinSize) is used.
10105 return MF.getFunction().hasMinSize();
10106 return MF.getFunction().hasOptSize();
10107}
10108
10109// Returns a list of types to use for memory op lowering in MemOps. A partial
10110// port of findOptimalMemOpLowering in TargetLowering.
10111static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10112 unsigned Limit, const MemOp &Op,
10113 unsigned DstAS, unsigned SrcAS,
10114 const AttributeList &FuncAttributes,
10115 const TargetLowering &TLI) {
10116 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10117 return false;
10118
10119 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10120
10121 if (Ty == LLT()) {
10122 // Use the largest scalar type whose alignment constraints are satisfied.
10123 // We only need to check DstAlign here as SrcAlign is always greater or
10124 // equal to DstAlign (or zero).
10125 Ty = LLT::scalar(64);
10126 if (Op.isFixedDstAlign())
10127 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10128 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10129 Ty = LLT::scalar(Ty.getSizeInBytes());
10130 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10131 // FIXME: check for the largest legal type we can load/store to.
10132 }
10133
10134 unsigned NumMemOps = 0;
10135 uint64_t Size = Op.size();
10136 while (Size) {
10137 unsigned TySize = Ty.getSizeInBytes();
10138 while (TySize > Size) {
10139 // For now, only use non-vector load / store's for the left-over pieces.
10140 LLT NewTy = Ty;
10141 // FIXME: check for mem op safety and legality of the types. Not all of
10142 // SDAGisms map cleanly to GISel concepts.
10143 if (NewTy.isVector())
10144 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10145 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10146 unsigned NewTySize = NewTy.getSizeInBytes();
10147 assert(NewTySize > 0 && "Could not find appropriate type");
10148
10149 // If the new LLT cannot cover all of the remaining bits, then consider
10150 // issuing a (or a pair of) unaligned and overlapping load / store.
10151 unsigned Fast;
10152 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10153 MVT VT = getMVTForLLT(Ty);
10154 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10156 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10158 Fast)
10159 TySize = Size;
10160 else {
10161 Ty = NewTy;
10162 TySize = NewTySize;
10163 }
10164 }
10165
10166 if (++NumMemOps > Limit)
10167 return false;
10168
10169 MemOps.push_back(Ty);
10170 Size -= TySize;
10171 }
10172
10173 return true;
10174}
10175
10176// Get a vectorized representation of the memset value operand, GISel edition.
10178 MachineRegisterInfo &MRI = *MIB.getMRI();
10179 unsigned NumBits = Ty.getScalarSizeInBits();
10180 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10181 if (!Ty.isVector() && ValVRegAndVal) {
10182 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10183 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10184 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10185 }
10186
10187 // Extend the byte value to the larger type, and then multiply by a magic
10188 // value 0x010101... in order to replicate it across every byte.
10189 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10190 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10191 return MIB.buildConstant(Ty, 0).getReg(0);
10192 }
10193
10194 LLT ExtType = Ty.getScalarType();
10195 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10196 if (NumBits > 8) {
10197 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10198 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10199 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10200 }
10201
10202 // For vector types create a G_BUILD_VECTOR.
10203 if (Ty.isVector())
10204 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10205
10206 return Val;
10207}
10208
10210LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10211 uint64_t KnownLen, Align Alignment,
10212 bool IsVolatile) {
10213 auto &MF = *MI.getParent()->getParent();
10214 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10215 auto &DL = MF.getDataLayout();
10216 LLVMContext &C = MF.getFunction().getContext();
10217
10218 assert(KnownLen != 0 && "Have a zero length memset length!");
10219
10220 bool DstAlignCanChange = false;
10221 MachineFrameInfo &MFI = MF.getFrameInfo();
10222 bool OptSize = shouldLowerMemFuncForSize(MF);
10223
10224 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10225 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10226 DstAlignCanChange = true;
10227
10228 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10229 std::vector<LLT> MemOps;
10230
10231 const auto &DstMMO = **MI.memoperands_begin();
10232 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10233
10234 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10235 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10236
10237 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10238 MemOp::Set(KnownLen, DstAlignCanChange,
10239 Alignment,
10240 /*IsZeroMemset=*/IsZeroVal,
10241 /*IsVolatile=*/IsVolatile),
10242 DstPtrInfo.getAddrSpace(), ~0u,
10243 MF.getFunction().getAttributes(), TLI))
10244 return UnableToLegalize;
10245
10246 if (DstAlignCanChange) {
10247 // Get an estimate of the type from the LLT.
10248 Type *IRTy = getTypeForLLT(MemOps[0], C);
10249 Align NewAlign = DL.getABITypeAlign(IRTy);
10250 if (NewAlign > Alignment) {
10251 Alignment = NewAlign;
10252 unsigned FI = FIDef->getOperand(1).getIndex();
10253 // Give the stack frame object a larger alignment if needed.
10254 if (MFI.getObjectAlign(FI) < Alignment)
10255 MFI.setObjectAlignment(FI, Alignment);
10256 }
10257 }
10258
10259 MachineIRBuilder MIB(MI);
10260 // Find the largest store and generate the bit pattern for it.
10261 LLT LargestTy = MemOps[0];
10262 for (unsigned i = 1; i < MemOps.size(); i++)
10263 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10264 LargestTy = MemOps[i];
10265
10266 // The memset stored value is always defined as an s8, so in order to make it
10267 // work with larger store types we need to repeat the bit pattern across the
10268 // wider type.
10269 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10270
10271 if (!MemSetValue)
10272 return UnableToLegalize;
10273
10274 // Generate the stores. For each store type in the list, we generate the
10275 // matching store of that type to the destination address.
10276 LLT PtrTy = MRI.getType(Dst);
10277 unsigned DstOff = 0;
10278 unsigned Size = KnownLen;
10279 for (unsigned I = 0; I < MemOps.size(); I++) {
10280 LLT Ty = MemOps[I];
10281 unsigned TySize = Ty.getSizeInBytes();
10282 if (TySize > Size) {
10283 // Issuing an unaligned load / store pair that overlaps with the previous
10284 // pair. Adjust the offset accordingly.
10285 assert(I == MemOps.size() - 1 && I != 0);
10286 DstOff -= TySize - Size;
10287 }
10288
10289 // If this store is smaller than the largest store see whether we can get
10290 // the smaller value for free with a truncate.
10291 Register Value = MemSetValue;
10292 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10293 MVT VT = getMVTForLLT(Ty);
10294 MVT LargestVT = getMVTForLLT(LargestTy);
10295 if (!LargestTy.isVector() && !Ty.isVector() &&
10296 TLI.isTruncateFree(LargestVT, VT))
10297 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10298 else
10299 Value = getMemsetValue(Val, Ty, MIB);
10300 if (!Value)
10301 return UnableToLegalize;
10302 }
10303
10304 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10305
10306 Register Ptr = Dst;
10307 if (DstOff != 0) {
10308 auto Offset =
10309 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10310 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10311 }
10312
10313 MIB.buildStore(Value, Ptr, *StoreMMO);
10314 DstOff += Ty.getSizeInBytes();
10315 Size -= TySize;
10316 }
10317
10318 MI.eraseFromParent();
10319 return Legalized;
10320}
10321
10323LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10324 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10325
10326 auto [Dst, Src, Len] = MI.getFirst3Regs();
10327
10328 const auto *MMOIt = MI.memoperands_begin();
10329 const MachineMemOperand *MemOp = *MMOIt;
10330 bool IsVolatile = MemOp->isVolatile();
10331
10332 // See if this is a constant length copy
10333 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10334 // FIXME: support dynamically sized G_MEMCPY_INLINE
10335 assert(LenVRegAndVal &&
10336 "inline memcpy with dynamic size is not yet supported");
10337 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10338 if (KnownLen == 0) {
10339 MI.eraseFromParent();
10340 return Legalized;
10341 }
10342
10343 const auto &DstMMO = **MI.memoperands_begin();
10344 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10345 Align DstAlign = DstMMO.getBaseAlign();
10346 Align SrcAlign = SrcMMO.getBaseAlign();
10347
10348 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10349 IsVolatile);
10350}
10351
10353LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10354 uint64_t KnownLen, Align DstAlign,
10355 Align SrcAlign, bool IsVolatile) {
10356 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10357 return lowerMemcpy(MI, Dst, Src, KnownLen,
10358 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10359 IsVolatile);
10360}
10361
10363LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10364 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10365 Align SrcAlign, bool IsVolatile) {
10366 auto &MF = *MI.getParent()->getParent();
10367 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10368 auto &DL = MF.getDataLayout();
10370
10371 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10372
10373 bool DstAlignCanChange = false;
10374 MachineFrameInfo &MFI = MF.getFrameInfo();
10375 Align Alignment = std::min(DstAlign, SrcAlign);
10376
10377 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10378 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10379 DstAlignCanChange = true;
10380
10381 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10382 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10383 // if the memcpy is in a tail call position.
10384
10385 std::vector<LLT> MemOps;
10386
10387 const auto &DstMMO = **MI.memoperands_begin();
10388 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10389 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10390 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10391
10393 MemOps, Limit,
10394 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10395 IsVolatile),
10396 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10397 MF.getFunction().getAttributes(), TLI))
10398 return UnableToLegalize;
10399
10400 if (DstAlignCanChange) {
10401 // Get an estimate of the type from the LLT.
10402 Type *IRTy = getTypeForLLT(MemOps[0], C);
10403 Align NewAlign = DL.getABITypeAlign(IRTy);
10404
10405 // Don't promote to an alignment that would require dynamic stack
10406 // realignment.
10408 if (!TRI->hasStackRealignment(MF))
10409 if (MaybeAlign StackAlign = DL.getStackAlignment())
10410 NewAlign = std::min(NewAlign, *StackAlign);
10411
10412 if (NewAlign > Alignment) {
10413 Alignment = NewAlign;
10414 unsigned FI = FIDef->getOperand(1).getIndex();
10415 // Give the stack frame object a larger alignment if needed.
10416 if (MFI.getObjectAlign(FI) < Alignment)
10417 MFI.setObjectAlignment(FI, Alignment);
10418 }
10419 }
10420
10421 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10422
10423 MachineIRBuilder MIB(MI);
10424 // Now we need to emit a pair of load and stores for each of the types we've
10425 // collected. I.e. for each type, generate a load from the source pointer of
10426 // that type width, and then generate a corresponding store to the dest buffer
10427 // of that value loaded. This can result in a sequence of loads and stores
10428 // mixed types, depending on what the target specifies as good types to use.
10429 unsigned CurrOffset = 0;
10430 unsigned Size = KnownLen;
10431 for (auto CopyTy : MemOps) {
10432 // Issuing an unaligned load / store pair that overlaps with the previous
10433 // pair. Adjust the offset accordingly.
10434 if (CopyTy.getSizeInBytes() > Size)
10435 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10436
10437 // Construct MMOs for the accesses.
10438 auto *LoadMMO =
10439 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10440 auto *StoreMMO =
10441 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10442
10443 // Create the load.
10444 Register LoadPtr = Src;
10446 if (CurrOffset != 0) {
10447 LLT SrcTy = MRI.getType(Src);
10448 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10449 .getReg(0);
10450 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10451 }
10452 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10453
10454 // Create the store.
10455 Register StorePtr = Dst;
10456 if (CurrOffset != 0) {
10457 LLT DstTy = MRI.getType(Dst);
10458 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10459 }
10460 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10461 CurrOffset += CopyTy.getSizeInBytes();
10462 Size -= CopyTy.getSizeInBytes();
10463 }
10464
10465 MI.eraseFromParent();
10466 return Legalized;
10467}
10468
10470LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10471 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10472 bool IsVolatile) {
10473 auto &MF = *MI.getParent()->getParent();
10474 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10475 auto &DL = MF.getDataLayout();
10476 LLVMContext &C = MF.getFunction().getContext();
10477
10478 assert(KnownLen != 0 && "Have a zero length memmove length!");
10479
10480 bool DstAlignCanChange = false;
10481 MachineFrameInfo &MFI = MF.getFrameInfo();
10482 bool OptSize = shouldLowerMemFuncForSize(MF);
10483 Align Alignment = std::min(DstAlign, SrcAlign);
10484
10485 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10486 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10487 DstAlignCanChange = true;
10488
10489 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10490 std::vector<LLT> MemOps;
10491
10492 const auto &DstMMO = **MI.memoperands_begin();
10493 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10494 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10495 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10496
10497 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10498 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10499 // same thing here.
10501 MemOps, Limit,
10502 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10503 /*IsVolatile*/ true),
10504 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10505 MF.getFunction().getAttributes(), TLI))
10506 return UnableToLegalize;
10507
10508 if (DstAlignCanChange) {
10509 // Get an estimate of the type from the LLT.
10510 Type *IRTy = getTypeForLLT(MemOps[0], C);
10511 Align NewAlign = DL.getABITypeAlign(IRTy);
10512
10513 // Don't promote to an alignment that would require dynamic stack
10514 // realignment.
10515 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10516 if (!TRI->hasStackRealignment(MF))
10517 if (MaybeAlign StackAlign = DL.getStackAlignment())
10518 NewAlign = std::min(NewAlign, *StackAlign);
10519
10520 if (NewAlign > Alignment) {
10521 Alignment = NewAlign;
10522 unsigned FI = FIDef->getOperand(1).getIndex();
10523 // Give the stack frame object a larger alignment if needed.
10524 if (MFI.getObjectAlign(FI) < Alignment)
10525 MFI.setObjectAlignment(FI, Alignment);
10526 }
10527 }
10528
10529 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10530
10531 MachineIRBuilder MIB(MI);
10532 // Memmove requires that we perform the loads first before issuing the stores.
10533 // Apart from that, this loop is pretty much doing the same thing as the
10534 // memcpy codegen function.
10535 unsigned CurrOffset = 0;
10536 SmallVector<Register, 16> LoadVals;
10537 for (auto CopyTy : MemOps) {
10538 // Construct MMO for the load.
10539 auto *LoadMMO =
10540 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10541
10542 // Create the load.
10543 Register LoadPtr = Src;
10544 if (CurrOffset != 0) {
10545 LLT SrcTy = MRI.getType(Src);
10546 auto Offset =
10547 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10548 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10549 }
10550 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10551 CurrOffset += CopyTy.getSizeInBytes();
10552 }
10553
10554 CurrOffset = 0;
10555 for (unsigned I = 0; I < MemOps.size(); ++I) {
10556 LLT CopyTy = MemOps[I];
10557 // Now store the values loaded.
10558 auto *StoreMMO =
10559 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10560
10561 Register StorePtr = Dst;
10562 if (CurrOffset != 0) {
10563 LLT DstTy = MRI.getType(Dst);
10564 auto Offset =
10565 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10566 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10567 }
10568 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10569 CurrOffset += CopyTy.getSizeInBytes();
10570 }
10571 MI.eraseFromParent();
10572 return Legalized;
10573}
10574
10577 const unsigned Opc = MI.getOpcode();
10578 // This combine is fairly complex so it's not written with a separate
10579 // matcher function.
10580 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10581 Opc == TargetOpcode::G_MEMSET) &&
10582 "Expected memcpy like instruction");
10583
10584 auto MMOIt = MI.memoperands_begin();
10585 const MachineMemOperand *MemOp = *MMOIt;
10586
10587 Align DstAlign = MemOp->getBaseAlign();
10588 Align SrcAlign;
10589 auto [Dst, Src, Len] = MI.getFirst3Regs();
10590
10591 if (Opc != TargetOpcode::G_MEMSET) {
10592 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10593 MemOp = *(++MMOIt);
10594 SrcAlign = MemOp->getBaseAlign();
10595 }
10596
10597 // See if this is a constant length copy
10598 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10599 if (!LenVRegAndVal)
10600 return UnableToLegalize;
10601 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10602
10603 if (KnownLen == 0) {
10604 MI.eraseFromParent();
10605 return Legalized;
10606 }
10607
10608 if (MaxLen && KnownLen > MaxLen)
10609 return UnableToLegalize;
10610
10611 bool IsVolatile = MemOp->isVolatile();
10612 if (Opc == TargetOpcode::G_MEMCPY) {
10613 auto &MF = *MI.getParent()->getParent();
10614 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10615 bool OptSize = shouldLowerMemFuncForSize(MF);
10616 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10617 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10618 IsVolatile);
10619 }
10620 if (Opc == TargetOpcode::G_MEMMOVE)
10621 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10622 if (Opc == TargetOpcode::G_MEMSET)
10623 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10624 return UnableToLegalize;
10625}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
bool isSigned() const
Definition InstrTypes.h:932
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:199
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:71
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:611
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:310
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:823
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2033
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1665
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:232
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1622
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1160
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1189
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:368
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1817
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:241
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:280
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)