LLVM 22.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
41#include <cmath>
42#include <optional>
43#include <tuple>
44
45#define DEBUG_TYPE "gi-combiner"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50// Option to allow testing of the combiner while no targets know about indexed
51// addressing.
52static cl::opt<bool>
53 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
54 cl::desc("Force all indexed operations to be "
55 "legal for the GlobalISel combiner"));
56
58 MachineIRBuilder &B, bool IsPreLegalize,
61 const LegalizerInfo *LI)
62 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
63 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
64 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
65 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
66 (void)this->VT;
67}
68
71}
72
74 return Builder.getMF();
75}
76
79}
80
82
83/// \returns The little endian in-memory byte position of byte \p I in a
84/// \p ByteWidth bytes wide type.
85///
86/// E.g. Given a 4-byte type x, x[0] -> byte 0
87static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
88 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
89 return I;
90}
91
92/// Determines the LogBase2 value for a non-null input value using the
93/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
95 auto &MRI = *MIB.getMRI();
96 LLT Ty = MRI.getType(V);
97 auto Ctlz = MIB.buildCTLZ(Ty, V);
98 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
99 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
100}
101
102/// \returns The big endian in-memory byte position of byte \p I in a
103/// \p ByteWidth bytes wide type.
104///
105/// E.g. Given a 4-byte type x, x[0] -> byte 3
106static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
107 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
108 return ByteWidth - I - 1;
109}
110
111/// Given a map from byte offsets in memory to indices in a load/store,
112/// determine if that map corresponds to a little or big endian byte pattern.
113///
114/// \param MemOffset2Idx maps memory offsets to address offsets.
115/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
116///
117/// \returns true if the map corresponds to a big endian byte pattern, false if
118/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
119///
120/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
121/// are as follows:
122///
123/// AddrOffset Little endian Big endian
124/// 0 0 3
125/// 1 1 2
126/// 2 2 1
127/// 3 3 0
128static std::optional<bool>
130 int64_t LowestIdx) {
131 // Need at least two byte positions to decide on endianness.
132 unsigned Width = MemOffset2Idx.size();
133 if (Width < 2)
134 return std::nullopt;
135 bool BigEndian = true, LittleEndian = true;
136 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
137 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
138 if (MemOffsetAndIdx == MemOffset2Idx.end())
139 return std::nullopt;
140 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
141 assert(Idx >= 0 && "Expected non-negative byte offset?");
142 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
143 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
144 if (!BigEndian && !LittleEndian)
145 return std::nullopt;
146 }
147
148 assert((BigEndian != LittleEndian) &&
149 "Pattern cannot be both big and little endian!");
150 return BigEndian;
151}
152
154
155bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
156 assert(LI && "Must have LegalizerInfo to query isLegal!");
157 return LI->getAction(Query).Action == LegalizeActions::Legal;
158}
159
161 const LegalityQuery &Query) const {
162 return isPreLegalize() || isLegal(Query);
163}
164
166 return isLegal(Query) ||
168}
169
171 if (!Ty.isVector())
172 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
173 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
174 if (isPreLegalize())
175 return true;
176 LLT EltTy = Ty.getElementType();
177 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
178 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
179}
180
182 Register ToReg) const {
184
185 if (MRI.constrainRegAttrs(ToReg, FromReg))
186 MRI.replaceRegWith(FromReg, ToReg);
187 else
188 Builder.buildCopy(FromReg, ToReg);
189
191}
192
194 MachineOperand &FromRegOp,
195 Register ToReg) const {
196 assert(FromRegOp.getParent() && "Expected an operand in an MI");
197 Observer.changingInstr(*FromRegOp.getParent());
198
199 FromRegOp.setReg(ToReg);
200
201 Observer.changedInstr(*FromRegOp.getParent());
202}
203
205 unsigned ToOpcode) const {
206 Observer.changingInstr(FromMI);
207
208 FromMI.setDesc(Builder.getTII().get(ToOpcode));
209
210 Observer.changedInstr(FromMI);
211}
212
214 return RBI->getRegBank(Reg, MRI, *TRI);
215}
216
218 const RegisterBank *RegBank) const {
219 if (RegBank)
220 MRI.setRegBank(Reg, *RegBank);
221}
222
224 if (matchCombineCopy(MI)) {
226 return true;
227 }
228 return false;
229}
231 if (MI.getOpcode() != TargetOpcode::COPY)
232 return false;
233 Register DstReg = MI.getOperand(0).getReg();
234 Register SrcReg = MI.getOperand(1).getReg();
235 return canReplaceReg(DstReg, SrcReg, MRI);
236}
238 Register DstReg = MI.getOperand(0).getReg();
239 Register SrcReg = MI.getOperand(1).getReg();
240 replaceRegWith(MRI, DstReg, SrcReg);
241 MI.eraseFromParent();
242}
243
245 MachineInstr &MI, BuildFnTy &MatchInfo) const {
246 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
247 Register DstOp = MI.getOperand(0).getReg();
248 Register OrigOp = MI.getOperand(1).getReg();
249
250 if (!MRI.hasOneNonDBGUse(OrigOp))
251 return false;
252
253 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
254 // Even if only a single operand of the PHI is not guaranteed non-poison,
255 // moving freeze() backwards across a PHI can cause optimization issues for
256 // other users of that operand.
257 //
258 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
259 // the source register is unprofitable because it makes the freeze() more
260 // strict than is necessary (it would affect the whole register instead of
261 // just the subreg being frozen).
262 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
263 return false;
264
265 if (canCreateUndefOrPoison(OrigOp, MRI,
266 /*ConsiderFlagsAndMetadata=*/false))
267 return false;
268
269 std::optional<MachineOperand> MaybePoisonOperand;
270 for (MachineOperand &Operand : OrigDef->uses()) {
271 if (!Operand.isReg())
272 return false;
273
274 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
275 continue;
276
277 if (!MaybePoisonOperand)
278 MaybePoisonOperand = Operand;
279 else {
280 // We have more than one maybe-poison operand. Moving the freeze is
281 // unsafe.
282 return false;
283 }
284 }
285
286 // Eliminate freeze if all operands are guaranteed non-poison.
287 if (!MaybePoisonOperand) {
288 MatchInfo = [=](MachineIRBuilder &B) {
289 Observer.changingInstr(*OrigDef);
290 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
291 Observer.changedInstr(*OrigDef);
292 B.buildCopy(DstOp, OrigOp);
293 };
294 return true;
295 }
296
297 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
298 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
299
300 MatchInfo = [=](MachineIRBuilder &B) mutable {
301 Observer.changingInstr(*OrigDef);
302 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
303 Observer.changedInstr(*OrigDef);
304 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
305 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
307 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
308 Freeze.getReg(0));
309 replaceRegWith(MRI, DstOp, OrigOp);
310 };
311 return true;
312}
313
316 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
317 "Invalid instruction");
318 bool IsUndef = true;
319 MachineInstr *Undef = nullptr;
320
321 // Walk over all the operands of concat vectors and check if they are
322 // build_vector themselves or undef.
323 // Then collect their operands in Ops.
324 for (const MachineOperand &MO : MI.uses()) {
325 Register Reg = MO.getReg();
326 MachineInstr *Def = MRI.getVRegDef(Reg);
327 assert(Def && "Operand not defined");
328 if (!MRI.hasOneNonDBGUse(Reg))
329 return false;
330 switch (Def->getOpcode()) {
331 case TargetOpcode::G_BUILD_VECTOR:
332 IsUndef = false;
333 // Remember the operands of the build_vector to fold
334 // them into the yet-to-build flattened concat vectors.
335 for (const MachineOperand &BuildVecMO : Def->uses())
336 Ops.push_back(BuildVecMO.getReg());
337 break;
338 case TargetOpcode::G_IMPLICIT_DEF: {
339 LLT OpType = MRI.getType(Reg);
340 // Keep one undef value for all the undef operands.
341 if (!Undef) {
342 Builder.setInsertPt(*MI.getParent(), MI);
343 Undef = Builder.buildUndef(OpType.getScalarType());
344 }
345 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
346 OpType.getScalarType() &&
347 "All undefs should have the same type");
348 // Break the undef vector in as many scalar elements as needed
349 // for the flattening.
350 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
351 EltIdx != EltEnd; ++EltIdx)
352 Ops.push_back(Undef->getOperand(0).getReg());
353 break;
354 }
355 default:
356 return false;
357 }
358 }
359
360 // Check if the combine is illegal
361 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
363 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
364 return false;
365 }
366
367 if (IsUndef)
368 Ops.clear();
369
370 return true;
371}
374 // We determined that the concat_vectors can be flatten.
375 // Generate the flattened build_vector.
376 Register DstReg = MI.getOperand(0).getReg();
377 Builder.setInsertPt(*MI.getParent(), MI);
378 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
379
380 // Note: IsUndef is sort of redundant. We could have determine it by
381 // checking that at all Ops are undef. Alternatively, we could have
382 // generate a build_vector of undefs and rely on another combine to
383 // clean that up. For now, given we already gather this information
384 // in matchCombineConcatVectors, just save compile time and issue the
385 // right thing.
386 if (Ops.empty())
387 Builder.buildUndef(NewDstReg);
388 else
389 Builder.buildBuildVector(NewDstReg, Ops);
390 replaceRegWith(MRI, DstReg, NewDstReg);
391 MI.eraseFromParent();
392}
393
395 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
396 "Invalid instruction");
397 auto &Shuffle = cast<GShuffleVector>(MI);
398
399 Register SrcVec1 = Shuffle.getSrc1Reg();
400 Register SrcVec2 = Shuffle.getSrc2Reg();
401
402 LLT SrcVec1Type = MRI.getType(SrcVec1);
403 LLT SrcVec2Type = MRI.getType(SrcVec2);
404 return SrcVec1Type.isVector() && SrcVec2Type.isVector();
405}
406
408 auto &Shuffle = cast<GShuffleVector>(MI);
409
410 Register SrcVec1 = Shuffle.getSrc1Reg();
411 Register SrcVec2 = Shuffle.getSrc2Reg();
412 LLT EltTy = MRI.getType(SrcVec1).getElementType();
413 int Width = MRI.getType(SrcVec1).getNumElements();
414
415 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
416 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
417
418 SmallVector<Register> Extracts;
419 // Select only applicable elements from unmerged values.
420 for (int Val : Shuffle.getMask()) {
421 if (Val == -1)
422 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
423 else if (Val < Width)
424 Extracts.push_back(Unmerge1.getReg(Val));
425 else
426 Extracts.push_back(Unmerge2.getReg(Val - Width));
427 }
428 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
429 if (Extracts.size() == 1)
430 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
431 else
432 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
433 MI.eraseFromParent();
434}
435
438 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
439 auto ConcatMI1 =
440 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
441 auto ConcatMI2 =
442 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
443 if (!ConcatMI1 || !ConcatMI2)
444 return false;
445
446 // Check that the sources of the Concat instructions have the same type
447 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
448 MRI.getType(ConcatMI2->getSourceReg(0)))
449 return false;
450
451 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
452 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
453 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
454 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
455 // Check if the index takes a whole source register from G_CONCAT_VECTORS
456 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
457 if (Mask[i] == -1) {
458 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
459 if (i + j >= Mask.size())
460 return false;
461 if (Mask[i + j] != -1)
462 return false;
463 }
465 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
466 return false;
467 Ops.push_back(0);
468 } else if (Mask[i] % ConcatSrcNumElt == 0) {
469 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
470 if (i + j >= Mask.size())
471 return false;
472 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
473 return false;
474 }
475 // Retrieve the source register from its respective G_CONCAT_VECTORS
476 // instruction
477 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
478 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
479 } else {
480 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
481 ConcatMI1->getNumSources()));
482 }
483 } else {
484 return false;
485 }
486 }
487
489 {TargetOpcode::G_CONCAT_VECTORS,
490 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
491 return false;
492
493 return !Ops.empty();
494}
495
498 LLT SrcTy;
499 for (Register &Reg : Ops) {
500 if (Reg != 0)
501 SrcTy = MRI.getType(Reg);
502 }
503 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
504
505 Register UndefReg = 0;
506
507 for (Register &Reg : Ops) {
508 if (Reg == 0) {
509 if (UndefReg == 0)
510 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
511 Reg = UndefReg;
512 }
513 }
514
515 if (Ops.size() > 1)
516 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
517 else
518 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
519 MI.eraseFromParent();
520}
521
524 if (matchCombineShuffleVector(MI, Ops)) {
526 return true;
527 }
528 return false;
529}
530
533 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
534 "Invalid instruction kind");
535 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
536 Register Src1 = MI.getOperand(1).getReg();
537 LLT SrcType = MRI.getType(Src1);
538 // As bizarre as it may look, shuffle vector can actually produce
539 // scalar! This is because at the IR level a <1 x ty> shuffle
540 // vector is perfectly valid.
541 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
542 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
543
544 // If the resulting vector is smaller than the size of the source
545 // vectors being concatenated, we won't be able to replace the
546 // shuffle vector into a concat_vectors.
547 //
548 // Note: We may still be able to produce a concat_vectors fed by
549 // extract_vector_elt and so on. It is less clear that would
550 // be better though, so don't bother for now.
551 //
552 // If the destination is a scalar, the size of the sources doesn't
553 // matter. we will lower the shuffle to a plain copy. This will
554 // work only if the source and destination have the same size. But
555 // that's covered by the next condition.
556 //
557 // TODO: If the size between the source and destination don't match
558 // we could still emit an extract vector element in that case.
559 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
560 return false;
561
562 // Check that the shuffle mask can be broken evenly between the
563 // different sources.
564 if (DstNumElts % SrcNumElts != 0)
565 return false;
566
567 // Mask length is a multiple of the source vector length.
568 // Check if the shuffle is some kind of concatenation of the input
569 // vectors.
570 unsigned NumConcat = DstNumElts / SrcNumElts;
571 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
572 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
573 for (unsigned i = 0; i != DstNumElts; ++i) {
574 int Idx = Mask[i];
575 // Undef value.
576 if (Idx < 0)
577 continue;
578 // Ensure the indices in each SrcType sized piece are sequential and that
579 // the same source is used for the whole piece.
580 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
581 (ConcatSrcs[i / SrcNumElts] >= 0 &&
582 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
583 return false;
584 // Remember which source this index came from.
585 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
586 }
587
588 // The shuffle is concatenating multiple vectors together.
589 // Collect the different operands for that.
590 Register UndefReg;
591 Register Src2 = MI.getOperand(2).getReg();
592 for (auto Src : ConcatSrcs) {
593 if (Src < 0) {
594 if (!UndefReg) {
595 Builder.setInsertPt(*MI.getParent(), MI);
596 UndefReg = Builder.buildUndef(SrcType).getReg(0);
597 }
598 Ops.push_back(UndefReg);
599 } else if (Src == 0)
600 Ops.push_back(Src1);
601 else
602 Ops.push_back(Src2);
603 }
604 return true;
605}
606
608 MachineInstr &MI, const ArrayRef<Register> Ops) const {
609 Register DstReg = MI.getOperand(0).getReg();
610 Builder.setInsertPt(*MI.getParent(), MI);
611 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
612
613 if (Ops.size() == 1)
614 Builder.buildCopy(NewDstReg, Ops[0]);
615 else
616 Builder.buildMergeLikeInstr(NewDstReg, Ops);
617
618 replaceRegWith(MRI, DstReg, NewDstReg);
619 MI.eraseFromParent();
620}
621
623 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
624 "Invalid instruction kind");
625
626 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
627 return Mask.size() == 1;
628}
629
631 Register DstReg = MI.getOperand(0).getReg();
632 Builder.setInsertPt(*MI.getParent(), MI);
633
634 int I = MI.getOperand(3).getShuffleMask()[0];
635 Register Src1 = MI.getOperand(1).getReg();
636 LLT Src1Ty = MRI.getType(Src1);
637 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
638 Register SrcReg;
639 if (I >= Src1NumElts) {
640 SrcReg = MI.getOperand(2).getReg();
641 I -= Src1NumElts;
642 } else if (I >= 0)
643 SrcReg = Src1;
644
645 if (I < 0)
646 Builder.buildUndef(DstReg);
647 else if (!MRI.getType(SrcReg).isVector())
648 Builder.buildCopy(DstReg, SrcReg);
649 else
651
652 MI.eraseFromParent();
653}
654
655namespace {
656
657/// Select a preference between two uses. CurrentUse is the current preference
658/// while *ForCandidate is attributes of the candidate under consideration.
659PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
660 PreferredTuple &CurrentUse,
661 const LLT TyForCandidate,
662 unsigned OpcodeForCandidate,
663 MachineInstr *MIForCandidate) {
664 if (!CurrentUse.Ty.isValid()) {
665 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
666 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
667 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
668 return CurrentUse;
669 }
670
671 // We permit the extend to hoist through basic blocks but this is only
672 // sensible if the target has extending loads. If you end up lowering back
673 // into a load and extend during the legalizer then the end result is
674 // hoisting the extend up to the load.
675
676 // Prefer defined extensions to undefined extensions as these are more
677 // likely to reduce the number of instructions.
678 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
679 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
680 return CurrentUse;
681 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
682 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
683 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
684
685 // Prefer sign extensions to zero extensions as sign-extensions tend to be
686 // more expensive. Don't do this if the load is already a zero-extend load
687 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
688 // later.
689 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
690 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
691 OpcodeForCandidate == TargetOpcode::G_ZEXT)
692 return CurrentUse;
693 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
694 OpcodeForCandidate == TargetOpcode::G_SEXT)
695 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
696 }
697
698 // This is potentially target specific. We've chosen the largest type
699 // because G_TRUNC is usually free. One potential catch with this is that
700 // some targets have a reduced number of larger registers than smaller
701 // registers and this choice potentially increases the live-range for the
702 // larger value.
703 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
704 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
705 }
706 return CurrentUse;
707}
708
709/// Find a suitable place to insert some instructions and insert them. This
710/// function accounts for special cases like inserting before a PHI node.
711/// The current strategy for inserting before PHI's is to duplicate the
712/// instructions for each predecessor. However, while that's ok for G_TRUNC
713/// on most targets since it generally requires no code, other targets/cases may
714/// want to try harder to find a dominating block.
715static void InsertInsnsWithoutSideEffectsBeforeUse(
718 MachineOperand &UseMO)>
719 Inserter) {
720 MachineInstr &UseMI = *UseMO.getParent();
721
722 MachineBasicBlock *InsertBB = UseMI.getParent();
723
724 // If the use is a PHI then we want the predecessor block instead.
725 if (UseMI.isPHI()) {
726 MachineOperand *PredBB = std::next(&UseMO);
727 InsertBB = PredBB->getMBB();
728 }
729
730 // If the block is the same block as the def then we want to insert just after
731 // the def instead of at the start of the block.
732 if (InsertBB == DefMI.getParent()) {
734 Inserter(InsertBB, std::next(InsertPt), UseMO);
735 return;
736 }
737
738 // Otherwise we want the start of the BB
739 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
740}
741} // end anonymous namespace
742
744 PreferredTuple Preferred;
745 if (matchCombineExtendingLoads(MI, Preferred)) {
746 applyCombineExtendingLoads(MI, Preferred);
747 return true;
748 }
749 return false;
750}
751
752static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
753 unsigned CandidateLoadOpc;
754 switch (ExtOpc) {
755 case TargetOpcode::G_ANYEXT:
756 CandidateLoadOpc = TargetOpcode::G_LOAD;
757 break;
758 case TargetOpcode::G_SEXT:
759 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
760 break;
761 case TargetOpcode::G_ZEXT:
762 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
763 break;
764 default:
765 llvm_unreachable("Unexpected extend opc");
766 }
767 return CandidateLoadOpc;
768}
769
771 MachineInstr &MI, PreferredTuple &Preferred) const {
772 // We match the loads and follow the uses to the extend instead of matching
773 // the extends and following the def to the load. This is because the load
774 // must remain in the same position for correctness (unless we also add code
775 // to find a safe place to sink it) whereas the extend is freely movable.
776 // It also prevents us from duplicating the load for the volatile case or just
777 // for performance.
778 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
779 if (!LoadMI)
780 return false;
781
782 Register LoadReg = LoadMI->getDstReg();
783
784 LLT LoadValueTy = MRI.getType(LoadReg);
785 if (!LoadValueTy.isScalar())
786 return false;
787
788 // Most architectures are going to legalize <s8 loads into at least a 1 byte
789 // load, and the MMOs can only describe memory accesses in multiples of bytes.
790 // If we try to perform extload combining on those, we can end up with
791 // %a(s8) = extload %ptr (load 1 byte from %ptr)
792 // ... which is an illegal extload instruction.
793 if (LoadValueTy.getSizeInBits() < 8)
794 return false;
795
796 // For non power-of-2 types, they will very likely be legalized into multiple
797 // loads. Don't bother trying to match them into extending loads.
798 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
799 return false;
800
801 // Find the preferred type aside from the any-extends (unless it's the only
802 // one) and non-extending ops. We'll emit an extending load to that type and
803 // and emit a variant of (extend (trunc X)) for the others according to the
804 // relative type sizes. At the same time, pick an extend to use based on the
805 // extend involved in the chosen type.
806 unsigned PreferredOpcode =
807 isa<GLoad>(&MI)
808 ? TargetOpcode::G_ANYEXT
809 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
810 Preferred = {LLT(), PreferredOpcode, nullptr};
811 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
812 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
813 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
814 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
815 const auto &MMO = LoadMI->getMMO();
816 // Don't do anything for atomics.
817 if (MMO.isAtomic())
818 continue;
819 // Check for legality.
820 if (!isPreLegalize()) {
821 LegalityQuery::MemDesc MMDesc(MMO);
822 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
823 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
824 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
825 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
826 .Action != LegalizeActions::Legal)
827 continue;
828 }
829 Preferred = ChoosePreferredUse(MI, Preferred,
830 MRI.getType(UseMI.getOperand(0).getReg()),
831 UseMI.getOpcode(), &UseMI);
832 }
833 }
834
835 // There were no extends
836 if (!Preferred.MI)
837 return false;
838 // It should be impossible to chose an extend without selecting a different
839 // type since by definition the result of an extend is larger.
840 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
841
842 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
843 return true;
844}
845
847 MachineInstr &MI, PreferredTuple &Preferred) const {
848 // Rewrite the load to the chosen extending load.
849 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
850
851 // Inserter to insert a truncate back to the original type at a given point
852 // with some basic CSE to limit truncate duplication to one per BB.
854 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
855 MachineBasicBlock::iterator InsertBefore,
856 MachineOperand &UseMO) {
857 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
858 if (PreviouslyEmitted) {
860 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
862 return;
863 }
864
865 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
866 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
867 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
868 EmittedInsns[InsertIntoBB] = NewMI;
869 replaceRegOpWith(MRI, UseMO, NewDstReg);
870 };
871
873 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
874 MI.setDesc(Builder.getTII().get(LoadOpc));
875
876 // Rewrite all the uses to fix up the types.
877 auto &LoadValue = MI.getOperand(0);
880
881 for (auto *UseMO : Uses) {
882 MachineInstr *UseMI = UseMO->getParent();
883
884 // If the extend is compatible with the preferred extend then we should fix
885 // up the type and extend so that it uses the preferred use.
886 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
887 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
888 Register UseDstReg = UseMI->getOperand(0).getReg();
889 MachineOperand &UseSrcMO = UseMI->getOperand(1);
890 const LLT UseDstTy = MRI.getType(UseDstReg);
891 if (UseDstReg != ChosenDstReg) {
892 if (Preferred.Ty == UseDstTy) {
893 // If the use has the same type as the preferred use, then merge
894 // the vregs and erase the extend. For example:
895 // %1:_(s8) = G_LOAD ...
896 // %2:_(s32) = G_SEXT %1(s8)
897 // %3:_(s32) = G_ANYEXT %1(s8)
898 // ... = ... %3(s32)
899 // rewrites to:
900 // %2:_(s32) = G_SEXTLOAD ...
901 // ... = ... %2(s32)
902 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
904 UseMO->getParent()->eraseFromParent();
905 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
906 // If the preferred size is smaller, then keep the extend but extend
907 // from the result of the extending load. For example:
908 // %1:_(s8) = G_LOAD ...
909 // %2:_(s32) = G_SEXT %1(s8)
910 // %3:_(s64) = G_ANYEXT %1(s8)
911 // ... = ... %3(s64)
912 /// rewrites to:
913 // %2:_(s32) = G_SEXTLOAD ...
914 // %3:_(s64) = G_ANYEXT %2:_(s32)
915 // ... = ... %3(s64)
916 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
917 } else {
918 // If the preferred size is large, then insert a truncate. For
919 // example:
920 // %1:_(s8) = G_LOAD ...
921 // %2:_(s64) = G_SEXT %1(s8)
922 // %3:_(s32) = G_ZEXT %1(s8)
923 // ... = ... %3(s32)
924 /// rewrites to:
925 // %2:_(s64) = G_SEXTLOAD ...
926 // %4:_(s8) = G_TRUNC %2:_(s32)
927 // %3:_(s64) = G_ZEXT %2:_(s8)
928 // ... = ... %3(s64)
929 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
930 InsertTruncAt);
931 }
932 continue;
933 }
934 // The use is (one of) the uses of the preferred use we chose earlier.
935 // We're going to update the load to def this value later so just erase
936 // the old extend.
938 UseMO->getParent()->eraseFromParent();
939 continue;
940 }
941
942 // The use isn't an extend. Truncate back to the type we originally loaded.
943 // This is free on many targets.
944 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
945 }
946
947 MI.getOperand(0).setReg(ChosenDstReg);
949}
950
952 BuildFnTy &MatchInfo) const {
953 assert(MI.getOpcode() == TargetOpcode::G_AND);
954
955 // If we have the following code:
956 // %mask = G_CONSTANT 255
957 // %ld = G_LOAD %ptr, (load s16)
958 // %and = G_AND %ld, %mask
959 //
960 // Try to fold it into
961 // %ld = G_ZEXTLOAD %ptr, (load s8)
962
963 Register Dst = MI.getOperand(0).getReg();
964 if (MRI.getType(Dst).isVector())
965 return false;
966
967 auto MaybeMask =
968 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
969 if (!MaybeMask)
970 return false;
971
972 APInt MaskVal = MaybeMask->Value;
973
974 if (!MaskVal.isMask())
975 return false;
976
977 Register SrcReg = MI.getOperand(1).getReg();
978 // Don't use getOpcodeDef() here since intermediate instructions may have
979 // multiple users.
980 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
981 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
982 return false;
983
984 Register LoadReg = LoadMI->getDstReg();
985 LLT RegTy = MRI.getType(LoadReg);
986 Register PtrReg = LoadMI->getPointerReg();
987 unsigned RegSize = RegTy.getSizeInBits();
988 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
989 unsigned MaskSizeBits = MaskVal.countr_one();
990
991 // The mask may not be larger than the in-memory type, as it might cover sign
992 // extended bits
993 if (MaskSizeBits > LoadSizeBits.getValue())
994 return false;
995
996 // If the mask covers the whole destination register, there's nothing to
997 // extend
998 if (MaskSizeBits >= RegSize)
999 return false;
1000
1001 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1002 // at least byte loads. Avoid creating such loads here
1003 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1004 return false;
1005
1006 const MachineMemOperand &MMO = LoadMI->getMMO();
1007 LegalityQuery::MemDesc MemDesc(MMO);
1008
1009 // Don't modify the memory access size if this is atomic/volatile, but we can
1010 // still adjust the opcode to indicate the high bit behavior.
1011 if (LoadMI->isSimple())
1012 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1013 else if (LoadSizeBits.getValue() > MaskSizeBits ||
1014 LoadSizeBits.getValue() == RegSize)
1015 return false;
1016
1017 // TODO: Could check if it's legal with the reduced or original memory size.
1019 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1020 return false;
1021
1022 MatchInfo = [=](MachineIRBuilder &B) {
1023 B.setInstrAndDebugLoc(*LoadMI);
1024 auto &MF = B.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1027 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1028 LoadMI->eraseFromParent();
1029 };
1030 return true;
1031}
1032
1034 const MachineInstr &UseMI) const {
1035 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1036 "shouldn't consider debug uses");
1037 assert(DefMI.getParent() == UseMI.getParent());
1038 if (&DefMI == &UseMI)
1039 return true;
1040 const MachineBasicBlock &MBB = *DefMI.getParent();
1041 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1042 return &MI == &DefMI || &MI == &UseMI;
1043 });
1044 if (DefOrUse == MBB.end())
1045 llvm_unreachable("Block must contain both DefMI and UseMI!");
1046 return &*DefOrUse == &DefMI;
1047}
1048
1050 const MachineInstr &UseMI) const {
1051 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1052 "shouldn't consider debug uses");
1053 if (MDT)
1054 return MDT->dominates(&DefMI, &UseMI);
1055 else if (DefMI.getParent() != UseMI.getParent())
1056 return false;
1057
1058 return isPredecessor(DefMI, UseMI);
1059}
1060
1062 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1063 Register SrcReg = MI.getOperand(1).getReg();
1064 Register LoadUser = SrcReg;
1065
1066 if (MRI.getType(SrcReg).isVector())
1067 return false;
1068
1069 Register TruncSrc;
1070 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1071 LoadUser = TruncSrc;
1072
1073 uint64_t SizeInBits = MI.getOperand(2).getImm();
1074 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1075 // need any extend at all, just a truncate.
1076 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1077 // If truncating more than the original extended value, abort.
1078 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1079 if (TruncSrc &&
1080 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1081 return false;
1082 if (LoadSizeBits == SizeInBits)
1083 return true;
1084 }
1085 return false;
1086}
1087
1089 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1090 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1091 MI.eraseFromParent();
1092}
1093
1095 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1096 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1097
1098 Register DstReg = MI.getOperand(0).getReg();
1099 LLT RegTy = MRI.getType(DstReg);
1100
1101 // Only supports scalars for now.
1102 if (RegTy.isVector())
1103 return false;
1104
1105 Register SrcReg = MI.getOperand(1).getReg();
1106 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1107 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1108 return false;
1109
1110 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1111
1112 // If the sign extend extends from a narrower width than the load's width,
1113 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1114 // Avoid widening the load at all.
1115 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1116
1117 // Don't generate G_SEXTLOADs with a < 1 byte width.
1118 if (NewSizeBits < 8)
1119 return false;
1120 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1121 // anyway for most targets.
1122 if (!isPowerOf2_32(NewSizeBits))
1123 return false;
1124
1125 const MachineMemOperand &MMO = LoadDef->getMMO();
1126 LegalityQuery::MemDesc MMDesc(MMO);
1127
1128 // Don't modify the memory access size if this is atomic/volatile, but we can
1129 // still adjust the opcode to indicate the high bit behavior.
1130 if (LoadDef->isSimple())
1131 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1132 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1133 return false;
1134
1135 // TODO: Could check if it's legal with the reduced or original memory size.
1136 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1137 {MRI.getType(LoadDef->getDstReg()),
1138 MRI.getType(LoadDef->getPointerReg())},
1139 {MMDesc}}))
1140 return false;
1141
1142 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1143 return true;
1144}
1145
1147 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1148 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1149 Register LoadReg;
1150 unsigned ScalarSizeBits;
1151 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1152 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1153
1154 // If we have the following:
1155 // %ld = G_LOAD %ptr, (load 2)
1156 // %ext = G_SEXT_INREG %ld, 8
1157 // ==>
1158 // %ld = G_SEXTLOAD %ptr (load 1)
1159
1160 auto &MMO = LoadDef->getMMO();
1161 Builder.setInstrAndDebugLoc(*LoadDef);
1162 auto &MF = Builder.getMF();
1163 auto PtrInfo = MMO.getPointerInfo();
1164 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1165 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1166 LoadDef->getPointerReg(), *NewMMO);
1167 MI.eraseFromParent();
1168
1169 // Not all loads can be deleted, so make sure the old one is removed.
1170 LoadDef->eraseFromParent();
1171}
1172
1173/// Return true if 'MI' is a load or a store that may be fold it's address
1174/// operand into the load / store addressing mode.
1178 auto *MF = MI->getMF();
1179 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1180 if (!Addr)
1181 return false;
1182
1183 AM.HasBaseReg = true;
1184 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1185 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1186 else
1187 AM.Scale = 1; // [reg +/- reg]
1188
1189 return TLI.isLegalAddressingMode(
1190 MF->getDataLayout(), AM,
1191 getTypeForLLT(MI->getMMO().getMemoryType(),
1192 MF->getFunction().getContext()),
1193 MI->getMMO().getAddrSpace());
1194}
1195
1196static unsigned getIndexedOpc(unsigned LdStOpc) {
1197 switch (LdStOpc) {
1198 case TargetOpcode::G_LOAD:
1199 return TargetOpcode::G_INDEXED_LOAD;
1200 case TargetOpcode::G_STORE:
1201 return TargetOpcode::G_INDEXED_STORE;
1202 case TargetOpcode::G_ZEXTLOAD:
1203 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1204 case TargetOpcode::G_SEXTLOAD:
1205 return TargetOpcode::G_INDEXED_SEXTLOAD;
1206 default:
1207 llvm_unreachable("Unexpected opcode");
1208 }
1209}
1210
1211bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1212 // Check for legality.
1213 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1214 LLT Ty = MRI.getType(LdSt.getReg(0));
1215 LLT MemTy = LdSt.getMMO().getMemoryType();
1217 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1219 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1220 SmallVector<LLT> OpTys;
1221 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1222 OpTys = {PtrTy, Ty, Ty};
1223 else
1224 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1225
1226 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1227 return isLegal(Q);
1228}
1229
1231 "post-index-use-threshold", cl::Hidden, cl::init(32),
1232 cl::desc("Number of uses of a base pointer to check before it is no longer "
1233 "considered for post-indexing."));
1234
1235bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1237 bool &RematOffset) const {
1238 // We're looking for the following pattern, for either load or store:
1239 // %baseptr:_(p0) = ...
1240 // G_STORE %val(s64), %baseptr(p0)
1241 // %offset:_(s64) = G_CONSTANT i64 -256
1242 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1243 const auto &TLI = getTargetLowering();
1244
1245 Register Ptr = LdSt.getPointerReg();
1246 // If the store is the only use, don't bother.
1247 if (MRI.hasOneNonDBGUse(Ptr))
1248 return false;
1249
1250 if (!isIndexedLoadStoreLegal(LdSt))
1251 return false;
1252
1253 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1254 return false;
1255
1256 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1257 auto *PtrDef = MRI.getVRegDef(Ptr);
1258
1259 unsigned NumUsesChecked = 0;
1260 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1261 if (++NumUsesChecked > PostIndexUseThreshold)
1262 return false; // Try to avoid exploding compile time.
1263
1264 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1265 // The use itself might be dead. This can happen during combines if DCE
1266 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1267 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1268 continue;
1269
1270 // Check the user of this isn't the store, otherwise we'd be generate a
1271 // indexed store defining its own use.
1272 if (StoredValDef == &Use)
1273 continue;
1274
1275 Offset = PtrAdd->getOffsetReg();
1276 if (!ForceLegalIndexing &&
1277 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1278 /*IsPre*/ false, MRI))
1279 continue;
1280
1281 // Make sure the offset calculation is before the potentially indexed op.
1282 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1283 RematOffset = false;
1284 if (!dominates(*OffsetDef, LdSt)) {
1285 // If the offset however is just a G_CONSTANT, we can always just
1286 // rematerialize it where we need it.
1287 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1288 continue;
1289 RematOffset = true;
1290 }
1291
1292 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1293 if (&BasePtrUse == PtrDef)
1294 continue;
1295
1296 // If the user is a later load/store that can be post-indexed, then don't
1297 // combine this one.
1298 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1299 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1300 dominates(LdSt, *BasePtrLdSt) &&
1301 isIndexedLoadStoreLegal(*BasePtrLdSt))
1302 return false;
1303
1304 // Now we're looking for the key G_PTR_ADD instruction, which contains
1305 // the offset add that we want to fold.
1306 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1307 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1308 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1309 // If the use is in a different block, then we may produce worse code
1310 // due to the extra register pressure.
1311 if (BaseUseUse.getParent() != LdSt.getParent())
1312 return false;
1313
1314 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1315 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1316 return false;
1317 }
1318 if (!dominates(LdSt, BasePtrUse))
1319 return false; // All use must be dominated by the load/store.
1320 }
1321 }
1322
1323 Addr = PtrAdd->getReg(0);
1324 Base = PtrAdd->getBaseReg();
1325 return true;
1326 }
1327
1328 return false;
1329}
1330
1331bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1332 Register &Base,
1333 Register &Offset) const {
1334 auto &MF = *LdSt.getParent()->getParent();
1335 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1336
1337 Addr = LdSt.getPointerReg();
1340 return false;
1341
1342 if (!ForceLegalIndexing &&
1343 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1344 return false;
1345
1346 if (!isIndexedLoadStoreLegal(LdSt))
1347 return false;
1348
1350 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1351 return false;
1352
1353 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1354 // Would require a copy.
1355 if (Base == St->getValueReg())
1356 return false;
1357
1358 // We're expecting one use of Addr in MI, but it could also be the
1359 // value stored, which isn't actually dominated by the instruction.
1360 if (St->getValueReg() == Addr)
1361 return false;
1362 }
1363
1364 // Avoid increasing cross-block register pressure.
1365 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1366 if (AddrUse.getParent() != LdSt.getParent())
1367 return false;
1368
1369 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1370 // That might allow us to end base's liveness here by adjusting the constant.
1371 bool RealUse = false;
1372 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1373 if (!dominates(LdSt, AddrUse))
1374 return false; // All use must be dominated by the load/store.
1375
1376 // If Ptr may be folded in addressing mode of other use, then it's
1377 // not profitable to do this transformation.
1378 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1379 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1380 RealUse = true;
1381 } else {
1382 RealUse = true;
1383 }
1384 }
1385 return RealUse;
1386}
1387
1389 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1390 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1391
1392 // Check if there is a load that defines the vector being extracted from.
1393 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1394 if (!LoadMI)
1395 return false;
1396
1397 Register Vector = MI.getOperand(1).getReg();
1398 LLT VecEltTy = MRI.getType(Vector).getElementType();
1399
1400 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1401
1402 // Checking whether we should reduce the load width.
1404 return false;
1405
1406 // Check if the defining load is simple.
1407 if (!LoadMI->isSimple())
1408 return false;
1409
1410 // If the vector element type is not a multiple of a byte then we are unable
1411 // to correctly compute an address to load only the extracted element as a
1412 // scalar.
1413 if (!VecEltTy.isByteSized())
1414 return false;
1415
1416 // Check for load fold barriers between the extraction and the load.
1417 if (MI.getParent() != LoadMI->getParent())
1418 return false;
1419 const unsigned MaxIter = 20;
1420 unsigned Iter = 0;
1421 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1422 if (II->isLoadFoldBarrier())
1423 return false;
1424 if (Iter++ == MaxIter)
1425 return false;
1426 }
1427
1428 // Check if the new load that we are going to create is legal
1429 // if we are in the post-legalization phase.
1430 MachineMemOperand MMO = LoadMI->getMMO();
1431 Align Alignment = MMO.getAlign();
1432 MachinePointerInfo PtrInfo;
1434
1435 // Finding the appropriate PtrInfo if offset is a known constant.
1436 // This is required to create the memory operand for the narrowed load.
1437 // This machine memory operand object helps us infer about legality
1438 // before we proceed to combine the instruction.
1439 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1440 int Elt = CVal->getZExtValue();
1441 // FIXME: should be (ABI size)*Elt.
1442 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1443 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1444 } else {
1445 // Discard the pointer info except the address space because the memory
1446 // operand can't represent this new access since the offset is variable.
1447 Offset = VecEltTy.getSizeInBits() / 8;
1449 }
1450
1451 Alignment = commonAlignment(Alignment, Offset);
1452
1453 Register VecPtr = LoadMI->getPointerReg();
1454 LLT PtrTy = MRI.getType(VecPtr);
1455
1456 MachineFunction &MF = *MI.getMF();
1457 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1458
1459 LegalityQuery::MemDesc MMDesc(*NewMMO);
1460
1462 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1463 return false;
1464
1465 // Load must be allowed and fast on the target.
1467 auto &DL = MF.getDataLayout();
1468 unsigned Fast = 0;
1469 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1470 &Fast) ||
1471 !Fast)
1472 return false;
1473
1474 Register Result = MI.getOperand(0).getReg();
1475 Register Index = MI.getOperand(2).getReg();
1476
1477 MatchInfo = [=](MachineIRBuilder &B) {
1478 GISelObserverWrapper DummyObserver;
1479 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1480 //// Get pointer to the vector element.
1481 Register finalPtr = Helper.getVectorElementPointer(
1482 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1483 Index);
1484 // New G_LOAD instruction.
1485 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1486 // Remove original GLOAD instruction.
1487 LoadMI->eraseFromParent();
1488 };
1489
1490 return true;
1491}
1492
1494 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1495 auto &LdSt = cast<GLoadStore>(MI);
1496
1497 if (LdSt.isAtomic())
1498 return false;
1499
1500 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1501 MatchInfo.Offset);
1502 if (!MatchInfo.IsPre &&
1503 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1504 MatchInfo.Offset, MatchInfo.RematOffset))
1505 return false;
1506
1507 return true;
1508}
1509
1511 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1512 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1513 unsigned Opcode = MI.getOpcode();
1514 bool IsStore = Opcode == TargetOpcode::G_STORE;
1515 unsigned NewOpcode = getIndexedOpc(Opcode);
1516
1517 // If the offset constant didn't happen to dominate the load/store, we can
1518 // just clone it as needed.
1519 if (MatchInfo.RematOffset) {
1520 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1521 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1522 *OldCst->getOperand(1).getCImm());
1523 MatchInfo.Offset = NewCst.getReg(0);
1524 }
1525
1526 auto MIB = Builder.buildInstr(NewOpcode);
1527 if (IsStore) {
1528 MIB.addDef(MatchInfo.Addr);
1529 MIB.addUse(MI.getOperand(0).getReg());
1530 } else {
1531 MIB.addDef(MI.getOperand(0).getReg());
1532 MIB.addDef(MatchInfo.Addr);
1533 }
1534
1535 MIB.addUse(MatchInfo.Base);
1536 MIB.addUse(MatchInfo.Offset);
1537 MIB.addImm(MatchInfo.IsPre);
1538 MIB->cloneMemRefs(*MI.getMF(), MI);
1539 MI.eraseFromParent();
1540 AddrDef.eraseFromParent();
1541
1542 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1543}
1544
1546 MachineInstr *&OtherMI) const {
1547 unsigned Opcode = MI.getOpcode();
1548 bool IsDiv, IsSigned;
1549
1550 switch (Opcode) {
1551 default:
1552 llvm_unreachable("Unexpected opcode!");
1553 case TargetOpcode::G_SDIV:
1554 case TargetOpcode::G_UDIV: {
1555 IsDiv = true;
1556 IsSigned = Opcode == TargetOpcode::G_SDIV;
1557 break;
1558 }
1559 case TargetOpcode::G_SREM:
1560 case TargetOpcode::G_UREM: {
1561 IsDiv = false;
1562 IsSigned = Opcode == TargetOpcode::G_SREM;
1563 break;
1564 }
1565 }
1566
1567 Register Src1 = MI.getOperand(1).getReg();
1568 unsigned DivOpcode, RemOpcode, DivremOpcode;
1569 if (IsSigned) {
1570 DivOpcode = TargetOpcode::G_SDIV;
1571 RemOpcode = TargetOpcode::G_SREM;
1572 DivremOpcode = TargetOpcode::G_SDIVREM;
1573 } else {
1574 DivOpcode = TargetOpcode::G_UDIV;
1575 RemOpcode = TargetOpcode::G_UREM;
1576 DivremOpcode = TargetOpcode::G_UDIVREM;
1577 }
1578
1579 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1580 return false;
1581
1582 // Combine:
1583 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1584 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1585 // into:
1586 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1587
1588 // Combine:
1589 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1590 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1591 // into:
1592 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1593
1594 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1595 if (MI.getParent() == UseMI.getParent() &&
1596 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1597 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1598 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1599 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1600 OtherMI = &UseMI;
1601 return true;
1602 }
1603 }
1604
1605 return false;
1606}
1607
1609 MachineInstr *&OtherMI) const {
1610 unsigned Opcode = MI.getOpcode();
1611 assert(OtherMI && "OtherMI shouldn't be empty.");
1612
1613 Register DestDivReg, DestRemReg;
1614 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1615 DestDivReg = MI.getOperand(0).getReg();
1616 DestRemReg = OtherMI->getOperand(0).getReg();
1617 } else {
1618 DestDivReg = OtherMI->getOperand(0).getReg();
1619 DestRemReg = MI.getOperand(0).getReg();
1620 }
1621
1622 bool IsSigned =
1623 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1624
1625 // Check which instruction is first in the block so we don't break def-use
1626 // deps by "moving" the instruction incorrectly. Also keep track of which
1627 // instruction is first so we pick it's operands, avoiding use-before-def
1628 // bugs.
1629 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1630 Builder.setInstrAndDebugLoc(*FirstInst);
1631
1632 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1633 : TargetOpcode::G_UDIVREM,
1634 {DestDivReg, DestRemReg},
1635 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1636 MI.eraseFromParent();
1637 OtherMI->eraseFromParent();
1638}
1639
1641 MachineInstr &MI, MachineInstr *&BrCond) const {
1642 assert(MI.getOpcode() == TargetOpcode::G_BR);
1643
1644 // Try to match the following:
1645 // bb1:
1646 // G_BRCOND %c1, %bb2
1647 // G_BR %bb3
1648 // bb2:
1649 // ...
1650 // bb3:
1651
1652 // The above pattern does not have a fall through to the successor bb2, always
1653 // resulting in a branch no matter which path is taken. Here we try to find
1654 // and replace that pattern with conditional branch to bb3 and otherwise
1655 // fallthrough to bb2. This is generally better for branch predictors.
1656
1657 MachineBasicBlock *MBB = MI.getParent();
1659 if (BrIt == MBB->begin())
1660 return false;
1661 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1662
1663 BrCond = &*std::prev(BrIt);
1664 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1665 return false;
1666
1667 // Check that the next block is the conditional branch target. Also make sure
1668 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1669 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1670 return BrCondTarget != MI.getOperand(0).getMBB() &&
1671 MBB->isLayoutSuccessor(BrCondTarget);
1672}
1673
1675 MachineInstr &MI, MachineInstr *&BrCond) const {
1676 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1678 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1679 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1680 // this to i1 only since we might not know for sure what kind of
1681 // compare generated the condition value.
1682 auto True = Builder.buildConstant(
1683 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1684 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1685
1686 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1688 MI.getOperand(0).setMBB(FallthroughBB);
1690
1691 // Change the conditional branch to use the inverted condition and
1692 // new target block.
1693 Observer.changingInstr(*BrCond);
1694 BrCond->getOperand(0).setReg(Xor.getReg(0));
1695 BrCond->getOperand(1).setMBB(BrTarget);
1696 Observer.changedInstr(*BrCond);
1697}
1698
1700 MachineIRBuilder HelperBuilder(MI);
1701 GISelObserverWrapper DummyObserver;
1702 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1703 return Helper.lowerMemcpyInline(MI) ==
1705}
1706
1708 unsigned MaxLen) const {
1709 MachineIRBuilder HelperBuilder(MI);
1710 GISelObserverWrapper DummyObserver;
1711 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1712 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1714}
1715
1717 const MachineRegisterInfo &MRI,
1718 const APFloat &Val) {
1719 APFloat Result(Val);
1720 switch (MI.getOpcode()) {
1721 default:
1722 llvm_unreachable("Unexpected opcode!");
1723 case TargetOpcode::G_FNEG: {
1724 Result.changeSign();
1725 return Result;
1726 }
1727 case TargetOpcode::G_FABS: {
1728 Result.clearSign();
1729 return Result;
1730 }
1731 case TargetOpcode::G_FPTRUNC: {
1732 bool Unused;
1733 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1735 &Unused);
1736 return Result;
1737 }
1738 case TargetOpcode::G_FSQRT: {
1739 bool Unused;
1741 &Unused);
1742 Result = APFloat(sqrt(Result.convertToDouble()));
1743 break;
1744 }
1745 case TargetOpcode::G_FLOG2: {
1746 bool Unused;
1748 &Unused);
1749 Result = APFloat(log2(Result.convertToDouble()));
1750 break;
1751 }
1752 }
1753 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1754 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1755 // `G_FLOG2` reach here.
1756 bool Unused;
1757 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1758 return Result;
1759}
1760
1762 MachineInstr &MI, const ConstantFP *Cst) const {
1763 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1764 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1765 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1766 MI.eraseFromParent();
1767}
1768
1770 PtrAddChain &MatchInfo) const {
1771 // We're trying to match the following pattern:
1772 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1773 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1774 // -->
1775 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1776
1777 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1778 return false;
1779
1780 Register Add2 = MI.getOperand(1).getReg();
1781 Register Imm1 = MI.getOperand(2).getReg();
1782 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1783 if (!MaybeImmVal)
1784 return false;
1785
1786 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1787 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1788 return false;
1789
1790 Register Base = Add2Def->getOperand(1).getReg();
1791 Register Imm2 = Add2Def->getOperand(2).getReg();
1792 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1793 if (!MaybeImm2Val)
1794 return false;
1795
1796 // Check if the new combined immediate forms an illegal addressing mode.
1797 // Do not combine if it was legal before but would get illegal.
1798 // To do so, we need to find a load/store user of the pointer to get
1799 // the access type.
1800 Type *AccessTy = nullptr;
1801 auto &MF = *MI.getMF();
1802 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1803 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1804 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1805 MF.getFunction().getContext());
1806 break;
1807 }
1808 }
1810 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1811 AMNew.BaseOffs = CombinedImm.getSExtValue();
1812 if (AccessTy) {
1813 AMNew.HasBaseReg = true;
1815 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1816 AMOld.HasBaseReg = true;
1817 unsigned AS = MRI.getType(Add2).getAddressSpace();
1818 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1819 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1820 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1821 return false;
1822 }
1823
1824 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1825 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1826 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1827 // largest signed integer that fits into the index type, which is the maximum
1828 // size of allocated objects according to the IR Language Reference.
1829 unsigned PtrAddFlags = MI.getFlags();
1830 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1831 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1832 bool IsInBounds =
1833 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1834 unsigned Flags = 0;
1835 if (IsNoUWrap)
1837 if (IsInBounds) {
1840 }
1841
1842 // Pass the combined immediate to the apply function.
1843 MatchInfo.Imm = AMNew.BaseOffs;
1844 MatchInfo.Base = Base;
1845 MatchInfo.Bank = getRegBank(Imm2);
1846 MatchInfo.Flags = Flags;
1847 return true;
1848}
1849
1851 PtrAddChain &MatchInfo) const {
1852 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1853 MachineIRBuilder MIB(MI);
1854 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1855 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1856 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1858 MI.getOperand(1).setReg(MatchInfo.Base);
1859 MI.getOperand(2).setReg(NewOffset.getReg(0));
1860 MI.setFlags(MatchInfo.Flags);
1862}
1863
1865 RegisterImmPair &MatchInfo) const {
1866 // We're trying to match the following pattern with any of
1867 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1868 // %t1 = SHIFT %base, G_CONSTANT imm1
1869 // %root = SHIFT %t1, G_CONSTANT imm2
1870 // -->
1871 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1872
1873 unsigned Opcode = MI.getOpcode();
1874 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1875 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1876 Opcode == TargetOpcode::G_USHLSAT) &&
1877 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1878
1879 Register Shl2 = MI.getOperand(1).getReg();
1880 Register Imm1 = MI.getOperand(2).getReg();
1881 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1882 if (!MaybeImmVal)
1883 return false;
1884
1885 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1886 if (Shl2Def->getOpcode() != Opcode)
1887 return false;
1888
1889 Register Base = Shl2Def->getOperand(1).getReg();
1890 Register Imm2 = Shl2Def->getOperand(2).getReg();
1891 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1892 if (!MaybeImm2Val)
1893 return false;
1894
1895 // Pass the combined immediate to the apply function.
1896 MatchInfo.Imm =
1897 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1898 MatchInfo.Reg = Base;
1899
1900 // There is no simple replacement for a saturating unsigned left shift that
1901 // exceeds the scalar size.
1902 if (Opcode == TargetOpcode::G_USHLSAT &&
1903 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1904 return false;
1905
1906 return true;
1907}
1908
1910 RegisterImmPair &MatchInfo) const {
1911 unsigned Opcode = MI.getOpcode();
1912 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1913 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1914 Opcode == TargetOpcode::G_USHLSAT) &&
1915 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1916
1917 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1918 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1919 auto Imm = MatchInfo.Imm;
1920
1921 if (Imm >= ScalarSizeInBits) {
1922 // Any logical shift that exceeds scalar size will produce zero.
1923 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1924 Builder.buildConstant(MI.getOperand(0), 0);
1925 MI.eraseFromParent();
1926 return;
1927 }
1928 // Arithmetic shift and saturating signed left shift have no effect beyond
1929 // scalar size.
1930 Imm = ScalarSizeInBits - 1;
1931 }
1932
1933 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1934 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1936 MI.getOperand(1).setReg(MatchInfo.Reg);
1937 MI.getOperand(2).setReg(NewImm);
1939}
1940
1942 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1943 // We're trying to match the following pattern with any of
1944 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1945 // with any of G_AND/G_OR/G_XOR logic instructions.
1946 // %t1 = SHIFT %X, G_CONSTANT C0
1947 // %t2 = LOGIC %t1, %Y
1948 // %root = SHIFT %t2, G_CONSTANT C1
1949 // -->
1950 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1951 // %t4 = SHIFT %Y, G_CONSTANT C1
1952 // %root = LOGIC %t3, %t4
1953 unsigned ShiftOpcode = MI.getOpcode();
1954 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1955 ShiftOpcode == TargetOpcode::G_ASHR ||
1956 ShiftOpcode == TargetOpcode::G_LSHR ||
1957 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1958 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1959 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1960
1961 // Match a one-use bitwise logic op.
1962 Register LogicDest = MI.getOperand(1).getReg();
1963 if (!MRI.hasOneNonDBGUse(LogicDest))
1964 return false;
1965
1966 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1967 unsigned LogicOpcode = LogicMI->getOpcode();
1968 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1969 LogicOpcode != TargetOpcode::G_XOR)
1970 return false;
1971
1972 // Find a matching one-use shift by constant.
1973 const Register C1 = MI.getOperand(2).getReg();
1974 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1975 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1976 return false;
1977
1978 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1979
1980 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1981 // Shift should match previous one and should be a one-use.
1982 if (MI->getOpcode() != ShiftOpcode ||
1983 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1984 return false;
1985
1986 // Must be a constant.
1987 auto MaybeImmVal =
1988 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1989 if (!MaybeImmVal)
1990 return false;
1991
1992 ShiftVal = MaybeImmVal->Value.getSExtValue();
1993 return true;
1994 };
1995
1996 // Logic ops are commutative, so check each operand for a match.
1997 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1998 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1999 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2000 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2001 uint64_t C0Val;
2002
2003 if (matchFirstShift(LogicMIOp1, C0Val)) {
2004 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2005 MatchInfo.Shift2 = LogicMIOp1;
2006 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2007 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2008 MatchInfo.Shift2 = LogicMIOp2;
2009 } else
2010 return false;
2011
2012 MatchInfo.ValSum = C0Val + C1Val;
2013
2014 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2015 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2016 return false;
2017
2018 MatchInfo.Logic = LogicMI;
2019 return true;
2020}
2021
2023 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2024 unsigned Opcode = MI.getOpcode();
2025 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2026 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2027 Opcode == TargetOpcode::G_SSHLSAT) &&
2028 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2029
2030 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2031 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2032
2033 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2034
2035 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2036 Register Shift1 =
2037 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2038
2039 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2040 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2041 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2042 // remove old shift1. And it will cause crash later. So erase it earlier to
2043 // avoid the crash.
2044 MatchInfo.Shift2->eraseFromParent();
2045
2046 Register Shift2Const = MI.getOperand(2).getReg();
2047 Register Shift2 = Builder
2048 .buildInstr(Opcode, {DestType},
2049 {MatchInfo.LogicNonShiftReg, Shift2Const})
2050 .getReg(0);
2051
2052 Register Dest = MI.getOperand(0).getReg();
2053 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2054
2055 // This was one use so it's safe to remove it.
2056 MatchInfo.Logic->eraseFromParent();
2057
2058 MI.eraseFromParent();
2059}
2060
2062 BuildFnTy &MatchInfo) const {
2063 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2064 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2065 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2066 auto &Shl = cast<GenericMachineInstr>(MI);
2067 Register DstReg = Shl.getReg(0);
2068 Register SrcReg = Shl.getReg(1);
2069 Register ShiftReg = Shl.getReg(2);
2070 Register X, C1;
2071
2072 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2073 return false;
2074
2075 if (!mi_match(SrcReg, MRI,
2077 m_GOr(m_Reg(X), m_Reg(C1))))))
2078 return false;
2079
2080 APInt C1Val, C2Val;
2081 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2082 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2083 return false;
2084
2085 auto *SrcDef = MRI.getVRegDef(SrcReg);
2086 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2087 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2088 LLT SrcTy = MRI.getType(SrcReg);
2089 MatchInfo = [=](MachineIRBuilder &B) {
2090 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2091 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2092 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2093 };
2094 return true;
2095}
2096
2098 unsigned &ShiftVal) const {
2099 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2100 auto MaybeImmVal =
2101 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2102 if (!MaybeImmVal)
2103 return false;
2104
2105 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2106 return (static_cast<int32_t>(ShiftVal) != -1);
2107}
2108
2110 unsigned &ShiftVal) const {
2111 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2112 MachineIRBuilder MIB(MI);
2113 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2114 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2116 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2117 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2118 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2121}
2122
2124 BuildFnTy &MatchInfo) const {
2125 GSub &Sub = cast<GSub>(MI);
2126
2127 LLT Ty = MRI.getType(Sub.getReg(0));
2128
2129 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2130 return false;
2131
2133 return false;
2134
2135 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2136
2137 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2138 auto NegCst = B.buildConstant(Ty, -Imm);
2140 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2141 MI.getOperand(2).setReg(NegCst.getReg(0));
2143 if (Imm.isMinSignedValue())
2146 };
2147 return true;
2148}
2149
2150// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2152 RegisterImmPair &MatchData) const {
2153 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2154 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2155 return false;
2156
2157 Register LHS = MI.getOperand(1).getReg();
2158
2159 Register ExtSrc;
2160 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2161 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2162 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2163 return false;
2164
2165 Register RHS = MI.getOperand(2).getReg();
2166 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2167 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2168 if (!MaybeShiftAmtVal)
2169 return false;
2170
2171 if (LI) {
2172 LLT SrcTy = MRI.getType(ExtSrc);
2173
2174 // We only really care about the legality with the shifted value. We can
2175 // pick any type the constant shift amount, so ask the target what to
2176 // use. Otherwise we would have to guess and hope it is reported as legal.
2177 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2178 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2179 return false;
2180 }
2181
2182 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2183 MatchData.Reg = ExtSrc;
2184 MatchData.Imm = ShiftAmt;
2185
2186 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2187 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2188 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2189}
2190
2192 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2193 Register ExtSrcReg = MatchData.Reg;
2194 int64_t ShiftAmtVal = MatchData.Imm;
2195
2196 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2197 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2198 auto NarrowShift =
2199 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2200 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2201 MI.eraseFromParent();
2202}
2203
2205 Register &MatchInfo) const {
2206 GMerge &Merge = cast<GMerge>(MI);
2207 SmallVector<Register, 16> MergedValues;
2208 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2209 MergedValues.emplace_back(Merge.getSourceReg(I));
2210
2211 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2212 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2213 return false;
2214
2215 for (unsigned I = 0; I < MergedValues.size(); ++I)
2216 if (MergedValues[I] != Unmerge->getReg(I))
2217 return false;
2218
2219 MatchInfo = Unmerge->getSourceReg();
2220 return true;
2221}
2222
2224 const MachineRegisterInfo &MRI) {
2225 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2226 ;
2227
2228 return Reg;
2229}
2230
2233 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2234 "Expected an unmerge");
2235 auto &Unmerge = cast<GUnmerge>(MI);
2236 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2237
2238 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2239 if (!SrcInstr)
2240 return false;
2241
2242 // Check the source type of the merge.
2243 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2244 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2245 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2246 if (SrcMergeTy != Dst0Ty && !SameSize)
2247 return false;
2248 // They are the same now (modulo a bitcast).
2249 // We can collect all the src registers.
2250 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2251 Operands.push_back(SrcInstr->getSourceReg(Idx));
2252 return true;
2253}
2254
2257 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2258 "Expected an unmerge");
2259 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2260 "Not enough operands to replace all defs");
2261 unsigned NumElems = MI.getNumOperands() - 1;
2262
2263 LLT SrcTy = MRI.getType(Operands[0]);
2264 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2265 bool CanReuseInputDirectly = DstTy == SrcTy;
2266 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2267 Register DstReg = MI.getOperand(Idx).getReg();
2268 Register SrcReg = Operands[Idx];
2269
2270 // This combine may run after RegBankSelect, so we need to be aware of
2271 // register banks.
2272 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2273 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2274 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2275 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2276 }
2277
2278 if (CanReuseInputDirectly)
2279 replaceRegWith(MRI, DstReg, SrcReg);
2280 else
2281 Builder.buildCast(DstReg, SrcReg);
2282 }
2283 MI.eraseFromParent();
2284}
2285
2287 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2288 unsigned SrcIdx = MI.getNumOperands() - 1;
2289 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2290 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2291 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2292 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2293 return false;
2294 // Break down the big constant in smaller ones.
2295 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2296 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2297 ? CstVal.getCImm()->getValue()
2298 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2299
2300 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2301 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2302 // Unmerge a constant.
2303 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2304 Csts.emplace_back(Val.trunc(ShiftAmt));
2305 Val = Val.lshr(ShiftAmt);
2306 }
2307
2308 return true;
2309}
2310
2312 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2313 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2314 "Expected an unmerge");
2315 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2316 "Not enough operands to replace all defs");
2317 unsigned NumElems = MI.getNumOperands() - 1;
2318 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2319 Register DstReg = MI.getOperand(Idx).getReg();
2320 Builder.buildConstant(DstReg, Csts[Idx]);
2321 }
2322
2323 MI.eraseFromParent();
2324}
2325
2328 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2329 unsigned SrcIdx = MI.getNumOperands() - 1;
2330 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2331 MatchInfo = [&MI](MachineIRBuilder &B) {
2332 unsigned NumElems = MI.getNumOperands() - 1;
2333 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2334 Register DstReg = MI.getOperand(Idx).getReg();
2335 B.buildUndef(DstReg);
2336 }
2337 };
2338 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2339}
2340
2342 MachineInstr &MI) const {
2343 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2344 "Expected an unmerge");
2345 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2346 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2347 return false;
2348 // Check that all the lanes are dead except the first one.
2349 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2350 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2351 return false;
2352 }
2353 return true;
2354}
2355
2357 MachineInstr &MI) const {
2358 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2359 Register Dst0Reg = MI.getOperand(0).getReg();
2360 Builder.buildTrunc(Dst0Reg, SrcReg);
2361 MI.eraseFromParent();
2362}
2363
2365 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2366 "Expected an unmerge");
2367 Register Dst0Reg = MI.getOperand(0).getReg();
2368 LLT Dst0Ty = MRI.getType(Dst0Reg);
2369 // G_ZEXT on vector applies to each lane, so it will
2370 // affect all destinations. Therefore we won't be able
2371 // to simplify the unmerge to just the first definition.
2372 if (Dst0Ty.isVector())
2373 return false;
2374 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2375 LLT SrcTy = MRI.getType(SrcReg);
2376 if (SrcTy.isVector())
2377 return false;
2378
2379 Register ZExtSrcReg;
2380 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2381 return false;
2382
2383 // Finally we can replace the first definition with
2384 // a zext of the source if the definition is big enough to hold
2385 // all of ZExtSrc bits.
2386 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2387 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2388}
2389
2391 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2392 "Expected an unmerge");
2393
2394 Register Dst0Reg = MI.getOperand(0).getReg();
2395
2396 MachineInstr *ZExtInstr =
2397 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2398 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2399 "Expecting a G_ZEXT");
2400
2401 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2402 LLT Dst0Ty = MRI.getType(Dst0Reg);
2403 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2404
2405 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2406 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2407 } else {
2408 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2409 "ZExt src doesn't fit in destination");
2410 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2411 }
2412
2413 Register ZeroReg;
2414 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2415 if (!ZeroReg)
2416 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2417 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2418 }
2419 MI.eraseFromParent();
2420}
2421
2423 unsigned TargetShiftSize,
2424 unsigned &ShiftVal) const {
2425 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2426 MI.getOpcode() == TargetOpcode::G_LSHR ||
2427 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2428
2429 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2430 if (Ty.isVector()) // TODO:
2431 return false;
2432
2433 // Don't narrow further than the requested size.
2434 unsigned Size = Ty.getSizeInBits();
2435 if (Size <= TargetShiftSize)
2436 return false;
2437
2438 auto MaybeImmVal =
2439 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2440 if (!MaybeImmVal)
2441 return false;
2442
2443 ShiftVal = MaybeImmVal->Value.getSExtValue();
2444 return ShiftVal >= Size / 2 && ShiftVal < Size;
2445}
2446
2448 MachineInstr &MI, const unsigned &ShiftVal) const {
2449 Register DstReg = MI.getOperand(0).getReg();
2450 Register SrcReg = MI.getOperand(1).getReg();
2451 LLT Ty = MRI.getType(SrcReg);
2452 unsigned Size = Ty.getSizeInBits();
2453 unsigned HalfSize = Size / 2;
2454 assert(ShiftVal >= HalfSize);
2455
2456 LLT HalfTy = LLT::scalar(HalfSize);
2457
2458 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2459 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2460
2461 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2462 Register Narrowed = Unmerge.getReg(1);
2463
2464 // dst = G_LSHR s64:x, C for C >= 32
2465 // =>
2466 // lo, hi = G_UNMERGE_VALUES x
2467 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2468
2469 if (NarrowShiftAmt != 0) {
2470 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2471 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2472 }
2473
2474 auto Zero = Builder.buildConstant(HalfTy, 0);
2475 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2476 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2477 Register Narrowed = Unmerge.getReg(0);
2478 // dst = G_SHL s64:x, C for C >= 32
2479 // =>
2480 // lo, hi = G_UNMERGE_VALUES x
2481 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2482 if (NarrowShiftAmt != 0) {
2483 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2484 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2485 }
2486
2487 auto Zero = Builder.buildConstant(HalfTy, 0);
2488 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2489 } else {
2490 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2491 auto Hi = Builder.buildAShr(
2492 HalfTy, Unmerge.getReg(1),
2493 Builder.buildConstant(HalfTy, HalfSize - 1));
2494
2495 if (ShiftVal == HalfSize) {
2496 // (G_ASHR i64:x, 32) ->
2497 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2498 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2499 } else if (ShiftVal == Size - 1) {
2500 // Don't need a second shift.
2501 // (G_ASHR i64:x, 63) ->
2502 // %narrowed = (G_ASHR hi_32(x), 31)
2503 // G_MERGE_VALUES %narrowed, %narrowed
2504 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2505 } else {
2506 auto Lo = Builder.buildAShr(
2507 HalfTy, Unmerge.getReg(1),
2508 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2509
2510 // (G_ASHR i64:x, C) ->, for C >= 32
2511 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2512 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2513 }
2514 }
2515
2516 MI.eraseFromParent();
2517}
2518
2520 MachineInstr &MI, unsigned TargetShiftAmount) const {
2521 unsigned ShiftAmt;
2522 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2523 applyCombineShiftToUnmerge(MI, ShiftAmt);
2524 return true;
2525 }
2526
2527 return false;
2528}
2529
2531 Register &Reg) const {
2532 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2533 Register DstReg = MI.getOperand(0).getReg();
2534 LLT DstTy = MRI.getType(DstReg);
2535 Register SrcReg = MI.getOperand(1).getReg();
2536 return mi_match(SrcReg, MRI,
2537 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2538}
2539
2541 Register &Reg) const {
2542 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2543 Register DstReg = MI.getOperand(0).getReg();
2544 Builder.buildCopy(DstReg, Reg);
2545 MI.eraseFromParent();
2546}
2547
2549 Register &Reg) const {
2550 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2551 Register DstReg = MI.getOperand(0).getReg();
2552 Builder.buildZExtOrTrunc(DstReg, Reg);
2553 MI.eraseFromParent();
2554}
2555
2557 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2558 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2559 Register LHS = MI.getOperand(1).getReg();
2560 Register RHS = MI.getOperand(2).getReg();
2561 LLT IntTy = MRI.getType(LHS);
2562
2563 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2564 // instruction.
2565 PtrReg.second = false;
2566 for (Register SrcReg : {LHS, RHS}) {
2567 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2568 // Don't handle cases where the integer is implicitly converted to the
2569 // pointer width.
2570 LLT PtrTy = MRI.getType(PtrReg.first);
2571 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2572 return true;
2573 }
2574
2575 PtrReg.second = true;
2576 }
2577
2578 return false;
2579}
2580
2582 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2583 Register Dst = MI.getOperand(0).getReg();
2584 Register LHS = MI.getOperand(1).getReg();
2585 Register RHS = MI.getOperand(2).getReg();
2586
2587 const bool DoCommute = PtrReg.second;
2588 if (DoCommute)
2589 std::swap(LHS, RHS);
2590 LHS = PtrReg.first;
2591
2592 LLT PtrTy = MRI.getType(LHS);
2593
2594 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2595 Builder.buildPtrToInt(Dst, PtrAdd);
2596 MI.eraseFromParent();
2597}
2598
2600 APInt &NewCst) const {
2601 auto &PtrAdd = cast<GPtrAdd>(MI);
2602 Register LHS = PtrAdd.getBaseReg();
2603 Register RHS = PtrAdd.getOffsetReg();
2605
2606 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2607 APInt Cst;
2608 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2609 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2610 // G_INTTOPTR uses zero-extension
2611 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2612 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2613 return true;
2614 }
2615 }
2616
2617 return false;
2618}
2619
2621 APInt &NewCst) const {
2622 auto &PtrAdd = cast<GPtrAdd>(MI);
2623 Register Dst = PtrAdd.getReg(0);
2624
2625 Builder.buildConstant(Dst, NewCst);
2626 PtrAdd.eraseFromParent();
2627}
2628
2630 Register &Reg) const {
2631 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2632 Register DstReg = MI.getOperand(0).getReg();
2633 Register SrcReg = MI.getOperand(1).getReg();
2634 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2635 if (OriginalSrcReg.isValid())
2636 SrcReg = OriginalSrcReg;
2637 LLT DstTy = MRI.getType(DstReg);
2638 return mi_match(SrcReg, MRI,
2639 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2640 canReplaceReg(DstReg, Reg, MRI);
2641}
2642
2644 Register &Reg) const {
2645 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2646 Register DstReg = MI.getOperand(0).getReg();
2647 Register SrcReg = MI.getOperand(1).getReg();
2648 LLT DstTy = MRI.getType(DstReg);
2649 if (mi_match(SrcReg, MRI,
2650 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2651 canReplaceReg(DstReg, Reg, MRI)) {
2652 unsigned DstSize = DstTy.getScalarSizeInBits();
2653 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2654 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2655 }
2656 return false;
2657}
2658
2660 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2661 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2662
2663 // ShiftTy > 32 > TruncTy -> 32
2664 if (ShiftSize > 32 && TruncSize < 32)
2665 return ShiftTy.changeElementSize(32);
2666
2667 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2668 // Some targets like it, some don't, some only like it under certain
2669 // conditions/processor versions, etc.
2670 // A TL hook might be needed for this.
2671
2672 // Don't combine
2673 return ShiftTy;
2674}
2675
2677 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2678 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2679 Register DstReg = MI.getOperand(0).getReg();
2680 Register SrcReg = MI.getOperand(1).getReg();
2681
2682 if (!MRI.hasOneNonDBGUse(SrcReg))
2683 return false;
2684
2685 LLT SrcTy = MRI.getType(SrcReg);
2686 LLT DstTy = MRI.getType(DstReg);
2687
2688 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2689 const auto &TL = getTargetLowering();
2690
2691 LLT NewShiftTy;
2692 switch (SrcMI->getOpcode()) {
2693 default:
2694 return false;
2695 case TargetOpcode::G_SHL: {
2696 NewShiftTy = DstTy;
2697
2698 // Make sure new shift amount is legal.
2699 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2700 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2701 return false;
2702 break;
2703 }
2704 case TargetOpcode::G_LSHR:
2705 case TargetOpcode::G_ASHR: {
2706 // For right shifts, we conservatively do not do the transform if the TRUNC
2707 // has any STORE users. The reason is that if we change the type of the
2708 // shift, we may break the truncstore combine.
2709 //
2710 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2711 for (auto &User : MRI.use_instructions(DstReg))
2712 if (User.getOpcode() == TargetOpcode::G_STORE)
2713 return false;
2714
2715 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2716 if (NewShiftTy == SrcTy)
2717 return false;
2718
2719 // Make sure we won't lose information by truncating the high bits.
2720 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2721 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2722 DstTy.getScalarSizeInBits()))
2723 return false;
2724 break;
2725 }
2726 }
2727
2729 {SrcMI->getOpcode(),
2730 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2731 return false;
2732
2733 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2734 return true;
2735}
2736
2738 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2739 MachineInstr *ShiftMI = MatchInfo.first;
2740 LLT NewShiftTy = MatchInfo.second;
2741
2742 Register Dst = MI.getOperand(0).getReg();
2743 LLT DstTy = MRI.getType(Dst);
2744
2745 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2746 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2747 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2748
2749 Register NewShift =
2750 Builder
2751 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2752 .getReg(0);
2753
2754 if (NewShiftTy == DstTy)
2755 replaceRegWith(MRI, Dst, NewShift);
2756 else
2757 Builder.buildTrunc(Dst, NewShift);
2758
2759 eraseInst(MI);
2760}
2761
2763 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2764 return MO.isReg() &&
2765 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2766 });
2767}
2768
2770 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2771 return !MO.isReg() ||
2772 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2773 });
2774}
2775
2777 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2778 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2779 return all_of(Mask, [](int Elt) { return Elt < 0; });
2780}
2781
2783 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2784 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2785 MRI);
2786}
2787
2789 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2790 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2791 MRI);
2792}
2793
2795 MachineInstr &MI) const {
2796 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2797 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2798 "Expected an insert/extract element op");
2799 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2800 if (VecTy.isScalableVector())
2801 return false;
2802
2803 unsigned IdxIdx =
2804 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2805 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2806 if (!Idx)
2807 return false;
2808 return Idx->getZExtValue() >= VecTy.getNumElements();
2809}
2810
2812 unsigned &OpIdx) const {
2813 GSelect &SelMI = cast<GSelect>(MI);
2814 auto Cst =
2816 if (!Cst)
2817 return false;
2818 OpIdx = Cst->isZero() ? 3 : 2;
2819 return true;
2820}
2821
2822void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2823
2825 const MachineOperand &MOP2) const {
2826 if (!MOP1.isReg() || !MOP2.isReg())
2827 return false;
2828 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2829 if (!InstAndDef1)
2830 return false;
2831 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2832 if (!InstAndDef2)
2833 return false;
2834 MachineInstr *I1 = InstAndDef1->MI;
2835 MachineInstr *I2 = InstAndDef2->MI;
2836
2837 // Handle a case like this:
2838 //
2839 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2840 //
2841 // Even though %0 and %1 are produced by the same instruction they are not
2842 // the same values.
2843 if (I1 == I2)
2844 return MOP1.getReg() == MOP2.getReg();
2845
2846 // If we have an instruction which loads or stores, we can't guarantee that
2847 // it is identical.
2848 //
2849 // For example, we may have
2850 //
2851 // %x1 = G_LOAD %addr (load N from @somewhere)
2852 // ...
2853 // call @foo
2854 // ...
2855 // %x2 = G_LOAD %addr (load N from @somewhere)
2856 // ...
2857 // %or = G_OR %x1, %x2
2858 //
2859 // It's possible that @foo will modify whatever lives at the address we're
2860 // loading from. To be safe, let's just assume that all loads and stores
2861 // are different (unless we have something which is guaranteed to not
2862 // change.)
2863 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2864 return false;
2865
2866 // If both instructions are loads or stores, they are equal only if both
2867 // are dereferenceable invariant loads with the same number of bits.
2868 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2869 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2870 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2871 if (!LS1 || !LS2)
2872 return false;
2873
2874 if (!I2->isDereferenceableInvariantLoad() ||
2875 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2876 return false;
2877 }
2878
2879 // Check for physical registers on the instructions first to avoid cases
2880 // like this:
2881 //
2882 // %a = COPY $physreg
2883 // ...
2884 // SOMETHING implicit-def $physreg
2885 // ...
2886 // %b = COPY $physreg
2887 //
2888 // These copies are not equivalent.
2889 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2890 return MO.isReg() && MO.getReg().isPhysical();
2891 })) {
2892 // Check if we have a case like this:
2893 //
2894 // %a = COPY $physreg
2895 // %b = COPY %a
2896 //
2897 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2898 // From that, we know that they must have the same value, since they must
2899 // have come from the same COPY.
2900 return I1->isIdenticalTo(*I2);
2901 }
2902
2903 // We don't have any physical registers, so we don't necessarily need the
2904 // same vreg defs.
2905 //
2906 // On the off-chance that there's some target instruction feeding into the
2907 // instruction, let's use produceSameValue instead of isIdenticalTo.
2908 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2909 // Handle instructions with multiple defs that produce same values. Values
2910 // are same for operands with same index.
2911 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2912 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2913 // I1 and I2 are different instructions but produce same values,
2914 // %1 and %6 are same, %1 and %7 are not the same value.
2915 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2916 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2917 }
2918 return false;
2919}
2920
2922 int64_t C) const {
2923 if (!MOP.isReg())
2924 return false;
2925 auto *MI = MRI.getVRegDef(MOP.getReg());
2926 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2927 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2928 MaybeCst->getSExtValue() == C;
2929}
2930
2932 double C) const {
2933 if (!MOP.isReg())
2934 return false;
2935 std::optional<FPValueAndVReg> MaybeCst;
2936 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2937 return false;
2938
2939 return MaybeCst->Value.isExactlyValue(C);
2940}
2941
2943 unsigned OpIdx) const {
2944 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2945 Register OldReg = MI.getOperand(0).getReg();
2946 Register Replacement = MI.getOperand(OpIdx).getReg();
2947 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2948 replaceRegWith(MRI, OldReg, Replacement);
2949 MI.eraseFromParent();
2950}
2951
2953 Register Replacement) const {
2954 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2955 Register OldReg = MI.getOperand(0).getReg();
2956 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2957 replaceRegWith(MRI, OldReg, Replacement);
2958 MI.eraseFromParent();
2959}
2960
2962 unsigned ConstIdx) const {
2963 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2964 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2965
2966 // Get the shift amount
2967 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2968 if (!VRegAndVal)
2969 return false;
2970
2971 // Return true of shift amount >= Bitwidth
2972 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2973}
2974
2976 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2977 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2978 "This is not a funnel shift operation");
2979
2980 Register ConstReg = MI.getOperand(3).getReg();
2981 LLT ConstTy = MRI.getType(ConstReg);
2982 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2983
2984 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2985 assert((VRegAndVal) && "Value is not a constant");
2986
2987 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2988 APInt NewConst = VRegAndVal->Value.urem(
2989 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2990
2991 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2993 MI.getOpcode(), {MI.getOperand(0)},
2994 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2995
2996 MI.eraseFromParent();
2997}
2998
3000 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3001 // Match (cond ? x : x)
3002 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3003 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3004 MRI);
3005}
3006
3008 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3009 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3010 MRI);
3011}
3012
3014 unsigned OpIdx) const {
3015 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
3016 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
3017 MRI);
3018}
3019
3021 unsigned OpIdx) const {
3022 MachineOperand &MO = MI.getOperand(OpIdx);
3023 return MO.isReg() &&
3024 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3025}
3026
3028 unsigned OpIdx) const {
3029 MachineOperand &MO = MI.getOperand(OpIdx);
3030 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3031}
3032
3034 double C) const {
3035 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3036 Builder.buildFConstant(MI.getOperand(0), C);
3037 MI.eraseFromParent();
3038}
3039
3041 int64_t C) const {
3042 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3043 Builder.buildConstant(MI.getOperand(0), C);
3044 MI.eraseFromParent();
3045}
3046
3048 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3049 Builder.buildConstant(MI.getOperand(0), C);
3050 MI.eraseFromParent();
3051}
3052
3054 ConstantFP *CFP) const {
3055 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3056 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3057 MI.eraseFromParent();
3058}
3059
3061 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3062 Builder.buildUndef(MI.getOperand(0));
3063 MI.eraseFromParent();
3064}
3065
3067 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3068 Register LHS = MI.getOperand(1).getReg();
3069 Register RHS = MI.getOperand(2).getReg();
3070 Register &NewLHS = std::get<0>(MatchInfo);
3071 Register &NewRHS = std::get<1>(MatchInfo);
3072
3073 // Helper lambda to check for opportunities for
3074 // ((0-A) + B) -> B - A
3075 // (A + (0-B)) -> A - B
3076 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3077 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3078 return false;
3079 NewLHS = MaybeNewLHS;
3080 return true;
3081 };
3082
3083 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3084}
3085
3087 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3088 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3089 "Invalid opcode");
3090 Register DstReg = MI.getOperand(0).getReg();
3091 LLT DstTy = MRI.getType(DstReg);
3092 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3093
3094 if (DstTy.isScalableVector())
3095 return false;
3096
3097 unsigned NumElts = DstTy.getNumElements();
3098 // If this MI is part of a sequence of insert_vec_elts, then
3099 // don't do the combine in the middle of the sequence.
3100 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3101 TargetOpcode::G_INSERT_VECTOR_ELT)
3102 return false;
3103 MachineInstr *CurrInst = &MI;
3104 MachineInstr *TmpInst;
3105 int64_t IntImm;
3106 Register TmpReg;
3107 MatchInfo.resize(NumElts);
3108 while (mi_match(
3109 CurrInst->getOperand(0).getReg(), MRI,
3110 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3111 if (IntImm >= NumElts || IntImm < 0)
3112 return false;
3113 if (!MatchInfo[IntImm])
3114 MatchInfo[IntImm] = TmpReg;
3115 CurrInst = TmpInst;
3116 }
3117 // Variable index.
3118 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3119 return false;
3120 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3121 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3122 if (!MatchInfo[I - 1].isValid())
3123 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3124 }
3125 return true;
3126 }
3127 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3128 // overwritten, bail out.
3129 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3130 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3131}
3132
3134 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3135 Register UndefReg;
3136 auto GetUndef = [&]() {
3137 if (UndefReg)
3138 return UndefReg;
3139 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3140 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3141 return UndefReg;
3142 };
3143 for (Register &Reg : MatchInfo) {
3144 if (!Reg)
3145 Reg = GetUndef();
3146 }
3147 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3148 MI.eraseFromParent();
3149}
3150
3152 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3153 Register SubLHS, SubRHS;
3154 std::tie(SubLHS, SubRHS) = MatchInfo;
3155 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3156 MI.eraseFromParent();
3157}
3158
3160 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3161 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3162 //
3163 // Creates the new hand + logic instruction (but does not insert them.)
3164 //
3165 // On success, MatchInfo is populated with the new instructions. These are
3166 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3167 unsigned LogicOpcode = MI.getOpcode();
3168 assert(LogicOpcode == TargetOpcode::G_AND ||
3169 LogicOpcode == TargetOpcode::G_OR ||
3170 LogicOpcode == TargetOpcode::G_XOR);
3171 MachineIRBuilder MIB(MI);
3172 Register Dst = MI.getOperand(0).getReg();
3173 Register LHSReg = MI.getOperand(1).getReg();
3174 Register RHSReg = MI.getOperand(2).getReg();
3175
3176 // Don't recompute anything.
3177 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3178 return false;
3179
3180 // Make sure we have (hand x, ...), (hand y, ...)
3181 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3182 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3183 if (!LeftHandInst || !RightHandInst)
3184 return false;
3185 unsigned HandOpcode = LeftHandInst->getOpcode();
3186 if (HandOpcode != RightHandInst->getOpcode())
3187 return false;
3188 if (LeftHandInst->getNumOperands() < 2 ||
3189 !LeftHandInst->getOperand(1).isReg() ||
3190 RightHandInst->getNumOperands() < 2 ||
3191 !RightHandInst->getOperand(1).isReg())
3192 return false;
3193
3194 // Make sure the types match up, and if we're doing this post-legalization,
3195 // we end up with legal types.
3196 Register X = LeftHandInst->getOperand(1).getReg();
3197 Register Y = RightHandInst->getOperand(1).getReg();
3198 LLT XTy = MRI.getType(X);
3199 LLT YTy = MRI.getType(Y);
3200 if (!XTy.isValid() || XTy != YTy)
3201 return false;
3202
3203 // Optional extra source register.
3204 Register ExtraHandOpSrcReg;
3205 switch (HandOpcode) {
3206 default:
3207 return false;
3208 case TargetOpcode::G_ANYEXT:
3209 case TargetOpcode::G_SEXT:
3210 case TargetOpcode::G_ZEXT: {
3211 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3212 break;
3213 }
3214 case TargetOpcode::G_TRUNC: {
3215 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3216 const MachineFunction *MF = MI.getMF();
3217 LLVMContext &Ctx = MF->getFunction().getContext();
3218
3219 LLT DstTy = MRI.getType(Dst);
3220 const TargetLowering &TLI = getTargetLowering();
3221
3222 // Be extra careful sinking truncate. If it's free, there's no benefit in
3223 // widening a binop.
3224 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3225 return false;
3226 break;
3227 }
3228 case TargetOpcode::G_AND:
3229 case TargetOpcode::G_ASHR:
3230 case TargetOpcode::G_LSHR:
3231 case TargetOpcode::G_SHL: {
3232 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3233 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3234 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3235 return false;
3236 ExtraHandOpSrcReg = ZOp.getReg();
3237 break;
3238 }
3239 }
3240
3241 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3242 return false;
3243
3244 // Record the steps to build the new instructions.
3245 //
3246 // Steps to build (logic x, y)
3247 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3248 OperandBuildSteps LogicBuildSteps = {
3249 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3250 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3251 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3252 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3253
3254 // Steps to build hand (logic x, y), ...z
3255 OperandBuildSteps HandBuildSteps = {
3256 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3257 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3258 if (ExtraHandOpSrcReg.isValid())
3259 HandBuildSteps.push_back(
3260 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3261 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3262
3263 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3264 return true;
3265}
3266
3268 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3269 assert(MatchInfo.InstrsToBuild.size() &&
3270 "Expected at least one instr to build?");
3271 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3272 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3273 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3274 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3275 for (auto &OperandFn : InstrToBuild.OperandFns)
3276 OperandFn(Instr);
3277 }
3278 MI.eraseFromParent();
3279}
3280
3282 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3283 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3284 int64_t ShlCst, AshrCst;
3285 Register Src;
3286 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3287 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3288 m_ICstOrSplat(AshrCst))))
3289 return false;
3290 if (ShlCst != AshrCst)
3291 return false;
3293 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3294 return false;
3295 MatchInfo = std::make_tuple(Src, ShlCst);
3296 return true;
3297}
3298
3300 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3301 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3302 Register Src;
3303 int64_t ShiftAmt;
3304 std::tie(Src, ShiftAmt) = MatchInfo;
3305 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3306 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3307 MI.eraseFromParent();
3308}
3309
3310/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3313 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3314 assert(MI.getOpcode() == TargetOpcode::G_AND);
3315
3316 Register Dst = MI.getOperand(0).getReg();
3317 LLT Ty = MRI.getType(Dst);
3318
3319 Register R;
3320 int64_t C1;
3321 int64_t C2;
3322 if (!mi_match(
3323 Dst, MRI,
3324 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3325 return false;
3326
3327 MatchInfo = [=](MachineIRBuilder &B) {
3328 if (C1 & C2) {
3329 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3330 return;
3331 }
3332 auto Zero = B.buildConstant(Ty, 0);
3333 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3334 };
3335 return true;
3336}
3337
3339 Register &Replacement) const {
3340 // Given
3341 //
3342 // %y:_(sN) = G_SOMETHING
3343 // %x:_(sN) = G_SOMETHING
3344 // %res:_(sN) = G_AND %x, %y
3345 //
3346 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3347 //
3348 // Patterns like this can appear as a result of legalization. E.g.
3349 //
3350 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3351 // %one:_(s32) = G_CONSTANT i32 1
3352 // %and:_(s32) = G_AND %cmp, %one
3353 //
3354 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3355 assert(MI.getOpcode() == TargetOpcode::G_AND);
3356 if (!VT)
3357 return false;
3358
3359 Register AndDst = MI.getOperand(0).getReg();
3360 Register LHS = MI.getOperand(1).getReg();
3361 Register RHS = MI.getOperand(2).getReg();
3362
3363 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3364 // we can't do anything. If we do, then it depends on whether we have
3365 // KnownBits on the LHS.
3366 KnownBits RHSBits = VT->getKnownBits(RHS);
3367 if (RHSBits.isUnknown())
3368 return false;
3369
3370 KnownBits LHSBits = VT->getKnownBits(LHS);
3371
3372 // Check that x & Mask == x.
3373 // x & 1 == x, always
3374 // x & 0 == x, only if x is also 0
3375 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3376 //
3377 // Check if we can replace AndDst with the LHS of the G_AND
3378 if (canReplaceReg(AndDst, LHS, MRI) &&
3379 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3380 Replacement = LHS;
3381 return true;
3382 }
3383
3384 // Check if we can replace AndDst with the RHS of the G_AND
3385 if (canReplaceReg(AndDst, RHS, MRI) &&
3386 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3387 Replacement = RHS;
3388 return true;
3389 }
3390
3391 return false;
3392}
3393
3395 Register &Replacement) const {
3396 // Given
3397 //
3398 // %y:_(sN) = G_SOMETHING
3399 // %x:_(sN) = G_SOMETHING
3400 // %res:_(sN) = G_OR %x, %y
3401 //
3402 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3403 assert(MI.getOpcode() == TargetOpcode::G_OR);
3404 if (!VT)
3405 return false;
3406
3407 Register OrDst = MI.getOperand(0).getReg();
3408 Register LHS = MI.getOperand(1).getReg();
3409 Register RHS = MI.getOperand(2).getReg();
3410
3411 KnownBits LHSBits = VT->getKnownBits(LHS);
3412 KnownBits RHSBits = VT->getKnownBits(RHS);
3413
3414 // Check that x | Mask == x.
3415 // x | 0 == x, always
3416 // x | 1 == x, only if x is also 1
3417 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3418 //
3419 // Check if we can replace OrDst with the LHS of the G_OR
3420 if (canReplaceReg(OrDst, LHS, MRI) &&
3421 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3422 Replacement = LHS;
3423 return true;
3424 }
3425
3426 // Check if we can replace OrDst with the RHS of the G_OR
3427 if (canReplaceReg(OrDst, RHS, MRI) &&
3428 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3429 Replacement = RHS;
3430 return true;
3431 }
3432
3433 return false;
3434}
3435
3437 // If the input is already sign extended, just drop the extension.
3438 Register Src = MI.getOperand(1).getReg();
3439 unsigned ExtBits = MI.getOperand(2).getImm();
3440 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3441 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3442}
3443
3444static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3445 int64_t Cst, bool IsVector, bool IsFP) {
3446 // For i1, Cst will always be -1 regardless of boolean contents.
3447 return (ScalarSizeBits == 1 && Cst == -1) ||
3448 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3449}
3450
3451// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3452// using vector truncates instead
3453//
3454// EXAMPLE:
3455// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3456// %T_a(i16) = G_TRUNC %a(i32)
3457// %T_b(i16) = G_TRUNC %b(i32)
3458// %Undef(i16) = G_IMPLICIT_DEF(i16)
3459// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3460//
3461// ===>
3462// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3463// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3464// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3465//
3466// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3468 Register &MatchInfo) const {
3469 auto BuildMI = cast<GBuildVector>(&MI);
3470 unsigned NumOperands = BuildMI->getNumSources();
3471 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3472
3473 // Check the G_BUILD_VECTOR sources
3474 unsigned I;
3475 MachineInstr *UnmergeMI = nullptr;
3476
3477 // Check all source TRUNCs come from the same UNMERGE instruction
3478 for (I = 0; I < NumOperands; ++I) {
3479 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3480 auto SrcMIOpc = SrcMI->getOpcode();
3481
3482 // Check if the G_TRUNC instructions all come from the same MI
3483 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3484 if (!UnmergeMI) {
3485 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3486 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3487 return false;
3488 } else {
3489 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3490 if (UnmergeMI != UnmergeSrcMI)
3491 return false;
3492 }
3493 } else {
3494 break;
3495 }
3496 }
3497 if (I < 2)
3498 return false;
3499
3500 // Check the remaining source elements are only G_IMPLICIT_DEF
3501 for (; I < NumOperands; ++I) {
3502 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3503 auto SrcMIOpc = SrcMI->getOpcode();
3504
3505 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3506 return false;
3507 }
3508
3509 // Check the size of unmerge source
3510 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3511 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3512 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3513 return false;
3514
3515 // Check the unmerge source and destination element types match
3516 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3517 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3518 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3519 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3520 return false;
3521
3522 // Only generate legal instructions post-legalizer
3523 if (!IsPreLegalize) {
3524 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3525
3526 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3527 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3528 return false;
3529
3530 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3531 return false;
3532 }
3533
3534 return true;
3535}
3536
3538 Register &MatchInfo) const {
3539 Register MidReg;
3540 auto BuildMI = cast<GBuildVector>(&MI);
3541 Register DstReg = BuildMI->getReg(0);
3542 LLT DstTy = MRI.getType(DstReg);
3543 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3544 unsigned DstTyNumElt = DstTy.getNumElements();
3545 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3546
3547 // No need to pad vector if only G_TRUNC is needed
3548 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3549 MidReg = MatchInfo;
3550 } else {
3551 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3552 SmallVector<Register> ConcatRegs = {MatchInfo};
3553 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3554 ConcatRegs.push_back(UndefReg);
3555
3556 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3557 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3558 }
3559
3560 Builder.buildTrunc(DstReg, MidReg);
3561 MI.eraseFromParent();
3562}
3563
3565 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3566 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3567 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3568 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3569 Register XorSrc;
3570 Register CstReg;
3571 // We match xor(src, true) here.
3572 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3573 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3574 return false;
3575
3576 if (!MRI.hasOneNonDBGUse(XorSrc))
3577 return false;
3578
3579 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3580 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3581 // list of tree nodes to visit.
3582 RegsToNegate.push_back(XorSrc);
3583 // Remember whether the comparisons are all integer or all floating point.
3584 bool IsInt = false;
3585 bool IsFP = false;
3586 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3587 Register Reg = RegsToNegate[I];
3588 if (!MRI.hasOneNonDBGUse(Reg))
3589 return false;
3590 MachineInstr *Def = MRI.getVRegDef(Reg);
3591 switch (Def->getOpcode()) {
3592 default:
3593 // Don't match if the tree contains anything other than ANDs, ORs and
3594 // comparisons.
3595 return false;
3596 case TargetOpcode::G_ICMP:
3597 if (IsFP)
3598 return false;
3599 IsInt = true;
3600 // When we apply the combine we will invert the predicate.
3601 break;
3602 case TargetOpcode::G_FCMP:
3603 if (IsInt)
3604 return false;
3605 IsFP = true;
3606 // When we apply the combine we will invert the predicate.
3607 break;
3608 case TargetOpcode::G_AND:
3609 case TargetOpcode::G_OR:
3610 // Implement De Morgan's laws:
3611 // ~(x & y) -> ~x | ~y
3612 // ~(x | y) -> ~x & ~y
3613 // When we apply the combine we will change the opcode and recursively
3614 // negate the operands.
3615 RegsToNegate.push_back(Def->getOperand(1).getReg());
3616 RegsToNegate.push_back(Def->getOperand(2).getReg());
3617 break;
3618 }
3619 }
3620
3621 // Now we know whether the comparisons are integer or floating point, check
3622 // the constant in the xor.
3623 int64_t Cst;
3624 if (Ty.isVector()) {
3625 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3626 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3627 if (!MaybeCst)
3628 return false;
3629 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3630 return false;
3631 } else {
3632 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3633 return false;
3634 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3635 return false;
3636 }
3637
3638 return true;
3639}
3640
3642 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3643 for (Register Reg : RegsToNegate) {
3644 MachineInstr *Def = MRI.getVRegDef(Reg);
3645 Observer.changingInstr(*Def);
3646 // For each comparison, invert the opcode. For each AND and OR, change the
3647 // opcode.
3648 switch (Def->getOpcode()) {
3649 default:
3650 llvm_unreachable("Unexpected opcode");
3651 case TargetOpcode::G_ICMP:
3652 case TargetOpcode::G_FCMP: {
3653 MachineOperand &PredOp = Def->getOperand(1);
3656 PredOp.setPredicate(NewP);
3657 break;
3658 }
3659 case TargetOpcode::G_AND:
3660 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3661 break;
3662 case TargetOpcode::G_OR:
3663 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3664 break;
3665 }
3666 Observer.changedInstr(*Def);
3667 }
3668
3669 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3670 MI.eraseFromParent();
3671}
3672
3674 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3675 // Match (xor (and x, y), y) (or any of its commuted cases)
3676 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3677 Register &X = MatchInfo.first;
3678 Register &Y = MatchInfo.second;
3679 Register AndReg = MI.getOperand(1).getReg();
3680 Register SharedReg = MI.getOperand(2).getReg();
3681
3682 // Find a G_AND on either side of the G_XOR.
3683 // Look for one of
3684 //
3685 // (xor (and x, y), SharedReg)
3686 // (xor SharedReg, (and x, y))
3687 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3688 std::swap(AndReg, SharedReg);
3689 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3690 return false;
3691 }
3692
3693 // Only do this if we'll eliminate the G_AND.
3694 if (!MRI.hasOneNonDBGUse(AndReg))
3695 return false;
3696
3697 // We can combine if SharedReg is the same as either the LHS or RHS of the
3698 // G_AND.
3699 if (Y != SharedReg)
3700 std::swap(X, Y);
3701 return Y == SharedReg;
3702}
3703
3705 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3706 // Fold (xor (and x, y), y) -> (and (not x), y)
3707 Register X, Y;
3708 std::tie(X, Y) = MatchInfo;
3709 auto Not = Builder.buildNot(MRI.getType(X), X);
3711 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3712 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3713 MI.getOperand(2).setReg(Y);
3715}
3716
3718 auto &PtrAdd = cast<GPtrAdd>(MI);
3719 Register DstReg = PtrAdd.getReg(0);
3720 LLT Ty = MRI.getType(DstReg);
3722
3723 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3724 return false;
3725
3726 if (Ty.isPointer()) {
3727 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3728 return ConstVal && *ConstVal == 0;
3729 }
3730
3731 assert(Ty.isVector() && "Expecting a vector type");
3732 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3733 return isBuildVectorAllZeros(*VecMI, MRI);
3734}
3735
3737 auto &PtrAdd = cast<GPtrAdd>(MI);
3738 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3739 PtrAdd.eraseFromParent();
3740}
3741
3742/// The second source operand is known to be a power of 2.
3744 Register DstReg = MI.getOperand(0).getReg();
3745 Register Src0 = MI.getOperand(1).getReg();
3746 Register Pow2Src1 = MI.getOperand(2).getReg();
3747 LLT Ty = MRI.getType(DstReg);
3748
3749 // Fold (urem x, pow2) -> (and x, pow2-1)
3750 auto NegOne = Builder.buildConstant(Ty, -1);
3751 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3752 Builder.buildAnd(DstReg, Src0, Add);
3753 MI.eraseFromParent();
3754}
3755
3757 unsigned &SelectOpNo) const {
3758 Register LHS = MI.getOperand(1).getReg();
3759 Register RHS = MI.getOperand(2).getReg();
3760
3761 Register OtherOperandReg = RHS;
3762 SelectOpNo = 1;
3764
3765 // Don't do this unless the old select is going away. We want to eliminate the
3766 // binary operator, not replace a binop with a select.
3767 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3769 OtherOperandReg = LHS;
3770 SelectOpNo = 2;
3772 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3774 return false;
3775 }
3776
3777 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3778 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3779
3780 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3781 /*AllowFP*/ true,
3782 /*AllowOpaqueConstants*/ false))
3783 return false;
3784 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3785 /*AllowFP*/ true,
3786 /*AllowOpaqueConstants*/ false))
3787 return false;
3788
3789 unsigned BinOpcode = MI.getOpcode();
3790
3791 // We know that one of the operands is a select of constants. Now verify that
3792 // the other binary operator operand is either a constant, or we can handle a
3793 // variable.
3794 bool CanFoldNonConst =
3795 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3796 (isNullOrNullSplat(*SelectLHS, MRI) ||
3797 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3798 (isNullOrNullSplat(*SelectRHS, MRI) ||
3799 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3800 if (CanFoldNonConst)
3801 return true;
3802
3803 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3804 /*AllowFP*/ true,
3805 /*AllowOpaqueConstants*/ false);
3806}
3807
3808/// \p SelectOperand is the operand in binary operator \p MI that is the select
3809/// to fold.
3811 MachineInstr &MI, const unsigned &SelectOperand) const {
3812 Register Dst = MI.getOperand(0).getReg();
3813 Register LHS = MI.getOperand(1).getReg();
3814 Register RHS = MI.getOperand(2).getReg();
3815 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3816
3817 Register SelectCond = Select->getOperand(1).getReg();
3818 Register SelectTrue = Select->getOperand(2).getReg();
3819 Register SelectFalse = Select->getOperand(3).getReg();
3820
3821 LLT Ty = MRI.getType(Dst);
3822 unsigned BinOpcode = MI.getOpcode();
3823
3824 Register FoldTrue, FoldFalse;
3825
3826 // We have a select-of-constants followed by a binary operator with a
3827 // constant. Eliminate the binop by pulling the constant math into the select.
3828 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3829 if (SelectOperand == 1) {
3830 // TODO: SelectionDAG verifies this actually constant folds before
3831 // committing to the combine.
3832
3833 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3834 FoldFalse =
3835 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3836 } else {
3837 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3838 FoldFalse =
3839 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3840 }
3841
3842 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3843 MI.eraseFromParent();
3844}
3845
3846std::optional<SmallVector<Register, 8>>
3847CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3848 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3849 // We want to detect if Root is part of a tree which represents a bunch
3850 // of loads being merged into a larger load. We'll try to recognize patterns
3851 // like, for example:
3852 //
3853 // Reg Reg
3854 // \ /
3855 // OR_1 Reg
3856 // \ /
3857 // OR_2
3858 // \ Reg
3859 // .. /
3860 // Root
3861 //
3862 // Reg Reg Reg Reg
3863 // \ / \ /
3864 // OR_1 OR_2
3865 // \ /
3866 // \ /
3867 // ...
3868 // Root
3869 //
3870 // Each "Reg" may have been produced by a load + some arithmetic. This
3871 // function will save each of them.
3872 SmallVector<Register, 8> RegsToVisit;
3874
3875 // In the "worst" case, we're dealing with a load for each byte. So, there
3876 // are at most #bytes - 1 ORs.
3877 const unsigned MaxIter =
3878 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3879 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3880 if (Ors.empty())
3881 break;
3882 const MachineInstr *Curr = Ors.pop_back_val();
3883 Register OrLHS = Curr->getOperand(1).getReg();
3884 Register OrRHS = Curr->getOperand(2).getReg();
3885
3886 // In the combine, we want to elimate the entire tree.
3887 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3888 return std::nullopt;
3889
3890 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3891 // something that may be a load + arithmetic.
3892 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3893 Ors.push_back(Or);
3894 else
3895 RegsToVisit.push_back(OrLHS);
3896 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3897 Ors.push_back(Or);
3898 else
3899 RegsToVisit.push_back(OrRHS);
3900 }
3901
3902 // We're going to try and merge each register into a wider power-of-2 type,
3903 // so we ought to have an even number of registers.
3904 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3905 return std::nullopt;
3906 return RegsToVisit;
3907}
3908
3909/// Helper function for findLoadOffsetsForLoadOrCombine.
3910///
3911/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3912/// and then moving that value into a specific byte offset.
3913///
3914/// e.g. x[i] << 24
3915///
3916/// \returns The load instruction and the byte offset it is moved into.
3917static std::optional<std::pair<GZExtLoad *, int64_t>>
3918matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3919 const MachineRegisterInfo &MRI) {
3920 assert(MRI.hasOneNonDBGUse(Reg) &&
3921 "Expected Reg to only have one non-debug use?");
3922 Register MaybeLoad;
3923 int64_t Shift;
3924 if (!mi_match(Reg, MRI,
3925 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3926 Shift = 0;
3927 MaybeLoad = Reg;
3928 }
3929
3930 if (Shift % MemSizeInBits != 0)
3931 return std::nullopt;
3932
3933 // TODO: Handle other types of loads.
3934 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3935 if (!Load)
3936 return std::nullopt;
3937
3938 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3939 return std::nullopt;
3940
3941 return std::make_pair(Load, Shift / MemSizeInBits);
3942}
3943
3944std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3945CombinerHelper::findLoadOffsetsForLoadOrCombine(
3947 const SmallVector<Register, 8> &RegsToVisit,
3948 const unsigned MemSizeInBits) const {
3949
3950 // Each load found for the pattern. There should be one for each RegsToVisit.
3952
3953 // The lowest index used in any load. (The lowest "i" for each x[i].)
3954 int64_t LowestIdx = INT64_MAX;
3955
3956 // The load which uses the lowest index.
3957 GZExtLoad *LowestIdxLoad = nullptr;
3958
3959 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3960 SmallSet<int64_t, 8> SeenIdx;
3961
3962 // Ensure each load is in the same MBB.
3963 // TODO: Support multiple MachineBasicBlocks.
3964 MachineBasicBlock *MBB = nullptr;
3965 const MachineMemOperand *MMO = nullptr;
3966
3967 // Earliest instruction-order load in the pattern.
3968 GZExtLoad *EarliestLoad = nullptr;
3969
3970 // Latest instruction-order load in the pattern.
3971 GZExtLoad *LatestLoad = nullptr;
3972
3973 // Base pointer which every load should share.
3975
3976 // We want to find a load for each register. Each load should have some
3977 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3978 // track of the load which uses the lowest index. Later, we will check if we
3979 // can use its pointer in the final, combined load.
3980 for (auto Reg : RegsToVisit) {
3981 // Find the load, and find the position that it will end up in (e.g. a
3982 // shifted) value.
3983 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3984 if (!LoadAndPos)
3985 return std::nullopt;
3986 GZExtLoad *Load;
3987 int64_t DstPos;
3988 std::tie(Load, DstPos) = *LoadAndPos;
3989
3990 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3991 // it is difficult to check for stores/calls/etc between loads.
3992 MachineBasicBlock *LoadMBB = Load->getParent();
3993 if (!MBB)
3994 MBB = LoadMBB;
3995 if (LoadMBB != MBB)
3996 return std::nullopt;
3997
3998 // Make sure that the MachineMemOperands of every seen load are compatible.
3999 auto &LoadMMO = Load->getMMO();
4000 if (!MMO)
4001 MMO = &LoadMMO;
4002 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4003 return std::nullopt;
4004
4005 // Find out what the base pointer and index for the load is.
4006 Register LoadPtr;
4007 int64_t Idx;
4008 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4009 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4010 LoadPtr = Load->getOperand(1).getReg();
4011 Idx = 0;
4012 }
4013
4014 // Don't combine things like a[i], a[i] -> a bigger load.
4015 if (!SeenIdx.insert(Idx).second)
4016 return std::nullopt;
4017
4018 // Every load must share the same base pointer; don't combine things like:
4019 //
4020 // a[i], b[i + 1] -> a bigger load.
4021 if (!BasePtr.isValid())
4022 BasePtr = LoadPtr;
4023 if (BasePtr != LoadPtr)
4024 return std::nullopt;
4025
4026 if (Idx < LowestIdx) {
4027 LowestIdx = Idx;
4028 LowestIdxLoad = Load;
4029 }
4030
4031 // Keep track of the byte offset that this load ends up at. If we have seen
4032 // the byte offset, then stop here. We do not want to combine:
4033 //
4034 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4035 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4036 return std::nullopt;
4037 Loads.insert(Load);
4038
4039 // Keep track of the position of the earliest/latest loads in the pattern.
4040 // We will check that there are no load fold barriers between them later
4041 // on.
4042 //
4043 // FIXME: Is there a better way to check for load fold barriers?
4044 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4045 EarliestLoad = Load;
4046 if (!LatestLoad || dominates(*LatestLoad, *Load))
4047 LatestLoad = Load;
4048 }
4049
4050 // We found a load for each register. Let's check if each load satisfies the
4051 // pattern.
4052 assert(Loads.size() == RegsToVisit.size() &&
4053 "Expected to find a load for each register?");
4054 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4055 LatestLoad && "Expected at least two loads?");
4056
4057 // Check if there are any stores, calls, etc. between any of the loads. If
4058 // there are, then we can't safely perform the combine.
4059 //
4060 // MaxIter is chosen based off the (worst case) number of iterations it
4061 // typically takes to succeed in the LLVM test suite plus some padding.
4062 //
4063 // FIXME: Is there a better way to check for load fold barriers?
4064 const unsigned MaxIter = 20;
4065 unsigned Iter = 0;
4066 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4067 LatestLoad->getIterator())) {
4068 if (Loads.count(&MI))
4069 continue;
4070 if (MI.isLoadFoldBarrier())
4071 return std::nullopt;
4072 if (Iter++ == MaxIter)
4073 return std::nullopt;
4074 }
4075
4076 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4077}
4078
4081 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4082 assert(MI.getOpcode() == TargetOpcode::G_OR);
4083 MachineFunction &MF = *MI.getMF();
4084 // Assuming a little-endian target, transform:
4085 // s8 *a = ...
4086 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4087 // =>
4088 // s32 val = *((i32)a)
4089 //
4090 // s8 *a = ...
4091 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4092 // =>
4093 // s32 val = BSWAP(*((s32)a))
4094 Register Dst = MI.getOperand(0).getReg();
4095 LLT Ty = MRI.getType(Dst);
4096 if (Ty.isVector())
4097 return false;
4098
4099 // We need to combine at least two loads into this type. Since the smallest
4100 // possible load is into a byte, we need at least a 16-bit wide type.
4101 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4102 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4103 return false;
4104
4105 // Match a collection of non-OR instructions in the pattern.
4106 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4107 if (!RegsToVisit)
4108 return false;
4109
4110 // We have a collection of non-OR instructions. Figure out how wide each of
4111 // the small loads should be based off of the number of potential loads we
4112 // found.
4113 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4114 if (NarrowMemSizeInBits % 8 != 0)
4115 return false;
4116
4117 // Check if each register feeding into each OR is a load from the same
4118 // base pointer + some arithmetic.
4119 //
4120 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4121 //
4122 // Also verify that each of these ends up putting a[i] into the same memory
4123 // offset as a load into a wide type would.
4125 GZExtLoad *LowestIdxLoad, *LatestLoad;
4126 int64_t LowestIdx;
4127 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4128 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4129 if (!MaybeLoadInfo)
4130 return false;
4131 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4132
4133 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4134 // we found before, check if this corresponds to a big or little endian byte
4135 // pattern. If it does, then we can represent it using a load + possibly a
4136 // BSWAP.
4137 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4138 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4139 if (!IsBigEndian)
4140 return false;
4141 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4142 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4143 return false;
4144
4145 // Make sure that the load from the lowest index produces offset 0 in the
4146 // final value.
4147 //
4148 // This ensures that we won't combine something like this:
4149 //
4150 // load x[i] -> byte 2
4151 // load x[i+1] -> byte 0 ---> wide_load x[i]
4152 // load x[i+2] -> byte 1
4153 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4154 const unsigned ZeroByteOffset =
4155 *IsBigEndian
4156 ? bigEndianByteAt(NumLoadsInTy, 0)
4157 : littleEndianByteAt(NumLoadsInTy, 0);
4158 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4159 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4160 ZeroOffsetIdx->second != LowestIdx)
4161 return false;
4162
4163 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4164 // may not use index 0.
4165 Register Ptr = LowestIdxLoad->getPointerReg();
4166 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4167 LegalityQuery::MemDesc MMDesc(MMO);
4168 MMDesc.MemoryTy = Ty;
4170 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4171 return false;
4172 auto PtrInfo = MMO.getPointerInfo();
4173 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4174
4175 // Load must be allowed and fast on the target.
4177 auto &DL = MF.getDataLayout();
4178 unsigned Fast = 0;
4179 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4180 !Fast)
4181 return false;
4182
4183 MatchInfo = [=](MachineIRBuilder &MIB) {
4184 MIB.setInstrAndDebugLoc(*LatestLoad);
4185 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4186 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4187 if (NeedsBSwap)
4188 MIB.buildBSwap(Dst, LoadDst);
4189 };
4190 return true;
4191}
4192
4194 MachineInstr *&ExtMI) const {
4195 auto &PHI = cast<GPhi>(MI);
4196 Register DstReg = PHI.getReg(0);
4197
4198 // TODO: Extending a vector may be expensive, don't do this until heuristics
4199 // are better.
4200 if (MRI.getType(DstReg).isVector())
4201 return false;
4202
4203 // Try to match a phi, whose only use is an extend.
4204 if (!MRI.hasOneNonDBGUse(DstReg))
4205 return false;
4206 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4207 switch (ExtMI->getOpcode()) {
4208 case TargetOpcode::G_ANYEXT:
4209 return true; // G_ANYEXT is usually free.
4210 case TargetOpcode::G_ZEXT:
4211 case TargetOpcode::G_SEXT:
4212 break;
4213 default:
4214 return false;
4215 }
4216
4217 // If the target is likely to fold this extend away, don't propagate.
4219 return false;
4220
4221 // We don't want to propagate the extends unless there's a good chance that
4222 // they'll be optimized in some way.
4223 // Collect the unique incoming values.
4225 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4226 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4227 switch (DefMI->getOpcode()) {
4228 case TargetOpcode::G_LOAD:
4229 case TargetOpcode::G_TRUNC:
4230 case TargetOpcode::G_SEXT:
4231 case TargetOpcode::G_ZEXT:
4232 case TargetOpcode::G_ANYEXT:
4233 case TargetOpcode::G_CONSTANT:
4234 InSrcs.insert(DefMI);
4235 // Don't try to propagate if there are too many places to create new
4236 // extends, chances are it'll increase code size.
4237 if (InSrcs.size() > 2)
4238 return false;
4239 break;
4240 default:
4241 return false;
4242 }
4243 }
4244 return true;
4245}
4246
4248 MachineInstr *&ExtMI) const {
4249 auto &PHI = cast<GPhi>(MI);
4250 Register DstReg = ExtMI->getOperand(0).getReg();
4251 LLT ExtTy = MRI.getType(DstReg);
4252
4253 // Propagate the extension into the block of each incoming reg's block.
4254 // Use a SetVector here because PHIs can have duplicate edges, and we want
4255 // deterministic iteration order.
4258 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4259 auto SrcReg = PHI.getIncomingValue(I);
4260 auto *SrcMI = MRI.getVRegDef(SrcReg);
4261 if (!SrcMIs.insert(SrcMI))
4262 continue;
4263
4264 // Build an extend after each src inst.
4265 auto *MBB = SrcMI->getParent();
4266 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4267 if (InsertPt != MBB->end() && InsertPt->isPHI())
4268 InsertPt = MBB->getFirstNonPHI();
4269
4270 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4271 Builder.setDebugLoc(MI.getDebugLoc());
4272 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4273 OldToNewSrcMap[SrcMI] = NewExt;
4274 }
4275
4276 // Create a new phi with the extended inputs.
4278 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4279 NewPhi.addDef(DstReg);
4280 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4281 if (!MO.isReg()) {
4282 NewPhi.addMBB(MO.getMBB());
4283 continue;
4284 }
4285 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4286 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4287 }
4288 Builder.insertInstr(NewPhi);
4289 ExtMI->eraseFromParent();
4290}
4291
4293 Register &Reg) const {
4294 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4295 // If we have a constant index, look for a G_BUILD_VECTOR source
4296 // and find the source register that the index maps to.
4297 Register SrcVec = MI.getOperand(1).getReg();
4298 LLT SrcTy = MRI.getType(SrcVec);
4299 if (SrcTy.isScalableVector())
4300 return false;
4301
4302 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4303 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4304 return false;
4305
4306 unsigned VecIdx = Cst->Value.getZExtValue();
4307
4308 // Check if we have a build_vector or build_vector_trunc with an optional
4309 // trunc in front.
4310 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4311 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4312 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4313 }
4314
4315 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4316 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4317 return false;
4318
4319 EVT Ty(getMVTForLLT(SrcTy));
4320 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4321 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4322 return false;
4323
4324 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4325 return true;
4326}
4327
4329 Register &Reg) const {
4330 // Check the type of the register, since it may have come from a
4331 // G_BUILD_VECTOR_TRUNC.
4332 LLT ScalarTy = MRI.getType(Reg);
4333 Register DstReg = MI.getOperand(0).getReg();
4334 LLT DstTy = MRI.getType(DstReg);
4335
4336 if (ScalarTy != DstTy) {
4337 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4338 Builder.buildTrunc(DstReg, Reg);
4339 MI.eraseFromParent();
4340 return;
4341 }
4343}
4344
4347 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4348 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4349 // This combine tries to find build_vector's which have every source element
4350 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4351 // the masked load scalarization is run late in the pipeline. There's already
4352 // a combine for a similar pattern starting from the extract, but that
4353 // doesn't attempt to do it if there are multiple uses of the build_vector,
4354 // which in this case is true. Starting the combine from the build_vector
4355 // feels more natural than trying to find sibling nodes of extracts.
4356 // E.g.
4357 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4358 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4359 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4360 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4361 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4362 // ==>
4363 // replace ext{1,2,3,4} with %s{1,2,3,4}
4364
4365 Register DstReg = MI.getOperand(0).getReg();
4366 LLT DstTy = MRI.getType(DstReg);
4367 unsigned NumElts = DstTy.getNumElements();
4368
4369 SmallBitVector ExtractedElts(NumElts);
4370 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4371 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4372 return false;
4373 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4374 if (!Cst)
4375 return false;
4376 unsigned Idx = Cst->getZExtValue();
4377 if (Idx >= NumElts)
4378 return false; // Out of range.
4379 ExtractedElts.set(Idx);
4380 SrcDstPairs.emplace_back(
4381 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4382 }
4383 // Match if every element was extracted.
4384 return ExtractedElts.all();
4385}
4386
4389 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4390 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4391 for (auto &Pair : SrcDstPairs) {
4392 auto *ExtMI = Pair.second;
4393 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4394 ExtMI->eraseFromParent();
4395 }
4396 MI.eraseFromParent();
4397}
4398
4401 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4402 applyBuildFnNoErase(MI, MatchInfo);
4403 MI.eraseFromParent();
4404}
4405
4408 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4409 MatchInfo(Builder);
4410}
4411
4413 BuildFnTy &MatchInfo) const {
4414 assert(MI.getOpcode() == TargetOpcode::G_OR);
4415
4416 Register Dst = MI.getOperand(0).getReg();
4417 LLT Ty = MRI.getType(Dst);
4418 unsigned BitWidth = Ty.getScalarSizeInBits();
4419
4420 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4421 unsigned FshOpc = 0;
4422
4423 // Match (or (shl ...), (lshr ...)).
4424 if (!mi_match(Dst, MRI,
4425 // m_GOr() handles the commuted version as well.
4426 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4427 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4428 return false;
4429
4430 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4431 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4432 int64_t CstShlAmt, CstLShrAmt;
4433 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4434 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4435 CstShlAmt + CstLShrAmt == BitWidth) {
4436 FshOpc = TargetOpcode::G_FSHR;
4437 Amt = LShrAmt;
4438
4439 } else if (mi_match(LShrAmt, MRI,
4441 ShlAmt == Amt) {
4442 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4443 FshOpc = TargetOpcode::G_FSHL;
4444
4445 } else if (mi_match(ShlAmt, MRI,
4447 LShrAmt == Amt) {
4448 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4449 FshOpc = TargetOpcode::G_FSHR;
4450
4451 } else {
4452 return false;
4453 }
4454
4455 LLT AmtTy = MRI.getType(Amt);
4456 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4457 return false;
4458
4459 MatchInfo = [=](MachineIRBuilder &B) {
4460 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4461 };
4462 return true;
4463}
4464
4465/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4467 unsigned Opc = MI.getOpcode();
4468 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4469 Register X = MI.getOperand(1).getReg();
4470 Register Y = MI.getOperand(2).getReg();
4471 if (X != Y)
4472 return false;
4473 unsigned RotateOpc =
4474 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4475 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4476}
4477
4479 unsigned Opc = MI.getOpcode();
4480 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4481 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4483 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4484 : TargetOpcode::G_ROTR));
4485 MI.removeOperand(2);
4487}
4488
4489// Fold (rot x, c) -> (rot x, c % BitSize)
4491 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4492 MI.getOpcode() == TargetOpcode::G_ROTR);
4493 unsigned Bitsize =
4494 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4495 Register AmtReg = MI.getOperand(2).getReg();
4496 bool OutOfRange = false;
4497 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4498 if (auto *CI = dyn_cast<ConstantInt>(C))
4499 OutOfRange |= CI->getValue().uge(Bitsize);
4500 return true;
4501 };
4502 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4503}
4504
4506 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4507 MI.getOpcode() == TargetOpcode::G_ROTR);
4508 unsigned Bitsize =
4509 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4510 Register Amt = MI.getOperand(2).getReg();
4511 LLT AmtTy = MRI.getType(Amt);
4512 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4513 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4515 MI.getOperand(2).setReg(Amt);
4517}
4518
4520 int64_t &MatchInfo) const {
4521 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4522 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4523
4524 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4525 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4526 // KnownBits on the LHS in two cases:
4527 //
4528 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4529 // we cannot do any transforms so we can safely bail out early.
4530 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4531 // >=0.
4532 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4533 if (KnownRHS.isUnknown())
4534 return false;
4535
4536 std::optional<bool> KnownVal;
4537 if (KnownRHS.isZero()) {
4538 // ? uge 0 -> always true
4539 // ? ult 0 -> always false
4540 if (Pred == CmpInst::ICMP_UGE)
4541 KnownVal = true;
4542 else if (Pred == CmpInst::ICMP_ULT)
4543 KnownVal = false;
4544 }
4545
4546 if (!KnownVal) {
4547 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4548 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4549 }
4550
4551 if (!KnownVal)
4552 return false;
4553 MatchInfo =
4554 *KnownVal
4556 /*IsVector = */
4557 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4558 /* IsFP = */ false)
4559 : 0;
4560 return true;
4561}
4562
4565 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4566 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4567 // Given:
4568 //
4569 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4570 // %cmp = G_ICMP ne %x, 0
4571 //
4572 // Or:
4573 //
4574 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4575 // %cmp = G_ICMP eq %x, 1
4576 //
4577 // We can replace %cmp with %x assuming true is 1 on the target.
4578 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4579 if (!CmpInst::isEquality(Pred))
4580 return false;
4581 Register Dst = MI.getOperand(0).getReg();
4582 LLT DstTy = MRI.getType(Dst);
4584 /* IsFP = */ false) != 1)
4585 return false;
4586 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4587 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4588 return false;
4589 Register LHS = MI.getOperand(2).getReg();
4590 auto KnownLHS = VT->getKnownBits(LHS);
4591 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4592 return false;
4593 // Make sure replacing Dst with the LHS is a legal operation.
4594 LLT LHSTy = MRI.getType(LHS);
4595 unsigned LHSSize = LHSTy.getSizeInBits();
4596 unsigned DstSize = DstTy.getSizeInBits();
4597 unsigned Op = TargetOpcode::COPY;
4598 if (DstSize != LHSSize)
4599 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4600 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4601 return false;
4602 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4603 return true;
4604}
4605
4606// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4609 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4610 assert(MI.getOpcode() == TargetOpcode::G_AND);
4611
4612 // Ignore vector types to simplify matching the two constants.
4613 // TODO: do this for vectors and scalars via a demanded bits analysis.
4614 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4615 if (Ty.isVector())
4616 return false;
4617
4618 Register Src;
4619 Register AndMaskReg;
4620 int64_t AndMaskBits;
4621 int64_t OrMaskBits;
4622 if (!mi_match(MI, MRI,
4623 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4624 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4625 return false;
4626
4627 // Check if OrMask could turn on any bits in Src.
4628 if (AndMaskBits & OrMaskBits)
4629 return false;
4630
4631 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4633 // Canonicalize the result to have the constant on the RHS.
4634 if (MI.getOperand(1).getReg() == AndMaskReg)
4635 MI.getOperand(2).setReg(AndMaskReg);
4636 MI.getOperand(1).setReg(Src);
4638 };
4639 return true;
4640}
4641
4642/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4645 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4646 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4647 Register Dst = MI.getOperand(0).getReg();
4648 Register Src = MI.getOperand(1).getReg();
4649 LLT Ty = MRI.getType(Src);
4651 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4652 return false;
4653 int64_t Width = MI.getOperand(2).getImm();
4654 Register ShiftSrc;
4655 int64_t ShiftImm;
4656 if (!mi_match(
4657 Src, MRI,
4658 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4659 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4660 return false;
4661 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4662 return false;
4663
4664 MatchInfo = [=](MachineIRBuilder &B) {
4665 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4666 auto Cst2 = B.buildConstant(ExtractTy, Width);
4667 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4668 };
4669 return true;
4670}
4671
4672/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4674 BuildFnTy &MatchInfo) const {
4675 GAnd *And = cast<GAnd>(&MI);
4676 Register Dst = And->getReg(0);
4677 LLT Ty = MRI.getType(Dst);
4679 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4680 // into account.
4681 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4682 return false;
4683
4684 int64_t AndImm, LSBImm;
4685 Register ShiftSrc;
4686 const unsigned Size = Ty.getScalarSizeInBits();
4687 if (!mi_match(And->getReg(0), MRI,
4688 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4689 m_ICst(AndImm))))
4690 return false;
4691
4692 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4693 auto MaybeMask = static_cast<uint64_t>(AndImm);
4694 if (MaybeMask & (MaybeMask + 1))
4695 return false;
4696
4697 // LSB must fit within the register.
4698 if (static_cast<uint64_t>(LSBImm) >= Size)
4699 return false;
4700
4701 uint64_t Width = APInt(Size, AndImm).countr_one();
4702 MatchInfo = [=](MachineIRBuilder &B) {
4703 auto WidthCst = B.buildConstant(ExtractTy, Width);
4704 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4705 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4706 };
4707 return true;
4708}
4709
4712 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4713 const unsigned Opcode = MI.getOpcode();
4714 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4715
4716 const Register Dst = MI.getOperand(0).getReg();
4717
4718 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4719 ? TargetOpcode::G_SBFX
4720 : TargetOpcode::G_UBFX;
4721
4722 // Check if the type we would use for the extract is legal
4723 LLT Ty = MRI.getType(Dst);
4725 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4726 return false;
4727
4728 Register ShlSrc;
4729 int64_t ShrAmt;
4730 int64_t ShlAmt;
4731 const unsigned Size = Ty.getScalarSizeInBits();
4732
4733 // Try to match shr (shl x, c1), c2
4734 if (!mi_match(Dst, MRI,
4735 m_BinOp(Opcode,
4736 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4737 m_ICst(ShrAmt))))
4738 return false;
4739
4740 // Make sure that the shift sizes can fit a bitfield extract
4741 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4742 return false;
4743
4744 // Skip this combine if the G_SEXT_INREG combine could handle it
4745 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4746 return false;
4747
4748 // Calculate start position and width of the extract
4749 const int64_t Pos = ShrAmt - ShlAmt;
4750 const int64_t Width = Size - ShrAmt;
4751
4752 MatchInfo = [=](MachineIRBuilder &B) {
4753 auto WidthCst = B.buildConstant(ExtractTy, Width);
4754 auto PosCst = B.buildConstant(ExtractTy, Pos);
4755 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4756 };
4757 return true;
4758}
4759
4762 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4763 const unsigned Opcode = MI.getOpcode();
4764 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4765
4766 const Register Dst = MI.getOperand(0).getReg();
4767 LLT Ty = MRI.getType(Dst);
4769 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4770 return false;
4771
4772 // Try to match shr (and x, c1), c2
4773 Register AndSrc;
4774 int64_t ShrAmt;
4775 int64_t SMask;
4776 if (!mi_match(Dst, MRI,
4777 m_BinOp(Opcode,
4778 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4779 m_ICst(ShrAmt))))
4780 return false;
4781
4782 const unsigned Size = Ty.getScalarSizeInBits();
4783 if (ShrAmt < 0 || ShrAmt >= Size)
4784 return false;
4785
4786 // If the shift subsumes the mask, emit the 0 directly.
4787 if (0 == (SMask >> ShrAmt)) {
4788 MatchInfo = [=](MachineIRBuilder &B) {
4789 B.buildConstant(Dst, 0);
4790 };
4791 return true;
4792 }
4793
4794 // Check that ubfx can do the extraction, with no holes in the mask.
4795 uint64_t UMask = SMask;
4796 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4797 UMask &= maskTrailingOnes<uint64_t>(Size);
4798 if (!isMask_64(UMask))
4799 return false;
4800
4801 // Calculate start position and width of the extract.
4802 const int64_t Pos = ShrAmt;
4803 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4804
4805 // It's preferable to keep the shift, rather than form G_SBFX.
4806 // TODO: remove the G_AND via demanded bits analysis.
4807 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4808 return false;
4809
4810 MatchInfo = [=](MachineIRBuilder &B) {
4811 auto WidthCst = B.buildConstant(ExtractTy, Width);
4812 auto PosCst = B.buildConstant(ExtractTy, Pos);
4813 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4814 };
4815 return true;
4816}
4817
4818bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4819 MachineInstr &MI) const {
4820 auto &PtrAdd = cast<GPtrAdd>(MI);
4821
4822 Register Src1Reg = PtrAdd.getBaseReg();
4823 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4824 if (!Src1Def)
4825 return false;
4826
4827 Register Src2Reg = PtrAdd.getOffsetReg();
4828
4829 if (MRI.hasOneNonDBGUse(Src1Reg))
4830 return false;
4831
4832 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4833 if (!C1)
4834 return false;
4835 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4836 if (!C2)
4837 return false;
4838
4839 const APInt &C1APIntVal = *C1;
4840 const APInt &C2APIntVal = *C2;
4841 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4842
4843 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4844 // This combine may end up running before ptrtoint/inttoptr combines
4845 // manage to eliminate redundant conversions, so try to look through them.
4846 MachineInstr *ConvUseMI = &UseMI;
4847 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4848 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4849 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4850 Register DefReg = ConvUseMI->getOperand(0).getReg();
4851 if (!MRI.hasOneNonDBGUse(DefReg))
4852 break;
4853 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4854 ConvUseOpc = ConvUseMI->getOpcode();
4855 }
4856 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4857 if (!LdStMI)
4858 continue;
4859 // Is x[offset2] already not a legal addressing mode? If so then
4860 // reassociating the constants breaks nothing (we test offset2 because
4861 // that's the one we hope to fold into the load or store).
4863 AM.HasBaseReg = true;
4864 AM.BaseOffs = C2APIntVal.getSExtValue();
4865 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4866 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4867 PtrAdd.getMF()->getFunction().getContext());
4868 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4869 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4870 AccessTy, AS))
4871 continue;
4872
4873 // Would x[offset1+offset2] still be a legal addressing mode?
4874 AM.BaseOffs = CombinedValue;
4875 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4876 AccessTy, AS))
4877 return true;
4878 }
4879
4880 return false;
4881}
4882
4884 MachineInstr *RHS,
4885 BuildFnTy &MatchInfo) const {
4886 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4887 Register Src1Reg = MI.getOperand(1).getReg();
4888 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4889 return false;
4890 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4891 if (!C2)
4892 return false;
4893
4894 // If both additions are nuw, the reassociated additions are also nuw.
4895 // If the original G_PTR_ADD is additionally nusw, X and C are both not
4896 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
4897 // therefore also nusw.
4898 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
4899 // the new G_PTR_ADDs are then also inbounds.
4900 unsigned PtrAddFlags = MI.getFlags();
4901 unsigned AddFlags = RHS->getFlags();
4902 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
4903 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
4904 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
4905 unsigned Flags = 0;
4906 if (IsNoUWrap)
4908 if (IsNoUSWrap)
4910 if (IsInBounds)
4912
4913 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4914 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4915
4916 auto NewBase =
4917 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
4919 MI.getOperand(1).setReg(NewBase.getReg(0));
4920 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4921 MI.setFlags(Flags);
4923 };
4924 return !reassociationCanBreakAddressingModePattern(MI);
4925}
4926
4928 MachineInstr *LHS,
4929 MachineInstr *RHS,
4930 BuildFnTy &MatchInfo) const {
4931 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4932 // if and only if (G_PTR_ADD X, C) has one use.
4933 Register LHSBase;
4934 std::optional<ValueAndVReg> LHSCstOff;
4935 if (!mi_match(MI.getBaseReg(), MRI,
4936 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4937 return false;
4938
4939 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4940
4941 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
4942 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
4943 // so the new G_PTR_ADDs are also inbounds.
4944 unsigned PtrAddFlags = MI.getFlags();
4945 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
4946 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
4947 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
4949 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
4951 unsigned Flags = 0;
4952 if (IsNoUWrap)
4954 if (IsNoUSWrap)
4956 if (IsInBounds)
4958
4959 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4960 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4961 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4962 // doesn't happen.
4963 LHSPtrAdd->moveBefore(&MI);
4964 Register RHSReg = MI.getOffsetReg();
4965 // set VReg will cause type mismatch if it comes from extend/trunc
4966 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4968 MI.getOperand(2).setReg(NewCst.getReg(0));
4969 MI.setFlags(Flags);
4971 Observer.changingInstr(*LHSPtrAdd);
4972 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4973 LHSPtrAdd->setFlags(Flags);
4974 Observer.changedInstr(*LHSPtrAdd);
4975 };
4976 return !reassociationCanBreakAddressingModePattern(MI);
4977}
4978
4980 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
4981 BuildFnTy &MatchInfo) const {
4982 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4983 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4984 if (!LHSPtrAdd)
4985 return false;
4986
4987 Register Src2Reg = MI.getOperand(2).getReg();
4988 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4989 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4990 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4991 if (!C1)
4992 return false;
4993 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4994 if (!C2)
4995 return false;
4996
4997 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
4998 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
4999 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5000 // largest signed integer that fits into the index type, which is the maximum
5001 // size of allocated objects according to the IR Language Reference.
5002 unsigned PtrAddFlags = MI.getFlags();
5003 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5004 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5005 bool IsInBounds =
5006 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5007 unsigned Flags = 0;
5008 if (IsNoUWrap)
5010 if (IsInBounds) {
5013 }
5014
5015 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5016 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5018 MI.getOperand(1).setReg(LHSSrc1);
5019 MI.getOperand(2).setReg(NewCst.getReg(0));
5020 MI.setFlags(Flags);
5022 };
5023 return !reassociationCanBreakAddressingModePattern(MI);
5024}
5025
5027 BuildFnTy &MatchInfo) const {
5028 auto &PtrAdd = cast<GPtrAdd>(MI);
5029 // We're trying to match a few pointer computation patterns here for
5030 // re-association opportunities.
5031 // 1) Isolating a constant operand to be on the RHS, e.g.:
5032 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5033 //
5034 // 2) Folding two constants in each sub-tree as long as such folding
5035 // doesn't break a legal addressing mode.
5036 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5037 //
5038 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5039 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5040 // iif (G_PTR_ADD X, C) has one use.
5041 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5042 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5043
5044 // Try to match example 2.
5045 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5046 return true;
5047
5048 // Try to match example 3.
5049 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5050 return true;
5051
5052 // Try to match example 1.
5053 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5054 return true;
5055
5056 return false;
5057}
5059 Register OpLHS, Register OpRHS,
5060 BuildFnTy &MatchInfo) const {
5061 LLT OpRHSTy = MRI.getType(OpRHS);
5062 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5063
5064 if (OpLHSDef->getOpcode() != Opc)
5065 return false;
5066
5067 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5068 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5069 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5070
5071 // If the inner op is (X op C), pull the constant out so it can be folded with
5072 // other constants in the expression tree. Folding is not guaranteed so we
5073 // might have (C1 op C2). In that case do not pull a constant out because it
5074 // won't help and can lead to infinite loops.
5077 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5078 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5079 MatchInfo = [=](MachineIRBuilder &B) {
5080 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5081 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5082 };
5083 return true;
5084 }
5085 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5086 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5087 // iff (op x, c1) has one use
5088 MatchInfo = [=](MachineIRBuilder &B) {
5089 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5090 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5091 };
5092 return true;
5093 }
5094 }
5095
5096 return false;
5097}
5098
5100 BuildFnTy &MatchInfo) const {
5101 // We don't check if the reassociation will break a legal addressing mode
5102 // here since pointer arithmetic is handled by G_PTR_ADD.
5103 unsigned Opc = MI.getOpcode();
5104 Register DstReg = MI.getOperand(0).getReg();
5105 Register LHSReg = MI.getOperand(1).getReg();
5106 Register RHSReg = MI.getOperand(2).getReg();
5107
5108 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5109 return true;
5110 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5111 return true;
5112 return false;
5113}
5114
5116 APInt &MatchInfo) const {
5117 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5118 Register SrcOp = MI.getOperand(1).getReg();
5119
5120 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5121 MatchInfo = *MaybeCst;
5122 return true;
5123 }
5124
5125 return false;
5126}
5127
5129 APInt &MatchInfo) const {
5130 Register Op1 = MI.getOperand(1).getReg();
5131 Register Op2 = MI.getOperand(2).getReg();
5132 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5133 if (!MaybeCst)
5134 return false;
5135 MatchInfo = *MaybeCst;
5136 return true;
5137}
5138
5140 ConstantFP *&MatchInfo) const {
5141 Register Op1 = MI.getOperand(1).getReg();
5142 Register Op2 = MI.getOperand(2).getReg();
5143 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5144 if (!MaybeCst)
5145 return false;
5146 MatchInfo =
5147 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5148 return true;
5149}
5150
5152 ConstantFP *&MatchInfo) const {
5153 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5154 MI.getOpcode() == TargetOpcode::G_FMAD);
5155 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5156
5157 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5158 if (!Op3Cst)
5159 return false;
5160
5161 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5162 if (!Op2Cst)
5163 return false;
5164
5165 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5166 if (!Op1Cst)
5167 return false;
5168
5169 APFloat Op1F = Op1Cst->getValueAPF();
5170 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5172 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5173 return true;
5174}
5175
5178 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5179 // Look for a binop feeding into an AND with a mask:
5180 //
5181 // %add = G_ADD %lhs, %rhs
5182 // %and = G_AND %add, 000...11111111
5183 //
5184 // Check if it's possible to perform the binop at a narrower width and zext
5185 // back to the original width like so:
5186 //
5187 // %narrow_lhs = G_TRUNC %lhs
5188 // %narrow_rhs = G_TRUNC %rhs
5189 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5190 // %new_add = G_ZEXT %narrow_add
5191 // %and = G_AND %new_add, 000...11111111
5192 //
5193 // This can allow later combines to eliminate the G_AND if it turns out
5194 // that the mask is irrelevant.
5195 assert(MI.getOpcode() == TargetOpcode::G_AND);
5196 Register Dst = MI.getOperand(0).getReg();
5197 Register AndLHS = MI.getOperand(1).getReg();
5198 Register AndRHS = MI.getOperand(2).getReg();
5199 LLT WideTy = MRI.getType(Dst);
5200
5201 // If the potential binop has more than one use, then it's possible that one
5202 // of those uses will need its full width.
5203 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5204 return false;
5205
5206 // Check if the LHS feeding the AND is impacted by the high bits that we're
5207 // masking out.
5208 //
5209 // e.g. for 64-bit x, y:
5210 //
5211 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5212 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5213 if (!LHSInst)
5214 return false;
5215 unsigned LHSOpc = LHSInst->getOpcode();
5216 switch (LHSOpc) {
5217 default:
5218 return false;
5219 case TargetOpcode::G_ADD:
5220 case TargetOpcode::G_SUB:
5221 case TargetOpcode::G_MUL:
5222 case TargetOpcode::G_AND:
5223 case TargetOpcode::G_OR:
5224 case TargetOpcode::G_XOR:
5225 break;
5226 }
5227
5228 // Find the mask on the RHS.
5229 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5230 if (!Cst)
5231 return false;
5232 auto Mask = Cst->Value;
5233 if (!Mask.isMask())
5234 return false;
5235
5236 // No point in combining if there's nothing to truncate.
5237 unsigned NarrowWidth = Mask.countr_one();
5238 if (NarrowWidth == WideTy.getSizeInBits())
5239 return false;
5240 LLT NarrowTy = LLT::scalar(NarrowWidth);
5241
5242 // Check if adding the zext + truncates could be harmful.
5243 auto &MF = *MI.getMF();
5244 const auto &TLI = getTargetLowering();
5245 LLVMContext &Ctx = MF.getFunction().getContext();
5246 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5247 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5248 return false;
5249 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5250 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5251 return false;
5252 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5253 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5254 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5255 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5256 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5257 auto NarrowBinOp =
5258 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5259 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5261 MI.getOperand(1).setReg(Ext.getReg(0));
5263 };
5264 return true;
5265}
5266
5268 BuildFnTy &MatchInfo) const {
5269 unsigned Opc = MI.getOpcode();
5270 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5271
5272 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5273 return false;
5274
5275 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5277 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5278 : TargetOpcode::G_SADDO;
5279 MI.setDesc(Builder.getTII().get(NewOpc));
5280 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5282 };
5283 return true;
5284}
5285
5287 BuildFnTy &MatchInfo) const {
5288 // (G_*MULO x, 0) -> 0 + no carry out
5289 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5290 MI.getOpcode() == TargetOpcode::G_SMULO);
5291 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5292 return false;
5293 Register Dst = MI.getOperand(0).getReg();
5294 Register Carry = MI.getOperand(1).getReg();
5297 return false;
5298 MatchInfo = [=](MachineIRBuilder &B) {
5299 B.buildConstant(Dst, 0);
5300 B.buildConstant(Carry, 0);
5301 };
5302 return true;
5303}
5304
5306 BuildFnTy &MatchInfo) const {
5307 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5308 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5309 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5310 MI.getOpcode() == TargetOpcode::G_SADDE ||
5311 MI.getOpcode() == TargetOpcode::G_USUBE ||
5312 MI.getOpcode() == TargetOpcode::G_SSUBE);
5313 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5314 return false;
5315 MatchInfo = [&](MachineIRBuilder &B) {
5316 unsigned NewOpcode;
5317 switch (MI.getOpcode()) {
5318 case TargetOpcode::G_UADDE:
5319 NewOpcode = TargetOpcode::G_UADDO;
5320 break;
5321 case TargetOpcode::G_SADDE:
5322 NewOpcode = TargetOpcode::G_SADDO;
5323 break;
5324 case TargetOpcode::G_USUBE:
5325 NewOpcode = TargetOpcode::G_USUBO;
5326 break;
5327 case TargetOpcode::G_SSUBE:
5328 NewOpcode = TargetOpcode::G_SSUBO;
5329 break;
5330 }
5332 MI.setDesc(B.getTII().get(NewOpcode));
5333 MI.removeOperand(4);
5335 };
5336 return true;
5337}
5338
5340 BuildFnTy &MatchInfo) const {
5341 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5342 Register Dst = MI.getOperand(0).getReg();
5343 // (x + y) - z -> x (if y == z)
5344 // (x + y) - z -> y (if x == z)
5345 Register X, Y, Z;
5346 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5347 Register ReplaceReg;
5348 int64_t CstX, CstY;
5349 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5351 ReplaceReg = X;
5352 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5354 ReplaceReg = Y;
5355 if (ReplaceReg) {
5356 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5357 return true;
5358 }
5359 }
5360
5361 // x - (y + z) -> 0 - y (if x == z)
5362 // x - (y + z) -> 0 - z (if x == y)
5363 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5364 Register ReplaceReg;
5365 int64_t CstX;
5366 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5368 ReplaceReg = Y;
5369 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5371 ReplaceReg = Z;
5372 if (ReplaceReg) {
5373 MatchInfo = [=](MachineIRBuilder &B) {
5374 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5375 B.buildSub(Dst, Zero, ReplaceReg);
5376 };
5377 return true;
5378 }
5379 }
5380 return false;
5381}
5382
5384 unsigned Opcode = MI.getOpcode();
5385 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5386 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5387 Register Dst = UDivorRem.getReg(0);
5388 Register LHS = UDivorRem.getReg(1);
5389 Register RHS = UDivorRem.getReg(2);
5390 LLT Ty = MRI.getType(Dst);
5391 LLT ScalarTy = Ty.getScalarType();
5392 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5394 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5395
5396 auto &MIB = Builder;
5397
5398 bool UseSRL = false;
5399 SmallVector<Register, 16> Shifts, Factors;
5400 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5401 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5402
5403 auto BuildExactUDIVPattern = [&](const Constant *C) {
5404 // Don't recompute inverses for each splat element.
5405 if (IsSplat && !Factors.empty()) {
5406 Shifts.push_back(Shifts[0]);
5407 Factors.push_back(Factors[0]);
5408 return true;
5409 }
5410
5411 auto *CI = cast<ConstantInt>(C);
5412 APInt Divisor = CI->getValue();
5413 unsigned Shift = Divisor.countr_zero();
5414 if (Shift) {
5415 Divisor.lshrInPlace(Shift);
5416 UseSRL = true;
5417 }
5418
5419 // Calculate the multiplicative inverse modulo BW.
5420 APInt Factor = Divisor.multiplicativeInverse();
5421 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5422 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5423 return true;
5424 };
5425
5426 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5427 // Collect all magic values from the build vector.
5428 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5429 llvm_unreachable("Expected unary predicate match to succeed");
5430
5431 Register Shift, Factor;
5432 if (Ty.isVector()) {
5433 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5434 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5435 } else {
5436 Shift = Shifts[0];
5437 Factor = Factors[0];
5438 }
5439
5440 Register Res = LHS;
5441
5442 if (UseSRL)
5443 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5444
5445 return MIB.buildMul(Ty, Res, Factor);
5446 }
5447
5448 unsigned KnownLeadingZeros =
5450
5451 bool UseNPQ = false;
5452 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5453 auto BuildUDIVPattern = [&](const Constant *C) {
5454 auto *CI = cast<ConstantInt>(C);
5455 const APInt &Divisor = CI->getValue();
5456
5457 bool SelNPQ = false;
5458 APInt Magic(Divisor.getBitWidth(), 0);
5459 unsigned PreShift = 0, PostShift = 0;
5460
5461 // Magic algorithm doesn't work for division by 1. We need to emit a select
5462 // at the end.
5463 // TODO: Use undef values for divisor of 1.
5464 if (!Divisor.isOne()) {
5465
5466 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5467 // in the dividend exceeds the leading zeros for the divisor.
5470 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5471
5472 Magic = std::move(magics.Magic);
5473
5474 assert(magics.PreShift < Divisor.getBitWidth() &&
5475 "We shouldn't generate an undefined shift!");
5476 assert(magics.PostShift < Divisor.getBitWidth() &&
5477 "We shouldn't generate an undefined shift!");
5478 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5479 PreShift = magics.PreShift;
5480 PostShift = magics.PostShift;
5481 SelNPQ = magics.IsAdd;
5482 }
5483
5484 PreShifts.push_back(
5485 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5486 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5487 NPQFactors.push_back(
5488 MIB.buildConstant(ScalarTy,
5489 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5490 : APInt::getZero(EltBits))
5491 .getReg(0));
5492 PostShifts.push_back(
5493 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5494 UseNPQ |= SelNPQ;
5495 return true;
5496 };
5497
5498 // Collect the shifts/magic values from each element.
5499 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5500 (void)Matched;
5501 assert(Matched && "Expected unary predicate match to succeed");
5502
5503 Register PreShift, PostShift, MagicFactor, NPQFactor;
5504 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5505 if (RHSDef) {
5506 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5507 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5508 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5509 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5510 } else {
5512 "Non-build_vector operation should have been a scalar");
5513 PreShift = PreShifts[0];
5514 MagicFactor = MagicFactors[0];
5515 PostShift = PostShifts[0];
5516 }
5517
5518 Register Q = LHS;
5519 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5520
5521 // Multiply the numerator (operand 0) by the magic value.
5522 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5523
5524 if (UseNPQ) {
5525 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5526
5527 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5528 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5529 if (Ty.isVector())
5530 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5531 else
5532 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5533
5534 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5535 }
5536
5537 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5538 auto One = MIB.buildConstant(Ty, 1);
5539 auto IsOne = MIB.buildICmp(
5541 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5542 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5543
5544 if (Opcode == TargetOpcode::G_UREM) {
5545 auto Prod = MIB.buildMul(Ty, ret, RHS);
5546 return MIB.buildSub(Ty, LHS, Prod);
5547 }
5548 return ret;
5549}
5550
5552 unsigned Opcode = MI.getOpcode();
5553 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5554 Register Dst = MI.getOperand(0).getReg();
5555 Register RHS = MI.getOperand(2).getReg();
5556 LLT DstTy = MRI.getType(Dst);
5557
5558 auto &MF = *MI.getMF();
5559 AttributeList Attr = MF.getFunction().getAttributes();
5560 const auto &TLI = getTargetLowering();
5561 LLVMContext &Ctx = MF.getFunction().getContext();
5562 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5563 return false;
5564
5565 // Don't do this for minsize because the instruction sequence is usually
5566 // larger.
5567 if (MF.getFunction().hasMinSize())
5568 return false;
5569
5570 if (Opcode == TargetOpcode::G_UDIV &&
5572 return matchUnaryPredicate(
5573 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5574 }
5575
5576 auto *RHSDef = MRI.getVRegDef(RHS);
5577 if (!isConstantOrConstantVector(*RHSDef, MRI))
5578 return false;
5579
5580 // Don't do this if the types are not going to be legal.
5581 if (LI) {
5582 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5583 return false;
5584 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5585 return false;
5587 {TargetOpcode::G_ICMP,
5588 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5589 DstTy}}))
5590 return false;
5591 if (Opcode == TargetOpcode::G_UREM &&
5592 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5593 return false;
5594 }
5595
5596 return matchUnaryPredicate(
5597 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5598}
5599
5601 auto *NewMI = buildUDivOrURemUsingMul(MI);
5602 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5603}
5604
5606 unsigned Opcode = MI.getOpcode();
5607 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5608 Register Dst = MI.getOperand(0).getReg();
5609 Register RHS = MI.getOperand(2).getReg();
5610 LLT DstTy = MRI.getType(Dst);
5611 auto SizeInBits = DstTy.getScalarSizeInBits();
5612 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5613
5614 auto &MF = *MI.getMF();
5615 AttributeList Attr = MF.getFunction().getAttributes();
5616 const auto &TLI = getTargetLowering();
5617 LLVMContext &Ctx = MF.getFunction().getContext();
5618 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5619 return false;
5620
5621 // Don't do this for minsize because the instruction sequence is usually
5622 // larger.
5623 if (MF.getFunction().hasMinSize())
5624 return false;
5625
5626 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5627 if (Opcode == TargetOpcode::G_SDIV &&
5629 return matchUnaryPredicate(
5630 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5631 }
5632
5633 auto *RHSDef = MRI.getVRegDef(RHS);
5634 if (!isConstantOrConstantVector(*RHSDef, MRI))
5635 return false;
5636
5637 // Don't do this if the types are not going to be legal.
5638 if (LI) {
5639 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5640 return false;
5641 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5642 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5643 return false;
5644 if (Opcode == TargetOpcode::G_SREM &&
5645 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5646 return false;
5647 }
5648
5649 return matchUnaryPredicate(
5650 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5651}
5652
5654 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5655 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5656}
5657
5659 unsigned Opcode = MI.getOpcode();
5660 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5661 Opcode == TargetOpcode::G_SREM);
5662 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5663 Register Dst = SDivorRem.getReg(0);
5664 Register LHS = SDivorRem.getReg(1);
5665 Register RHS = SDivorRem.getReg(2);
5666 LLT Ty = MRI.getType(Dst);
5667 LLT ScalarTy = Ty.getScalarType();
5668 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5670 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5671 auto &MIB = Builder;
5672
5673 bool UseSRA = false;
5674 SmallVector<Register, 16> ExactShifts, ExactFactors;
5675
5676 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5677 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5678
5679 auto BuildExactSDIVPattern = [&](const Constant *C) {
5680 // Don't recompute inverses for each splat element.
5681 if (IsSplat && !ExactFactors.empty()) {
5682 ExactShifts.push_back(ExactShifts[0]);
5683 ExactFactors.push_back(ExactFactors[0]);
5684 return true;
5685 }
5686
5687 auto *CI = cast<ConstantInt>(C);
5688 APInt Divisor = CI->getValue();
5689 unsigned Shift = Divisor.countr_zero();
5690 if (Shift) {
5691 Divisor.ashrInPlace(Shift);
5692 UseSRA = true;
5693 }
5694
5695 // Calculate the multiplicative inverse modulo BW.
5696 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5697 APInt Factor = Divisor.multiplicativeInverse();
5698 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5699 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5700 return true;
5701 };
5702
5703 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5704 // Collect all magic values from the build vector.
5705 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5706 (void)Matched;
5707 assert(Matched && "Expected unary predicate match to succeed");
5708
5709 Register Shift, Factor;
5710 if (Ty.isVector()) {
5711 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5712 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5713 } else {
5714 Shift = ExactShifts[0];
5715 Factor = ExactFactors[0];
5716 }
5717
5718 Register Res = LHS;
5719
5720 if (UseSRA)
5721 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5722
5723 return MIB.buildMul(Ty, Res, Factor);
5724 }
5725
5726 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5727
5728 auto BuildSDIVPattern = [&](const Constant *C) {
5729 auto *CI = cast<ConstantInt>(C);
5730 const APInt &Divisor = CI->getValue();
5731
5734 int NumeratorFactor = 0;
5735 int ShiftMask = -1;
5736
5737 if (Divisor.isOne() || Divisor.isAllOnes()) {
5738 // If d is +1/-1, we just multiply the numerator by +1/-1.
5739 NumeratorFactor = Divisor.getSExtValue();
5740 Magics.Magic = 0;
5741 Magics.ShiftAmount = 0;
5742 ShiftMask = 0;
5743 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5744 // If d > 0 and m < 0, add the numerator.
5745 NumeratorFactor = 1;
5746 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5747 // If d < 0 and m > 0, subtract the numerator.
5748 NumeratorFactor = -1;
5749 }
5750
5751 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5752 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5753 Shifts.push_back(
5754 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5755 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5756
5757 return true;
5758 };
5759
5760 // Collect the shifts/magic values from each element.
5761 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5762 (void)Matched;
5763 assert(Matched && "Expected unary predicate match to succeed");
5764
5765 Register MagicFactor, Factor, Shift, ShiftMask;
5766 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5767 if (RHSDef) {
5768 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5769 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5770 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5771 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5772 } else {
5774 "Non-build_vector operation should have been a scalar");
5775 MagicFactor = MagicFactors[0];
5776 Factor = Factors[0];
5777 Shift = Shifts[0];
5778 ShiftMask = ShiftMasks[0];
5779 }
5780
5781 Register Q = LHS;
5782 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5783
5784 // (Optionally) Add/subtract the numerator using Factor.
5785 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5786 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5787
5788 // Shift right algebraic by shift value.
5789 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5790
5791 // Extract the sign bit, mask it and add it to the quotient.
5792 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5793 auto T = MIB.buildLShr(Ty, Q, SignShift);
5794 T = MIB.buildAnd(Ty, T, ShiftMask);
5795 auto ret = MIB.buildAdd(Ty, Q, T);
5796
5797 if (Opcode == TargetOpcode::G_SREM) {
5798 auto Prod = MIB.buildMul(Ty, ret, RHS);
5799 return MIB.buildSub(Ty, LHS, Prod);
5800 }
5801 return ret;
5802}
5803
5805 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5806 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5807 "Expected SDIV or UDIV");
5808 auto &Div = cast<GenericMachineInstr>(MI);
5809 Register RHS = Div.getReg(2);
5810 auto MatchPow2 = [&](const Constant *C) {
5811 auto *CI = dyn_cast<ConstantInt>(C);
5812 return CI && (CI->getValue().isPowerOf2() ||
5813 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5814 };
5815 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5816}
5817
5819 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5820 auto &SDiv = cast<GenericMachineInstr>(MI);
5821 Register Dst = SDiv.getReg(0);
5822 Register LHS = SDiv.getReg(1);
5823 Register RHS = SDiv.getReg(2);
5824 LLT Ty = MRI.getType(Dst);
5826 LLT CCVT =
5827 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5828
5829 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5830 // to the following version:
5831 //
5832 // %c1 = G_CTTZ %rhs
5833 // %inexact = G_SUB $bitwidth, %c1
5834 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5835 // %lshr = G_LSHR %sign, %inexact
5836 // %add = G_ADD %lhs, %lshr
5837 // %ashr = G_ASHR %add, %c1
5838 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5839 // %zero = G_CONSTANT $0
5840 // %neg = G_NEG %ashr
5841 // %isneg = G_ICMP SLT %rhs, %zero
5842 // %res = G_SELECT %isneg, %neg, %ashr
5843
5844 unsigned BitWidth = Ty.getScalarSizeInBits();
5845 auto Zero = Builder.buildConstant(Ty, 0);
5846
5847 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5848 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5849 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5850 // Splat the sign bit into the register
5851 auto Sign = Builder.buildAShr(
5852 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5853
5854 // Add (LHS < 0) ? abs2 - 1 : 0;
5855 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5856 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5857 auto AShr = Builder.buildAShr(Ty, Add, C1);
5858
5859 // Special case: (sdiv X, 1) -> X
5860 // Special Case: (sdiv X, -1) -> 0-X
5861 auto One = Builder.buildConstant(Ty, 1);
5862 auto MinusOne = Builder.buildConstant(Ty, -1);
5863 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5864 auto IsMinusOne =
5866 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5867 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5868
5869 // If divided by a positive value, we're done. Otherwise, the result must be
5870 // negated.
5871 auto Neg = Builder.buildNeg(Ty, AShr);
5872 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5873 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5874 MI.eraseFromParent();
5875}
5876
5878 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5879 auto &UDiv = cast<GenericMachineInstr>(MI);
5880 Register Dst = UDiv.getReg(0);
5881 Register LHS = UDiv.getReg(1);
5882 Register RHS = UDiv.getReg(2);
5883 LLT Ty = MRI.getType(Dst);
5885
5886 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5887 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5888 MI.eraseFromParent();
5889}
5890
5892 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5893 Register RHS = MI.getOperand(2).getReg();
5894 Register Dst = MI.getOperand(0).getReg();
5895 LLT Ty = MRI.getType(Dst);
5896 LLT RHSTy = MRI.getType(RHS);
5898 auto MatchPow2ExceptOne = [&](const Constant *C) {
5899 if (auto *CI = dyn_cast<ConstantInt>(C))
5900 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5901 return false;
5902 };
5903 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5904 return false;
5905 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
5906 // get log base 2, and it is not always legal for on a target.
5907 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
5908 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
5909}
5910
5912 Register LHS = MI.getOperand(1).getReg();
5913 Register RHS = MI.getOperand(2).getReg();
5914 Register Dst = MI.getOperand(0).getReg();
5915 LLT Ty = MRI.getType(Dst);
5917 unsigned NumEltBits = Ty.getScalarSizeInBits();
5918
5919 auto LogBase2 = buildLogBase2(RHS, Builder);
5920 auto ShiftAmt =
5921 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5922 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5923 Builder.buildLShr(Dst, LHS, Trunc);
5924 MI.eraseFromParent();
5925}
5926
5928 Register &MatchInfo) const {
5929 Register Dst = MI.getOperand(0).getReg();
5930 Register Src = MI.getOperand(1).getReg();
5931 LLT DstTy = MRI.getType(Dst);
5932 LLT SrcTy = MRI.getType(Src);
5933 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5934 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5935 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5936
5937 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
5938 return false;
5939
5940 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
5941 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
5942 return mi_match(Src, MRI,
5943 m_GSMin(m_GSMax(m_Reg(MatchInfo),
5944 m_SpecificICstOrSplat(SignedMin)),
5945 m_SpecificICstOrSplat(SignedMax))) ||
5946 mi_match(Src, MRI,
5947 m_GSMax(m_GSMin(m_Reg(MatchInfo),
5948 m_SpecificICstOrSplat(SignedMax)),
5949 m_SpecificICstOrSplat(SignedMin)));
5950}
5951
5953 Register &MatchInfo) const {
5954 Register Dst = MI.getOperand(0).getReg();
5955 Builder.buildTruncSSatS(Dst, MatchInfo);
5956 MI.eraseFromParent();
5957}
5958
5960 Register &MatchInfo) const {
5961 Register Dst = MI.getOperand(0).getReg();
5962 Register Src = MI.getOperand(1).getReg();
5963 LLT DstTy = MRI.getType(Dst);
5964 LLT SrcTy = MRI.getType(Src);
5965 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5966 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5967 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5968
5969 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
5970 return false;
5971 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
5972 return mi_match(Src, MRI,
5974 m_SpecificICstOrSplat(UnsignedMax))) ||
5975 mi_match(Src, MRI,
5976 m_GSMax(m_GSMin(m_Reg(MatchInfo),
5977 m_SpecificICstOrSplat(UnsignedMax)),
5978 m_SpecificICstOrSplat(0))) ||
5979 mi_match(Src, MRI,
5981 m_SpecificICstOrSplat(UnsignedMax)));
5982}
5983
5985 Register &MatchInfo) const {
5986 Register Dst = MI.getOperand(0).getReg();
5987 Builder.buildTruncSSatU(Dst, MatchInfo);
5988 MI.eraseFromParent();
5989}
5990
5992 MachineInstr &MinMI) const {
5993 Register Min = MinMI.getOperand(2).getReg();
5994 Register Val = MinMI.getOperand(1).getReg();
5995 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5996 LLT SrcTy = MRI.getType(Val);
5997 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5998 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5999 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6000
6001 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6002 return false;
6003 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6004 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6005 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6006}
6007
6009 MachineInstr &SrcMI) const {
6010 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6011 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6012
6013 return LI &&
6014 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6015}
6016
6018 BuildFnTy &MatchInfo) const {
6019 unsigned Opc = MI.getOpcode();
6020 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6021 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6022 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6023
6024 Register Dst = MI.getOperand(0).getReg();
6025 Register X = MI.getOperand(1).getReg();
6026 Register Y = MI.getOperand(2).getReg();
6027 LLT Type = MRI.getType(Dst);
6028
6029 // fold (fadd x, fneg(y)) -> (fsub x, y)
6030 // fold (fadd fneg(y), x) -> (fsub x, y)
6031 // G_ADD is commutative so both cases are checked by m_GFAdd
6032 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6033 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6034 Opc = TargetOpcode::G_FSUB;
6035 }
6036 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6037 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6038 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6039 Opc = TargetOpcode::G_FADD;
6040 }
6041 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6042 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6043 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6044 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6045 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6046 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6047 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6048 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6049 // no opcode change
6050 } else
6051 return false;
6052
6053 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6055 MI.setDesc(B.getTII().get(Opc));
6056 MI.getOperand(1).setReg(X);
6057 MI.getOperand(2).setReg(Y);
6059 };
6060 return true;
6061}
6062
6064 Register &MatchInfo) const {
6065 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6066
6067 Register LHS = MI.getOperand(1).getReg();
6068 MatchInfo = MI.getOperand(2).getReg();
6069 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6070
6071 const auto LHSCst = Ty.isVector()
6072 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6074 if (!LHSCst)
6075 return false;
6076
6077 // -0.0 is always allowed
6078 if (LHSCst->Value.isNegZero())
6079 return true;
6080
6081 // +0.0 is only allowed if nsz is set.
6082 if (LHSCst->Value.isPosZero())
6083 return MI.getFlag(MachineInstr::FmNsz);
6084
6085 return false;
6086}
6087
6089 Register &MatchInfo) const {
6090 Register Dst = MI.getOperand(0).getReg();
6092 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6093 eraseInst(MI);
6094}
6095
6096/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6097/// due to global flags or MachineInstr flags.
6098static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6099 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6100 return false;
6101 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6102}
6103
6104static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6105 const MachineRegisterInfo &MRI) {
6106 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6107 MRI.use_instr_nodbg_end()) >
6108 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6109 MRI.use_instr_nodbg_end());
6110}
6111
6113 bool &AllowFusionGlobally,
6114 bool &HasFMAD, bool &Aggressive,
6115 bool CanReassociate) const {
6116
6117 auto *MF = MI.getMF();
6118 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6119 const TargetOptions &Options = MF->getTarget().Options;
6120 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6121
6122 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6123 return false;
6124
6125 // Floating-point multiply-add with intermediate rounding.
6126 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6127 // Floating-point multiply-add without intermediate rounding.
6128 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6129 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6130 // No valid opcode, do not combine.
6131 if (!HasFMAD && !HasFMA)
6132 return false;
6133
6134 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6135 // If the addition is not contractable, do not combine.
6136 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6137 return false;
6138
6139 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6140 return true;
6141}
6142
6145 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6146 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6147
6148 bool AllowFusionGlobally, HasFMAD, Aggressive;
6149 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6150 return false;
6151
6152 Register Op1 = MI.getOperand(1).getReg();
6153 Register Op2 = MI.getOperand(2).getReg();
6156 unsigned PreferredFusedOpcode =
6157 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6158
6159 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6160 // prefer to fold the multiply with fewer uses.
6161 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6162 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6163 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6164 std::swap(LHS, RHS);
6165 }
6166
6167 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6168 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6169 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6170 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6171 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6172 {LHS.MI->getOperand(1).getReg(),
6173 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6174 };
6175 return true;
6176 }
6177
6178 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6179 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6180 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6181 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6182 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6183 {RHS.MI->getOperand(1).getReg(),
6184 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6185 };
6186 return true;
6187 }
6188
6189 return false;
6190}
6191
6194 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6195 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6196
6197 bool AllowFusionGlobally, HasFMAD, Aggressive;
6198 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6199 return false;
6200
6201 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6202 Register Op1 = MI.getOperand(1).getReg();
6203 Register Op2 = MI.getOperand(2).getReg();
6206 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6207
6208 unsigned PreferredFusedOpcode =
6209 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6210
6211 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6212 // prefer to fold the multiply with fewer uses.
6213 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6214 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6215 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6216 std::swap(LHS, RHS);
6217 }
6218
6219 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6220 MachineInstr *FpExtSrc;
6221 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6222 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6223 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6224 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6225 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6226 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6227 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6228 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6229 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6230 };
6231 return true;
6232 }
6233
6234 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6235 // Note: Commutes FADD operands.
6236 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6237 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6238 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6239 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6240 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6241 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6242 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6243 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6244 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6245 };
6246 return true;
6247 }
6248
6249 return false;
6250}
6251
6254 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6255 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6256
6257 bool AllowFusionGlobally, HasFMAD, Aggressive;
6258 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6259 return false;
6260
6261 Register Op1 = MI.getOperand(1).getReg();
6262 Register Op2 = MI.getOperand(2).getReg();
6265 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6266
6267 unsigned PreferredFusedOpcode =
6268 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6269
6270 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6271 // prefer to fold the multiply with fewer uses.
6272 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6273 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6274 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6275 std::swap(LHS, RHS);
6276 }
6277
6278 MachineInstr *FMA = nullptr;
6279 Register Z;
6280 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6281 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6282 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6283 TargetOpcode::G_FMUL) &&
6284 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6285 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6286 FMA = LHS.MI;
6287 Z = RHS.Reg;
6288 }
6289 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6290 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6291 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6292 TargetOpcode::G_FMUL) &&
6293 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6294 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6295 Z = LHS.Reg;
6296 FMA = RHS.MI;
6297 }
6298
6299 if (FMA) {
6300 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6301 Register X = FMA->getOperand(1).getReg();
6302 Register Y = FMA->getOperand(2).getReg();
6303 Register U = FMulMI->getOperand(1).getReg();
6304 Register V = FMulMI->getOperand(2).getReg();
6305
6306 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6307 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6308 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6309 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6310 {X, Y, InnerFMA});
6311 };
6312 return true;
6313 }
6314
6315 return false;
6316}
6317
6320 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6321 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6322
6323 bool AllowFusionGlobally, HasFMAD, Aggressive;
6324 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6325 return false;
6326
6327 if (!Aggressive)
6328 return false;
6329
6330 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6331 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6332 Register Op1 = MI.getOperand(1).getReg();
6333 Register Op2 = MI.getOperand(2).getReg();
6336
6337 unsigned PreferredFusedOpcode =
6338 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6339
6340 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6341 // prefer to fold the multiply with fewer uses.
6342 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6343 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6344 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6345 std::swap(LHS, RHS);
6346 }
6347
6348 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6349 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6351 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6352 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6353 Register InnerFMA =
6354 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6355 .getReg(0);
6356 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6357 {X, Y, InnerFMA});
6358 };
6359
6360 MachineInstr *FMulMI, *FMAMI;
6361 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6362 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6363 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6364 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6365 m_GFPExt(m_MInstr(FMulMI))) &&
6366 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6367 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6368 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6369 MatchInfo = [=](MachineIRBuilder &B) {
6370 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6371 FMulMI->getOperand(2).getReg(), RHS.Reg,
6372 LHS.MI->getOperand(1).getReg(),
6373 LHS.MI->getOperand(2).getReg(), B);
6374 };
6375 return true;
6376 }
6377
6378 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6379 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6380 // FIXME: This turns two single-precision and one double-precision
6381 // operation into two double-precision operations, which might not be
6382 // interesting for all targets, especially GPUs.
6383 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6384 FMAMI->getOpcode() == PreferredFusedOpcode) {
6385 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6386 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6387 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6388 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6389 MatchInfo = [=](MachineIRBuilder &B) {
6390 Register X = FMAMI->getOperand(1).getReg();
6391 Register Y = FMAMI->getOperand(2).getReg();
6392 X = B.buildFPExt(DstType, X).getReg(0);
6393 Y = B.buildFPExt(DstType, Y).getReg(0);
6394 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6395 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6396 };
6397
6398 return true;
6399 }
6400 }
6401
6402 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6403 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6404 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6405 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6406 m_GFPExt(m_MInstr(FMulMI))) &&
6407 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6408 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6409 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6410 MatchInfo = [=](MachineIRBuilder &B) {
6411 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6412 FMulMI->getOperand(2).getReg(), LHS.Reg,
6413 RHS.MI->getOperand(1).getReg(),
6414 RHS.MI->getOperand(2).getReg(), B);
6415 };
6416 return true;
6417 }
6418
6419 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6420 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6421 // FIXME: This turns two single-precision and one double-precision
6422 // operation into two double-precision operations, which might not be
6423 // interesting for all targets, especially GPUs.
6424 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6425 FMAMI->getOpcode() == PreferredFusedOpcode) {
6426 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6427 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6428 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6429 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6430 MatchInfo = [=](MachineIRBuilder &B) {
6431 Register X = FMAMI->getOperand(1).getReg();
6432 Register Y = FMAMI->getOperand(2).getReg();
6433 X = B.buildFPExt(DstType, X).getReg(0);
6434 Y = B.buildFPExt(DstType, Y).getReg(0);
6435 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6436 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6437 };
6438 return true;
6439 }
6440 }
6441
6442 return false;
6443}
6444
6447 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6448 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6449
6450 bool AllowFusionGlobally, HasFMAD, Aggressive;
6451 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6452 return false;
6453
6454 Register Op1 = MI.getOperand(1).getReg();
6455 Register Op2 = MI.getOperand(2).getReg();
6458 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6459
6460 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6461 // prefer to fold the multiply with fewer uses.
6462 int FirstMulHasFewerUses = true;
6463 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6464 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6465 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6466 FirstMulHasFewerUses = false;
6467
6468 unsigned PreferredFusedOpcode =
6469 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6470
6471 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6472 if (FirstMulHasFewerUses &&
6473 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6474 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6475 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6476 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6477 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6478 {LHS.MI->getOperand(1).getReg(),
6479 LHS.MI->getOperand(2).getReg(), NegZ});
6480 };
6481 return true;
6482 }
6483 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6484 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6485 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6486 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6487 Register NegY =
6488 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6489 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6490 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6491 };
6492 return true;
6493 }
6494
6495 return false;
6496}
6497
6500 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6501 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6502
6503 bool AllowFusionGlobally, HasFMAD, Aggressive;
6504 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6505 return false;
6506
6507 Register LHSReg = MI.getOperand(1).getReg();
6508 Register RHSReg = MI.getOperand(2).getReg();
6509 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6510
6511 unsigned PreferredFusedOpcode =
6512 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6513
6514 MachineInstr *FMulMI;
6515 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6516 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6517 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6518 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6519 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6520 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6521 Register NegX =
6522 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6523 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6524 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6525 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6526 };
6527 return true;
6528 }
6529
6530 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6531 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6532 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6533 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6534 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6535 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6536 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6537 {FMulMI->getOperand(1).getReg(),
6538 FMulMI->getOperand(2).getReg(), LHSReg});
6539 };
6540 return true;
6541 }
6542
6543 return false;
6544}
6545
6548 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6549 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6550
6551 bool AllowFusionGlobally, HasFMAD, Aggressive;
6552 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6553 return false;
6554
6555 Register LHSReg = MI.getOperand(1).getReg();
6556 Register RHSReg = MI.getOperand(2).getReg();
6557 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6558
6559 unsigned PreferredFusedOpcode =
6560 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6561
6562 MachineInstr *FMulMI;
6563 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6564 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6565 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6566 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6567 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6568 Register FpExtX =
6569 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6570 Register FpExtY =
6571 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6572 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6573 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6574 {FpExtX, FpExtY, NegZ});
6575 };
6576 return true;
6577 }
6578
6579 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6580 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6581 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6582 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6583 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6584 Register FpExtY =
6585 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6586 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6587 Register FpExtZ =
6588 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6589 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6590 {NegY, FpExtZ, LHSReg});
6591 };
6592 return true;
6593 }
6594
6595 return false;
6596}
6597
6600 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6601 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6602
6603 bool AllowFusionGlobally, HasFMAD, Aggressive;
6604 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6605 return false;
6606
6607 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6608 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6609 Register LHSReg = MI.getOperand(1).getReg();
6610 Register RHSReg = MI.getOperand(2).getReg();
6611
6612 unsigned PreferredFusedOpcode =
6613 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6614
6615 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6617 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6618 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6619 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6620 };
6621
6622 MachineInstr *FMulMI;
6623 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6624 // (fneg (fma (fpext x), (fpext y), z))
6625 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6626 // (fneg (fma (fpext x), (fpext y), z))
6627 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6628 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6629 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6630 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6631 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6632 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6634 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6635 FMulMI->getOperand(2).getReg(), RHSReg, B);
6636 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6637 };
6638 return true;
6639 }
6640
6641 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6642 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6643 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6644 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6645 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6646 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6647 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6648 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6649 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6650 FMulMI->getOperand(2).getReg(), LHSReg, B);
6651 };
6652 return true;
6653 }
6654
6655 return false;
6656}
6657
6659 unsigned &IdxToPropagate) const {
6660 bool PropagateNaN;
6661 switch (MI.getOpcode()) {
6662 default:
6663 return false;
6664 case TargetOpcode::G_FMINNUM:
6665 case TargetOpcode::G_FMAXNUM:
6666 PropagateNaN = false;
6667 break;
6668 case TargetOpcode::G_FMINIMUM:
6669 case TargetOpcode::G_FMAXIMUM:
6670 PropagateNaN = true;
6671 break;
6672 }
6673
6674 auto MatchNaN = [&](unsigned Idx) {
6675 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6676 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6677 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6678 return false;
6679 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6680 return true;
6681 };
6682
6683 return MatchNaN(1) || MatchNaN(2);
6684}
6685
6687 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6688 Register LHS = MI.getOperand(1).getReg();
6689 Register RHS = MI.getOperand(2).getReg();
6690
6691 // Helper lambda to check for opportunities for
6692 // A + (B - A) -> B
6693 // (B - A) + A -> B
6694 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6695 Register Reg;
6696 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6697 Reg == MaybeSameReg;
6698 };
6699 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6700}
6701
6703 Register &MatchInfo) const {
6704 // This combine folds the following patterns:
6705 //
6706 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6707 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6708 // into
6709 // x
6710 // if
6711 // k == sizeof(VecEltTy)/2
6712 // type(x) == type(dst)
6713 //
6714 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6715 // into
6716 // x
6717 // if
6718 // type(x) == type(dst)
6719
6720 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6721 LLT DstEltTy = DstVecTy.getElementType();
6722
6723 Register Lo, Hi;
6724
6725 if (mi_match(
6726 MI, MRI,
6728 MatchInfo = Lo;
6729 return MRI.getType(MatchInfo) == DstVecTy;
6730 }
6731
6732 std::optional<ValueAndVReg> ShiftAmount;
6733 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6734 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6735 if (mi_match(
6736 MI, MRI,
6737 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6738 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6739 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6740 MatchInfo = Lo;
6741 return MRI.getType(MatchInfo) == DstVecTy;
6742 }
6743 }
6744
6745 return false;
6746}
6747
6749 Register &MatchInfo) const {
6750 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6751 // if type(x) == type(G_TRUNC)
6752 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6753 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6754 return false;
6755
6756 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6757}
6758
6760 Register &MatchInfo) const {
6761 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6762 // y if K == size of vector element type
6763 std::optional<ValueAndVReg> ShiftAmt;
6764 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6766 m_GCst(ShiftAmt))))
6767 return false;
6768
6769 LLT MatchTy = MRI.getType(MatchInfo);
6770 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6771 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6772}
6773
6774unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6775 CmpInst::Predicate Pred, LLT DstTy,
6776 SelectPatternNaNBehaviour VsNaNRetVal) const {
6777 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6778 "Expected a NaN behaviour?");
6779 // Choose an opcode based off of legality or the behaviour when one of the
6780 // LHS/RHS may be NaN.
6781 switch (Pred) {
6782 default:
6783 return 0;
6784 case CmpInst::FCMP_UGT:
6785 case CmpInst::FCMP_UGE:
6786 case CmpInst::FCMP_OGT:
6787 case CmpInst::FCMP_OGE:
6788 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6789 return TargetOpcode::G_FMAXNUM;
6790 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6791 return TargetOpcode::G_FMAXIMUM;
6792 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6793 return TargetOpcode::G_FMAXNUM;
6794 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6795 return TargetOpcode::G_FMAXIMUM;
6796 return 0;
6797 case CmpInst::FCMP_ULT:
6798 case CmpInst::FCMP_ULE:
6799 case CmpInst::FCMP_OLT:
6800 case CmpInst::FCMP_OLE:
6801 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6802 return TargetOpcode::G_FMINNUM;
6803 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6804 return TargetOpcode::G_FMINIMUM;
6805 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6806 return TargetOpcode::G_FMINNUM;
6807 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6808 return 0;
6809 return TargetOpcode::G_FMINIMUM;
6810 }
6811}
6812
6813CombinerHelper::SelectPatternNaNBehaviour
6814CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6815 bool IsOrderedComparison) const {
6816 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6817 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6818 // Completely unsafe.
6819 if (!LHSSafe && !RHSSafe)
6820 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6821 if (LHSSafe && RHSSafe)
6822 return SelectPatternNaNBehaviour::RETURNS_ANY;
6823 // An ordered comparison will return false when given a NaN, so it
6824 // returns the RHS.
6825 if (IsOrderedComparison)
6826 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6827 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6828 // An unordered comparison will return true when given a NaN, so it
6829 // returns the LHS.
6830 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6831 : SelectPatternNaNBehaviour::RETURNS_NAN;
6832}
6833
6834bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6835 Register TrueVal, Register FalseVal,
6836 BuildFnTy &MatchInfo) const {
6837 // Match: select (fcmp cond x, y) x, y
6838 // select (fcmp cond x, y) y, x
6839 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6840 LLT DstTy = MRI.getType(Dst);
6841 // Bail out early on pointers, since we'll never want to fold to a min/max.
6842 if (DstTy.isPointer())
6843 return false;
6844 // Match a floating point compare with a less-than/greater-than predicate.
6845 // TODO: Allow multiple users of the compare if they are all selects.
6846 CmpInst::Predicate Pred;
6847 Register CmpLHS, CmpRHS;
6848 if (!mi_match(Cond, MRI,
6850 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6851 CmpInst::isEquality(Pred))
6852 return false;
6853 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6854 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6855 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6856 return false;
6857 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6858 std::swap(CmpLHS, CmpRHS);
6859 Pred = CmpInst::getSwappedPredicate(Pred);
6860 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6861 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6862 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6863 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6864 }
6865 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6866 return false;
6867 // Decide what type of max/min this should be based off of the predicate.
6868 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6869 if (!Opc || !isLegal({Opc, {DstTy}}))
6870 return false;
6871 // Comparisons between signed zero and zero may have different results...
6872 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6873 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6874 // We don't know if a comparison between two 0s will give us a consistent
6875 // result. Be conservative and only proceed if at least one side is
6876 // non-zero.
6877 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6878 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6879 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6880 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6881 return false;
6882 }
6883 }
6884 MatchInfo = [=](MachineIRBuilder &B) {
6885 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6886 };
6887 return true;
6888}
6889
6891 BuildFnTy &MatchInfo) const {
6892 // TODO: Handle integer cases.
6893 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6894 // Condition may be fed by a truncated compare.
6895 Register Cond = MI.getOperand(1).getReg();
6896 Register MaybeTrunc;
6897 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6898 Cond = MaybeTrunc;
6899 Register Dst = MI.getOperand(0).getReg();
6900 Register TrueVal = MI.getOperand(2).getReg();
6901 Register FalseVal = MI.getOperand(3).getReg();
6902 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6903}
6904
6906 BuildFnTy &MatchInfo) const {
6907 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6908 // (X + Y) == X --> Y == 0
6909 // (X + Y) != X --> Y != 0
6910 // (X - Y) == X --> Y == 0
6911 // (X - Y) != X --> Y != 0
6912 // (X ^ Y) == X --> Y == 0
6913 // (X ^ Y) != X --> Y != 0
6914 Register Dst = MI.getOperand(0).getReg();
6915 CmpInst::Predicate Pred;
6916 Register X, Y, OpLHS, OpRHS;
6917 bool MatchedSub = mi_match(
6918 Dst, MRI,
6919 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6920 if (MatchedSub && X != OpLHS)
6921 return false;
6922 if (!MatchedSub) {
6923 if (!mi_match(Dst, MRI,
6924 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6925 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6926 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6927 return false;
6928 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6929 }
6930 MatchInfo = [=](MachineIRBuilder &B) {
6931 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6932 B.buildICmp(Pred, Dst, Y, Zero);
6933 };
6934 return CmpInst::isEquality(Pred) && Y.isValid();
6935}
6936
6937/// Return the minimum useless shift amount that results in complete loss of the
6938/// source value. Return std::nullopt when it cannot determine a value.
6939static std::optional<unsigned>
6940getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
6941 std::optional<int64_t> &Result) {
6942 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
6943 Opcode == TargetOpcode::G_ASHR) &&
6944 "Expect G_SHL, G_LSHR or G_ASHR.");
6945 auto SignificantBits = 0;
6946 switch (Opcode) {
6947 case TargetOpcode::G_SHL:
6948 SignificantBits = ValueKB.countMinTrailingZeros();
6949 Result = 0;
6950 break;
6951 case TargetOpcode::G_LSHR:
6952 Result = 0;
6953 SignificantBits = ValueKB.countMinLeadingZeros();
6954 break;
6955 case TargetOpcode::G_ASHR:
6956 if (ValueKB.isNonNegative()) {
6957 SignificantBits = ValueKB.countMinLeadingZeros();
6958 Result = 0;
6959 } else if (ValueKB.isNegative()) {
6960 SignificantBits = ValueKB.countMinLeadingOnes();
6961 Result = -1;
6962 } else {
6963 // Cannot determine shift result.
6964 Result = std::nullopt;
6965 }
6966 break;
6967 default:
6968 break;
6969 }
6970 return ValueKB.getBitWidth() - SignificantBits;
6971}
6972
6974 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
6975 Register ShiftVal = MI.getOperand(1).getReg();
6976 Register ShiftReg = MI.getOperand(2).getReg();
6977 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6978 auto IsShiftTooBig = [&](const Constant *C) {
6979 auto *CI = dyn_cast<ConstantInt>(C);
6980 if (!CI)
6981 return false;
6982 if (CI->uge(ResTy.getScalarSizeInBits())) {
6983 MatchInfo = std::nullopt;
6984 return true;
6985 }
6986 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
6987 MI.getOpcode(), MatchInfo);
6988 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
6989 };
6990 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6991}
6992
6994 unsigned LHSOpndIdx = 1;
6995 unsigned RHSOpndIdx = 2;
6996 switch (MI.getOpcode()) {
6997 case TargetOpcode::G_UADDO:
6998 case TargetOpcode::G_SADDO:
6999 case TargetOpcode::G_UMULO:
7000 case TargetOpcode::G_SMULO:
7001 LHSOpndIdx = 2;
7002 RHSOpndIdx = 3;
7003 break;
7004 default:
7005 break;
7006 }
7007 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7008 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7009 if (!getIConstantVRegVal(LHS, MRI)) {
7010 // Skip commuting if LHS is not a constant. But, LHS may be a
7011 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7012 // have a constant on the RHS.
7013 if (MRI.getVRegDef(LHS)->getOpcode() !=
7014 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7015 return false;
7016 }
7017 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7018 return MRI.getVRegDef(RHS)->getOpcode() !=
7019 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7021}
7022
7024 Register LHS = MI.getOperand(1).getReg();
7025 Register RHS = MI.getOperand(2).getReg();
7026 std::optional<FPValueAndVReg> ValAndVReg;
7027 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7028 return false;
7029 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7030}
7031
7034 unsigned LHSOpndIdx = 1;
7035 unsigned RHSOpndIdx = 2;
7036 switch (MI.getOpcode()) {
7037 case TargetOpcode::G_UADDO:
7038 case TargetOpcode::G_SADDO:
7039 case TargetOpcode::G_UMULO:
7040 case TargetOpcode::G_SMULO:
7041 LHSOpndIdx = 2;
7042 RHSOpndIdx = 3;
7043 break;
7044 default:
7045 break;
7046 }
7047 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7048 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7049 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7050 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7052}
7053
7054bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7055 LLT SrcTy = MRI.getType(Src);
7056 if (SrcTy.isFixedVector())
7057 return isConstantSplatVector(Src, 1, AllowUndefs);
7058 if (SrcTy.isScalar()) {
7059 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7060 return true;
7061 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7062 return IConstant && IConstant->Value == 1;
7063 }
7064 return false; // scalable vector
7065}
7066
7067bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7068 LLT SrcTy = MRI.getType(Src);
7069 if (SrcTy.isFixedVector())
7070 return isConstantSplatVector(Src, 0, AllowUndefs);
7071 if (SrcTy.isScalar()) {
7072 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7073 return true;
7074 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7075 return IConstant && IConstant->Value == 0;
7076 }
7077 return false; // scalable vector
7078}
7079
7080// Ignores COPYs during conformance checks.
7081// FIXME scalable vectors.
7082bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7083 bool AllowUndefs) const {
7084 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7085 if (!BuildVector)
7086 return false;
7087 unsigned NumSources = BuildVector->getNumSources();
7088
7089 for (unsigned I = 0; I < NumSources; ++I) {
7090 GImplicitDef *ImplicitDef =
7091 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
7092 if (ImplicitDef && AllowUndefs)
7093 continue;
7094 if (ImplicitDef && !AllowUndefs)
7095 return false;
7096 std::optional<ValueAndVReg> IConstant =
7098 if (IConstant && IConstant->Value == SplatValue)
7099 continue;
7100 return false;
7101 }
7102 return true;
7103}
7104
7105// Ignores COPYs during lookups.
7106// FIXME scalable vectors
7107std::optional<APInt>
7108CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7109 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7110 if (IConstant)
7111 return IConstant->Value;
7112
7113 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7114 if (!BuildVector)
7115 return std::nullopt;
7116 unsigned NumSources = BuildVector->getNumSources();
7117
7118 std::optional<APInt> Value = std::nullopt;
7119 for (unsigned I = 0; I < NumSources; ++I) {
7120 std::optional<ValueAndVReg> IConstant =
7122 if (!IConstant)
7123 return std::nullopt;
7124 if (!Value)
7125 Value = IConstant->Value;
7126 else if (*Value != IConstant->Value)
7127 return std::nullopt;
7128 }
7129 return Value;
7130}
7131
7132// FIXME G_SPLAT_VECTOR
7133bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7134 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7135 if (IConstant)
7136 return true;
7137
7138 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7139 if (!BuildVector)
7140 return false;
7141
7142 unsigned NumSources = BuildVector->getNumSources();
7143 for (unsigned I = 0; I < NumSources; ++I) {
7144 std::optional<ValueAndVReg> IConstant =
7146 if (!IConstant)
7147 return false;
7148 }
7149 return true;
7150}
7151
7152// TODO: use knownbits to determine zeros
7153bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7154 BuildFnTy &MatchInfo) const {
7155 uint32_t Flags = Select->getFlags();
7156 Register Dest = Select->getReg(0);
7157 Register Cond = Select->getCondReg();
7158 Register True = Select->getTrueReg();
7159 Register False = Select->getFalseReg();
7160 LLT CondTy = MRI.getType(Select->getCondReg());
7161 LLT TrueTy = MRI.getType(Select->getTrueReg());
7162
7163 // We only do this combine for scalar boolean conditions.
7164 if (CondTy != LLT::scalar(1))
7165 return false;
7166
7167 if (TrueTy.isPointer())
7168 return false;
7169
7170 // Both are scalars.
7171 std::optional<ValueAndVReg> TrueOpt =
7173 std::optional<ValueAndVReg> FalseOpt =
7175
7176 if (!TrueOpt || !FalseOpt)
7177 return false;
7178
7179 APInt TrueValue = TrueOpt->Value;
7180 APInt FalseValue = FalseOpt->Value;
7181
7182 // select Cond, 1, 0 --> zext (Cond)
7183 if (TrueValue.isOne() && FalseValue.isZero()) {
7184 MatchInfo = [=](MachineIRBuilder &B) {
7185 B.setInstrAndDebugLoc(*Select);
7186 B.buildZExtOrTrunc(Dest, Cond);
7187 };
7188 return true;
7189 }
7190
7191 // select Cond, -1, 0 --> sext (Cond)
7192 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7193 MatchInfo = [=](MachineIRBuilder &B) {
7194 B.setInstrAndDebugLoc(*Select);
7195 B.buildSExtOrTrunc(Dest, Cond);
7196 };
7197 return true;
7198 }
7199
7200 // select Cond, 0, 1 --> zext (!Cond)
7201 if (TrueValue.isZero() && FalseValue.isOne()) {
7202 MatchInfo = [=](MachineIRBuilder &B) {
7203 B.setInstrAndDebugLoc(*Select);
7205 B.buildNot(Inner, Cond);
7206 B.buildZExtOrTrunc(Dest, Inner);
7207 };
7208 return true;
7209 }
7210
7211 // select Cond, 0, -1 --> sext (!Cond)
7212 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7213 MatchInfo = [=](MachineIRBuilder &B) {
7214 B.setInstrAndDebugLoc(*Select);
7216 B.buildNot(Inner, Cond);
7217 B.buildSExtOrTrunc(Dest, Inner);
7218 };
7219 return true;
7220 }
7221
7222 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7223 if (TrueValue - 1 == FalseValue) {
7224 MatchInfo = [=](MachineIRBuilder &B) {
7225 B.setInstrAndDebugLoc(*Select);
7227 B.buildZExtOrTrunc(Inner, Cond);
7228 B.buildAdd(Dest, Inner, False);
7229 };
7230 return true;
7231 }
7232
7233 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7234 if (TrueValue + 1 == FalseValue) {
7235 MatchInfo = [=](MachineIRBuilder &B) {
7236 B.setInstrAndDebugLoc(*Select);
7238 B.buildSExtOrTrunc(Inner, Cond);
7239 B.buildAdd(Dest, Inner, False);
7240 };
7241 return true;
7242 }
7243
7244 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7245 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7246 MatchInfo = [=](MachineIRBuilder &B) {
7247 B.setInstrAndDebugLoc(*Select);
7249 B.buildZExtOrTrunc(Inner, Cond);
7250 // The shift amount must be scalar.
7251 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7252 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7253 B.buildShl(Dest, Inner, ShAmtC, Flags);
7254 };
7255 return true;
7256 }
7257
7258 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7259 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7260 MatchInfo = [=](MachineIRBuilder &B) {
7261 B.setInstrAndDebugLoc(*Select);
7263 B.buildNot(Not, Cond);
7265 B.buildZExtOrTrunc(Inner, Not);
7266 // The shift amount must be scalar.
7267 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7268 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7269 B.buildShl(Dest, Inner, ShAmtC, Flags);
7270 };
7271 return true;
7272 }
7273
7274 // select Cond, -1, C --> or (sext Cond), C
7275 if (TrueValue.isAllOnes()) {
7276 MatchInfo = [=](MachineIRBuilder &B) {
7277 B.setInstrAndDebugLoc(*Select);
7279 B.buildSExtOrTrunc(Inner, Cond);
7280 B.buildOr(Dest, Inner, False, Flags);
7281 };
7282 return true;
7283 }
7284
7285 // select Cond, C, -1 --> or (sext (not Cond)), C
7286 if (FalseValue.isAllOnes()) {
7287 MatchInfo = [=](MachineIRBuilder &B) {
7288 B.setInstrAndDebugLoc(*Select);
7290 B.buildNot(Not, Cond);
7292 B.buildSExtOrTrunc(Inner, Not);
7293 B.buildOr(Dest, Inner, True, Flags);
7294 };
7295 return true;
7296 }
7297
7298 return false;
7299}
7300
7301// TODO: use knownbits to determine zeros
7302bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7303 BuildFnTy &MatchInfo) const {
7304 uint32_t Flags = Select->getFlags();
7305 Register DstReg = Select->getReg(0);
7306 Register Cond = Select->getCondReg();
7307 Register True = Select->getTrueReg();
7308 Register False = Select->getFalseReg();
7309 LLT CondTy = MRI.getType(Select->getCondReg());
7310 LLT TrueTy = MRI.getType(Select->getTrueReg());
7311
7312 // Boolean or fixed vector of booleans.
7313 if (CondTy.isScalableVector() ||
7314 (CondTy.isFixedVector() &&
7315 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7316 CondTy.getScalarSizeInBits() != 1)
7317 return false;
7318
7319 if (CondTy != TrueTy)
7320 return false;
7321
7322 // select Cond, Cond, F --> or Cond, F
7323 // select Cond, 1, F --> or Cond, F
7324 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7325 MatchInfo = [=](MachineIRBuilder &B) {
7326 B.setInstrAndDebugLoc(*Select);
7328 B.buildZExtOrTrunc(Ext, Cond);
7329 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7330 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7331 };
7332 return true;
7333 }
7334
7335 // select Cond, T, Cond --> and Cond, T
7336 // select Cond, T, 0 --> and Cond, T
7337 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7338 MatchInfo = [=](MachineIRBuilder &B) {
7339 B.setInstrAndDebugLoc(*Select);
7341 B.buildZExtOrTrunc(Ext, Cond);
7342 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7343 B.buildAnd(DstReg, Ext, FreezeTrue);
7344 };
7345 return true;
7346 }
7347
7348 // select Cond, T, 1 --> or (not Cond), T
7349 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7350 MatchInfo = [=](MachineIRBuilder &B) {
7351 B.setInstrAndDebugLoc(*Select);
7352 // First the not.
7354 B.buildNot(Inner, Cond);
7355 // Then an ext to match the destination register.
7357 B.buildZExtOrTrunc(Ext, Inner);
7358 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7359 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7360 };
7361 return true;
7362 }
7363
7364 // select Cond, 0, F --> and (not Cond), F
7365 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7366 MatchInfo = [=](MachineIRBuilder &B) {
7367 B.setInstrAndDebugLoc(*Select);
7368 // First the not.
7370 B.buildNot(Inner, Cond);
7371 // Then an ext to match the destination register.
7373 B.buildZExtOrTrunc(Ext, Inner);
7374 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7375 B.buildAnd(DstReg, Ext, FreezeFalse);
7376 };
7377 return true;
7378 }
7379
7380 return false;
7381}
7382
7384 BuildFnTy &MatchInfo) const {
7385 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7386 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7387
7388 Register DstReg = Select->getReg(0);
7389 Register True = Select->getTrueReg();
7390 Register False = Select->getFalseReg();
7391 LLT DstTy = MRI.getType(DstReg);
7392
7393 if (DstTy.isPointer())
7394 return false;
7395
7396 // We want to fold the icmp and replace the select.
7397 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7398 return false;
7399
7400 CmpInst::Predicate Pred = Cmp->getCond();
7401 // We need a larger or smaller predicate for
7402 // canonicalization.
7403 if (CmpInst::isEquality(Pred))
7404 return false;
7405
7406 Register CmpLHS = Cmp->getLHSReg();
7407 Register CmpRHS = Cmp->getRHSReg();
7408
7409 // We can swap CmpLHS and CmpRHS for higher hitrate.
7410 if (True == CmpRHS && False == CmpLHS) {
7411 std::swap(CmpLHS, CmpRHS);
7412 Pred = CmpInst::getSwappedPredicate(Pred);
7413 }
7414
7415 // (icmp X, Y) ? X : Y -> integer minmax.
7416 // see matchSelectPattern in ValueTracking.
7417 // Legality between G_SELECT and integer minmax can differ.
7418 if (True != CmpLHS || False != CmpRHS)
7419 return false;
7420
7421 switch (Pred) {
7422 case ICmpInst::ICMP_UGT:
7423 case ICmpInst::ICMP_UGE: {
7424 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7425 return false;
7426 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7427 return true;
7428 }
7429 case ICmpInst::ICMP_SGT:
7430 case ICmpInst::ICMP_SGE: {
7431 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7432 return false;
7433 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7434 return true;
7435 }
7436 case ICmpInst::ICMP_ULT:
7437 case ICmpInst::ICMP_ULE: {
7438 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7439 return false;
7440 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7441 return true;
7442 }
7443 case ICmpInst::ICMP_SLT:
7444 case ICmpInst::ICMP_SLE: {
7445 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7446 return false;
7447 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7448 return true;
7449 }
7450 default:
7451 return false;
7452 }
7453}
7454
7455// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7457 BuildFnTy &MatchInfo) const {
7458 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7459 Register DestReg = MI.getOperand(0).getReg();
7460 LLT DestTy = MRI.getType(DestReg);
7461
7462 Register X;
7463 Register Sub0;
7464 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7465 if (mi_match(DestReg, MRI,
7466 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7467 m_GSMax(m_Reg(X), NegPattern),
7468 m_GUMin(m_Reg(X), NegPattern),
7469 m_GUMax(m_Reg(X), NegPattern)))))) {
7470 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7471 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7472 if (isLegal({NewOpc, {DestTy}})) {
7473 MatchInfo = [=](MachineIRBuilder &B) {
7474 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7475 };
7476 return true;
7477 }
7478 }
7479
7480 return false;
7481}
7482
7484 GSelect *Select = cast<GSelect>(&MI);
7485
7486 if (tryFoldSelectOfConstants(Select, MatchInfo))
7487 return true;
7488
7489 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7490 return true;
7491
7492 return false;
7493}
7494
7495/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7496/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7497/// into a single comparison using range-based reasoning.
7498/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7499bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7500 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7501 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7502 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7503 Register DstReg = Logic->getReg(0);
7504 Register LHS = Logic->getLHSReg();
7505 Register RHS = Logic->getRHSReg();
7506 unsigned Flags = Logic->getFlags();
7507
7508 // We need an G_ICMP on the LHS register.
7509 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7510 if (!Cmp1)
7511 return false;
7512
7513 // We need an G_ICMP on the RHS register.
7514 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7515 if (!Cmp2)
7516 return false;
7517
7518 // We want to fold the icmps.
7519 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7520 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7521 return false;
7522
7523 APInt C1;
7524 APInt C2;
7525 std::optional<ValueAndVReg> MaybeC1 =
7527 if (!MaybeC1)
7528 return false;
7529 C1 = MaybeC1->Value;
7530
7531 std::optional<ValueAndVReg> MaybeC2 =
7533 if (!MaybeC2)
7534 return false;
7535 C2 = MaybeC2->Value;
7536
7537 Register R1 = Cmp1->getLHSReg();
7538 Register R2 = Cmp2->getLHSReg();
7539 CmpInst::Predicate Pred1 = Cmp1->getCond();
7540 CmpInst::Predicate Pred2 = Cmp2->getCond();
7541 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7542 LLT CmpOperandTy = MRI.getType(R1);
7543
7544 if (CmpOperandTy.isPointer())
7545 return false;
7546
7547 // We build ands, adds, and constants of type CmpOperandTy.
7548 // They must be legal to build.
7549 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7550 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7551 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7552 return false;
7553
7554 // Look through add of a constant offset on R1, R2, or both operands. This
7555 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7556 std::optional<APInt> Offset1;
7557 std::optional<APInt> Offset2;
7558 if (R1 != R2) {
7559 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7560 std::optional<ValueAndVReg> MaybeOffset1 =
7562 if (MaybeOffset1) {
7563 R1 = Add->getLHSReg();
7564 Offset1 = MaybeOffset1->Value;
7565 }
7566 }
7567 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7568 std::optional<ValueAndVReg> MaybeOffset2 =
7570 if (MaybeOffset2) {
7571 R2 = Add->getLHSReg();
7572 Offset2 = MaybeOffset2->Value;
7573 }
7574 }
7575 }
7576
7577 if (R1 != R2)
7578 return false;
7579
7580 // We calculate the icmp ranges including maybe offsets.
7582 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7583 if (Offset1)
7584 CR1 = CR1.subtract(*Offset1);
7585
7587 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7588 if (Offset2)
7589 CR2 = CR2.subtract(*Offset2);
7590
7591 bool CreateMask = false;
7592 APInt LowerDiff;
7593 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7594 if (!CR) {
7595 // We need non-wrapping ranges.
7596 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7597 return false;
7598
7599 // Check whether we have equal-size ranges that only differ by one bit.
7600 // In that case we can apply a mask to map one range onto the other.
7601 LowerDiff = CR1.getLower() ^ CR2.getLower();
7602 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7603 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7604 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7605 CR1Size != CR2.getUpper() - CR2.getLower())
7606 return false;
7607
7608 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7609 CreateMask = true;
7610 }
7611
7612 if (IsAnd)
7613 CR = CR->inverse();
7614
7615 CmpInst::Predicate NewPred;
7616 APInt NewC, Offset;
7617 CR->getEquivalentICmp(NewPred, NewC, Offset);
7618
7619 // We take the result type of one of the original icmps, CmpTy, for
7620 // the to be build icmp. The operand type, CmpOperandTy, is used for
7621 // the other instructions and constants to be build. The types of
7622 // the parameters and output are the same for add and and. CmpTy
7623 // and the type of DstReg might differ. That is why we zext or trunc
7624 // the icmp into the destination register.
7625
7626 MatchInfo = [=](MachineIRBuilder &B) {
7627 if (CreateMask && Offset != 0) {
7628 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7629 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7630 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7631 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7632 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7633 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7634 B.buildZExtOrTrunc(DstReg, ICmp);
7635 } else if (CreateMask && Offset == 0) {
7636 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7637 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7638 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7639 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7640 B.buildZExtOrTrunc(DstReg, ICmp);
7641 } else if (!CreateMask && Offset != 0) {
7642 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7643 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7644 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7645 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7646 B.buildZExtOrTrunc(DstReg, ICmp);
7647 } else if (!CreateMask && Offset == 0) {
7648 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7649 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7650 B.buildZExtOrTrunc(DstReg, ICmp);
7651 } else {
7652 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7653 }
7654 };
7655 return true;
7656}
7657
7658bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7659 BuildFnTy &MatchInfo) const {
7660 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7661 Register DestReg = Logic->getReg(0);
7662 Register LHS = Logic->getLHSReg();
7663 Register RHS = Logic->getRHSReg();
7664 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7665
7666 // We need a compare on the LHS register.
7667 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7668 if (!Cmp1)
7669 return false;
7670
7671 // We need a compare on the RHS register.
7672 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7673 if (!Cmp2)
7674 return false;
7675
7676 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7677 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7678
7679 // We build one fcmp, want to fold the fcmps, replace the logic op,
7680 // and the fcmps must have the same shape.
7682 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7683 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7684 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7685 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7686 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7687 return false;
7688
7689 CmpInst::Predicate PredL = Cmp1->getCond();
7690 CmpInst::Predicate PredR = Cmp2->getCond();
7691 Register LHS0 = Cmp1->getLHSReg();
7692 Register LHS1 = Cmp1->getRHSReg();
7693 Register RHS0 = Cmp2->getLHSReg();
7694 Register RHS1 = Cmp2->getRHSReg();
7695
7696 if (LHS0 == RHS1 && LHS1 == RHS0) {
7697 // Swap RHS operands to match LHS.
7698 PredR = CmpInst::getSwappedPredicate(PredR);
7699 std::swap(RHS0, RHS1);
7700 }
7701
7702 if (LHS0 == RHS0 && LHS1 == RHS1) {
7703 // We determine the new predicate.
7704 unsigned CmpCodeL = getFCmpCode(PredL);
7705 unsigned CmpCodeR = getFCmpCode(PredR);
7706 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7707 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7708 MatchInfo = [=](MachineIRBuilder &B) {
7709 // The fcmp predicates fill the lower part of the enum.
7710 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7711 if (Pred == FCmpInst::FCMP_FALSE &&
7713 auto False = B.buildConstant(CmpTy, 0);
7714 B.buildZExtOrTrunc(DestReg, False);
7715 } else if (Pred == FCmpInst::FCMP_TRUE &&
7717 auto True =
7718 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7719 CmpTy.isVector() /*isVector*/,
7720 true /*isFP*/));
7721 B.buildZExtOrTrunc(DestReg, True);
7722 } else { // We take the predicate without predicate optimizations.
7723 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7724 B.buildZExtOrTrunc(DestReg, Cmp);
7725 }
7726 };
7727 return true;
7728 }
7729
7730 return false;
7731}
7732
7734 GAnd *And = cast<GAnd>(&MI);
7735
7736 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7737 return true;
7738
7739 if (tryFoldLogicOfFCmps(And, MatchInfo))
7740 return true;
7741
7742 return false;
7743}
7744
7746 GOr *Or = cast<GOr>(&MI);
7747
7748 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7749 return true;
7750
7751 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7752 return true;
7753
7754 return false;
7755}
7756
7758 BuildFnTy &MatchInfo) const {
7759 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7760
7761 // Addo has no flags
7762 Register Dst = Add->getReg(0);
7763 Register Carry = Add->getReg(1);
7764 Register LHS = Add->getLHSReg();
7765 Register RHS = Add->getRHSReg();
7766 bool IsSigned = Add->isSigned();
7767 LLT DstTy = MRI.getType(Dst);
7768 LLT CarryTy = MRI.getType(Carry);
7769
7770 // Fold addo, if the carry is dead -> add, undef.
7771 if (MRI.use_nodbg_empty(Carry) &&
7772 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7773 MatchInfo = [=](MachineIRBuilder &B) {
7774 B.buildAdd(Dst, LHS, RHS);
7775 B.buildUndef(Carry);
7776 };
7777 return true;
7778 }
7779
7780 // Canonicalize constant to RHS.
7781 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7782 if (IsSigned) {
7783 MatchInfo = [=](MachineIRBuilder &B) {
7784 B.buildSAddo(Dst, Carry, RHS, LHS);
7785 };
7786 return true;
7787 }
7788 // !IsSigned
7789 MatchInfo = [=](MachineIRBuilder &B) {
7790 B.buildUAddo(Dst, Carry, RHS, LHS);
7791 };
7792 return true;
7793 }
7794
7795 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7796 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7797
7798 // Fold addo(c1, c2) -> c3, carry.
7799 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7801 bool Overflow;
7802 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7803 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7804 MatchInfo = [=](MachineIRBuilder &B) {
7805 B.buildConstant(Dst, Result);
7806 B.buildConstant(Carry, Overflow);
7807 };
7808 return true;
7809 }
7810
7811 // Fold (addo x, 0) -> x, no carry
7812 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7813 MatchInfo = [=](MachineIRBuilder &B) {
7814 B.buildCopy(Dst, LHS);
7815 B.buildConstant(Carry, 0);
7816 };
7817 return true;
7818 }
7819
7820 // Given 2 constant operands whose sum does not overflow:
7821 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7822 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7823 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7824 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7825 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7826 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7827 std::optional<APInt> MaybeAddRHS =
7828 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7829 if (MaybeAddRHS) {
7830 bool Overflow;
7831 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7832 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7833 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7834 if (IsSigned) {
7835 MatchInfo = [=](MachineIRBuilder &B) {
7836 auto ConstRHS = B.buildConstant(DstTy, NewC);
7837 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7838 };
7839 return true;
7840 }
7841 // !IsSigned
7842 MatchInfo = [=](MachineIRBuilder &B) {
7843 auto ConstRHS = B.buildConstant(DstTy, NewC);
7844 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7845 };
7846 return true;
7847 }
7848 }
7849 };
7850
7851 // We try to combine addo to non-overflowing add.
7852 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7854 return false;
7855
7856 // We try to combine uaddo to non-overflowing add.
7857 if (!IsSigned) {
7858 ConstantRange CRLHS =
7859 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
7860 ConstantRange CRRHS =
7861 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
7862
7863 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7865 return false;
7867 MatchInfo = [=](MachineIRBuilder &B) {
7868 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7869 B.buildConstant(Carry, 0);
7870 };
7871 return true;
7872 }
7875 MatchInfo = [=](MachineIRBuilder &B) {
7876 B.buildAdd(Dst, LHS, RHS);
7877 B.buildConstant(Carry, 1);
7878 };
7879 return true;
7880 }
7881 }
7882 return false;
7883 }
7884
7885 // We try to combine saddo to non-overflowing add.
7886
7887 // If LHS and RHS each have at least two sign bits, then there is no signed
7888 // overflow.
7889 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
7890 MatchInfo = [=](MachineIRBuilder &B) {
7891 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7892 B.buildConstant(Carry, 0);
7893 };
7894 return true;
7895 }
7896
7897 ConstantRange CRLHS =
7898 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
7899 ConstantRange CRRHS =
7900 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
7901
7902 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7904 return false;
7906 MatchInfo = [=](MachineIRBuilder &B) {
7907 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7908 B.buildConstant(Carry, 0);
7909 };
7910 return true;
7911 }
7914 MatchInfo = [=](MachineIRBuilder &B) {
7915 B.buildAdd(Dst, LHS, RHS);
7916 B.buildConstant(Carry, 1);
7917 };
7918 return true;
7919 }
7920 }
7921
7922 return false;
7923}
7924
7926 BuildFnTy &MatchInfo) const {
7928 MatchInfo(Builder);
7929 Root->eraseFromParent();
7930}
7931
7933 int64_t Exponent) const {
7934 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7936}
7937
7939 int64_t Exponent) const {
7940 auto [Dst, Base] = MI.getFirst2Regs();
7941 LLT Ty = MRI.getType(Dst);
7942 int64_t ExpVal = Exponent;
7943
7944 if (ExpVal == 0) {
7945 Builder.buildFConstant(Dst, 1.0);
7946 MI.removeFromParent();
7947 return;
7948 }
7949
7950 if (ExpVal < 0)
7951 ExpVal = -ExpVal;
7952
7953 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
7954 // to generate the multiply sequence. There are more optimal ways to do this
7955 // (for example, powi(x,15) generates one more multiply than it should), but
7956 // this has the benefit of being both really simple and much better than a
7957 // libcall.
7958 std::optional<SrcOp> Res;
7959 SrcOp CurSquare = Base;
7960 while (ExpVal > 0) {
7961 if (ExpVal & 1) {
7962 if (!Res)
7963 Res = CurSquare;
7964 else
7965 Res = Builder.buildFMul(Ty, *Res, CurSquare);
7966 }
7967
7968 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
7969 ExpVal >>= 1;
7970 }
7971
7972 // If the original exponent was negative, invert the result, producing
7973 // 1/(x*x*x).
7974 if (Exponent < 0)
7975 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
7976 MI.getFlags());
7977
7978 Builder.buildCopy(Dst, *Res);
7979 MI.eraseFromParent();
7980}
7981
7983 BuildFnTy &MatchInfo) const {
7984 // fold (A+C1)-C2 -> A+(C1-C2)
7985 const GSub *Sub = cast<GSub>(&MI);
7986 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
7987
7988 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
7989 return false;
7990
7991 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
7992 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
7993
7994 Register Dst = Sub->getReg(0);
7995 LLT DstTy = MRI.getType(Dst);
7996
7997 MatchInfo = [=](MachineIRBuilder &B) {
7998 auto Const = B.buildConstant(DstTy, C1 - C2);
7999 B.buildAdd(Dst, Add->getLHSReg(), Const);
8000 };
8001
8002 return true;
8003}
8004
8006 BuildFnTy &MatchInfo) const {
8007 // fold C2-(A+C1) -> (C2-C1)-A
8008 const GSub *Sub = cast<GSub>(&MI);
8009 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8010
8011 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8012 return false;
8013
8014 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8015 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8016
8017 Register Dst = Sub->getReg(0);
8018 LLT DstTy = MRI.getType(Dst);
8019
8020 MatchInfo = [=](MachineIRBuilder &B) {
8021 auto Const = B.buildConstant(DstTy, C2 - C1);
8022 B.buildSub(Dst, Const, Add->getLHSReg());
8023 };
8024
8025 return true;
8026}
8027
8029 BuildFnTy &MatchInfo) const {
8030 // fold (A-C1)-C2 -> A-(C1+C2)
8031 const GSub *Sub1 = cast<GSub>(&MI);
8032 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8033
8034 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8035 return false;
8036
8037 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8038 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8039
8040 Register Dst = Sub1->getReg(0);
8041 LLT DstTy = MRI.getType(Dst);
8042
8043 MatchInfo = [=](MachineIRBuilder &B) {
8044 auto Const = B.buildConstant(DstTy, C1 + C2);
8045 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8046 };
8047
8048 return true;
8049}
8050
8052 BuildFnTy &MatchInfo) const {
8053 // fold (C1-A)-C2 -> (C1-C2)-A
8054 const GSub *Sub1 = cast<GSub>(&MI);
8055 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8056
8057 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8058 return false;
8059
8060 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8061 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8062
8063 Register Dst = Sub1->getReg(0);
8064 LLT DstTy = MRI.getType(Dst);
8065
8066 MatchInfo = [=](MachineIRBuilder &B) {
8067 auto Const = B.buildConstant(DstTy, C1 - C2);
8068 B.buildSub(Dst, Const, Sub2->getRHSReg());
8069 };
8070
8071 return true;
8072}
8073
8075 BuildFnTy &MatchInfo) const {
8076 // fold ((A-C1)+C2) -> (A+(C2-C1))
8077 const GAdd *Add = cast<GAdd>(&MI);
8078 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8079
8080 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8081 return false;
8082
8083 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8084 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8085
8086 Register Dst = Add->getReg(0);
8087 LLT DstTy = MRI.getType(Dst);
8088
8089 MatchInfo = [=](MachineIRBuilder &B) {
8090 auto Const = B.buildConstant(DstTy, C2 - C1);
8091 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8092 };
8093
8094 return true;
8095}
8096
8098 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8099 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8100
8101 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8102 return false;
8103
8104 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8105
8106 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8107
8108 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8109 // $any:_(<8 x s16>) = G_ANYEXT $bv
8110 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8111 //
8112 // ->
8113 //
8114 // $any:_(s16) = G_ANYEXT $bv[0]
8115 // $any1:_(s16) = G_ANYEXT $bv[1]
8116 // $any2:_(s16) = G_ANYEXT $bv[2]
8117 // $any3:_(s16) = G_ANYEXT $bv[3]
8118 // $any4:_(s16) = G_ANYEXT $bv[4]
8119 // $any5:_(s16) = G_ANYEXT $bv[5]
8120 // $any6:_(s16) = G_ANYEXT $bv[6]
8121 // $any7:_(s16) = G_ANYEXT $bv[7]
8122 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8123 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8124
8125 // We want to unmerge into vectors.
8126 if (!DstTy.isFixedVector())
8127 return false;
8128
8129 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8130 if (!Any)
8131 return false;
8132
8133 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8134
8135 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8136 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8137
8138 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8139 return false;
8140
8141 // FIXME: check element types?
8142 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8143 return false;
8144
8145 LLT BigBvTy = MRI.getType(BV->getReg(0));
8146 LLT SmallBvTy = DstTy;
8147 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8148
8150 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8151 return false;
8152
8153 // We check the legality of scalar anyext.
8155 {TargetOpcode::G_ANYEXT,
8156 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8157 return false;
8158
8159 MatchInfo = [=](MachineIRBuilder &B) {
8160 // Build into each G_UNMERGE_VALUES def
8161 // a small build vector with anyext from the source build vector.
8162 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8164 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8165 Register SourceArray =
8166 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8167 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8168 Ops.push_back(AnyExt.getReg(0));
8169 }
8170 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8171 };
8172 };
8173 return true;
8174 };
8175
8176 return false;
8177}
8178
8180 BuildFnTy &MatchInfo) const {
8181
8182 bool Changed = false;
8183 auto &Shuffle = cast<GShuffleVector>(MI);
8184 ArrayRef<int> OrigMask = Shuffle.getMask();
8185 SmallVector<int, 16> NewMask;
8186 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8187 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8188 const unsigned NumDstElts = OrigMask.size();
8189 for (unsigned i = 0; i != NumDstElts; ++i) {
8190 int Idx = OrigMask[i];
8191 if (Idx >= (int)NumSrcElems) {
8192 Idx = -1;
8193 Changed = true;
8194 }
8195 NewMask.push_back(Idx);
8196 }
8197
8198 if (!Changed)
8199 return false;
8200
8201 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8202 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8203 std::move(NewMask));
8204 };
8205
8206 return true;
8207}
8208
8209static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8210 const unsigned MaskSize = Mask.size();
8211 for (unsigned I = 0; I < MaskSize; ++I) {
8212 int Idx = Mask[I];
8213 if (Idx < 0)
8214 continue;
8215
8216 if (Idx < (int)NumElems)
8217 Mask[I] = Idx + NumElems;
8218 else
8219 Mask[I] = Idx - NumElems;
8220 }
8221}
8222
8224 BuildFnTy &MatchInfo) const {
8225
8226 auto &Shuffle = cast<GShuffleVector>(MI);
8227 // If any of the two inputs is already undef, don't check the mask again to
8228 // prevent infinite loop
8229 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8230 return false;
8231
8232 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8233 return false;
8234
8235 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8236 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8238 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8239 return false;
8240
8241 ArrayRef<int> Mask = Shuffle.getMask();
8242 const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
8243
8244 bool TouchesSrc1 = false;
8245 bool TouchesSrc2 = false;
8246 const unsigned NumElems = Mask.size();
8247 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8248 if (Mask[Idx] < 0)
8249 continue;
8250
8251 if (Mask[Idx] < (int)NumSrcElems)
8252 TouchesSrc1 = true;
8253 else
8254 TouchesSrc2 = true;
8255 }
8256
8257 if (TouchesSrc1 == TouchesSrc2)
8258 return false;
8259
8260 Register NewSrc1 = Shuffle.getSrc1Reg();
8261 SmallVector<int, 16> NewMask(Mask);
8262 if (TouchesSrc2) {
8263 NewSrc1 = Shuffle.getSrc2Reg();
8264 commuteMask(NewMask, NumSrcElems);
8265 }
8266
8267 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8268 auto Undef = B.buildUndef(Src1Ty);
8269 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8270 };
8271
8272 return true;
8273}
8274
8276 BuildFnTy &MatchInfo) const {
8277 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8278
8279 Register Dst = Subo->getReg(0);
8280 Register LHS = Subo->getLHSReg();
8281 Register RHS = Subo->getRHSReg();
8282 Register Carry = Subo->getCarryOutReg();
8283 LLT DstTy = MRI.getType(Dst);
8284 LLT CarryTy = MRI.getType(Carry);
8285
8286 // Check legality before known bits.
8287 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8289 return false;
8290
8291 ConstantRange KBLHS =
8293 /* IsSigned=*/Subo->isSigned());
8294 ConstantRange KBRHS =
8296 /* IsSigned=*/Subo->isSigned());
8297
8298 if (Subo->isSigned()) {
8299 // G_SSUBO
8300 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8302 return false;
8304 MatchInfo = [=](MachineIRBuilder &B) {
8305 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8306 B.buildConstant(Carry, 0);
8307 };
8308 return true;
8309 }
8312 MatchInfo = [=](MachineIRBuilder &B) {
8313 B.buildSub(Dst, LHS, RHS);
8314 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8315 /*isVector=*/CarryTy.isVector(),
8316 /*isFP=*/false));
8317 };
8318 return true;
8319 }
8320 }
8321 return false;
8322 }
8323
8324 // G_USUBO
8325 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8327 return false;
8329 MatchInfo = [=](MachineIRBuilder &B) {
8330 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8331 B.buildConstant(Carry, 0);
8332 };
8333 return true;
8334 }
8337 MatchInfo = [=](MachineIRBuilder &B) {
8338 B.buildSub(Dst, LHS, RHS);
8339 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8340 /*isVector=*/CarryTy.isVector(),
8341 /*isFP=*/false));
8342 };
8343 return true;
8344 }
8345 }
8346
8347 return false;
8348}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1457
bool isNaN() const
Definition: APFloat.h:1447
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1235
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
int32_t exactLogBase2() const
Definition: APInt.h:1783
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:834
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1041
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1615
LLVM_ABI APInt multiplicativeInverse() const
Definition: APInt.cpp:1274
bool isMask(unsigned numBits) const
Definition: APInt.h:488
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
Definition: Any.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
LLVM_ABI AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:917
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:695
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:692
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:685
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:680
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:829
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchShuffleToExtract(MachineInstr &MI) const
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is zero.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool isPreLegalize() const
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
void applyShuffleToExtract(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
const APFloat & getValue() const
Definition: Constants.h:321
const APFloat & getValueAPF() const
Definition: Constants.h:320
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This class represents a range of values.
Definition: ConstantRange.h:47
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isBigEndian() const
Definition: DataLayout.h:199
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
unsigned size() const
Definition: DenseMap.h:120
iterator end()
Definition: DenseMap.h:87
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getRHSReg() const
Register getLHSReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
LLVM_ABI void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
LLVM_ABI void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
Simple wrapper observer that takes several observers, and calls each one for each event.
KnownBits getKnownBits(Register R)
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
APInt getKnownZeroes(Register R)
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265
constexpr bool isScalar() const
Definition: LowLevelType.h:147
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:212
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:65
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
constexpr bool isValid() const
Definition: LowLevelType.h:146
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:160
constexpr bool isVector() const
Definition: LowLevelType.h:149
constexpr bool isByteSized() const
Definition: LowLevelType.h:261
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191
constexpr bool isPointer() const
Definition: LowLevelType.h:150
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:278
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:219
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:271
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:206
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:201
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:64
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildTruncSSatU(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC_SSAT_U Op.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FDIV Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTruncSSatS(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC_SSAT_S Op.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:409
LLVM_ABI void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
Definition: MachineInstr.h:731
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:404
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:29
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:107
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:184
self_iterator getIterator()
Definition: ilist_node.h:134
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:48
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:58
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(APInt RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1480
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:2029
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:651
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1440
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1605
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:739
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1563
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1587
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:492
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1620
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1652
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:670
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1543
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1473
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:966
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition: Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:447
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1677
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:467
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:499
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1458
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:267
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:234
Extended Value Type.
Definition: ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:101
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:244
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:235
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:241
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:138
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:98
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...