LLVM 22.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
41#include <cmath>
42#include <optional>
43#include <tuple>
44
45#define DEBUG_TYPE "gi-combiner"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50// Option to allow testing of the combiner while no targets know about indexed
51// addressing.
52static cl::opt<bool>
53 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
54 cl::desc("Force all indexed operations to be "
55 "legal for the GlobalISel combiner"));
56
61 const LegalizerInfo *LI)
62 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
64 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
65 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
66 (void)this->VT;
67}
68
70 return *Builder.getMF().getSubtarget().getTargetLowering();
71}
72
74 return Builder.getMF();
75}
76
80
81LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
82
83/// \returns The little endian in-memory byte position of byte \p I in a
84/// \p ByteWidth bytes wide type.
85///
86/// E.g. Given a 4-byte type x, x[0] -> byte 0
87static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
88 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
89 return I;
90}
91
92/// Determines the LogBase2 value for a non-null input value using the
93/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
95 auto &MRI = *MIB.getMRI();
96 LLT Ty = MRI.getType(V);
97 auto Ctlz = MIB.buildCTLZ(Ty, V);
98 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
99 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
100}
101
102/// \returns The big endian in-memory byte position of byte \p I in a
103/// \p ByteWidth bytes wide type.
104///
105/// E.g. Given a 4-byte type x, x[0] -> byte 3
106static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
107 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
108 return ByteWidth - I - 1;
109}
110
111/// Given a map from byte offsets in memory to indices in a load/store,
112/// determine if that map corresponds to a little or big endian byte pattern.
113///
114/// \param MemOffset2Idx maps memory offsets to address offsets.
115/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
116///
117/// \returns true if the map corresponds to a big endian byte pattern, false if
118/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
119///
120/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
121/// are as follows:
122///
123/// AddrOffset Little endian Big endian
124/// 0 0 3
125/// 1 1 2
126/// 2 2 1
127/// 3 3 0
128static std::optional<bool>
130 int64_t LowestIdx) {
131 // Need at least two byte positions to decide on endianness.
132 unsigned Width = MemOffset2Idx.size();
133 if (Width < 2)
134 return std::nullopt;
135 bool BigEndian = true, LittleEndian = true;
136 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
137 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
138 if (MemOffsetAndIdx == MemOffset2Idx.end())
139 return std::nullopt;
140 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
141 assert(Idx >= 0 && "Expected non-negative byte offset?");
142 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
143 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
144 if (!BigEndian && !LittleEndian)
145 return std::nullopt;
146 }
147
148 assert((BigEndian != LittleEndian) &&
149 "Pattern cannot be both big and little endian!");
150 return BigEndian;
151}
152
154
155bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
156 assert(LI && "Must have LegalizerInfo to query isLegal!");
157 return LI->getAction(Query).Action == LegalizeActions::Legal;
158}
159
161 const LegalityQuery &Query) const {
162 return isPreLegalize() || isLegal(Query);
163}
164
166 return isLegal(Query) ||
167 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
168}
169
171 if (!Ty.isVector())
172 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
173 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
174 if (isPreLegalize())
175 return true;
176 LLT EltTy = Ty.getElementType();
177 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
178 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
179}
180
182 Register ToReg) const {
183 Observer.changingAllUsesOfReg(MRI, FromReg);
184
185 if (MRI.constrainRegAttrs(ToReg, FromReg))
186 MRI.replaceRegWith(FromReg, ToReg);
187 else
188 Builder.buildCopy(FromReg, ToReg);
189
190 Observer.finishedChangingAllUsesOfReg();
191}
192
194 MachineOperand &FromRegOp,
195 Register ToReg) const {
196 assert(FromRegOp.getParent() && "Expected an operand in an MI");
197 Observer.changingInstr(*FromRegOp.getParent());
198
199 FromRegOp.setReg(ToReg);
200
201 Observer.changedInstr(*FromRegOp.getParent());
202}
203
205 unsigned ToOpcode) const {
206 Observer.changingInstr(FromMI);
207
208 FromMI.setDesc(Builder.getTII().get(ToOpcode));
209
210 Observer.changedInstr(FromMI);
211}
212
214 return RBI->getRegBank(Reg, MRI, *TRI);
215}
216
218 const RegisterBank *RegBank) const {
219 if (RegBank)
220 MRI.setRegBank(Reg, *RegBank);
221}
222
224 if (matchCombineCopy(MI)) {
226 return true;
227 }
228 return false;
229}
231 if (MI.getOpcode() != TargetOpcode::COPY)
232 return false;
233 Register DstReg = MI.getOperand(0).getReg();
234 Register SrcReg = MI.getOperand(1).getReg();
235 return canReplaceReg(DstReg, SrcReg, MRI);
236}
238 Register DstReg = MI.getOperand(0).getReg();
239 Register SrcReg = MI.getOperand(1).getReg();
240 replaceRegWith(MRI, DstReg, SrcReg);
241 MI.eraseFromParent();
242}
243
245 MachineInstr &MI, BuildFnTy &MatchInfo) const {
246 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
247 Register DstOp = MI.getOperand(0).getReg();
248 Register OrigOp = MI.getOperand(1).getReg();
249
250 if (!MRI.hasOneNonDBGUse(OrigOp))
251 return false;
252
253 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
254 // Even if only a single operand of the PHI is not guaranteed non-poison,
255 // moving freeze() backwards across a PHI can cause optimization issues for
256 // other users of that operand.
257 //
258 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
259 // the source register is unprofitable because it makes the freeze() more
260 // strict than is necessary (it would affect the whole register instead of
261 // just the subreg being frozen).
262 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
263 return false;
264
265 if (canCreateUndefOrPoison(OrigOp, MRI,
266 /*ConsiderFlagsAndMetadata=*/false))
267 return false;
268
269 std::optional<MachineOperand> MaybePoisonOperand;
270 for (MachineOperand &Operand : OrigDef->uses()) {
271 if (!Operand.isReg())
272 return false;
273
274 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
275 continue;
276
277 if (!MaybePoisonOperand)
278 MaybePoisonOperand = Operand;
279 else {
280 // We have more than one maybe-poison operand. Moving the freeze is
281 // unsafe.
282 return false;
283 }
284 }
285
286 // Eliminate freeze if all operands are guaranteed non-poison.
287 if (!MaybePoisonOperand) {
288 MatchInfo = [=](MachineIRBuilder &B) {
289 Observer.changingInstr(*OrigDef);
290 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
291 Observer.changedInstr(*OrigDef);
292 B.buildCopy(DstOp, OrigOp);
293 };
294 return true;
295 }
296
297 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
298 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
299
300 MatchInfo = [=](MachineIRBuilder &B) mutable {
301 Observer.changingInstr(*OrigDef);
302 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
303 Observer.changedInstr(*OrigDef);
304 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
305 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
307 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
308 Freeze.getReg(0));
309 replaceRegWith(MRI, DstOp, OrigOp);
310 };
311 return true;
312}
313
316 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
317 "Invalid instruction");
318 bool IsUndef = true;
319 MachineInstr *Undef = nullptr;
320
321 // Walk over all the operands of concat vectors and check if they are
322 // build_vector themselves or undef.
323 // Then collect their operands in Ops.
324 for (const MachineOperand &MO : MI.uses()) {
325 Register Reg = MO.getReg();
326 MachineInstr *Def = MRI.getVRegDef(Reg);
327 assert(Def && "Operand not defined");
328 if (!MRI.hasOneNonDBGUse(Reg))
329 return false;
330 switch (Def->getOpcode()) {
331 case TargetOpcode::G_BUILD_VECTOR:
332 IsUndef = false;
333 // Remember the operands of the build_vector to fold
334 // them into the yet-to-build flattened concat vectors.
335 for (const MachineOperand &BuildVecMO : Def->uses())
336 Ops.push_back(BuildVecMO.getReg());
337 break;
338 case TargetOpcode::G_IMPLICIT_DEF: {
339 LLT OpType = MRI.getType(Reg);
340 // Keep one undef value for all the undef operands.
341 if (!Undef) {
342 Builder.setInsertPt(*MI.getParent(), MI);
343 Undef = Builder.buildUndef(OpType.getScalarType());
344 }
345 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
346 OpType.getScalarType() &&
347 "All undefs should have the same type");
348 // Break the undef vector in as many scalar elements as needed
349 // for the flattening.
350 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
351 EltIdx != EltEnd; ++EltIdx)
352 Ops.push_back(Undef->getOperand(0).getReg());
353 break;
354 }
355 default:
356 return false;
357 }
358 }
359
360 // Check if the combine is illegal
361 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
363 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
364 return false;
365 }
366
367 if (IsUndef)
368 Ops.clear();
369
370 return true;
371}
374 // We determined that the concat_vectors can be flatten.
375 // Generate the flattened build_vector.
376 Register DstReg = MI.getOperand(0).getReg();
377 Builder.setInsertPt(*MI.getParent(), MI);
378 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
379
380 // Note: IsUndef is sort of redundant. We could have determine it by
381 // checking that at all Ops are undef. Alternatively, we could have
382 // generate a build_vector of undefs and rely on another combine to
383 // clean that up. For now, given we already gather this information
384 // in matchCombineConcatVectors, just save compile time and issue the
385 // right thing.
386 if (Ops.empty())
387 Builder.buildUndef(NewDstReg);
388 else
389 Builder.buildBuildVector(NewDstReg, Ops);
390 replaceRegWith(MRI, DstReg, NewDstReg);
391 MI.eraseFromParent();
392}
393
395 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
396 "Invalid instruction");
397 auto &Shuffle = cast<GShuffleVector>(MI);
398
399 Register SrcVec1 = Shuffle.getSrc1Reg();
400 Register SrcVec2 = Shuffle.getSrc2Reg();
401
402 LLT SrcVec1Type = MRI.getType(SrcVec1);
403 LLT SrcVec2Type = MRI.getType(SrcVec2);
404 return SrcVec1Type.isVector() && SrcVec2Type.isVector();
405}
406
408 auto &Shuffle = cast<GShuffleVector>(MI);
409
410 Register SrcVec1 = Shuffle.getSrc1Reg();
411 Register SrcVec2 = Shuffle.getSrc2Reg();
412 LLT EltTy = MRI.getType(SrcVec1).getElementType();
413 int Width = MRI.getType(SrcVec1).getNumElements();
414
415 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
416 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
417
418 SmallVector<Register> Extracts;
419 // Select only applicable elements from unmerged values.
420 for (int Val : Shuffle.getMask()) {
421 if (Val == -1)
422 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
423 else if (Val < Width)
424 Extracts.push_back(Unmerge1.getReg(Val));
425 else
426 Extracts.push_back(Unmerge2.getReg(Val - Width));
427 }
428 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
429 if (Extracts.size() == 1)
430 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
431 else
432 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
433 MI.eraseFromParent();
434}
435
438 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
439 auto ConcatMI1 =
440 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
441 auto ConcatMI2 =
442 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
443 if (!ConcatMI1 || !ConcatMI2)
444 return false;
445
446 // Check that the sources of the Concat instructions have the same type
447 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
448 MRI.getType(ConcatMI2->getSourceReg(0)))
449 return false;
450
451 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
452 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
453 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
454 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
455 // Check if the index takes a whole source register from G_CONCAT_VECTORS
456 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
457 if (Mask[i] == -1) {
458 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
459 if (i + j >= Mask.size())
460 return false;
461 if (Mask[i + j] != -1)
462 return false;
463 }
465 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
466 return false;
467 Ops.push_back(0);
468 } else if (Mask[i] % ConcatSrcNumElt == 0) {
469 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
470 if (i + j >= Mask.size())
471 return false;
472 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
473 return false;
474 }
475 // Retrieve the source register from its respective G_CONCAT_VECTORS
476 // instruction
477 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
478 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
479 } else {
480 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
481 ConcatMI1->getNumSources()));
482 }
483 } else {
484 return false;
485 }
486 }
487
489 {TargetOpcode::G_CONCAT_VECTORS,
490 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
491 return false;
492
493 return !Ops.empty();
494}
495
498 LLT SrcTy;
499 for (Register &Reg : Ops) {
500 if (Reg != 0)
501 SrcTy = MRI.getType(Reg);
502 }
503 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
504
505 Register UndefReg = 0;
506
507 for (Register &Reg : Ops) {
508 if (Reg == 0) {
509 if (UndefReg == 0)
510 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
511 Reg = UndefReg;
512 }
513 }
514
515 if (Ops.size() > 1)
516 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
517 else
518 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
519 MI.eraseFromParent();
520}
521
526 return true;
527 }
528 return false;
529}
530
533 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
534 "Invalid instruction kind");
535 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
536 Register Src1 = MI.getOperand(1).getReg();
537 LLT SrcType = MRI.getType(Src1);
538 // As bizarre as it may look, shuffle vector can actually produce
539 // scalar! This is because at the IR level a <1 x ty> shuffle
540 // vector is perfectly valid.
541 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
542 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
543
544 // If the resulting vector is smaller than the size of the source
545 // vectors being concatenated, we won't be able to replace the
546 // shuffle vector into a concat_vectors.
547 //
548 // Note: We may still be able to produce a concat_vectors fed by
549 // extract_vector_elt and so on. It is less clear that would
550 // be better though, so don't bother for now.
551 //
552 // If the destination is a scalar, the size of the sources doesn't
553 // matter. we will lower the shuffle to a plain copy. This will
554 // work only if the source and destination have the same size. But
555 // that's covered by the next condition.
556 //
557 // TODO: If the size between the source and destination don't match
558 // we could still emit an extract vector element in that case.
559 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
560 return false;
561
562 // Check that the shuffle mask can be broken evenly between the
563 // different sources.
564 if (DstNumElts % SrcNumElts != 0)
565 return false;
566
567 // Mask length is a multiple of the source vector length.
568 // Check if the shuffle is some kind of concatenation of the input
569 // vectors.
570 unsigned NumConcat = DstNumElts / SrcNumElts;
571 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
572 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
573 for (unsigned i = 0; i != DstNumElts; ++i) {
574 int Idx = Mask[i];
575 // Undef value.
576 if (Idx < 0)
577 continue;
578 // Ensure the indices in each SrcType sized piece are sequential and that
579 // the same source is used for the whole piece.
580 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
581 (ConcatSrcs[i / SrcNumElts] >= 0 &&
582 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
583 return false;
584 // Remember which source this index came from.
585 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
586 }
587
588 // The shuffle is concatenating multiple vectors together.
589 // Collect the different operands for that.
590 Register UndefReg;
591 Register Src2 = MI.getOperand(2).getReg();
592 for (auto Src : ConcatSrcs) {
593 if (Src < 0) {
594 if (!UndefReg) {
595 Builder.setInsertPt(*MI.getParent(), MI);
596 UndefReg = Builder.buildUndef(SrcType).getReg(0);
597 }
598 Ops.push_back(UndefReg);
599 } else if (Src == 0)
600 Ops.push_back(Src1);
601 else
602 Ops.push_back(Src2);
603 }
604 return true;
605}
606
608 MachineInstr &MI, const ArrayRef<Register> Ops) const {
609 Register DstReg = MI.getOperand(0).getReg();
610 Builder.setInsertPt(*MI.getParent(), MI);
611 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
612
613 if (Ops.size() == 1)
614 Builder.buildCopy(NewDstReg, Ops[0]);
615 else
616 Builder.buildMergeLikeInstr(NewDstReg, Ops);
617
618 replaceRegWith(MRI, DstReg, NewDstReg);
619 MI.eraseFromParent();
620}
621
623 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
624 "Invalid instruction kind");
625
626 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
627 return Mask.size() == 1;
628}
629
631 Register DstReg = MI.getOperand(0).getReg();
632 Builder.setInsertPt(*MI.getParent(), MI);
633
634 int I = MI.getOperand(3).getShuffleMask()[0];
635 Register Src1 = MI.getOperand(1).getReg();
636 LLT Src1Ty = MRI.getType(Src1);
637 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
638 Register SrcReg;
639 if (I >= Src1NumElts) {
640 SrcReg = MI.getOperand(2).getReg();
641 I -= Src1NumElts;
642 } else if (I >= 0)
643 SrcReg = Src1;
644
645 if (I < 0)
646 Builder.buildUndef(DstReg);
647 else if (!MRI.getType(SrcReg).isVector())
648 Builder.buildCopy(DstReg, SrcReg);
649 else
650 Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
651
652 MI.eraseFromParent();
653}
654
655namespace {
656
657/// Select a preference between two uses. CurrentUse is the current preference
658/// while *ForCandidate is attributes of the candidate under consideration.
659PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
660 PreferredTuple &CurrentUse,
661 const LLT TyForCandidate,
662 unsigned OpcodeForCandidate,
663 MachineInstr *MIForCandidate) {
664 if (!CurrentUse.Ty.isValid()) {
665 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
666 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
667 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
668 return CurrentUse;
669 }
670
671 // We permit the extend to hoist through basic blocks but this is only
672 // sensible if the target has extending loads. If you end up lowering back
673 // into a load and extend during the legalizer then the end result is
674 // hoisting the extend up to the load.
675
676 // Prefer defined extensions to undefined extensions as these are more
677 // likely to reduce the number of instructions.
678 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
679 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
680 return CurrentUse;
681 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
682 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
683 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
684
685 // Prefer sign extensions to zero extensions as sign-extensions tend to be
686 // more expensive. Don't do this if the load is already a zero-extend load
687 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
688 // later.
689 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
690 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
691 OpcodeForCandidate == TargetOpcode::G_ZEXT)
692 return CurrentUse;
693 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
694 OpcodeForCandidate == TargetOpcode::G_SEXT)
695 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
696 }
697
698 // This is potentially target specific. We've chosen the largest type
699 // because G_TRUNC is usually free. One potential catch with this is that
700 // some targets have a reduced number of larger registers than smaller
701 // registers and this choice potentially increases the live-range for the
702 // larger value.
703 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
704 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
705 }
706 return CurrentUse;
707}
708
709/// Find a suitable place to insert some instructions and insert them. This
710/// function accounts for special cases like inserting before a PHI node.
711/// The current strategy for inserting before PHI's is to duplicate the
712/// instructions for each predecessor. However, while that's ok for G_TRUNC
713/// on most targets since it generally requires no code, other targets/cases may
714/// want to try harder to find a dominating block.
715static void InsertInsnsWithoutSideEffectsBeforeUse(
718 MachineOperand &UseMO)>
719 Inserter) {
720 MachineInstr &UseMI = *UseMO.getParent();
721
722 MachineBasicBlock *InsertBB = UseMI.getParent();
723
724 // If the use is a PHI then we want the predecessor block instead.
725 if (UseMI.isPHI()) {
726 MachineOperand *PredBB = std::next(&UseMO);
727 InsertBB = PredBB->getMBB();
728 }
729
730 // If the block is the same block as the def then we want to insert just after
731 // the def instead of at the start of the block.
732 if (InsertBB == DefMI.getParent()) {
734 Inserter(InsertBB, std::next(InsertPt), UseMO);
735 return;
736 }
737
738 // Otherwise we want the start of the BB
739 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
740}
741} // end anonymous namespace
742
744 PreferredTuple Preferred;
745 if (matchCombineExtendingLoads(MI, Preferred)) {
746 applyCombineExtendingLoads(MI, Preferred);
747 return true;
748 }
749 return false;
750}
751
752static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
753 unsigned CandidateLoadOpc;
754 switch (ExtOpc) {
755 case TargetOpcode::G_ANYEXT:
756 CandidateLoadOpc = TargetOpcode::G_LOAD;
757 break;
758 case TargetOpcode::G_SEXT:
759 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
760 break;
761 case TargetOpcode::G_ZEXT:
762 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
763 break;
764 default:
765 llvm_unreachable("Unexpected extend opc");
766 }
767 return CandidateLoadOpc;
768}
769
771 MachineInstr &MI, PreferredTuple &Preferred) const {
772 // We match the loads and follow the uses to the extend instead of matching
773 // the extends and following the def to the load. This is because the load
774 // must remain in the same position for correctness (unless we also add code
775 // to find a safe place to sink it) whereas the extend is freely movable.
776 // It also prevents us from duplicating the load for the volatile case or just
777 // for performance.
778 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
779 if (!LoadMI)
780 return false;
781
782 Register LoadReg = LoadMI->getDstReg();
783
784 LLT LoadValueTy = MRI.getType(LoadReg);
785 if (!LoadValueTy.isScalar())
786 return false;
787
788 // Most architectures are going to legalize <s8 loads into at least a 1 byte
789 // load, and the MMOs can only describe memory accesses in multiples of bytes.
790 // If we try to perform extload combining on those, we can end up with
791 // %a(s8) = extload %ptr (load 1 byte from %ptr)
792 // ... which is an illegal extload instruction.
793 if (LoadValueTy.getSizeInBits() < 8)
794 return false;
795
796 // For non power-of-2 types, they will very likely be legalized into multiple
797 // loads. Don't bother trying to match them into extending loads.
799 return false;
800
801 // Find the preferred type aside from the any-extends (unless it's the only
802 // one) and non-extending ops. We'll emit an extending load to that type and
803 // and emit a variant of (extend (trunc X)) for the others according to the
804 // relative type sizes. At the same time, pick an extend to use based on the
805 // extend involved in the chosen type.
806 unsigned PreferredOpcode =
807 isa<GLoad>(&MI)
808 ? TargetOpcode::G_ANYEXT
809 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
810 Preferred = {LLT(), PreferredOpcode, nullptr};
811 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
812 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
813 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
814 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
815 const auto &MMO = LoadMI->getMMO();
816 // Don't do anything for atomics.
817 if (MMO.isAtomic())
818 continue;
819 // Check for legality.
820 if (!isPreLegalize()) {
821 LegalityQuery::MemDesc MMDesc(MMO);
822 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
823 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
824 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
825 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
826 .Action != LegalizeActions::Legal)
827 continue;
828 }
829 Preferred = ChoosePreferredUse(MI, Preferred,
830 MRI.getType(UseMI.getOperand(0).getReg()),
831 UseMI.getOpcode(), &UseMI);
832 }
833 }
834
835 // There were no extends
836 if (!Preferred.MI)
837 return false;
838 // It should be impossible to chose an extend without selecting a different
839 // type since by definition the result of an extend is larger.
840 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
841
842 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
843 return true;
844}
845
847 MachineInstr &MI, PreferredTuple &Preferred) const {
848 // Rewrite the load to the chosen extending load.
849 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
850
851 // Inserter to insert a truncate back to the original type at a given point
852 // with some basic CSE to limit truncate duplication to one per BB.
854 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
855 MachineBasicBlock::iterator InsertBefore,
856 MachineOperand &UseMO) {
857 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
858 if (PreviouslyEmitted) {
859 Observer.changingInstr(*UseMO.getParent());
860 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
861 Observer.changedInstr(*UseMO.getParent());
862 return;
863 }
864
865 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
866 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
867 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
868 EmittedInsns[InsertIntoBB] = NewMI;
869 replaceRegOpWith(MRI, UseMO, NewDstReg);
870 };
871
872 Observer.changingInstr(MI);
873 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
874 MI.setDesc(Builder.getTII().get(LoadOpc));
875
876 // Rewrite all the uses to fix up the types.
877 auto &LoadValue = MI.getOperand(0);
879 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
880
881 for (auto *UseMO : Uses) {
882 MachineInstr *UseMI = UseMO->getParent();
883
884 // If the extend is compatible with the preferred extend then we should fix
885 // up the type and extend so that it uses the preferred use.
886 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
887 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
888 Register UseDstReg = UseMI->getOperand(0).getReg();
889 MachineOperand &UseSrcMO = UseMI->getOperand(1);
890 const LLT UseDstTy = MRI.getType(UseDstReg);
891 if (UseDstReg != ChosenDstReg) {
892 if (Preferred.Ty == UseDstTy) {
893 // If the use has the same type as the preferred use, then merge
894 // the vregs and erase the extend. For example:
895 // %1:_(s8) = G_LOAD ...
896 // %2:_(s32) = G_SEXT %1(s8)
897 // %3:_(s32) = G_ANYEXT %1(s8)
898 // ... = ... %3(s32)
899 // rewrites to:
900 // %2:_(s32) = G_SEXTLOAD ...
901 // ... = ... %2(s32)
902 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
903 Observer.erasingInstr(*UseMO->getParent());
904 UseMO->getParent()->eraseFromParent();
905 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
906 // If the preferred size is smaller, then keep the extend but extend
907 // from the result of the extending load. For example:
908 // %1:_(s8) = G_LOAD ...
909 // %2:_(s32) = G_SEXT %1(s8)
910 // %3:_(s64) = G_ANYEXT %1(s8)
911 // ... = ... %3(s64)
912 /// rewrites to:
913 // %2:_(s32) = G_SEXTLOAD ...
914 // %3:_(s64) = G_ANYEXT %2:_(s32)
915 // ... = ... %3(s64)
916 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
917 } else {
918 // If the preferred size is large, then insert a truncate. For
919 // example:
920 // %1:_(s8) = G_LOAD ...
921 // %2:_(s64) = G_SEXT %1(s8)
922 // %3:_(s32) = G_ZEXT %1(s8)
923 // ... = ... %3(s32)
924 /// rewrites to:
925 // %2:_(s64) = G_SEXTLOAD ...
926 // %4:_(s8) = G_TRUNC %2:_(s32)
927 // %3:_(s64) = G_ZEXT %2:_(s8)
928 // ... = ... %3(s64)
929 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
930 InsertTruncAt);
931 }
932 continue;
933 }
934 // The use is (one of) the uses of the preferred use we chose earlier.
935 // We're going to update the load to def this value later so just erase
936 // the old extend.
937 Observer.erasingInstr(*UseMO->getParent());
938 UseMO->getParent()->eraseFromParent();
939 continue;
940 }
941
942 // The use isn't an extend. Truncate back to the type we originally loaded.
943 // This is free on many targets.
944 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
945 }
946
947 MI.getOperand(0).setReg(ChosenDstReg);
948 Observer.changedInstr(MI);
949}
950
952 BuildFnTy &MatchInfo) const {
953 assert(MI.getOpcode() == TargetOpcode::G_AND);
954
955 // If we have the following code:
956 // %mask = G_CONSTANT 255
957 // %ld = G_LOAD %ptr, (load s16)
958 // %and = G_AND %ld, %mask
959 //
960 // Try to fold it into
961 // %ld = G_ZEXTLOAD %ptr, (load s8)
962
963 Register Dst = MI.getOperand(0).getReg();
964 if (MRI.getType(Dst).isVector())
965 return false;
966
967 auto MaybeMask =
968 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
969 if (!MaybeMask)
970 return false;
971
972 APInt MaskVal = MaybeMask->Value;
973
974 if (!MaskVal.isMask())
975 return false;
976
977 Register SrcReg = MI.getOperand(1).getReg();
978 // Don't use getOpcodeDef() here since intermediate instructions may have
979 // multiple users.
980 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
981 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
982 return false;
983
984 Register LoadReg = LoadMI->getDstReg();
985 LLT RegTy = MRI.getType(LoadReg);
986 Register PtrReg = LoadMI->getPointerReg();
987 unsigned RegSize = RegTy.getSizeInBits();
988 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
989 unsigned MaskSizeBits = MaskVal.countr_one();
990
991 // The mask may not be larger than the in-memory type, as it might cover sign
992 // extended bits
993 if (MaskSizeBits > LoadSizeBits.getValue())
994 return false;
995
996 // If the mask covers the whole destination register, there's nothing to
997 // extend
998 if (MaskSizeBits >= RegSize)
999 return false;
1000
1001 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1002 // at least byte loads. Avoid creating such loads here
1003 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1004 return false;
1005
1006 const MachineMemOperand &MMO = LoadMI->getMMO();
1007 LegalityQuery::MemDesc MemDesc(MMO);
1008
1009 // Don't modify the memory access size if this is atomic/volatile, but we can
1010 // still adjust the opcode to indicate the high bit behavior.
1011 if (LoadMI->isSimple())
1012 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1013 else if (LoadSizeBits.getValue() > MaskSizeBits ||
1014 LoadSizeBits.getValue() == RegSize)
1015 return false;
1016
1017 // TODO: Could check if it's legal with the reduced or original memory size.
1019 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1020 return false;
1021
1022 MatchInfo = [=](MachineIRBuilder &B) {
1023 B.setInstrAndDebugLoc(*LoadMI);
1024 auto &MF = B.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1027 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1028 LoadMI->eraseFromParent();
1029 };
1030 return true;
1031}
1032
1034 const MachineInstr &UseMI) const {
1035 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1036 "shouldn't consider debug uses");
1037 assert(DefMI.getParent() == UseMI.getParent());
1038 if (&DefMI == &UseMI)
1039 return true;
1040 const MachineBasicBlock &MBB = *DefMI.getParent();
1041 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1042 return &MI == &DefMI || &MI == &UseMI;
1043 });
1044 if (DefOrUse == MBB.end())
1045 llvm_unreachable("Block must contain both DefMI and UseMI!");
1046 return &*DefOrUse == &DefMI;
1047}
1048
1050 const MachineInstr &UseMI) const {
1051 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1052 "shouldn't consider debug uses");
1053 if (MDT)
1054 return MDT->dominates(&DefMI, &UseMI);
1055 else if (DefMI.getParent() != UseMI.getParent())
1056 return false;
1057
1058 return isPredecessor(DefMI, UseMI);
1059}
1060
1062 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1063 Register SrcReg = MI.getOperand(1).getReg();
1064 Register LoadUser = SrcReg;
1065
1066 if (MRI.getType(SrcReg).isVector())
1067 return false;
1068
1069 Register TruncSrc;
1070 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1071 LoadUser = TruncSrc;
1072
1073 uint64_t SizeInBits = MI.getOperand(2).getImm();
1074 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1075 // need any extend at all, just a truncate.
1076 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1077 // If truncating more than the original extended value, abort.
1078 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1079 if (TruncSrc &&
1080 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1081 return false;
1082 if (LoadSizeBits == SizeInBits)
1083 return true;
1084 }
1085 return false;
1086}
1087
1089 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1090 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1091 MI.eraseFromParent();
1092}
1093
1095 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1096 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1097
1098 Register DstReg = MI.getOperand(0).getReg();
1099 LLT RegTy = MRI.getType(DstReg);
1100
1101 // Only supports scalars for now.
1102 if (RegTy.isVector())
1103 return false;
1104
1105 Register SrcReg = MI.getOperand(1).getReg();
1106 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1107 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1108 return false;
1109
1110 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1111
1112 // If the sign extend extends from a narrower width than the load's width,
1113 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1114 // Avoid widening the load at all.
1115 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1116
1117 // Don't generate G_SEXTLOADs with a < 1 byte width.
1118 if (NewSizeBits < 8)
1119 return false;
1120 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1121 // anyway for most targets.
1122 if (!isPowerOf2_32(NewSizeBits))
1123 return false;
1124
1125 const MachineMemOperand &MMO = LoadDef->getMMO();
1126 LegalityQuery::MemDesc MMDesc(MMO);
1127
1128 // Don't modify the memory access size if this is atomic/volatile, but we can
1129 // still adjust the opcode to indicate the high bit behavior.
1130 if (LoadDef->isSimple())
1131 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1132 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1133 return false;
1134
1135 // TODO: Could check if it's legal with the reduced or original memory size.
1136 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1137 {MRI.getType(LoadDef->getDstReg()),
1138 MRI.getType(LoadDef->getPointerReg())},
1139 {MMDesc}}))
1140 return false;
1141
1142 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1143 return true;
1144}
1145
1147 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1148 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1149 Register LoadReg;
1150 unsigned ScalarSizeBits;
1151 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1152 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1153
1154 // If we have the following:
1155 // %ld = G_LOAD %ptr, (load 2)
1156 // %ext = G_SEXT_INREG %ld, 8
1157 // ==>
1158 // %ld = G_SEXTLOAD %ptr (load 1)
1159
1160 auto &MMO = LoadDef->getMMO();
1161 Builder.setInstrAndDebugLoc(*LoadDef);
1162 auto &MF = Builder.getMF();
1163 auto PtrInfo = MMO.getPointerInfo();
1164 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1165 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1166 LoadDef->getPointerReg(), *NewMMO);
1167 MI.eraseFromParent();
1168
1169 // Not all loads can be deleted, so make sure the old one is removed.
1170 LoadDef->eraseFromParent();
1171}
1172
1173/// Return true if 'MI' is a load or a store that may be fold it's address
1174/// operand into the load / store addressing mode.
1178 auto *MF = MI->getMF();
1179 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1180 if (!Addr)
1181 return false;
1182
1183 AM.HasBaseReg = true;
1184 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1185 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1186 else
1187 AM.Scale = 1; // [reg +/- reg]
1188
1189 return TLI.isLegalAddressingMode(
1190 MF->getDataLayout(), AM,
1191 getTypeForLLT(MI->getMMO().getMemoryType(),
1192 MF->getFunction().getContext()),
1193 MI->getMMO().getAddrSpace());
1194}
1195
1196static unsigned getIndexedOpc(unsigned LdStOpc) {
1197 switch (LdStOpc) {
1198 case TargetOpcode::G_LOAD:
1199 return TargetOpcode::G_INDEXED_LOAD;
1200 case TargetOpcode::G_STORE:
1201 return TargetOpcode::G_INDEXED_STORE;
1202 case TargetOpcode::G_ZEXTLOAD:
1203 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1204 case TargetOpcode::G_SEXTLOAD:
1205 return TargetOpcode::G_INDEXED_SEXTLOAD;
1206 default:
1207 llvm_unreachable("Unexpected opcode");
1208 }
1209}
1210
1211bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1212 // Check for legality.
1213 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1214 LLT Ty = MRI.getType(LdSt.getReg(0));
1215 LLT MemTy = LdSt.getMMO().getMemoryType();
1217 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1219 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1220 SmallVector<LLT> OpTys;
1221 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1222 OpTys = {PtrTy, Ty, Ty};
1223 else
1224 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1225
1226 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1227 return isLegal(Q);
1228}
1229
1231 "post-index-use-threshold", cl::Hidden, cl::init(32),
1232 cl::desc("Number of uses of a base pointer to check before it is no longer "
1233 "considered for post-indexing."));
1234
1235bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1237 bool &RematOffset) const {
1238 // We're looking for the following pattern, for either load or store:
1239 // %baseptr:_(p0) = ...
1240 // G_STORE %val(s64), %baseptr(p0)
1241 // %offset:_(s64) = G_CONSTANT i64 -256
1242 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1243 const auto &TLI = getTargetLowering();
1244
1245 Register Ptr = LdSt.getPointerReg();
1246 // If the store is the only use, don't bother.
1247 if (MRI.hasOneNonDBGUse(Ptr))
1248 return false;
1249
1250 if (!isIndexedLoadStoreLegal(LdSt))
1251 return false;
1252
1253 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1254 return false;
1255
1256 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1257 auto *PtrDef = MRI.getVRegDef(Ptr);
1258
1259 unsigned NumUsesChecked = 0;
1260 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1261 if (++NumUsesChecked > PostIndexUseThreshold)
1262 return false; // Try to avoid exploding compile time.
1263
1264 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1265 // The use itself might be dead. This can happen during combines if DCE
1266 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1267 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1268 continue;
1269
1270 // Check the user of this isn't the store, otherwise we'd be generate a
1271 // indexed store defining its own use.
1272 if (StoredValDef == &Use)
1273 continue;
1274
1275 Offset = PtrAdd->getOffsetReg();
1276 if (!ForceLegalIndexing &&
1277 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1278 /*IsPre*/ false, MRI))
1279 continue;
1280
1281 // Make sure the offset calculation is before the potentially indexed op.
1282 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1283 RematOffset = false;
1284 if (!dominates(*OffsetDef, LdSt)) {
1285 // If the offset however is just a G_CONSTANT, we can always just
1286 // rematerialize it where we need it.
1287 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1288 continue;
1289 RematOffset = true;
1290 }
1291
1292 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1293 if (&BasePtrUse == PtrDef)
1294 continue;
1295
1296 // If the user is a later load/store that can be post-indexed, then don't
1297 // combine this one.
1298 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1299 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1300 dominates(LdSt, *BasePtrLdSt) &&
1301 isIndexedLoadStoreLegal(*BasePtrLdSt))
1302 return false;
1303
1304 // Now we're looking for the key G_PTR_ADD instruction, which contains
1305 // the offset add that we want to fold.
1306 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1307 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1308 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1309 // If the use is in a different block, then we may produce worse code
1310 // due to the extra register pressure.
1311 if (BaseUseUse.getParent() != LdSt.getParent())
1312 return false;
1313
1314 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1315 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1316 return false;
1317 }
1318 if (!dominates(LdSt, BasePtrUse))
1319 return false; // All use must be dominated by the load/store.
1320 }
1321 }
1322
1323 Addr = PtrAdd->getReg(0);
1324 Base = PtrAdd->getBaseReg();
1325 return true;
1326 }
1327
1328 return false;
1329}
1330
1331bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1332 Register &Base,
1333 Register &Offset) const {
1334 auto &MF = *LdSt.getParent()->getParent();
1335 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1336
1337 Addr = LdSt.getPointerReg();
1338 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1339 MRI.hasOneNonDBGUse(Addr))
1340 return false;
1341
1342 if (!ForceLegalIndexing &&
1343 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1344 return false;
1345
1346 if (!isIndexedLoadStoreLegal(LdSt))
1347 return false;
1348
1349 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1350 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1351 return false;
1352
1353 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1354 // Would require a copy.
1355 if (Base == St->getValueReg())
1356 return false;
1357
1358 // We're expecting one use of Addr in MI, but it could also be the
1359 // value stored, which isn't actually dominated by the instruction.
1360 if (St->getValueReg() == Addr)
1361 return false;
1362 }
1363
1364 // Avoid increasing cross-block register pressure.
1365 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1366 if (AddrUse.getParent() != LdSt.getParent())
1367 return false;
1368
1369 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1370 // That might allow us to end base's liveness here by adjusting the constant.
1371 bool RealUse = false;
1372 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1373 if (!dominates(LdSt, AddrUse))
1374 return false; // All use must be dominated by the load/store.
1375
1376 // If Ptr may be folded in addressing mode of other use, then it's
1377 // not profitable to do this transformation.
1378 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1379 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1380 RealUse = true;
1381 } else {
1382 RealUse = true;
1383 }
1384 }
1385 return RealUse;
1386}
1387
1389 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1390 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1391
1392 // Check if there is a load that defines the vector being extracted from.
1393 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1394 if (!LoadMI)
1395 return false;
1396
1397 Register Vector = MI.getOperand(1).getReg();
1398 LLT VecEltTy = MRI.getType(Vector).getElementType();
1399
1400 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1401
1402 // Checking whether we should reduce the load width.
1403 if (!MRI.hasOneNonDBGUse(Vector))
1404 return false;
1405
1406 // Check if the defining load is simple.
1407 if (!LoadMI->isSimple())
1408 return false;
1409
1410 // If the vector element type is not a multiple of a byte then we are unable
1411 // to correctly compute an address to load only the extracted element as a
1412 // scalar.
1413 if (!VecEltTy.isByteSized())
1414 return false;
1415
1416 // Check for load fold barriers between the extraction and the load.
1417 if (MI.getParent() != LoadMI->getParent())
1418 return false;
1419 const unsigned MaxIter = 20;
1420 unsigned Iter = 0;
1421 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1422 if (II->isLoadFoldBarrier())
1423 return false;
1424 if (Iter++ == MaxIter)
1425 return false;
1426 }
1427
1428 // Check if the new load that we are going to create is legal
1429 // if we are in the post-legalization phase.
1430 MachineMemOperand MMO = LoadMI->getMMO();
1431 Align Alignment = MMO.getAlign();
1432 MachinePointerInfo PtrInfo;
1434
1435 // Finding the appropriate PtrInfo if offset is a known constant.
1436 // This is required to create the memory operand for the narrowed load.
1437 // This machine memory operand object helps us infer about legality
1438 // before we proceed to combine the instruction.
1439 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1440 int Elt = CVal->getZExtValue();
1441 // FIXME: should be (ABI size)*Elt.
1442 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1443 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1444 } else {
1445 // Discard the pointer info except the address space because the memory
1446 // operand can't represent this new access since the offset is variable.
1447 Offset = VecEltTy.getSizeInBits() / 8;
1449 }
1450
1451 Alignment = commonAlignment(Alignment, Offset);
1452
1453 Register VecPtr = LoadMI->getPointerReg();
1454 LLT PtrTy = MRI.getType(VecPtr);
1455
1456 MachineFunction &MF = *MI.getMF();
1457 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1458
1459 LegalityQuery::MemDesc MMDesc(*NewMMO);
1460
1462 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1463 return false;
1464
1465 // Load must be allowed and fast on the target.
1467 auto &DL = MF.getDataLayout();
1468 unsigned Fast = 0;
1469 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1470 &Fast) ||
1471 !Fast)
1472 return false;
1473
1474 Register Result = MI.getOperand(0).getReg();
1475 Register Index = MI.getOperand(2).getReg();
1476
1477 MatchInfo = [=](MachineIRBuilder &B) {
1478 GISelObserverWrapper DummyObserver;
1479 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1480 //// Get pointer to the vector element.
1481 Register finalPtr = Helper.getVectorElementPointer(
1482 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1483 Index);
1484 // New G_LOAD instruction.
1485 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1486 // Remove original GLOAD instruction.
1487 LoadMI->eraseFromParent();
1488 };
1489
1490 return true;
1491}
1492
1494 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1495 auto &LdSt = cast<GLoadStore>(MI);
1496
1497 if (LdSt.isAtomic())
1498 return false;
1499
1500 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1501 MatchInfo.Offset);
1502 if (!MatchInfo.IsPre &&
1503 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1504 MatchInfo.Offset, MatchInfo.RematOffset))
1505 return false;
1506
1507 return true;
1508}
1509
1511 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1512 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1513 unsigned Opcode = MI.getOpcode();
1514 bool IsStore = Opcode == TargetOpcode::G_STORE;
1515 unsigned NewOpcode = getIndexedOpc(Opcode);
1516
1517 // If the offset constant didn't happen to dominate the load/store, we can
1518 // just clone it as needed.
1519 if (MatchInfo.RematOffset) {
1520 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1521 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1522 *OldCst->getOperand(1).getCImm());
1523 MatchInfo.Offset = NewCst.getReg(0);
1524 }
1525
1526 auto MIB = Builder.buildInstr(NewOpcode);
1527 if (IsStore) {
1528 MIB.addDef(MatchInfo.Addr);
1529 MIB.addUse(MI.getOperand(0).getReg());
1530 } else {
1531 MIB.addDef(MI.getOperand(0).getReg());
1532 MIB.addDef(MatchInfo.Addr);
1533 }
1534
1535 MIB.addUse(MatchInfo.Base);
1536 MIB.addUse(MatchInfo.Offset);
1537 MIB.addImm(MatchInfo.IsPre);
1538 MIB->cloneMemRefs(*MI.getMF(), MI);
1539 MI.eraseFromParent();
1540 AddrDef.eraseFromParent();
1541
1542 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1543}
1544
1546 MachineInstr *&OtherMI) const {
1547 unsigned Opcode = MI.getOpcode();
1548 bool IsDiv, IsSigned;
1549
1550 switch (Opcode) {
1551 default:
1552 llvm_unreachable("Unexpected opcode!");
1553 case TargetOpcode::G_SDIV:
1554 case TargetOpcode::G_UDIV: {
1555 IsDiv = true;
1556 IsSigned = Opcode == TargetOpcode::G_SDIV;
1557 break;
1558 }
1559 case TargetOpcode::G_SREM:
1560 case TargetOpcode::G_UREM: {
1561 IsDiv = false;
1562 IsSigned = Opcode == TargetOpcode::G_SREM;
1563 break;
1564 }
1565 }
1566
1567 Register Src1 = MI.getOperand(1).getReg();
1568 unsigned DivOpcode, RemOpcode, DivremOpcode;
1569 if (IsSigned) {
1570 DivOpcode = TargetOpcode::G_SDIV;
1571 RemOpcode = TargetOpcode::G_SREM;
1572 DivremOpcode = TargetOpcode::G_SDIVREM;
1573 } else {
1574 DivOpcode = TargetOpcode::G_UDIV;
1575 RemOpcode = TargetOpcode::G_UREM;
1576 DivremOpcode = TargetOpcode::G_UDIVREM;
1577 }
1578
1579 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1580 return false;
1581
1582 // Combine:
1583 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1584 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1585 // into:
1586 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1587
1588 // Combine:
1589 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1590 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1591 // into:
1592 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1593
1594 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1595 if (MI.getParent() == UseMI.getParent() &&
1596 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1597 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1598 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1599 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1600 OtherMI = &UseMI;
1601 return true;
1602 }
1603 }
1604
1605 return false;
1606}
1607
1609 MachineInstr *&OtherMI) const {
1610 unsigned Opcode = MI.getOpcode();
1611 assert(OtherMI && "OtherMI shouldn't be empty.");
1612
1613 Register DestDivReg, DestRemReg;
1614 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1615 DestDivReg = MI.getOperand(0).getReg();
1616 DestRemReg = OtherMI->getOperand(0).getReg();
1617 } else {
1618 DestDivReg = OtherMI->getOperand(0).getReg();
1619 DestRemReg = MI.getOperand(0).getReg();
1620 }
1621
1622 bool IsSigned =
1623 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1624
1625 // Check which instruction is first in the block so we don't break def-use
1626 // deps by "moving" the instruction incorrectly. Also keep track of which
1627 // instruction is first so we pick it's operands, avoiding use-before-def
1628 // bugs.
1629 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1630 Builder.setInstrAndDebugLoc(*FirstInst);
1631
1632 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1633 : TargetOpcode::G_UDIVREM,
1634 {DestDivReg, DestRemReg},
1635 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1636 MI.eraseFromParent();
1637 OtherMI->eraseFromParent();
1638}
1639
1641 MachineInstr &MI, MachineInstr *&BrCond) const {
1642 assert(MI.getOpcode() == TargetOpcode::G_BR);
1643
1644 // Try to match the following:
1645 // bb1:
1646 // G_BRCOND %c1, %bb2
1647 // G_BR %bb3
1648 // bb2:
1649 // ...
1650 // bb3:
1651
1652 // The above pattern does not have a fall through to the successor bb2, always
1653 // resulting in a branch no matter which path is taken. Here we try to find
1654 // and replace that pattern with conditional branch to bb3 and otherwise
1655 // fallthrough to bb2. This is generally better for branch predictors.
1656
1657 MachineBasicBlock *MBB = MI.getParent();
1659 if (BrIt == MBB->begin())
1660 return false;
1661 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1662
1663 BrCond = &*std::prev(BrIt);
1664 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1665 return false;
1666
1667 // Check that the next block is the conditional branch target. Also make sure
1668 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1669 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1670 return BrCondTarget != MI.getOperand(0).getMBB() &&
1671 MBB->isLayoutSuccessor(BrCondTarget);
1672}
1673
1675 MachineInstr &MI, MachineInstr *&BrCond) const {
1676 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1677 Builder.setInstrAndDebugLoc(*BrCond);
1678 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1679 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1680 // this to i1 only since we might not know for sure what kind of
1681 // compare generated the condition value.
1682 auto True = Builder.buildConstant(
1683 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1684 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1685
1686 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1687 Observer.changingInstr(MI);
1688 MI.getOperand(0).setMBB(FallthroughBB);
1689 Observer.changedInstr(MI);
1690
1691 // Change the conditional branch to use the inverted condition and
1692 // new target block.
1693 Observer.changingInstr(*BrCond);
1694 BrCond->getOperand(0).setReg(Xor.getReg(0));
1695 BrCond->getOperand(1).setMBB(BrTarget);
1696 Observer.changedInstr(*BrCond);
1697}
1698
1700 MachineIRBuilder HelperBuilder(MI);
1701 GISelObserverWrapper DummyObserver;
1702 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1703 return Helper.lowerMemcpyInline(MI) ==
1705}
1706
1708 unsigned MaxLen) const {
1709 MachineIRBuilder HelperBuilder(MI);
1710 GISelObserverWrapper DummyObserver;
1711 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1712 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1714}
1715
1717 const MachineRegisterInfo &MRI,
1718 const APFloat &Val) {
1719 APFloat Result(Val);
1720 switch (MI.getOpcode()) {
1721 default:
1722 llvm_unreachable("Unexpected opcode!");
1723 case TargetOpcode::G_FNEG: {
1724 Result.changeSign();
1725 return Result;
1726 }
1727 case TargetOpcode::G_FABS: {
1728 Result.clearSign();
1729 return Result;
1730 }
1731 case TargetOpcode::G_FPEXT:
1732 case TargetOpcode::G_FPTRUNC: {
1733 bool Unused;
1734 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1736 &Unused);
1737 return Result;
1738 }
1739 case TargetOpcode::G_FSQRT: {
1740 bool Unused;
1742 &Unused);
1743 Result = APFloat(sqrt(Result.convertToDouble()));
1744 break;
1745 }
1746 case TargetOpcode::G_FLOG2: {
1747 bool Unused;
1749 &Unused);
1750 Result = APFloat(log2(Result.convertToDouble()));
1751 break;
1752 }
1753 }
1754 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1755 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1756 // `G_FLOG2` reach here.
1757 bool Unused;
1758 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1759 return Result;
1760}
1761
1763 MachineInstr &MI, const ConstantFP *Cst) const {
1764 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1765 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1766 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1767 MI.eraseFromParent();
1768}
1769
1771 PtrAddChain &MatchInfo) const {
1772 // We're trying to match the following pattern:
1773 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1774 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1775 // -->
1776 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1777
1778 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1779 return false;
1780
1781 Register Add2 = MI.getOperand(1).getReg();
1782 Register Imm1 = MI.getOperand(2).getReg();
1783 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1784 if (!MaybeImmVal)
1785 return false;
1786
1787 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1788 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1789 return false;
1790
1791 Register Base = Add2Def->getOperand(1).getReg();
1792 Register Imm2 = Add2Def->getOperand(2).getReg();
1793 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1794 if (!MaybeImm2Val)
1795 return false;
1796
1797 // Check if the new combined immediate forms an illegal addressing mode.
1798 // Do not combine if it was legal before but would get illegal.
1799 // To do so, we need to find a load/store user of the pointer to get
1800 // the access type.
1801 Type *AccessTy = nullptr;
1802 auto &MF = *MI.getMF();
1803 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1804 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1805 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1806 MF.getFunction().getContext());
1807 break;
1808 }
1809 }
1811 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1812 AMNew.BaseOffs = CombinedImm.getSExtValue();
1813 if (AccessTy) {
1814 AMNew.HasBaseReg = true;
1816 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1817 AMOld.HasBaseReg = true;
1818 unsigned AS = MRI.getType(Add2).getAddressSpace();
1819 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1820 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1821 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1822 return false;
1823 }
1824
1825 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1826 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1827 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1828 // largest signed integer that fits into the index type, which is the maximum
1829 // size of allocated objects according to the IR Language Reference.
1830 unsigned PtrAddFlags = MI.getFlags();
1831 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1832 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1833 bool IsInBounds =
1834 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1835 unsigned Flags = 0;
1836 if (IsNoUWrap)
1838 if (IsInBounds) {
1841 }
1842
1843 // Pass the combined immediate to the apply function.
1844 MatchInfo.Imm = AMNew.BaseOffs;
1845 MatchInfo.Base = Base;
1846 MatchInfo.Bank = getRegBank(Imm2);
1847 MatchInfo.Flags = Flags;
1848 return true;
1849}
1850
1852 PtrAddChain &MatchInfo) const {
1853 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1854 MachineIRBuilder MIB(MI);
1855 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1856 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1857 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1858 Observer.changingInstr(MI);
1859 MI.getOperand(1).setReg(MatchInfo.Base);
1860 MI.getOperand(2).setReg(NewOffset.getReg(0));
1861 MI.setFlags(MatchInfo.Flags);
1862 Observer.changedInstr(MI);
1863}
1864
1866 RegisterImmPair &MatchInfo) const {
1867 // We're trying to match the following pattern with any of
1868 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1869 // %t1 = SHIFT %base, G_CONSTANT imm1
1870 // %root = SHIFT %t1, G_CONSTANT imm2
1871 // -->
1872 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1873
1874 unsigned Opcode = MI.getOpcode();
1875 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1876 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1877 Opcode == TargetOpcode::G_USHLSAT) &&
1878 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1879
1880 Register Shl2 = MI.getOperand(1).getReg();
1881 Register Imm1 = MI.getOperand(2).getReg();
1882 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1883 if (!MaybeImmVal)
1884 return false;
1885
1886 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1887 if (Shl2Def->getOpcode() != Opcode)
1888 return false;
1889
1890 Register Base = Shl2Def->getOperand(1).getReg();
1891 Register Imm2 = Shl2Def->getOperand(2).getReg();
1892 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1893 if (!MaybeImm2Val)
1894 return false;
1895
1896 // Pass the combined immediate to the apply function.
1897 MatchInfo.Imm =
1898 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1899 MatchInfo.Reg = Base;
1900
1901 // There is no simple replacement for a saturating unsigned left shift that
1902 // exceeds the scalar size.
1903 if (Opcode == TargetOpcode::G_USHLSAT &&
1904 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1905 return false;
1906
1907 return true;
1908}
1909
1911 RegisterImmPair &MatchInfo) const {
1912 unsigned Opcode = MI.getOpcode();
1913 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1914 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1915 Opcode == TargetOpcode::G_USHLSAT) &&
1916 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1917
1918 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1919 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1920 auto Imm = MatchInfo.Imm;
1921
1922 if (Imm >= ScalarSizeInBits) {
1923 // Any logical shift that exceeds scalar size will produce zero.
1924 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1925 Builder.buildConstant(MI.getOperand(0), 0);
1926 MI.eraseFromParent();
1927 return;
1928 }
1929 // Arithmetic shift and saturating signed left shift have no effect beyond
1930 // scalar size.
1931 Imm = ScalarSizeInBits - 1;
1932 }
1933
1934 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1935 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1936 Observer.changingInstr(MI);
1937 MI.getOperand(1).setReg(MatchInfo.Reg);
1938 MI.getOperand(2).setReg(NewImm);
1939 Observer.changedInstr(MI);
1940}
1941
1943 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1944 // We're trying to match the following pattern with any of
1945 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1946 // with any of G_AND/G_OR/G_XOR logic instructions.
1947 // %t1 = SHIFT %X, G_CONSTANT C0
1948 // %t2 = LOGIC %t1, %Y
1949 // %root = SHIFT %t2, G_CONSTANT C1
1950 // -->
1951 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1952 // %t4 = SHIFT %Y, G_CONSTANT C1
1953 // %root = LOGIC %t3, %t4
1954 unsigned ShiftOpcode = MI.getOpcode();
1955 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1956 ShiftOpcode == TargetOpcode::G_ASHR ||
1957 ShiftOpcode == TargetOpcode::G_LSHR ||
1958 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1959 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1960 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1961
1962 // Match a one-use bitwise logic op.
1963 Register LogicDest = MI.getOperand(1).getReg();
1964 if (!MRI.hasOneNonDBGUse(LogicDest))
1965 return false;
1966
1967 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1968 unsigned LogicOpcode = LogicMI->getOpcode();
1969 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1970 LogicOpcode != TargetOpcode::G_XOR)
1971 return false;
1972
1973 // Find a matching one-use shift by constant.
1974 const Register C1 = MI.getOperand(2).getReg();
1975 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1976 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1977 return false;
1978
1979 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1980
1981 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1982 // Shift should match previous one and should be a one-use.
1983 if (MI->getOpcode() != ShiftOpcode ||
1984 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1985 return false;
1986
1987 // Must be a constant.
1988 auto MaybeImmVal =
1989 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1990 if (!MaybeImmVal)
1991 return false;
1992
1993 ShiftVal = MaybeImmVal->Value.getSExtValue();
1994 return true;
1995 };
1996
1997 // Logic ops are commutative, so check each operand for a match.
1998 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1999 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
2000 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2001 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2002 uint64_t C0Val;
2003
2004 if (matchFirstShift(LogicMIOp1, C0Val)) {
2005 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2006 MatchInfo.Shift2 = LogicMIOp1;
2007 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2008 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2009 MatchInfo.Shift2 = LogicMIOp2;
2010 } else
2011 return false;
2012
2013 MatchInfo.ValSum = C0Val + C1Val;
2014
2015 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2016 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2017 return false;
2018
2019 MatchInfo.Logic = LogicMI;
2020 return true;
2021}
2022
2024 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2025 unsigned Opcode = MI.getOpcode();
2026 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2027 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2028 Opcode == TargetOpcode::G_SSHLSAT) &&
2029 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2030
2031 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2032 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2033
2034 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2035
2036 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2037 Register Shift1 =
2038 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2039
2040 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2041 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2042 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2043 // remove old shift1. And it will cause crash later. So erase it earlier to
2044 // avoid the crash.
2045 MatchInfo.Shift2->eraseFromParent();
2046
2047 Register Shift2Const = MI.getOperand(2).getReg();
2048 Register Shift2 = Builder
2049 .buildInstr(Opcode, {DestType},
2050 {MatchInfo.LogicNonShiftReg, Shift2Const})
2051 .getReg(0);
2052
2053 Register Dest = MI.getOperand(0).getReg();
2054 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2055
2056 // This was one use so it's safe to remove it.
2057 MatchInfo.Logic->eraseFromParent();
2058
2059 MI.eraseFromParent();
2060}
2061
2063 BuildFnTy &MatchInfo) const {
2064 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2065 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2066 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2067 auto &Shl = cast<GenericMachineInstr>(MI);
2068 Register DstReg = Shl.getReg(0);
2069 Register SrcReg = Shl.getReg(1);
2070 Register ShiftReg = Shl.getReg(2);
2071 Register X, C1;
2072
2073 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2074 return false;
2075
2076 if (!mi_match(SrcReg, MRI,
2078 m_GOr(m_Reg(X), m_Reg(C1))))))
2079 return false;
2080
2081 APInt C1Val, C2Val;
2082 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2083 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2084 return false;
2085
2086 auto *SrcDef = MRI.getVRegDef(SrcReg);
2087 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2088 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2089 LLT SrcTy = MRI.getType(SrcReg);
2090 MatchInfo = [=](MachineIRBuilder &B) {
2091 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2092 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2093 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2094 };
2095 return true;
2096}
2097
2099 LshrOfTruncOfLshr &MatchInfo,
2100 MachineInstr &ShiftMI) const {
2101 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2102
2103 Register N0 = MI.getOperand(1).getReg();
2104 Register N1 = MI.getOperand(2).getReg();
2105 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2106
2107 APInt N1C, N001C;
2108 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2109 return false;
2110 auto N001 = ShiftMI.getOperand(2).getReg();
2111 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2112 return false;
2113
2114 if (N001C.getBitWidth() > N1C.getBitWidth())
2115 N1C = N1C.zext(N001C.getBitWidth());
2116 else
2117 N001C = N001C.zext(N1C.getBitWidth());
2118
2119 Register InnerShift = ShiftMI.getOperand(0).getReg();
2120 LLT InnerShiftTy = MRI.getType(InnerShift);
2121 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2122 if ((N1C + N001C).ult(InnerShiftSize)) {
2123 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2124 MatchInfo.ShiftAmt = N1C + N001C;
2125 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2126 MatchInfo.InnerShiftTy = InnerShiftTy;
2127
2128 if ((N001C + OpSizeInBits) == InnerShiftSize)
2129 return true;
2130 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2131 MatchInfo.Mask = true;
2132 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2133 return true;
2134 }
2135 }
2136 return false;
2137}
2138
2140 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2141 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2142
2143 Register Dst = MI.getOperand(0).getReg();
2144 auto ShiftAmt =
2145 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2146 auto Shift =
2147 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2148 if (MatchInfo.Mask == true) {
2149 APInt MaskVal =
2151 MatchInfo.MaskVal.getZExtValue());
2152 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2153 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2154 Builder.buildTrunc(Dst, And);
2155 } else
2156 Builder.buildTrunc(Dst, Shift);
2157 MI.eraseFromParent();
2158}
2159
2161 unsigned &ShiftVal) const {
2162 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2163 auto MaybeImmVal =
2164 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2165 if (!MaybeImmVal)
2166 return false;
2167
2168 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2169 return (static_cast<int32_t>(ShiftVal) != -1);
2170}
2171
2173 unsigned &ShiftVal) const {
2174 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2175 MachineIRBuilder MIB(MI);
2176 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2177 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2178 Observer.changingInstr(MI);
2179 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2180 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2181 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2183 Observer.changedInstr(MI);
2184}
2185
2187 BuildFnTy &MatchInfo) const {
2188 GSub &Sub = cast<GSub>(MI);
2189
2190 LLT Ty = MRI.getType(Sub.getReg(0));
2191
2192 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2193 return false;
2194
2196 return false;
2197
2198 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2199
2200 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2201 auto NegCst = B.buildConstant(Ty, -Imm);
2202 Observer.changingInstr(MI);
2203 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2204 MI.getOperand(2).setReg(NegCst.getReg(0));
2206 if (Imm.isMinSignedValue())
2208 Observer.changedInstr(MI);
2209 };
2210 return true;
2211}
2212
2213// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2215 RegisterImmPair &MatchData) const {
2216 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2217 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2218 return false;
2219
2220 Register LHS = MI.getOperand(1).getReg();
2221
2222 Register ExtSrc;
2223 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2224 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2225 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2226 return false;
2227
2228 Register RHS = MI.getOperand(2).getReg();
2229 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2230 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2231 if (!MaybeShiftAmtVal)
2232 return false;
2233
2234 if (LI) {
2235 LLT SrcTy = MRI.getType(ExtSrc);
2236
2237 // We only really care about the legality with the shifted value. We can
2238 // pick any type the constant shift amount, so ask the target what to
2239 // use. Otherwise we would have to guess and hope it is reported as legal.
2240 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2241 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2242 return false;
2243 }
2244
2245 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2246 MatchData.Reg = ExtSrc;
2247 MatchData.Imm = ShiftAmt;
2248
2249 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2250 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2251 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2252}
2253
2255 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2256 Register ExtSrcReg = MatchData.Reg;
2257 int64_t ShiftAmtVal = MatchData.Imm;
2258
2259 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2260 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2261 auto NarrowShift =
2262 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2263 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2264 MI.eraseFromParent();
2265}
2266
2268 Register &MatchInfo) const {
2270 SmallVector<Register, 16> MergedValues;
2271 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2272 MergedValues.emplace_back(Merge.getSourceReg(I));
2273
2274 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2275 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2276 return false;
2277
2278 for (unsigned I = 0; I < MergedValues.size(); ++I)
2279 if (MergedValues[I] != Unmerge->getReg(I))
2280 return false;
2281
2282 MatchInfo = Unmerge->getSourceReg();
2283 return true;
2284}
2285
2287 const MachineRegisterInfo &MRI) {
2288 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2289 ;
2290
2291 return Reg;
2292}
2293
2296 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2297 "Expected an unmerge");
2298 auto &Unmerge = cast<GUnmerge>(MI);
2299 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2300
2301 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2302 if (!SrcInstr)
2303 return false;
2304
2305 // Check the source type of the merge.
2306 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2307 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2308 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2309 if (SrcMergeTy != Dst0Ty && !SameSize)
2310 return false;
2311 // They are the same now (modulo a bitcast).
2312 // We can collect all the src registers.
2313 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2314 Operands.push_back(SrcInstr->getSourceReg(Idx));
2315 return true;
2316}
2317
2320 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2321 "Expected an unmerge");
2322 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2323 "Not enough operands to replace all defs");
2324 unsigned NumElems = MI.getNumOperands() - 1;
2325
2326 LLT SrcTy = MRI.getType(Operands[0]);
2327 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2328 bool CanReuseInputDirectly = DstTy == SrcTy;
2329 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2330 Register DstReg = MI.getOperand(Idx).getReg();
2331 Register SrcReg = Operands[Idx];
2332
2333 // This combine may run after RegBankSelect, so we need to be aware of
2334 // register banks.
2335 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2336 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2337 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2338 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2339 }
2340
2341 if (CanReuseInputDirectly)
2342 replaceRegWith(MRI, DstReg, SrcReg);
2343 else
2344 Builder.buildCast(DstReg, SrcReg);
2345 }
2346 MI.eraseFromParent();
2347}
2348
2350 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2351 unsigned SrcIdx = MI.getNumOperands() - 1;
2352 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2353 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2354 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2355 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2356 return false;
2357 // Break down the big constant in smaller ones.
2358 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2359 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2360 ? CstVal.getCImm()->getValue()
2361 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2362
2363 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2364 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2365 // Unmerge a constant.
2366 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2367 Csts.emplace_back(Val.trunc(ShiftAmt));
2368 Val = Val.lshr(ShiftAmt);
2369 }
2370
2371 return true;
2372}
2373
2375 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2376 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2377 "Expected an unmerge");
2378 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2379 "Not enough operands to replace all defs");
2380 unsigned NumElems = MI.getNumOperands() - 1;
2381 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2382 Register DstReg = MI.getOperand(Idx).getReg();
2383 Builder.buildConstant(DstReg, Csts[Idx]);
2384 }
2385
2386 MI.eraseFromParent();
2387}
2388
2391 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2392 unsigned SrcIdx = MI.getNumOperands() - 1;
2393 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2394 MatchInfo = [&MI](MachineIRBuilder &B) {
2395 unsigned NumElems = MI.getNumOperands() - 1;
2396 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2397 Register DstReg = MI.getOperand(Idx).getReg();
2398 B.buildUndef(DstReg);
2399 }
2400 };
2401 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2402}
2403
2405 MachineInstr &MI) const {
2406 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2407 "Expected an unmerge");
2408 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2409 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2410 return false;
2411 // Check that all the lanes are dead except the first one.
2412 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2413 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2414 return false;
2415 }
2416 return true;
2417}
2418
2420 MachineInstr &MI) const {
2421 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2422 Register Dst0Reg = MI.getOperand(0).getReg();
2423 Builder.buildTrunc(Dst0Reg, SrcReg);
2424 MI.eraseFromParent();
2425}
2426
2428 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2429 "Expected an unmerge");
2430 Register Dst0Reg = MI.getOperand(0).getReg();
2431 LLT Dst0Ty = MRI.getType(Dst0Reg);
2432 // G_ZEXT on vector applies to each lane, so it will
2433 // affect all destinations. Therefore we won't be able
2434 // to simplify the unmerge to just the first definition.
2435 if (Dst0Ty.isVector())
2436 return false;
2437 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2438 LLT SrcTy = MRI.getType(SrcReg);
2439 if (SrcTy.isVector())
2440 return false;
2441
2442 Register ZExtSrcReg;
2443 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2444 return false;
2445
2446 // Finally we can replace the first definition with
2447 // a zext of the source if the definition is big enough to hold
2448 // all of ZExtSrc bits.
2449 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2450 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2451}
2452
2454 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2455 "Expected an unmerge");
2456
2457 Register Dst0Reg = MI.getOperand(0).getReg();
2458
2459 MachineInstr *ZExtInstr =
2460 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2461 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2462 "Expecting a G_ZEXT");
2463
2464 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2465 LLT Dst0Ty = MRI.getType(Dst0Reg);
2466 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2467
2468 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2469 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2470 } else {
2471 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2472 "ZExt src doesn't fit in destination");
2473 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2474 }
2475
2476 Register ZeroReg;
2477 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2478 if (!ZeroReg)
2479 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2480 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2481 }
2482 MI.eraseFromParent();
2483}
2484
2486 unsigned TargetShiftSize,
2487 unsigned &ShiftVal) const {
2488 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2489 MI.getOpcode() == TargetOpcode::G_LSHR ||
2490 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2491
2492 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2493 if (Ty.isVector()) // TODO:
2494 return false;
2495
2496 // Don't narrow further than the requested size.
2497 unsigned Size = Ty.getSizeInBits();
2498 if (Size <= TargetShiftSize)
2499 return false;
2500
2501 auto MaybeImmVal =
2502 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2503 if (!MaybeImmVal)
2504 return false;
2505
2506 ShiftVal = MaybeImmVal->Value.getSExtValue();
2507 return ShiftVal >= Size / 2 && ShiftVal < Size;
2508}
2509
2511 MachineInstr &MI, const unsigned &ShiftVal) const {
2512 Register DstReg = MI.getOperand(0).getReg();
2513 Register SrcReg = MI.getOperand(1).getReg();
2514 LLT Ty = MRI.getType(SrcReg);
2515 unsigned Size = Ty.getSizeInBits();
2516 unsigned HalfSize = Size / 2;
2517 assert(ShiftVal >= HalfSize);
2518
2519 LLT HalfTy = LLT::scalar(HalfSize);
2520
2521 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2522 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2523
2524 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2525 Register Narrowed = Unmerge.getReg(1);
2526
2527 // dst = G_LSHR s64:x, C for C >= 32
2528 // =>
2529 // lo, hi = G_UNMERGE_VALUES x
2530 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2531
2532 if (NarrowShiftAmt != 0) {
2533 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2534 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2535 }
2536
2537 auto Zero = Builder.buildConstant(HalfTy, 0);
2538 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2539 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2540 Register Narrowed = Unmerge.getReg(0);
2541 // dst = G_SHL s64:x, C for C >= 32
2542 // =>
2543 // lo, hi = G_UNMERGE_VALUES x
2544 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2545 if (NarrowShiftAmt != 0) {
2546 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2547 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2548 }
2549
2550 auto Zero = Builder.buildConstant(HalfTy, 0);
2551 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2552 } else {
2553 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2554 auto Hi = Builder.buildAShr(
2555 HalfTy, Unmerge.getReg(1),
2556 Builder.buildConstant(HalfTy, HalfSize - 1));
2557
2558 if (ShiftVal == HalfSize) {
2559 // (G_ASHR i64:x, 32) ->
2560 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2561 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2562 } else if (ShiftVal == Size - 1) {
2563 // Don't need a second shift.
2564 // (G_ASHR i64:x, 63) ->
2565 // %narrowed = (G_ASHR hi_32(x), 31)
2566 // G_MERGE_VALUES %narrowed, %narrowed
2567 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2568 } else {
2569 auto Lo = Builder.buildAShr(
2570 HalfTy, Unmerge.getReg(1),
2571 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2572
2573 // (G_ASHR i64:x, C) ->, for C >= 32
2574 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2575 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2576 }
2577 }
2578
2579 MI.eraseFromParent();
2580}
2581
2583 MachineInstr &MI, unsigned TargetShiftAmount) const {
2584 unsigned ShiftAmt;
2585 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2586 applyCombineShiftToUnmerge(MI, ShiftAmt);
2587 return true;
2588 }
2589
2590 return false;
2591}
2592
2594 Register &Reg) const {
2595 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2596 Register DstReg = MI.getOperand(0).getReg();
2597 LLT DstTy = MRI.getType(DstReg);
2598 Register SrcReg = MI.getOperand(1).getReg();
2599 return mi_match(SrcReg, MRI,
2600 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2601}
2602
2604 Register &Reg) const {
2605 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2606 Register DstReg = MI.getOperand(0).getReg();
2607 Builder.buildCopy(DstReg, Reg);
2608 MI.eraseFromParent();
2609}
2610
2612 Register &Reg) const {
2613 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2614 Register DstReg = MI.getOperand(0).getReg();
2615 Builder.buildZExtOrTrunc(DstReg, Reg);
2616 MI.eraseFromParent();
2617}
2618
2620 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2621 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2622 Register LHS = MI.getOperand(1).getReg();
2623 Register RHS = MI.getOperand(2).getReg();
2624 LLT IntTy = MRI.getType(LHS);
2625
2626 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2627 // instruction.
2628 PtrReg.second = false;
2629 for (Register SrcReg : {LHS, RHS}) {
2630 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2631 // Don't handle cases where the integer is implicitly converted to the
2632 // pointer width.
2633 LLT PtrTy = MRI.getType(PtrReg.first);
2634 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2635 return true;
2636 }
2637
2638 PtrReg.second = true;
2639 }
2640
2641 return false;
2642}
2643
2645 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2646 Register Dst = MI.getOperand(0).getReg();
2647 Register LHS = MI.getOperand(1).getReg();
2648 Register RHS = MI.getOperand(2).getReg();
2649
2650 const bool DoCommute = PtrReg.second;
2651 if (DoCommute)
2652 std::swap(LHS, RHS);
2653 LHS = PtrReg.first;
2654
2655 LLT PtrTy = MRI.getType(LHS);
2656
2657 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2658 Builder.buildPtrToInt(Dst, PtrAdd);
2659 MI.eraseFromParent();
2660}
2661
2663 APInt &NewCst) const {
2664 auto &PtrAdd = cast<GPtrAdd>(MI);
2665 Register LHS = PtrAdd.getBaseReg();
2666 Register RHS = PtrAdd.getOffsetReg();
2667 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2668
2669 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2670 APInt Cst;
2671 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2672 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2673 // G_INTTOPTR uses zero-extension
2674 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2675 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2676 return true;
2677 }
2678 }
2679
2680 return false;
2681}
2682
2684 APInt &NewCst) const {
2685 auto &PtrAdd = cast<GPtrAdd>(MI);
2686 Register Dst = PtrAdd.getReg(0);
2687
2688 Builder.buildConstant(Dst, NewCst);
2689 PtrAdd.eraseFromParent();
2690}
2691
2693 Register &Reg) const {
2694 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2695 Register DstReg = MI.getOperand(0).getReg();
2696 Register SrcReg = MI.getOperand(1).getReg();
2697 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2698 if (OriginalSrcReg.isValid())
2699 SrcReg = OriginalSrcReg;
2700 LLT DstTy = MRI.getType(DstReg);
2701 return mi_match(SrcReg, MRI,
2702 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2703 canReplaceReg(DstReg, Reg, MRI);
2704}
2705
2707 Register &Reg) const {
2708 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2709 Register DstReg = MI.getOperand(0).getReg();
2710 Register SrcReg = MI.getOperand(1).getReg();
2711 LLT DstTy = MRI.getType(DstReg);
2712 if (mi_match(SrcReg, MRI,
2713 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2714 canReplaceReg(DstReg, Reg, MRI)) {
2715 unsigned DstSize = DstTy.getScalarSizeInBits();
2716 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2717 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2718 }
2719 return false;
2720}
2721
2723 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2724 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2725
2726 // ShiftTy > 32 > TruncTy -> 32
2727 if (ShiftSize > 32 && TruncSize < 32)
2728 return ShiftTy.changeElementSize(32);
2729
2730 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2731 // Some targets like it, some don't, some only like it under certain
2732 // conditions/processor versions, etc.
2733 // A TL hook might be needed for this.
2734
2735 // Don't combine
2736 return ShiftTy;
2737}
2738
2740 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2741 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2742 Register DstReg = MI.getOperand(0).getReg();
2743 Register SrcReg = MI.getOperand(1).getReg();
2744
2745 if (!MRI.hasOneNonDBGUse(SrcReg))
2746 return false;
2747
2748 LLT SrcTy = MRI.getType(SrcReg);
2749 LLT DstTy = MRI.getType(DstReg);
2750
2751 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2752 const auto &TL = getTargetLowering();
2753
2754 LLT NewShiftTy;
2755 switch (SrcMI->getOpcode()) {
2756 default:
2757 return false;
2758 case TargetOpcode::G_SHL: {
2759 NewShiftTy = DstTy;
2760
2761 // Make sure new shift amount is legal.
2762 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2763 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2764 return false;
2765 break;
2766 }
2767 case TargetOpcode::G_LSHR:
2768 case TargetOpcode::G_ASHR: {
2769 // For right shifts, we conservatively do not do the transform if the TRUNC
2770 // has any STORE users. The reason is that if we change the type of the
2771 // shift, we may break the truncstore combine.
2772 //
2773 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2774 for (auto &User : MRI.use_instructions(DstReg))
2775 if (User.getOpcode() == TargetOpcode::G_STORE)
2776 return false;
2777
2778 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2779 if (NewShiftTy == SrcTy)
2780 return false;
2781
2782 // Make sure we won't lose information by truncating the high bits.
2783 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2784 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2785 DstTy.getScalarSizeInBits()))
2786 return false;
2787 break;
2788 }
2789 }
2790
2792 {SrcMI->getOpcode(),
2793 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2794 return false;
2795
2796 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2797 return true;
2798}
2799
2801 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2802 MachineInstr *ShiftMI = MatchInfo.first;
2803 LLT NewShiftTy = MatchInfo.second;
2804
2805 Register Dst = MI.getOperand(0).getReg();
2806 LLT DstTy = MRI.getType(Dst);
2807
2808 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2809 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2810 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2811
2812 Register NewShift =
2813 Builder
2814 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2815 .getReg(0);
2816
2817 if (NewShiftTy == DstTy)
2818 replaceRegWith(MRI, Dst, NewShift);
2819 else
2820 Builder.buildTrunc(Dst, NewShift);
2821
2822 eraseInst(MI);
2823}
2824
2826 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2827 return MO.isReg() &&
2828 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2829 });
2830}
2831
2833 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2834 return !MO.isReg() ||
2835 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2836 });
2837}
2838
2840 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2841 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2842 return all_of(Mask, [](int Elt) { return Elt < 0; });
2843}
2844
2846 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2847 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2848 MRI);
2849}
2850
2852 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2853 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2854 MRI);
2855}
2856
2858 MachineInstr &MI) const {
2859 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2860 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2861 "Expected an insert/extract element op");
2862 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2863 if (VecTy.isScalableVector())
2864 return false;
2865
2866 unsigned IdxIdx =
2867 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2868 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2869 if (!Idx)
2870 return false;
2871 return Idx->getZExtValue() >= VecTy.getNumElements();
2872}
2873
2875 unsigned &OpIdx) const {
2876 GSelect &SelMI = cast<GSelect>(MI);
2877 auto Cst =
2878 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2879 if (!Cst)
2880 return false;
2881 OpIdx = Cst->isZero() ? 3 : 2;
2882 return true;
2883}
2884
2885void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2886
2888 const MachineOperand &MOP2) const {
2889 if (!MOP1.isReg() || !MOP2.isReg())
2890 return false;
2891 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2892 if (!InstAndDef1)
2893 return false;
2894 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2895 if (!InstAndDef2)
2896 return false;
2897 MachineInstr *I1 = InstAndDef1->MI;
2898 MachineInstr *I2 = InstAndDef2->MI;
2899
2900 // Handle a case like this:
2901 //
2902 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2903 //
2904 // Even though %0 and %1 are produced by the same instruction they are not
2905 // the same values.
2906 if (I1 == I2)
2907 return MOP1.getReg() == MOP2.getReg();
2908
2909 // If we have an instruction which loads or stores, we can't guarantee that
2910 // it is identical.
2911 //
2912 // For example, we may have
2913 //
2914 // %x1 = G_LOAD %addr (load N from @somewhere)
2915 // ...
2916 // call @foo
2917 // ...
2918 // %x2 = G_LOAD %addr (load N from @somewhere)
2919 // ...
2920 // %or = G_OR %x1, %x2
2921 //
2922 // It's possible that @foo will modify whatever lives at the address we're
2923 // loading from. To be safe, let's just assume that all loads and stores
2924 // are different (unless we have something which is guaranteed to not
2925 // change.)
2926 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2927 return false;
2928
2929 // If both instructions are loads or stores, they are equal only if both
2930 // are dereferenceable invariant loads with the same number of bits.
2931 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2934 if (!LS1 || !LS2)
2935 return false;
2936
2937 if (!I2->isDereferenceableInvariantLoad() ||
2938 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2939 return false;
2940 }
2941
2942 // Check for physical registers on the instructions first to avoid cases
2943 // like this:
2944 //
2945 // %a = COPY $physreg
2946 // ...
2947 // SOMETHING implicit-def $physreg
2948 // ...
2949 // %b = COPY $physreg
2950 //
2951 // These copies are not equivalent.
2952 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2953 return MO.isReg() && MO.getReg().isPhysical();
2954 })) {
2955 // Check if we have a case like this:
2956 //
2957 // %a = COPY $physreg
2958 // %b = COPY %a
2959 //
2960 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2961 // From that, we know that they must have the same value, since they must
2962 // have come from the same COPY.
2963 return I1->isIdenticalTo(*I2);
2964 }
2965
2966 // We don't have any physical registers, so we don't necessarily need the
2967 // same vreg defs.
2968 //
2969 // On the off-chance that there's some target instruction feeding into the
2970 // instruction, let's use produceSameValue instead of isIdenticalTo.
2971 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2972 // Handle instructions with multiple defs that produce same values. Values
2973 // are same for operands with same index.
2974 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2975 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2976 // I1 and I2 are different instructions but produce same values,
2977 // %1 and %6 are same, %1 and %7 are not the same value.
2978 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2979 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2980 }
2981 return false;
2982}
2983
2985 int64_t C) const {
2986 if (!MOP.isReg())
2987 return false;
2988 auto *MI = MRI.getVRegDef(MOP.getReg());
2989 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2990 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2991 MaybeCst->getSExtValue() == C;
2992}
2993
2995 double C) const {
2996 if (!MOP.isReg())
2997 return false;
2998 std::optional<FPValueAndVReg> MaybeCst;
2999 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
3000 return false;
3001
3002 return MaybeCst->Value.isExactlyValue(C);
3003}
3004
3006 unsigned OpIdx) const {
3007 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3008 Register OldReg = MI.getOperand(0).getReg();
3009 Register Replacement = MI.getOperand(OpIdx).getReg();
3010 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3011 replaceRegWith(MRI, OldReg, Replacement);
3012 MI.eraseFromParent();
3013}
3014
3016 Register Replacement) const {
3017 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3018 Register OldReg = MI.getOperand(0).getReg();
3019 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3020 replaceRegWith(MRI, OldReg, Replacement);
3021 MI.eraseFromParent();
3022}
3023
3025 unsigned ConstIdx) const {
3026 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3027 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3028
3029 // Get the shift amount
3030 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3031 if (!VRegAndVal)
3032 return false;
3033
3034 // Return true of shift amount >= Bitwidth
3035 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3036}
3037
3039 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3040 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3041 "This is not a funnel shift operation");
3042
3043 Register ConstReg = MI.getOperand(3).getReg();
3044 LLT ConstTy = MRI.getType(ConstReg);
3045 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3046
3047 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3048 assert((VRegAndVal) && "Value is not a constant");
3049
3050 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3051 APInt NewConst = VRegAndVal->Value.urem(
3052 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3053
3054 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3055 Builder.buildInstr(
3056 MI.getOpcode(), {MI.getOperand(0)},
3057 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3058
3059 MI.eraseFromParent();
3060}
3061
3063 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3064 // Match (cond ? x : x)
3065 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3066 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3067 MRI);
3068}
3069
3071 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3072 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3073 MRI);
3074}
3075
3077 unsigned OpIdx) const {
3078 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
3079 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
3080 MRI);
3081}
3082
3084 unsigned OpIdx) const {
3085 MachineOperand &MO = MI.getOperand(OpIdx);
3086 return MO.isReg() &&
3087 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3088}
3089
3091 unsigned OpIdx) const {
3092 MachineOperand &MO = MI.getOperand(OpIdx);
3093 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3094}
3095
3097 double C) const {
3098 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3099 Builder.buildFConstant(MI.getOperand(0), C);
3100 MI.eraseFromParent();
3101}
3102
3104 int64_t C) const {
3105 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3106 Builder.buildConstant(MI.getOperand(0), C);
3107 MI.eraseFromParent();
3108}
3109
3111 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3112 Builder.buildConstant(MI.getOperand(0), C);
3113 MI.eraseFromParent();
3114}
3115
3117 ConstantFP *CFP) const {
3118 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3119 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3120 MI.eraseFromParent();
3121}
3122
3124 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3125 Builder.buildUndef(MI.getOperand(0));
3126 MI.eraseFromParent();
3127}
3128
3130 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3131 Register LHS = MI.getOperand(1).getReg();
3132 Register RHS = MI.getOperand(2).getReg();
3133 Register &NewLHS = std::get<0>(MatchInfo);
3134 Register &NewRHS = std::get<1>(MatchInfo);
3135
3136 // Helper lambda to check for opportunities for
3137 // ((0-A) + B) -> B - A
3138 // (A + (0-B)) -> A - B
3139 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3140 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3141 return false;
3142 NewLHS = MaybeNewLHS;
3143 return true;
3144 };
3145
3146 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3147}
3148
3150 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3151 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3152 "Invalid opcode");
3153 Register DstReg = MI.getOperand(0).getReg();
3154 LLT DstTy = MRI.getType(DstReg);
3155 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3156
3157 if (DstTy.isScalableVector())
3158 return false;
3159
3160 unsigned NumElts = DstTy.getNumElements();
3161 // If this MI is part of a sequence of insert_vec_elts, then
3162 // don't do the combine in the middle of the sequence.
3163 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3164 TargetOpcode::G_INSERT_VECTOR_ELT)
3165 return false;
3166 MachineInstr *CurrInst = &MI;
3167 MachineInstr *TmpInst;
3168 int64_t IntImm;
3169 Register TmpReg;
3170 MatchInfo.resize(NumElts);
3171 while (mi_match(
3172 CurrInst->getOperand(0).getReg(), MRI,
3173 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3174 if (IntImm >= NumElts || IntImm < 0)
3175 return false;
3176 if (!MatchInfo[IntImm])
3177 MatchInfo[IntImm] = TmpReg;
3178 CurrInst = TmpInst;
3179 }
3180 // Variable index.
3181 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3182 return false;
3183 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3184 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3185 if (!MatchInfo[I - 1].isValid())
3186 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3187 }
3188 return true;
3189 }
3190 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3191 // overwritten, bail out.
3192 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3193 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3194}
3195
3197 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3198 Register UndefReg;
3199 auto GetUndef = [&]() {
3200 if (UndefReg)
3201 return UndefReg;
3202 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3203 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3204 return UndefReg;
3205 };
3206 for (Register &Reg : MatchInfo) {
3207 if (!Reg)
3208 Reg = GetUndef();
3209 }
3210 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3211 MI.eraseFromParent();
3212}
3213
3215 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3216 Register SubLHS, SubRHS;
3217 std::tie(SubLHS, SubRHS) = MatchInfo;
3218 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3219 MI.eraseFromParent();
3220}
3221
3223 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3224 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3225 //
3226 // Creates the new hand + logic instruction (but does not insert them.)
3227 //
3228 // On success, MatchInfo is populated with the new instructions. These are
3229 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3230 unsigned LogicOpcode = MI.getOpcode();
3231 assert(LogicOpcode == TargetOpcode::G_AND ||
3232 LogicOpcode == TargetOpcode::G_OR ||
3233 LogicOpcode == TargetOpcode::G_XOR);
3234 MachineIRBuilder MIB(MI);
3235 Register Dst = MI.getOperand(0).getReg();
3236 Register LHSReg = MI.getOperand(1).getReg();
3237 Register RHSReg = MI.getOperand(2).getReg();
3238
3239 // Don't recompute anything.
3240 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3241 return false;
3242
3243 // Make sure we have (hand x, ...), (hand y, ...)
3244 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3245 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3246 if (!LeftHandInst || !RightHandInst)
3247 return false;
3248 unsigned HandOpcode = LeftHandInst->getOpcode();
3249 if (HandOpcode != RightHandInst->getOpcode())
3250 return false;
3251 if (LeftHandInst->getNumOperands() < 2 ||
3252 !LeftHandInst->getOperand(1).isReg() ||
3253 RightHandInst->getNumOperands() < 2 ||
3254 !RightHandInst->getOperand(1).isReg())
3255 return false;
3256
3257 // Make sure the types match up, and if we're doing this post-legalization,
3258 // we end up with legal types.
3259 Register X = LeftHandInst->getOperand(1).getReg();
3260 Register Y = RightHandInst->getOperand(1).getReg();
3261 LLT XTy = MRI.getType(X);
3262 LLT YTy = MRI.getType(Y);
3263 if (!XTy.isValid() || XTy != YTy)
3264 return false;
3265
3266 // Optional extra source register.
3267 Register ExtraHandOpSrcReg;
3268 switch (HandOpcode) {
3269 default:
3270 return false;
3271 case TargetOpcode::G_ANYEXT:
3272 case TargetOpcode::G_SEXT:
3273 case TargetOpcode::G_ZEXT: {
3274 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3275 break;
3276 }
3277 case TargetOpcode::G_TRUNC: {
3278 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3279 const MachineFunction *MF = MI.getMF();
3280 LLVMContext &Ctx = MF->getFunction().getContext();
3281
3282 LLT DstTy = MRI.getType(Dst);
3283 const TargetLowering &TLI = getTargetLowering();
3284
3285 // Be extra careful sinking truncate. If it's free, there's no benefit in
3286 // widening a binop.
3287 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3288 return false;
3289 break;
3290 }
3291 case TargetOpcode::G_AND:
3292 case TargetOpcode::G_ASHR:
3293 case TargetOpcode::G_LSHR:
3294 case TargetOpcode::G_SHL: {
3295 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3296 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3297 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3298 return false;
3299 ExtraHandOpSrcReg = ZOp.getReg();
3300 break;
3301 }
3302 }
3303
3304 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3305 return false;
3306
3307 // Record the steps to build the new instructions.
3308 //
3309 // Steps to build (logic x, y)
3310 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3311 OperandBuildSteps LogicBuildSteps = {
3312 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3313 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3314 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3315 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3316
3317 // Steps to build hand (logic x, y), ...z
3318 OperandBuildSteps HandBuildSteps = {
3319 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3320 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3321 if (ExtraHandOpSrcReg.isValid())
3322 HandBuildSteps.push_back(
3323 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3324 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3325
3326 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3327 return true;
3328}
3329
3331 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3332 assert(MatchInfo.InstrsToBuild.size() &&
3333 "Expected at least one instr to build?");
3334 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3335 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3336 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3337 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3338 for (auto &OperandFn : InstrToBuild.OperandFns)
3339 OperandFn(Instr);
3340 }
3341 MI.eraseFromParent();
3342}
3343
3345 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3346 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3347 int64_t ShlCst, AshrCst;
3348 Register Src;
3349 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3350 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3351 m_ICstOrSplat(AshrCst))))
3352 return false;
3353 if (ShlCst != AshrCst)
3354 return false;
3356 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3357 return false;
3358 MatchInfo = std::make_tuple(Src, ShlCst);
3359 return true;
3360}
3361
3363 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3364 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3365 Register Src;
3366 int64_t ShiftAmt;
3367 std::tie(Src, ShiftAmt) = MatchInfo;
3368 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3369 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3370 MI.eraseFromParent();
3371}
3372
3373/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3376 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3377 assert(MI.getOpcode() == TargetOpcode::G_AND);
3378
3379 Register Dst = MI.getOperand(0).getReg();
3380 LLT Ty = MRI.getType(Dst);
3381
3382 Register R;
3383 int64_t C1;
3384 int64_t C2;
3385 if (!mi_match(
3386 Dst, MRI,
3387 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3388 return false;
3389
3390 MatchInfo = [=](MachineIRBuilder &B) {
3391 if (C1 & C2) {
3392 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3393 return;
3394 }
3395 auto Zero = B.buildConstant(Ty, 0);
3396 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3397 };
3398 return true;
3399}
3400
3402 Register &Replacement) const {
3403 // Given
3404 //
3405 // %y:_(sN) = G_SOMETHING
3406 // %x:_(sN) = G_SOMETHING
3407 // %res:_(sN) = G_AND %x, %y
3408 //
3409 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3410 //
3411 // Patterns like this can appear as a result of legalization. E.g.
3412 //
3413 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3414 // %one:_(s32) = G_CONSTANT i32 1
3415 // %and:_(s32) = G_AND %cmp, %one
3416 //
3417 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3418 assert(MI.getOpcode() == TargetOpcode::G_AND);
3419 if (!VT)
3420 return false;
3421
3422 Register AndDst = MI.getOperand(0).getReg();
3423 Register LHS = MI.getOperand(1).getReg();
3424 Register RHS = MI.getOperand(2).getReg();
3425
3426 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3427 // we can't do anything. If we do, then it depends on whether we have
3428 // KnownBits on the LHS.
3429 KnownBits RHSBits = VT->getKnownBits(RHS);
3430 if (RHSBits.isUnknown())
3431 return false;
3432
3433 KnownBits LHSBits = VT->getKnownBits(LHS);
3434
3435 // Check that x & Mask == x.
3436 // x & 1 == x, always
3437 // x & 0 == x, only if x is also 0
3438 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3439 //
3440 // Check if we can replace AndDst with the LHS of the G_AND
3441 if (canReplaceReg(AndDst, LHS, MRI) &&
3442 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3443 Replacement = LHS;
3444 return true;
3445 }
3446
3447 // Check if we can replace AndDst with the RHS of the G_AND
3448 if (canReplaceReg(AndDst, RHS, MRI) &&
3449 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3450 Replacement = RHS;
3451 return true;
3452 }
3453
3454 return false;
3455}
3456
3458 Register &Replacement) const {
3459 // Given
3460 //
3461 // %y:_(sN) = G_SOMETHING
3462 // %x:_(sN) = G_SOMETHING
3463 // %res:_(sN) = G_OR %x, %y
3464 //
3465 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3466 assert(MI.getOpcode() == TargetOpcode::G_OR);
3467 if (!VT)
3468 return false;
3469
3470 Register OrDst = MI.getOperand(0).getReg();
3471 Register LHS = MI.getOperand(1).getReg();
3472 Register RHS = MI.getOperand(2).getReg();
3473
3474 KnownBits LHSBits = VT->getKnownBits(LHS);
3475 KnownBits RHSBits = VT->getKnownBits(RHS);
3476
3477 // Check that x | Mask == x.
3478 // x | 0 == x, always
3479 // x | 1 == x, only if x is also 1
3480 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3481 //
3482 // Check if we can replace OrDst with the LHS of the G_OR
3483 if (canReplaceReg(OrDst, LHS, MRI) &&
3484 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3485 Replacement = LHS;
3486 return true;
3487 }
3488
3489 // Check if we can replace OrDst with the RHS of the G_OR
3490 if (canReplaceReg(OrDst, RHS, MRI) &&
3491 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3492 Replacement = RHS;
3493 return true;
3494 }
3495
3496 return false;
3497}
3498
3500 // If the input is already sign extended, just drop the extension.
3501 Register Src = MI.getOperand(1).getReg();
3502 unsigned ExtBits = MI.getOperand(2).getImm();
3503 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3504 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3505}
3506
3507static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3508 int64_t Cst, bool IsVector, bool IsFP) {
3509 // For i1, Cst will always be -1 regardless of boolean contents.
3510 return (ScalarSizeBits == 1 && Cst == -1) ||
3511 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3512}
3513
3514// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3515// using vector truncates instead
3516//
3517// EXAMPLE:
3518// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3519// %T_a(i16) = G_TRUNC %a(i32)
3520// %T_b(i16) = G_TRUNC %b(i32)
3521// %Undef(i16) = G_IMPLICIT_DEF(i16)
3522// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3523//
3524// ===>
3525// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3526// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3527// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3528//
3529// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3531 Register &MatchInfo) const {
3532 auto BuildMI = cast<GBuildVector>(&MI);
3533 unsigned NumOperands = BuildMI->getNumSources();
3534 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3535
3536 // Check the G_BUILD_VECTOR sources
3537 unsigned I;
3538 MachineInstr *UnmergeMI = nullptr;
3539
3540 // Check all source TRUNCs come from the same UNMERGE instruction
3541 for (I = 0; I < NumOperands; ++I) {
3542 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3543 auto SrcMIOpc = SrcMI->getOpcode();
3544
3545 // Check if the G_TRUNC instructions all come from the same MI
3546 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3547 if (!UnmergeMI) {
3548 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3549 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3550 return false;
3551 } else {
3552 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3553 if (UnmergeMI != UnmergeSrcMI)
3554 return false;
3555 }
3556 } else {
3557 break;
3558 }
3559 }
3560 if (I < 2)
3561 return false;
3562
3563 // Check the remaining source elements are only G_IMPLICIT_DEF
3564 for (; I < NumOperands; ++I) {
3565 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3566 auto SrcMIOpc = SrcMI->getOpcode();
3567
3568 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3569 return false;
3570 }
3571
3572 // Check the size of unmerge source
3573 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3574 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3575 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3576 return false;
3577
3578 // Check the unmerge source and destination element types match
3579 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3580 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3581 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3582 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3583 return false;
3584
3585 // Only generate legal instructions post-legalizer
3586 if (!IsPreLegalize) {
3587 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3588
3589 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3590 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3591 return false;
3592
3593 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3594 return false;
3595 }
3596
3597 return true;
3598}
3599
3601 Register &MatchInfo) const {
3602 Register MidReg;
3603 auto BuildMI = cast<GBuildVector>(&MI);
3604 Register DstReg = BuildMI->getReg(0);
3605 LLT DstTy = MRI.getType(DstReg);
3606 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3607 unsigned DstTyNumElt = DstTy.getNumElements();
3608 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3609
3610 // No need to pad vector if only G_TRUNC is needed
3611 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3612 MidReg = MatchInfo;
3613 } else {
3614 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3615 SmallVector<Register> ConcatRegs = {MatchInfo};
3616 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3617 ConcatRegs.push_back(UndefReg);
3618
3619 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3620 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3621 }
3622
3623 Builder.buildTrunc(DstReg, MidReg);
3624 MI.eraseFromParent();
3625}
3626
3628 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3629 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3630 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3631 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3632 Register XorSrc;
3633 Register CstReg;
3634 // We match xor(src, true) here.
3635 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3636 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3637 return false;
3638
3639 if (!MRI.hasOneNonDBGUse(XorSrc))
3640 return false;
3641
3642 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3643 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3644 // list of tree nodes to visit.
3645 RegsToNegate.push_back(XorSrc);
3646 // Remember whether the comparisons are all integer or all floating point.
3647 bool IsInt = false;
3648 bool IsFP = false;
3649 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3650 Register Reg = RegsToNegate[I];
3651 if (!MRI.hasOneNonDBGUse(Reg))
3652 return false;
3653 MachineInstr *Def = MRI.getVRegDef(Reg);
3654 switch (Def->getOpcode()) {
3655 default:
3656 // Don't match if the tree contains anything other than ANDs, ORs and
3657 // comparisons.
3658 return false;
3659 case TargetOpcode::G_ICMP:
3660 if (IsFP)
3661 return false;
3662 IsInt = true;
3663 // When we apply the combine we will invert the predicate.
3664 break;
3665 case TargetOpcode::G_FCMP:
3666 if (IsInt)
3667 return false;
3668 IsFP = true;
3669 // When we apply the combine we will invert the predicate.
3670 break;
3671 case TargetOpcode::G_AND:
3672 case TargetOpcode::G_OR:
3673 // Implement De Morgan's laws:
3674 // ~(x & y) -> ~x | ~y
3675 // ~(x | y) -> ~x & ~y
3676 // When we apply the combine we will change the opcode and recursively
3677 // negate the operands.
3678 RegsToNegate.push_back(Def->getOperand(1).getReg());
3679 RegsToNegate.push_back(Def->getOperand(2).getReg());
3680 break;
3681 }
3682 }
3683
3684 // Now we know whether the comparisons are integer or floating point, check
3685 // the constant in the xor.
3686 int64_t Cst;
3687 if (Ty.isVector()) {
3688 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3689 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3690 if (!MaybeCst)
3691 return false;
3692 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3693 return false;
3694 } else {
3695 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3696 return false;
3697 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3698 return false;
3699 }
3700
3701 return true;
3702}
3703
3705 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3706 for (Register Reg : RegsToNegate) {
3707 MachineInstr *Def = MRI.getVRegDef(Reg);
3708 Observer.changingInstr(*Def);
3709 // For each comparison, invert the opcode. For each AND and OR, change the
3710 // opcode.
3711 switch (Def->getOpcode()) {
3712 default:
3713 llvm_unreachable("Unexpected opcode");
3714 case TargetOpcode::G_ICMP:
3715 case TargetOpcode::G_FCMP: {
3716 MachineOperand &PredOp = Def->getOperand(1);
3719 PredOp.setPredicate(NewP);
3720 break;
3721 }
3722 case TargetOpcode::G_AND:
3723 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3724 break;
3725 case TargetOpcode::G_OR:
3726 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3727 break;
3728 }
3729 Observer.changedInstr(*Def);
3730 }
3731
3732 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3733 MI.eraseFromParent();
3734}
3735
3737 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3738 // Match (xor (and x, y), y) (or any of its commuted cases)
3739 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3740 Register &X = MatchInfo.first;
3741 Register &Y = MatchInfo.second;
3742 Register AndReg = MI.getOperand(1).getReg();
3743 Register SharedReg = MI.getOperand(2).getReg();
3744
3745 // Find a G_AND on either side of the G_XOR.
3746 // Look for one of
3747 //
3748 // (xor (and x, y), SharedReg)
3749 // (xor SharedReg, (and x, y))
3750 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3751 std::swap(AndReg, SharedReg);
3752 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3753 return false;
3754 }
3755
3756 // Only do this if we'll eliminate the G_AND.
3757 if (!MRI.hasOneNonDBGUse(AndReg))
3758 return false;
3759
3760 // We can combine if SharedReg is the same as either the LHS or RHS of the
3761 // G_AND.
3762 if (Y != SharedReg)
3763 std::swap(X, Y);
3764 return Y == SharedReg;
3765}
3766
3768 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3769 // Fold (xor (and x, y), y) -> (and (not x), y)
3770 Register X, Y;
3771 std::tie(X, Y) = MatchInfo;
3772 auto Not = Builder.buildNot(MRI.getType(X), X);
3773 Observer.changingInstr(MI);
3774 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3775 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3776 MI.getOperand(2).setReg(Y);
3777 Observer.changedInstr(MI);
3778}
3779
3781 auto &PtrAdd = cast<GPtrAdd>(MI);
3782 Register DstReg = PtrAdd.getReg(0);
3783 LLT Ty = MRI.getType(DstReg);
3784 const DataLayout &DL = Builder.getMF().getDataLayout();
3785
3786 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3787 return false;
3788
3789 if (Ty.isPointer()) {
3790 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3791 return ConstVal && *ConstVal == 0;
3792 }
3793
3794 assert(Ty.isVector() && "Expecting a vector type");
3795 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3796 return isBuildVectorAllZeros(*VecMI, MRI);
3797}
3798
3800 auto &PtrAdd = cast<GPtrAdd>(MI);
3801 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3802 PtrAdd.eraseFromParent();
3803}
3804
3805/// The second source operand is known to be a power of 2.
3807 Register DstReg = MI.getOperand(0).getReg();
3808 Register Src0 = MI.getOperand(1).getReg();
3809 Register Pow2Src1 = MI.getOperand(2).getReg();
3810 LLT Ty = MRI.getType(DstReg);
3811
3812 // Fold (urem x, pow2) -> (and x, pow2-1)
3813 auto NegOne = Builder.buildConstant(Ty, -1);
3814 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3815 Builder.buildAnd(DstReg, Src0, Add);
3816 MI.eraseFromParent();
3817}
3818
3820 unsigned &SelectOpNo) const {
3821 Register LHS = MI.getOperand(1).getReg();
3822 Register RHS = MI.getOperand(2).getReg();
3823
3824 Register OtherOperandReg = RHS;
3825 SelectOpNo = 1;
3826 MachineInstr *Select = MRI.getVRegDef(LHS);
3827
3828 // Don't do this unless the old select is going away. We want to eliminate the
3829 // binary operator, not replace a binop with a select.
3830 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3831 !MRI.hasOneNonDBGUse(LHS)) {
3832 OtherOperandReg = LHS;
3833 SelectOpNo = 2;
3834 Select = MRI.getVRegDef(RHS);
3835 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3836 !MRI.hasOneNonDBGUse(RHS))
3837 return false;
3838 }
3839
3840 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3841 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3842
3843 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3844 /*AllowFP*/ true,
3845 /*AllowOpaqueConstants*/ false))
3846 return false;
3847 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3848 /*AllowFP*/ true,
3849 /*AllowOpaqueConstants*/ false))
3850 return false;
3851
3852 unsigned BinOpcode = MI.getOpcode();
3853
3854 // We know that one of the operands is a select of constants. Now verify that
3855 // the other binary operator operand is either a constant, or we can handle a
3856 // variable.
3857 bool CanFoldNonConst =
3858 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3859 (isNullOrNullSplat(*SelectLHS, MRI) ||
3860 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3861 (isNullOrNullSplat(*SelectRHS, MRI) ||
3862 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3863 if (CanFoldNonConst)
3864 return true;
3865
3866 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3867 /*AllowFP*/ true,
3868 /*AllowOpaqueConstants*/ false);
3869}
3870
3871/// \p SelectOperand is the operand in binary operator \p MI that is the select
3872/// to fold.
3874 MachineInstr &MI, const unsigned &SelectOperand) const {
3875 Register Dst = MI.getOperand(0).getReg();
3876 Register LHS = MI.getOperand(1).getReg();
3877 Register RHS = MI.getOperand(2).getReg();
3878 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3879
3880 Register SelectCond = Select->getOperand(1).getReg();
3881 Register SelectTrue = Select->getOperand(2).getReg();
3882 Register SelectFalse = Select->getOperand(3).getReg();
3883
3884 LLT Ty = MRI.getType(Dst);
3885 unsigned BinOpcode = MI.getOpcode();
3886
3887 Register FoldTrue, FoldFalse;
3888
3889 // We have a select-of-constants followed by a binary operator with a
3890 // constant. Eliminate the binop by pulling the constant math into the select.
3891 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3892 if (SelectOperand == 1) {
3893 // TODO: SelectionDAG verifies this actually constant folds before
3894 // committing to the combine.
3895
3896 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3897 FoldFalse =
3898 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3899 } else {
3900 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3901 FoldFalse =
3902 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3903 }
3904
3905 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3906 MI.eraseFromParent();
3907}
3908
3909std::optional<SmallVector<Register, 8>>
3910CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3911 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3912 // We want to detect if Root is part of a tree which represents a bunch
3913 // of loads being merged into a larger load. We'll try to recognize patterns
3914 // like, for example:
3915 //
3916 // Reg Reg
3917 // \ /
3918 // OR_1 Reg
3919 // \ /
3920 // OR_2
3921 // \ Reg
3922 // .. /
3923 // Root
3924 //
3925 // Reg Reg Reg Reg
3926 // \ / \ /
3927 // OR_1 OR_2
3928 // \ /
3929 // \ /
3930 // ...
3931 // Root
3932 //
3933 // Each "Reg" may have been produced by a load + some arithmetic. This
3934 // function will save each of them.
3935 SmallVector<Register, 8> RegsToVisit;
3937
3938 // In the "worst" case, we're dealing with a load for each byte. So, there
3939 // are at most #bytes - 1 ORs.
3940 const unsigned MaxIter =
3941 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3942 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3943 if (Ors.empty())
3944 break;
3945 const MachineInstr *Curr = Ors.pop_back_val();
3946 Register OrLHS = Curr->getOperand(1).getReg();
3947 Register OrRHS = Curr->getOperand(2).getReg();
3948
3949 // In the combine, we want to elimate the entire tree.
3950 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3951 return std::nullopt;
3952
3953 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3954 // something that may be a load + arithmetic.
3955 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3956 Ors.push_back(Or);
3957 else
3958 RegsToVisit.push_back(OrLHS);
3959 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3960 Ors.push_back(Or);
3961 else
3962 RegsToVisit.push_back(OrRHS);
3963 }
3964
3965 // We're going to try and merge each register into a wider power-of-2 type,
3966 // so we ought to have an even number of registers.
3967 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3968 return std::nullopt;
3969 return RegsToVisit;
3970}
3971
3972/// Helper function for findLoadOffsetsForLoadOrCombine.
3973///
3974/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3975/// and then moving that value into a specific byte offset.
3976///
3977/// e.g. x[i] << 24
3978///
3979/// \returns The load instruction and the byte offset it is moved into.
3980static std::optional<std::pair<GZExtLoad *, int64_t>>
3981matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3982 const MachineRegisterInfo &MRI) {
3983 assert(MRI.hasOneNonDBGUse(Reg) &&
3984 "Expected Reg to only have one non-debug use?");
3985 Register MaybeLoad;
3986 int64_t Shift;
3987 if (!mi_match(Reg, MRI,
3988 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3989 Shift = 0;
3990 MaybeLoad = Reg;
3991 }
3992
3993 if (Shift % MemSizeInBits != 0)
3994 return std::nullopt;
3995
3996 // TODO: Handle other types of loads.
3997 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3998 if (!Load)
3999 return std::nullopt;
4000
4001 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4002 return std::nullopt;
4003
4004 return std::make_pair(Load, Shift / MemSizeInBits);
4005}
4006
4007std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4008CombinerHelper::findLoadOffsetsForLoadOrCombine(
4010 const SmallVector<Register, 8> &RegsToVisit,
4011 const unsigned MemSizeInBits) const {
4012
4013 // Each load found for the pattern. There should be one for each RegsToVisit.
4014 SmallSetVector<const MachineInstr *, 8> Loads;
4015
4016 // The lowest index used in any load. (The lowest "i" for each x[i].)
4017 int64_t LowestIdx = INT64_MAX;
4018
4019 // The load which uses the lowest index.
4020 GZExtLoad *LowestIdxLoad = nullptr;
4021
4022 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4023 SmallSet<int64_t, 8> SeenIdx;
4024
4025 // Ensure each load is in the same MBB.
4026 // TODO: Support multiple MachineBasicBlocks.
4027 MachineBasicBlock *MBB = nullptr;
4028 const MachineMemOperand *MMO = nullptr;
4029
4030 // Earliest instruction-order load in the pattern.
4031 GZExtLoad *EarliestLoad = nullptr;
4032
4033 // Latest instruction-order load in the pattern.
4034 GZExtLoad *LatestLoad = nullptr;
4035
4036 // Base pointer which every load should share.
4038
4039 // We want to find a load for each register. Each load should have some
4040 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4041 // track of the load which uses the lowest index. Later, we will check if we
4042 // can use its pointer in the final, combined load.
4043 for (auto Reg : RegsToVisit) {
4044 // Find the load, and find the position that it will end up in (e.g. a
4045 // shifted) value.
4046 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4047 if (!LoadAndPos)
4048 return std::nullopt;
4049 GZExtLoad *Load;
4050 int64_t DstPos;
4051 std::tie(Load, DstPos) = *LoadAndPos;
4052
4053 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4054 // it is difficult to check for stores/calls/etc between loads.
4055 MachineBasicBlock *LoadMBB = Load->getParent();
4056 if (!MBB)
4057 MBB = LoadMBB;
4058 if (LoadMBB != MBB)
4059 return std::nullopt;
4060
4061 // Make sure that the MachineMemOperands of every seen load are compatible.
4062 auto &LoadMMO = Load->getMMO();
4063 if (!MMO)
4064 MMO = &LoadMMO;
4065 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4066 return std::nullopt;
4067
4068 // Find out what the base pointer and index for the load is.
4069 Register LoadPtr;
4070 int64_t Idx;
4071 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4072 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4073 LoadPtr = Load->getOperand(1).getReg();
4074 Idx = 0;
4075 }
4076
4077 // Don't combine things like a[i], a[i] -> a bigger load.
4078 if (!SeenIdx.insert(Idx).second)
4079 return std::nullopt;
4080
4081 // Every load must share the same base pointer; don't combine things like:
4082 //
4083 // a[i], b[i + 1] -> a bigger load.
4084 if (!BasePtr.isValid())
4085 BasePtr = LoadPtr;
4086 if (BasePtr != LoadPtr)
4087 return std::nullopt;
4088
4089 if (Idx < LowestIdx) {
4090 LowestIdx = Idx;
4091 LowestIdxLoad = Load;
4092 }
4093
4094 // Keep track of the byte offset that this load ends up at. If we have seen
4095 // the byte offset, then stop here. We do not want to combine:
4096 //
4097 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4098 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4099 return std::nullopt;
4100 Loads.insert(Load);
4101
4102 // Keep track of the position of the earliest/latest loads in the pattern.
4103 // We will check that there are no load fold barriers between them later
4104 // on.
4105 //
4106 // FIXME: Is there a better way to check for load fold barriers?
4107 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4108 EarliestLoad = Load;
4109 if (!LatestLoad || dominates(*LatestLoad, *Load))
4110 LatestLoad = Load;
4111 }
4112
4113 // We found a load for each register. Let's check if each load satisfies the
4114 // pattern.
4115 assert(Loads.size() == RegsToVisit.size() &&
4116 "Expected to find a load for each register?");
4117 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4118 LatestLoad && "Expected at least two loads?");
4119
4120 // Check if there are any stores, calls, etc. between any of the loads. If
4121 // there are, then we can't safely perform the combine.
4122 //
4123 // MaxIter is chosen based off the (worst case) number of iterations it
4124 // typically takes to succeed in the LLVM test suite plus some padding.
4125 //
4126 // FIXME: Is there a better way to check for load fold barriers?
4127 const unsigned MaxIter = 20;
4128 unsigned Iter = 0;
4129 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4130 LatestLoad->getIterator())) {
4131 if (Loads.count(&MI))
4132 continue;
4133 if (MI.isLoadFoldBarrier())
4134 return std::nullopt;
4135 if (Iter++ == MaxIter)
4136 return std::nullopt;
4137 }
4138
4139 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4140}
4141
4144 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4145 assert(MI.getOpcode() == TargetOpcode::G_OR);
4146 MachineFunction &MF = *MI.getMF();
4147 // Assuming a little-endian target, transform:
4148 // s8 *a = ...
4149 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4150 // =>
4151 // s32 val = *((i32)a)
4152 //
4153 // s8 *a = ...
4154 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4155 // =>
4156 // s32 val = BSWAP(*((s32)a))
4157 Register Dst = MI.getOperand(0).getReg();
4158 LLT Ty = MRI.getType(Dst);
4159 if (Ty.isVector())
4160 return false;
4161
4162 // We need to combine at least two loads into this type. Since the smallest
4163 // possible load is into a byte, we need at least a 16-bit wide type.
4164 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4165 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4166 return false;
4167
4168 // Match a collection of non-OR instructions in the pattern.
4169 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4170 if (!RegsToVisit)
4171 return false;
4172
4173 // We have a collection of non-OR instructions. Figure out how wide each of
4174 // the small loads should be based off of the number of potential loads we
4175 // found.
4176 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4177 if (NarrowMemSizeInBits % 8 != 0)
4178 return false;
4179
4180 // Check if each register feeding into each OR is a load from the same
4181 // base pointer + some arithmetic.
4182 //
4183 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4184 //
4185 // Also verify that each of these ends up putting a[i] into the same memory
4186 // offset as a load into a wide type would.
4188 GZExtLoad *LowestIdxLoad, *LatestLoad;
4189 int64_t LowestIdx;
4190 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4191 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4192 if (!MaybeLoadInfo)
4193 return false;
4194 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4195
4196 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4197 // we found before, check if this corresponds to a big or little endian byte
4198 // pattern. If it does, then we can represent it using a load + possibly a
4199 // BSWAP.
4200 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4201 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4202 if (!IsBigEndian)
4203 return false;
4204 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4205 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4206 return false;
4207
4208 // Make sure that the load from the lowest index produces offset 0 in the
4209 // final value.
4210 //
4211 // This ensures that we won't combine something like this:
4212 //
4213 // load x[i] -> byte 2
4214 // load x[i+1] -> byte 0 ---> wide_load x[i]
4215 // load x[i+2] -> byte 1
4216 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4217 const unsigned ZeroByteOffset =
4218 *IsBigEndian
4219 ? bigEndianByteAt(NumLoadsInTy, 0)
4220 : littleEndianByteAt(NumLoadsInTy, 0);
4221 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4222 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4223 ZeroOffsetIdx->second != LowestIdx)
4224 return false;
4225
4226 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4227 // may not use index 0.
4228 Register Ptr = LowestIdxLoad->getPointerReg();
4229 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4230 LegalityQuery::MemDesc MMDesc(MMO);
4231 MMDesc.MemoryTy = Ty;
4233 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4234 return false;
4235 auto PtrInfo = MMO.getPointerInfo();
4236 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4237
4238 // Load must be allowed and fast on the target.
4240 auto &DL = MF.getDataLayout();
4241 unsigned Fast = 0;
4242 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4243 !Fast)
4244 return false;
4245
4246 MatchInfo = [=](MachineIRBuilder &MIB) {
4247 MIB.setInstrAndDebugLoc(*LatestLoad);
4248 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4249 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4250 if (NeedsBSwap)
4251 MIB.buildBSwap(Dst, LoadDst);
4252 };
4253 return true;
4254}
4255
4257 MachineInstr *&ExtMI) const {
4258 auto &PHI = cast<GPhi>(MI);
4259 Register DstReg = PHI.getReg(0);
4260
4261 // TODO: Extending a vector may be expensive, don't do this until heuristics
4262 // are better.
4263 if (MRI.getType(DstReg).isVector())
4264 return false;
4265
4266 // Try to match a phi, whose only use is an extend.
4267 if (!MRI.hasOneNonDBGUse(DstReg))
4268 return false;
4269 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4270 switch (ExtMI->getOpcode()) {
4271 case TargetOpcode::G_ANYEXT:
4272 return true; // G_ANYEXT is usually free.
4273 case TargetOpcode::G_ZEXT:
4274 case TargetOpcode::G_SEXT:
4275 break;
4276 default:
4277 return false;
4278 }
4279
4280 // If the target is likely to fold this extend away, don't propagate.
4281 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4282 return false;
4283
4284 // We don't want to propagate the extends unless there's a good chance that
4285 // they'll be optimized in some way.
4286 // Collect the unique incoming values.
4288 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4289 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4290 switch (DefMI->getOpcode()) {
4291 case TargetOpcode::G_LOAD:
4292 case TargetOpcode::G_TRUNC:
4293 case TargetOpcode::G_SEXT:
4294 case TargetOpcode::G_ZEXT:
4295 case TargetOpcode::G_ANYEXT:
4296 case TargetOpcode::G_CONSTANT:
4297 InSrcs.insert(DefMI);
4298 // Don't try to propagate if there are too many places to create new
4299 // extends, chances are it'll increase code size.
4300 if (InSrcs.size() > 2)
4301 return false;
4302 break;
4303 default:
4304 return false;
4305 }
4306 }
4307 return true;
4308}
4309
4311 MachineInstr *&ExtMI) const {
4312 auto &PHI = cast<GPhi>(MI);
4313 Register DstReg = ExtMI->getOperand(0).getReg();
4314 LLT ExtTy = MRI.getType(DstReg);
4315
4316 // Propagate the extension into the block of each incoming reg's block.
4317 // Use a SetVector here because PHIs can have duplicate edges, and we want
4318 // deterministic iteration order.
4321 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4322 auto SrcReg = PHI.getIncomingValue(I);
4323 auto *SrcMI = MRI.getVRegDef(SrcReg);
4324 if (!SrcMIs.insert(SrcMI))
4325 continue;
4326
4327 // Build an extend after each src inst.
4328 auto *MBB = SrcMI->getParent();
4329 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4330 if (InsertPt != MBB->end() && InsertPt->isPHI())
4331 InsertPt = MBB->getFirstNonPHI();
4332
4333 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4334 Builder.setDebugLoc(MI.getDebugLoc());
4335 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4336 OldToNewSrcMap[SrcMI] = NewExt;
4337 }
4338
4339 // Create a new phi with the extended inputs.
4340 Builder.setInstrAndDebugLoc(MI);
4341 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4342 NewPhi.addDef(DstReg);
4343 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4344 if (!MO.isReg()) {
4345 NewPhi.addMBB(MO.getMBB());
4346 continue;
4347 }
4348 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4349 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4350 }
4351 Builder.insertInstr(NewPhi);
4352 ExtMI->eraseFromParent();
4353}
4354
4356 Register &Reg) const {
4357 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4358 // If we have a constant index, look for a G_BUILD_VECTOR source
4359 // and find the source register that the index maps to.
4360 Register SrcVec = MI.getOperand(1).getReg();
4361 LLT SrcTy = MRI.getType(SrcVec);
4362 if (SrcTy.isScalableVector())
4363 return false;
4364
4365 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4366 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4367 return false;
4368
4369 unsigned VecIdx = Cst->Value.getZExtValue();
4370
4371 // Check if we have a build_vector or build_vector_trunc with an optional
4372 // trunc in front.
4373 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4374 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4375 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4376 }
4377
4378 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4379 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4380 return false;
4381
4382 EVT Ty(getMVTForLLT(SrcTy));
4383 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4384 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4385 return false;
4386
4387 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4388 return true;
4389}
4390
4392 Register &Reg) const {
4393 // Check the type of the register, since it may have come from a
4394 // G_BUILD_VECTOR_TRUNC.
4395 LLT ScalarTy = MRI.getType(Reg);
4396 Register DstReg = MI.getOperand(0).getReg();
4397 LLT DstTy = MRI.getType(DstReg);
4398
4399 if (ScalarTy != DstTy) {
4400 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4401 Builder.buildTrunc(DstReg, Reg);
4402 MI.eraseFromParent();
4403 return;
4404 }
4406}
4407
4410 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4411 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4412 // This combine tries to find build_vector's which have every source element
4413 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4414 // the masked load scalarization is run late in the pipeline. There's already
4415 // a combine for a similar pattern starting from the extract, but that
4416 // doesn't attempt to do it if there are multiple uses of the build_vector,
4417 // which in this case is true. Starting the combine from the build_vector
4418 // feels more natural than trying to find sibling nodes of extracts.
4419 // E.g.
4420 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4421 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4422 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4423 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4424 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4425 // ==>
4426 // replace ext{1,2,3,4} with %s{1,2,3,4}
4427
4428 Register DstReg = MI.getOperand(0).getReg();
4429 LLT DstTy = MRI.getType(DstReg);
4430 unsigned NumElts = DstTy.getNumElements();
4431
4432 SmallBitVector ExtractedElts(NumElts);
4433 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4434 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4435 return false;
4436 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4437 if (!Cst)
4438 return false;
4439 unsigned Idx = Cst->getZExtValue();
4440 if (Idx >= NumElts)
4441 return false; // Out of range.
4442 ExtractedElts.set(Idx);
4443 SrcDstPairs.emplace_back(
4444 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4445 }
4446 // Match if every element was extracted.
4447 return ExtractedElts.all();
4448}
4449
4452 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4453 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4454 for (auto &Pair : SrcDstPairs) {
4455 auto *ExtMI = Pair.second;
4456 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4457 ExtMI->eraseFromParent();
4458 }
4459 MI.eraseFromParent();
4460}
4461
4464 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4465 applyBuildFnNoErase(MI, MatchInfo);
4466 MI.eraseFromParent();
4467}
4468
4471 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4472 MatchInfo(Builder);
4473}
4474
4476 BuildFnTy &MatchInfo) const {
4477 assert(MI.getOpcode() == TargetOpcode::G_OR);
4478
4479 Register Dst = MI.getOperand(0).getReg();
4480 LLT Ty = MRI.getType(Dst);
4481 unsigned BitWidth = Ty.getScalarSizeInBits();
4482
4483 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4484 unsigned FshOpc = 0;
4485
4486 // Match (or (shl ...), (lshr ...)).
4487 if (!mi_match(Dst, MRI,
4488 // m_GOr() handles the commuted version as well.
4489 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4490 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4491 return false;
4492
4493 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4494 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4495 int64_t CstShlAmt, CstLShrAmt;
4496 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4497 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4498 CstShlAmt + CstLShrAmt == BitWidth) {
4499 FshOpc = TargetOpcode::G_FSHR;
4500 Amt = LShrAmt;
4501
4502 } else if (mi_match(LShrAmt, MRI,
4504 ShlAmt == Amt) {
4505 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4506 FshOpc = TargetOpcode::G_FSHL;
4507
4508 } else if (mi_match(ShlAmt, MRI,
4510 LShrAmt == Amt) {
4511 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4512 FshOpc = TargetOpcode::G_FSHR;
4513
4514 } else {
4515 return false;
4516 }
4517
4518 LLT AmtTy = MRI.getType(Amt);
4519 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4520 return false;
4521
4522 MatchInfo = [=](MachineIRBuilder &B) {
4523 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4524 };
4525 return true;
4526}
4527
4528/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4530 unsigned Opc = MI.getOpcode();
4531 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4532 Register X = MI.getOperand(1).getReg();
4533 Register Y = MI.getOperand(2).getReg();
4534 if (X != Y)
4535 return false;
4536 unsigned RotateOpc =
4537 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4538 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4539}
4540
4542 unsigned Opc = MI.getOpcode();
4543 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4544 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4545 Observer.changingInstr(MI);
4546 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4547 : TargetOpcode::G_ROTR));
4548 MI.removeOperand(2);
4549 Observer.changedInstr(MI);
4550}
4551
4552// Fold (rot x, c) -> (rot x, c % BitSize)
4554 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4555 MI.getOpcode() == TargetOpcode::G_ROTR);
4556 unsigned Bitsize =
4557 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4558 Register AmtReg = MI.getOperand(2).getReg();
4559 bool OutOfRange = false;
4560 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4561 if (auto *CI = dyn_cast<ConstantInt>(C))
4562 OutOfRange |= CI->getValue().uge(Bitsize);
4563 return true;
4564 };
4565 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4566}
4567
4569 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4570 MI.getOpcode() == TargetOpcode::G_ROTR);
4571 unsigned Bitsize =
4572 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4573 Register Amt = MI.getOperand(2).getReg();
4574 LLT AmtTy = MRI.getType(Amt);
4575 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4576 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4577 Observer.changingInstr(MI);
4578 MI.getOperand(2).setReg(Amt);
4579 Observer.changedInstr(MI);
4580}
4581
4583 int64_t &MatchInfo) const {
4584 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4585 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4586
4587 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4588 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4589 // KnownBits on the LHS in two cases:
4590 //
4591 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4592 // we cannot do any transforms so we can safely bail out early.
4593 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4594 // >=0.
4595 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4596 if (KnownRHS.isUnknown())
4597 return false;
4598
4599 std::optional<bool> KnownVal;
4600 if (KnownRHS.isZero()) {
4601 // ? uge 0 -> always true
4602 // ? ult 0 -> always false
4603 if (Pred == CmpInst::ICMP_UGE)
4604 KnownVal = true;
4605 else if (Pred == CmpInst::ICMP_ULT)
4606 KnownVal = false;
4607 }
4608
4609 if (!KnownVal) {
4610 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4611 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4612 }
4613
4614 if (!KnownVal)
4615 return false;
4616 MatchInfo =
4617 *KnownVal
4619 /*IsVector = */
4620 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4621 /* IsFP = */ false)
4622 : 0;
4623 return true;
4624}
4625
4628 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4629 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4630 // Given:
4631 //
4632 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4633 // %cmp = G_ICMP ne %x, 0
4634 //
4635 // Or:
4636 //
4637 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4638 // %cmp = G_ICMP eq %x, 1
4639 //
4640 // We can replace %cmp with %x assuming true is 1 on the target.
4641 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4642 if (!CmpInst::isEquality(Pred))
4643 return false;
4644 Register Dst = MI.getOperand(0).getReg();
4645 LLT DstTy = MRI.getType(Dst);
4647 /* IsFP = */ false) != 1)
4648 return false;
4649 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4650 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4651 return false;
4652 Register LHS = MI.getOperand(2).getReg();
4653 auto KnownLHS = VT->getKnownBits(LHS);
4654 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4655 return false;
4656 // Make sure replacing Dst with the LHS is a legal operation.
4657 LLT LHSTy = MRI.getType(LHS);
4658 unsigned LHSSize = LHSTy.getSizeInBits();
4659 unsigned DstSize = DstTy.getSizeInBits();
4660 unsigned Op = TargetOpcode::COPY;
4661 if (DstSize != LHSSize)
4662 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4663 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4664 return false;
4665 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4666 return true;
4667}
4668
4669// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4672 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4673 assert(MI.getOpcode() == TargetOpcode::G_AND);
4674
4675 // Ignore vector types to simplify matching the two constants.
4676 // TODO: do this for vectors and scalars via a demanded bits analysis.
4677 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4678 if (Ty.isVector())
4679 return false;
4680
4681 Register Src;
4682 Register AndMaskReg;
4683 int64_t AndMaskBits;
4684 int64_t OrMaskBits;
4685 if (!mi_match(MI, MRI,
4686 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4687 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4688 return false;
4689
4690 // Check if OrMask could turn on any bits in Src.
4691 if (AndMaskBits & OrMaskBits)
4692 return false;
4693
4694 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4695 Observer.changingInstr(MI);
4696 // Canonicalize the result to have the constant on the RHS.
4697 if (MI.getOperand(1).getReg() == AndMaskReg)
4698 MI.getOperand(2).setReg(AndMaskReg);
4699 MI.getOperand(1).setReg(Src);
4700 Observer.changedInstr(MI);
4701 };
4702 return true;
4703}
4704
4705/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4708 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4709 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4710 Register Dst = MI.getOperand(0).getReg();
4711 Register Src = MI.getOperand(1).getReg();
4712 LLT Ty = MRI.getType(Src);
4714 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4715 return false;
4716 int64_t Width = MI.getOperand(2).getImm();
4717 Register ShiftSrc;
4718 int64_t ShiftImm;
4719 if (!mi_match(
4720 Src, MRI,
4721 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4722 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4723 return false;
4724 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4725 return false;
4726
4727 MatchInfo = [=](MachineIRBuilder &B) {
4728 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4729 auto Cst2 = B.buildConstant(ExtractTy, Width);
4730 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4731 };
4732 return true;
4733}
4734
4735/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4737 BuildFnTy &MatchInfo) const {
4738 GAnd *And = cast<GAnd>(&MI);
4739 Register Dst = And->getReg(0);
4740 LLT Ty = MRI.getType(Dst);
4742 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4743 // into account.
4744 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4745 return false;
4746
4747 int64_t AndImm, LSBImm;
4748 Register ShiftSrc;
4749 const unsigned Size = Ty.getScalarSizeInBits();
4750 if (!mi_match(And->getReg(0), MRI,
4751 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4752 m_ICst(AndImm))))
4753 return false;
4754
4755 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4756 auto MaybeMask = static_cast<uint64_t>(AndImm);
4757 if (MaybeMask & (MaybeMask + 1))
4758 return false;
4759
4760 // LSB must fit within the register.
4761 if (static_cast<uint64_t>(LSBImm) >= Size)
4762 return false;
4763
4764 uint64_t Width = APInt(Size, AndImm).countr_one();
4765 MatchInfo = [=](MachineIRBuilder &B) {
4766 auto WidthCst = B.buildConstant(ExtractTy, Width);
4767 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4768 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4769 };
4770 return true;
4771}
4772
4775 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4776 const unsigned Opcode = MI.getOpcode();
4777 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4778
4779 const Register Dst = MI.getOperand(0).getReg();
4780
4781 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4782 ? TargetOpcode::G_SBFX
4783 : TargetOpcode::G_UBFX;
4784
4785 // Check if the type we would use for the extract is legal
4786 LLT Ty = MRI.getType(Dst);
4788 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4789 return false;
4790
4791 Register ShlSrc;
4792 int64_t ShrAmt;
4793 int64_t ShlAmt;
4794 const unsigned Size = Ty.getScalarSizeInBits();
4795
4796 // Try to match shr (shl x, c1), c2
4797 if (!mi_match(Dst, MRI,
4798 m_BinOp(Opcode,
4799 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4800 m_ICst(ShrAmt))))
4801 return false;
4802
4803 // Make sure that the shift sizes can fit a bitfield extract
4804 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4805 return false;
4806
4807 // Skip this combine if the G_SEXT_INREG combine could handle it
4808 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4809 return false;
4810
4811 // Calculate start position and width of the extract
4812 const int64_t Pos = ShrAmt - ShlAmt;
4813 const int64_t Width = Size - ShrAmt;
4814
4815 MatchInfo = [=](MachineIRBuilder &B) {
4816 auto WidthCst = B.buildConstant(ExtractTy, Width);
4817 auto PosCst = B.buildConstant(ExtractTy, Pos);
4818 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4819 };
4820 return true;
4821}
4822
4825 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4826 const unsigned Opcode = MI.getOpcode();
4827 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4828
4829 const Register Dst = MI.getOperand(0).getReg();
4830 LLT Ty = MRI.getType(Dst);
4832 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4833 return false;
4834
4835 // Try to match shr (and x, c1), c2
4836 Register AndSrc;
4837 int64_t ShrAmt;
4838 int64_t SMask;
4839 if (!mi_match(Dst, MRI,
4840 m_BinOp(Opcode,
4841 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4842 m_ICst(ShrAmt))))
4843 return false;
4844
4845 const unsigned Size = Ty.getScalarSizeInBits();
4846 if (ShrAmt < 0 || ShrAmt >= Size)
4847 return false;
4848
4849 // If the shift subsumes the mask, emit the 0 directly.
4850 if (0 == (SMask >> ShrAmt)) {
4851 MatchInfo = [=](MachineIRBuilder &B) {
4852 B.buildConstant(Dst, 0);
4853 };
4854 return true;
4855 }
4856
4857 // Check that ubfx can do the extraction, with no holes in the mask.
4858 uint64_t UMask = SMask;
4859 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4861 if (!isMask_64(UMask))
4862 return false;
4863
4864 // Calculate start position and width of the extract.
4865 const int64_t Pos = ShrAmt;
4866 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4867
4868 // It's preferable to keep the shift, rather than form G_SBFX.
4869 // TODO: remove the G_AND via demanded bits analysis.
4870 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4871 return false;
4872
4873 MatchInfo = [=](MachineIRBuilder &B) {
4874 auto WidthCst = B.buildConstant(ExtractTy, Width);
4875 auto PosCst = B.buildConstant(ExtractTy, Pos);
4876 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4877 };
4878 return true;
4879}
4880
4881bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4882 MachineInstr &MI) const {
4883 auto &PtrAdd = cast<GPtrAdd>(MI);
4884
4885 Register Src1Reg = PtrAdd.getBaseReg();
4886 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4887 if (!Src1Def)
4888 return false;
4889
4890 Register Src2Reg = PtrAdd.getOffsetReg();
4891
4892 if (MRI.hasOneNonDBGUse(Src1Reg))
4893 return false;
4894
4895 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4896 if (!C1)
4897 return false;
4898 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4899 if (!C2)
4900 return false;
4901
4902 const APInt &C1APIntVal = *C1;
4903 const APInt &C2APIntVal = *C2;
4904 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4905
4906 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4907 // This combine may end up running before ptrtoint/inttoptr combines
4908 // manage to eliminate redundant conversions, so try to look through them.
4909 MachineInstr *ConvUseMI = &UseMI;
4910 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4911 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4912 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4913 Register DefReg = ConvUseMI->getOperand(0).getReg();
4914 if (!MRI.hasOneNonDBGUse(DefReg))
4915 break;
4916 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4917 ConvUseOpc = ConvUseMI->getOpcode();
4918 }
4919 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4920 if (!LdStMI)
4921 continue;
4922 // Is x[offset2] already not a legal addressing mode? If so then
4923 // reassociating the constants breaks nothing (we test offset2 because
4924 // that's the one we hope to fold into the load or store).
4925 TargetLoweringBase::AddrMode AM;
4926 AM.HasBaseReg = true;
4927 AM.BaseOffs = C2APIntVal.getSExtValue();
4928 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4929 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4930 PtrAdd.getMF()->getFunction().getContext());
4931 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4932 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4933 AccessTy, AS))
4934 continue;
4935
4936 // Would x[offset1+offset2] still be a legal addressing mode?
4937 AM.BaseOffs = CombinedValue;
4938 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4939 AccessTy, AS))
4940 return true;
4941 }
4942
4943 return false;
4944}
4945
4947 MachineInstr *RHS,
4948 BuildFnTy &MatchInfo) const {
4949 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4950 Register Src1Reg = MI.getOperand(1).getReg();
4951 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4952 return false;
4953 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4954 if (!C2)
4955 return false;
4956
4957 // If both additions are nuw, the reassociated additions are also nuw.
4958 // If the original G_PTR_ADD is additionally nusw, X and C are both not
4959 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
4960 // therefore also nusw.
4961 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
4962 // the new G_PTR_ADDs are then also inbounds.
4963 unsigned PtrAddFlags = MI.getFlags();
4964 unsigned AddFlags = RHS->getFlags();
4965 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
4966 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
4967 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
4968 unsigned Flags = 0;
4969 if (IsNoUWrap)
4971 if (IsNoUSWrap)
4973 if (IsInBounds)
4975
4976 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4977 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4978
4979 auto NewBase =
4980 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
4981 Observer.changingInstr(MI);
4982 MI.getOperand(1).setReg(NewBase.getReg(0));
4983 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4984 MI.setFlags(Flags);
4985 Observer.changedInstr(MI);
4986 };
4987 return !reassociationCanBreakAddressingModePattern(MI);
4988}
4989
4991 MachineInstr *LHS,
4992 MachineInstr *RHS,
4993 BuildFnTy &MatchInfo) const {
4994 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4995 // if and only if (G_PTR_ADD X, C) has one use.
4996 Register LHSBase;
4997 std::optional<ValueAndVReg> LHSCstOff;
4998 if (!mi_match(MI.getBaseReg(), MRI,
4999 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5000 return false;
5001
5002 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5003
5004 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5005 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5006 // so the new G_PTR_ADDs are also inbounds.
5007 unsigned PtrAddFlags = MI.getFlags();
5008 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5009 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5010 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5012 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5014 unsigned Flags = 0;
5015 if (IsNoUWrap)
5017 if (IsNoUSWrap)
5019 if (IsInBounds)
5021
5022 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5023 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5024 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5025 // doesn't happen.
5026 LHSPtrAdd->moveBefore(&MI);
5027 Register RHSReg = MI.getOffsetReg();
5028 // set VReg will cause type mismatch if it comes from extend/trunc
5029 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5030 Observer.changingInstr(MI);
5031 MI.getOperand(2).setReg(NewCst.getReg(0));
5032 MI.setFlags(Flags);
5033 Observer.changedInstr(MI);
5034 Observer.changingInstr(*LHSPtrAdd);
5035 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5036 LHSPtrAdd->setFlags(Flags);
5037 Observer.changedInstr(*LHSPtrAdd);
5038 };
5039 return !reassociationCanBreakAddressingModePattern(MI);
5040}
5041
5043 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5044 BuildFnTy &MatchInfo) const {
5045 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5046 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5047 if (!LHSPtrAdd)
5048 return false;
5049
5050 Register Src2Reg = MI.getOperand(2).getReg();
5051 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5052 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5053 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5054 if (!C1)
5055 return false;
5056 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5057 if (!C2)
5058 return false;
5059
5060 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5061 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5062 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5063 // largest signed integer that fits into the index type, which is the maximum
5064 // size of allocated objects according to the IR Language Reference.
5065 unsigned PtrAddFlags = MI.getFlags();
5066 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5067 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5068 bool IsInBounds =
5069 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5070 unsigned Flags = 0;
5071 if (IsNoUWrap)
5073 if (IsInBounds) {
5076 }
5077
5078 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5079 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5080 Observer.changingInstr(MI);
5081 MI.getOperand(1).setReg(LHSSrc1);
5082 MI.getOperand(2).setReg(NewCst.getReg(0));
5083 MI.setFlags(Flags);
5084 Observer.changedInstr(MI);
5085 };
5086 return !reassociationCanBreakAddressingModePattern(MI);
5087}
5088
5090 BuildFnTy &MatchInfo) const {
5091 auto &PtrAdd = cast<GPtrAdd>(MI);
5092 // We're trying to match a few pointer computation patterns here for
5093 // re-association opportunities.
5094 // 1) Isolating a constant operand to be on the RHS, e.g.:
5095 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5096 //
5097 // 2) Folding two constants in each sub-tree as long as such folding
5098 // doesn't break a legal addressing mode.
5099 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5100 //
5101 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5102 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5103 // iif (G_PTR_ADD X, C) has one use.
5104 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5105 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5106
5107 // Try to match example 2.
5108 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5109 return true;
5110
5111 // Try to match example 3.
5112 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5113 return true;
5114
5115 // Try to match example 1.
5116 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5117 return true;
5118
5119 return false;
5120}
5122 Register OpLHS, Register OpRHS,
5123 BuildFnTy &MatchInfo) const {
5124 LLT OpRHSTy = MRI.getType(OpRHS);
5125 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5126
5127 if (OpLHSDef->getOpcode() != Opc)
5128 return false;
5129
5130 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5131 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5132 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5133
5134 // If the inner op is (X op C), pull the constant out so it can be folded with
5135 // other constants in the expression tree. Folding is not guaranteed so we
5136 // might have (C1 op C2). In that case do not pull a constant out because it
5137 // won't help and can lead to infinite loops.
5138 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5139 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5140 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5141 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5142 MatchInfo = [=](MachineIRBuilder &B) {
5143 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5144 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5145 };
5146 return true;
5147 }
5148 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5149 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5150 // iff (op x, c1) has one use
5151 MatchInfo = [=](MachineIRBuilder &B) {
5152 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5153 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5154 };
5155 return true;
5156 }
5157 }
5158
5159 return false;
5160}
5161
5163 BuildFnTy &MatchInfo) const {
5164 // We don't check if the reassociation will break a legal addressing mode
5165 // here since pointer arithmetic is handled by G_PTR_ADD.
5166 unsigned Opc = MI.getOpcode();
5167 Register DstReg = MI.getOperand(0).getReg();
5168 Register LHSReg = MI.getOperand(1).getReg();
5169 Register RHSReg = MI.getOperand(2).getReg();
5170
5171 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5172 return true;
5173 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5174 return true;
5175 return false;
5176}
5177
5179 APInt &MatchInfo) const {
5180 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5181 Register SrcOp = MI.getOperand(1).getReg();
5182
5183 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5184 MatchInfo = *MaybeCst;
5185 return true;
5186 }
5187
5188 return false;
5189}
5190
5192 APInt &MatchInfo) const {
5193 Register Op1 = MI.getOperand(1).getReg();
5194 Register Op2 = MI.getOperand(2).getReg();
5195 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5196 if (!MaybeCst)
5197 return false;
5198 MatchInfo = *MaybeCst;
5199 return true;
5200}
5201
5203 ConstantFP *&MatchInfo) const {
5204 Register Op1 = MI.getOperand(1).getReg();
5205 Register Op2 = MI.getOperand(2).getReg();
5206 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5207 if (!MaybeCst)
5208 return false;
5209 MatchInfo =
5210 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5211 return true;
5212}
5213
5215 ConstantFP *&MatchInfo) const {
5216 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5217 MI.getOpcode() == TargetOpcode::G_FMAD);
5218 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5219
5220 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5221 if (!Op3Cst)
5222 return false;
5223
5224 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5225 if (!Op2Cst)
5226 return false;
5227
5228 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5229 if (!Op1Cst)
5230 return false;
5231
5232 APFloat Op1F = Op1Cst->getValueAPF();
5233 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5235 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5236 return true;
5237}
5238
5241 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5242 // Look for a binop feeding into an AND with a mask:
5243 //
5244 // %add = G_ADD %lhs, %rhs
5245 // %and = G_AND %add, 000...11111111
5246 //
5247 // Check if it's possible to perform the binop at a narrower width and zext
5248 // back to the original width like so:
5249 //
5250 // %narrow_lhs = G_TRUNC %lhs
5251 // %narrow_rhs = G_TRUNC %rhs
5252 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5253 // %new_add = G_ZEXT %narrow_add
5254 // %and = G_AND %new_add, 000...11111111
5255 //
5256 // This can allow later combines to eliminate the G_AND if it turns out
5257 // that the mask is irrelevant.
5258 assert(MI.getOpcode() == TargetOpcode::G_AND);
5259 Register Dst = MI.getOperand(0).getReg();
5260 Register AndLHS = MI.getOperand(1).getReg();
5261 Register AndRHS = MI.getOperand(2).getReg();
5262 LLT WideTy = MRI.getType(Dst);
5263
5264 // If the potential binop has more than one use, then it's possible that one
5265 // of those uses will need its full width.
5266 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5267 return false;
5268
5269 // Check if the LHS feeding the AND is impacted by the high bits that we're
5270 // masking out.
5271 //
5272 // e.g. for 64-bit x, y:
5273 //
5274 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5275 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5276 if (!LHSInst)
5277 return false;
5278 unsigned LHSOpc = LHSInst->getOpcode();
5279 switch (LHSOpc) {
5280 default:
5281 return false;
5282 case TargetOpcode::G_ADD:
5283 case TargetOpcode::G_SUB:
5284 case TargetOpcode::G_MUL:
5285 case TargetOpcode::G_AND:
5286 case TargetOpcode::G_OR:
5287 case TargetOpcode::G_XOR:
5288 break;
5289 }
5290
5291 // Find the mask on the RHS.
5292 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5293 if (!Cst)
5294 return false;
5295 auto Mask = Cst->Value;
5296 if (!Mask.isMask())
5297 return false;
5298
5299 // No point in combining if there's nothing to truncate.
5300 unsigned NarrowWidth = Mask.countr_one();
5301 if (NarrowWidth == WideTy.getSizeInBits())
5302 return false;
5303 LLT NarrowTy = LLT::scalar(NarrowWidth);
5304
5305 // Check if adding the zext + truncates could be harmful.
5306 auto &MF = *MI.getMF();
5307 const auto &TLI = getTargetLowering();
5308 LLVMContext &Ctx = MF.getFunction().getContext();
5309 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5310 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5311 return false;
5312 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5313 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5314 return false;
5315 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5316 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5317 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5318 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5319 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5320 auto NarrowBinOp =
5321 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5322 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5323 Observer.changingInstr(MI);
5324 MI.getOperand(1).setReg(Ext.getReg(0));
5325 Observer.changedInstr(MI);
5326 };
5327 return true;
5328}
5329
5331 BuildFnTy &MatchInfo) const {
5332 unsigned Opc = MI.getOpcode();
5333 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5334
5335 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5336 return false;
5337
5338 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5339 Observer.changingInstr(MI);
5340 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5341 : TargetOpcode::G_SADDO;
5342 MI.setDesc(Builder.getTII().get(NewOpc));
5343 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5344 Observer.changedInstr(MI);
5345 };
5346 return true;
5347}
5348
5350 BuildFnTy &MatchInfo) const {
5351 // (G_*MULO x, 0) -> 0 + no carry out
5352 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5353 MI.getOpcode() == TargetOpcode::G_SMULO);
5354 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5355 return false;
5356 Register Dst = MI.getOperand(0).getReg();
5357 Register Carry = MI.getOperand(1).getReg();
5358 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5359 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5360 return false;
5361 MatchInfo = [=](MachineIRBuilder &B) {
5362 B.buildConstant(Dst, 0);
5363 B.buildConstant(Carry, 0);
5364 };
5365 return true;
5366}
5367
5369 BuildFnTy &MatchInfo) const {
5370 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5371 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5372 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5373 MI.getOpcode() == TargetOpcode::G_SADDE ||
5374 MI.getOpcode() == TargetOpcode::G_USUBE ||
5375 MI.getOpcode() == TargetOpcode::G_SSUBE);
5376 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5377 return false;
5378 MatchInfo = [&](MachineIRBuilder &B) {
5379 unsigned NewOpcode;
5380 switch (MI.getOpcode()) {
5381 case TargetOpcode::G_UADDE:
5382 NewOpcode = TargetOpcode::G_UADDO;
5383 break;
5384 case TargetOpcode::G_SADDE:
5385 NewOpcode = TargetOpcode::G_SADDO;
5386 break;
5387 case TargetOpcode::G_USUBE:
5388 NewOpcode = TargetOpcode::G_USUBO;
5389 break;
5390 case TargetOpcode::G_SSUBE:
5391 NewOpcode = TargetOpcode::G_SSUBO;
5392 break;
5393 }
5394 Observer.changingInstr(MI);
5395 MI.setDesc(B.getTII().get(NewOpcode));
5396 MI.removeOperand(4);
5397 Observer.changedInstr(MI);
5398 };
5399 return true;
5400}
5401
5403 BuildFnTy &MatchInfo) const {
5404 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5405 Register Dst = MI.getOperand(0).getReg();
5406 // (x + y) - z -> x (if y == z)
5407 // (x + y) - z -> y (if x == z)
5408 Register X, Y, Z;
5409 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5410 Register ReplaceReg;
5411 int64_t CstX, CstY;
5412 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5414 ReplaceReg = X;
5415 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5417 ReplaceReg = Y;
5418 if (ReplaceReg) {
5419 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5420 return true;
5421 }
5422 }
5423
5424 // x - (y + z) -> 0 - y (if x == z)
5425 // x - (y + z) -> 0 - z (if x == y)
5426 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5427 Register ReplaceReg;
5428 int64_t CstX;
5429 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5431 ReplaceReg = Y;
5432 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5434 ReplaceReg = Z;
5435 if (ReplaceReg) {
5436 MatchInfo = [=](MachineIRBuilder &B) {
5437 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5438 B.buildSub(Dst, Zero, ReplaceReg);
5439 };
5440 return true;
5441 }
5442 }
5443 return false;
5444}
5445
5447 unsigned Opcode = MI.getOpcode();
5448 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5449 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5450 Register Dst = UDivorRem.getReg(0);
5451 Register LHS = UDivorRem.getReg(1);
5452 Register RHS = UDivorRem.getReg(2);
5453 LLT Ty = MRI.getType(Dst);
5454 LLT ScalarTy = Ty.getScalarType();
5455 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5457 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5458
5459 auto &MIB = Builder;
5460
5461 bool UseSRL = false;
5462 SmallVector<Register, 16> Shifts, Factors;
5463 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5464 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5465
5466 auto BuildExactUDIVPattern = [&](const Constant *C) {
5467 // Don't recompute inverses for each splat element.
5468 if (IsSplat && !Factors.empty()) {
5469 Shifts.push_back(Shifts[0]);
5470 Factors.push_back(Factors[0]);
5471 return true;
5472 }
5473
5474 auto *CI = cast<ConstantInt>(C);
5475 APInt Divisor = CI->getValue();
5476 unsigned Shift = Divisor.countr_zero();
5477 if (Shift) {
5478 Divisor.lshrInPlace(Shift);
5479 UseSRL = true;
5480 }
5481
5482 // Calculate the multiplicative inverse modulo BW.
5483 APInt Factor = Divisor.multiplicativeInverse();
5484 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5485 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5486 return true;
5487 };
5488
5489 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5490 // Collect all magic values from the build vector.
5491 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5492 llvm_unreachable("Expected unary predicate match to succeed");
5493
5494 Register Shift, Factor;
5495 if (Ty.isVector()) {
5496 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5497 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5498 } else {
5499 Shift = Shifts[0];
5500 Factor = Factors[0];
5501 }
5502
5503 Register Res = LHS;
5504
5505 if (UseSRL)
5506 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5507
5508 return MIB.buildMul(Ty, Res, Factor);
5509 }
5510
5511 unsigned KnownLeadingZeros =
5512 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5513
5514 bool UseNPQ = false;
5515 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5516 auto BuildUDIVPattern = [&](const Constant *C) {
5517 auto *CI = cast<ConstantInt>(C);
5518 const APInt &Divisor = CI->getValue();
5519
5520 bool SelNPQ = false;
5521 APInt Magic(Divisor.getBitWidth(), 0);
5522 unsigned PreShift = 0, PostShift = 0;
5523
5524 // Magic algorithm doesn't work for division by 1. We need to emit a select
5525 // at the end.
5526 // TODO: Use undef values for divisor of 1.
5527 if (!Divisor.isOne()) {
5528
5529 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5530 // in the dividend exceeds the leading zeros for the divisor.
5533 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5534
5535 Magic = std::move(magics.Magic);
5536
5537 assert(magics.PreShift < Divisor.getBitWidth() &&
5538 "We shouldn't generate an undefined shift!");
5539 assert(magics.PostShift < Divisor.getBitWidth() &&
5540 "We shouldn't generate an undefined shift!");
5541 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5542 PreShift = magics.PreShift;
5543 PostShift = magics.PostShift;
5544 SelNPQ = magics.IsAdd;
5545 }
5546
5547 PreShifts.push_back(
5548 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5549 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5550 NPQFactors.push_back(
5551 MIB.buildConstant(ScalarTy,
5552 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5553 : APInt::getZero(EltBits))
5554 .getReg(0));
5555 PostShifts.push_back(
5556 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5557 UseNPQ |= SelNPQ;
5558 return true;
5559 };
5560
5561 // Collect the shifts/magic values from each element.
5562 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5563 (void)Matched;
5564 assert(Matched && "Expected unary predicate match to succeed");
5565
5566 Register PreShift, PostShift, MagicFactor, NPQFactor;
5567 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5568 if (RHSDef) {
5569 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5570 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5571 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5572 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5573 } else {
5574 assert(MRI.getType(RHS).isScalar() &&
5575 "Non-build_vector operation should have been a scalar");
5576 PreShift = PreShifts[0];
5577 MagicFactor = MagicFactors[0];
5578 PostShift = PostShifts[0];
5579 }
5580
5581 Register Q = LHS;
5582 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5583
5584 // Multiply the numerator (operand 0) by the magic value.
5585 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5586
5587 if (UseNPQ) {
5588 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5589
5590 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5591 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5592 if (Ty.isVector())
5593 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5594 else
5595 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5596
5597 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5598 }
5599
5600 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5601 auto One = MIB.buildConstant(Ty, 1);
5602 auto IsOne = MIB.buildICmp(
5604 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5605 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5606
5607 if (Opcode == TargetOpcode::G_UREM) {
5608 auto Prod = MIB.buildMul(Ty, ret, RHS);
5609 return MIB.buildSub(Ty, LHS, Prod);
5610 }
5611 return ret;
5612}
5613
5615 unsigned Opcode = MI.getOpcode();
5616 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5617 Register Dst = MI.getOperand(0).getReg();
5618 Register RHS = MI.getOperand(2).getReg();
5619 LLT DstTy = MRI.getType(Dst);
5620
5621 auto &MF = *MI.getMF();
5622 AttributeList Attr = MF.getFunction().getAttributes();
5623 const auto &TLI = getTargetLowering();
5624 LLVMContext &Ctx = MF.getFunction().getContext();
5625 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5626 return false;
5627
5628 // Don't do this for minsize because the instruction sequence is usually
5629 // larger.
5630 if (MF.getFunction().hasMinSize())
5631 return false;
5632
5633 if (Opcode == TargetOpcode::G_UDIV &&
5635 return matchUnaryPredicate(
5636 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5637 }
5638
5639 auto *RHSDef = MRI.getVRegDef(RHS);
5640 if (!isConstantOrConstantVector(*RHSDef, MRI))
5641 return false;
5642
5643 // Don't do this if the types are not going to be legal.
5644 if (LI) {
5645 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5646 return false;
5647 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5648 return false;
5650 {TargetOpcode::G_ICMP,
5651 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5652 DstTy}}))
5653 return false;
5654 if (Opcode == TargetOpcode::G_UREM &&
5655 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5656 return false;
5657 }
5658
5659 return matchUnaryPredicate(
5660 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5661}
5662
5664 auto *NewMI = buildUDivOrURemUsingMul(MI);
5665 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5666}
5667
5669 unsigned Opcode = MI.getOpcode();
5670 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5671 Register Dst = MI.getOperand(0).getReg();
5672 Register RHS = MI.getOperand(2).getReg();
5673 LLT DstTy = MRI.getType(Dst);
5674 auto SizeInBits = DstTy.getScalarSizeInBits();
5675 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5676
5677 auto &MF = *MI.getMF();
5678 AttributeList Attr = MF.getFunction().getAttributes();
5679 const auto &TLI = getTargetLowering();
5680 LLVMContext &Ctx = MF.getFunction().getContext();
5681 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5682 return false;
5683
5684 // Don't do this for minsize because the instruction sequence is usually
5685 // larger.
5686 if (MF.getFunction().hasMinSize())
5687 return false;
5688
5689 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5690 if (Opcode == TargetOpcode::G_SDIV &&
5692 return matchUnaryPredicate(
5693 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5694 }
5695
5696 auto *RHSDef = MRI.getVRegDef(RHS);
5697 if (!isConstantOrConstantVector(*RHSDef, MRI))
5698 return false;
5699
5700 // Don't do this if the types are not going to be legal.
5701 if (LI) {
5702 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5703 return false;
5704 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5705 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5706 return false;
5707 if (Opcode == TargetOpcode::G_SREM &&
5708 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5709 return false;
5710 }
5711
5712 return matchUnaryPredicate(
5713 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5714}
5715
5717 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5718 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5719}
5720
5722 unsigned Opcode = MI.getOpcode();
5723 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5724 Opcode == TargetOpcode::G_SREM);
5725 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5726 Register Dst = SDivorRem.getReg(0);
5727 Register LHS = SDivorRem.getReg(1);
5728 Register RHS = SDivorRem.getReg(2);
5729 LLT Ty = MRI.getType(Dst);
5730 LLT ScalarTy = Ty.getScalarType();
5731 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5733 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5734 auto &MIB = Builder;
5735
5736 bool UseSRA = false;
5737 SmallVector<Register, 16> ExactShifts, ExactFactors;
5738
5739 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5740 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5741
5742 auto BuildExactSDIVPattern = [&](const Constant *C) {
5743 // Don't recompute inverses for each splat element.
5744 if (IsSplat && !ExactFactors.empty()) {
5745 ExactShifts.push_back(ExactShifts[0]);
5746 ExactFactors.push_back(ExactFactors[0]);
5747 return true;
5748 }
5749
5750 auto *CI = cast<ConstantInt>(C);
5751 APInt Divisor = CI->getValue();
5752 unsigned Shift = Divisor.countr_zero();
5753 if (Shift) {
5754 Divisor.ashrInPlace(Shift);
5755 UseSRA = true;
5756 }
5757
5758 // Calculate the multiplicative inverse modulo BW.
5759 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5760 APInt Factor = Divisor.multiplicativeInverse();
5761 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5762 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5763 return true;
5764 };
5765
5766 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5767 // Collect all magic values from the build vector.
5768 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5769 (void)Matched;
5770 assert(Matched && "Expected unary predicate match to succeed");
5771
5772 Register Shift, Factor;
5773 if (Ty.isVector()) {
5774 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5775 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5776 } else {
5777 Shift = ExactShifts[0];
5778 Factor = ExactFactors[0];
5779 }
5780
5781 Register Res = LHS;
5782
5783 if (UseSRA)
5784 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5785
5786 return MIB.buildMul(Ty, Res, Factor);
5787 }
5788
5789 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5790
5791 auto BuildSDIVPattern = [&](const Constant *C) {
5792 auto *CI = cast<ConstantInt>(C);
5793 const APInt &Divisor = CI->getValue();
5794
5797 int NumeratorFactor = 0;
5798 int ShiftMask = -1;
5799
5800 if (Divisor.isOne() || Divisor.isAllOnes()) {
5801 // If d is +1/-1, we just multiply the numerator by +1/-1.
5802 NumeratorFactor = Divisor.getSExtValue();
5803 Magics.Magic = 0;
5804 Magics.ShiftAmount = 0;
5805 ShiftMask = 0;
5806 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5807 // If d > 0 and m < 0, add the numerator.
5808 NumeratorFactor = 1;
5809 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5810 // If d < 0 and m > 0, subtract the numerator.
5811 NumeratorFactor = -1;
5812 }
5813
5814 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5815 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5816 Shifts.push_back(
5817 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5818 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5819
5820 return true;
5821 };
5822
5823 // Collect the shifts/magic values from each element.
5824 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5825 (void)Matched;
5826 assert(Matched && "Expected unary predicate match to succeed");
5827
5828 Register MagicFactor, Factor, Shift, ShiftMask;
5829 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5830 if (RHSDef) {
5831 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5832 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5833 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5834 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5835 } else {
5836 assert(MRI.getType(RHS).isScalar() &&
5837 "Non-build_vector operation should have been a scalar");
5838 MagicFactor = MagicFactors[0];
5839 Factor = Factors[0];
5840 Shift = Shifts[0];
5841 ShiftMask = ShiftMasks[0];
5842 }
5843
5844 Register Q = LHS;
5845 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5846
5847 // (Optionally) Add/subtract the numerator using Factor.
5848 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5849 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5850
5851 // Shift right algebraic by shift value.
5852 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5853
5854 // Extract the sign bit, mask it and add it to the quotient.
5855 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5856 auto T = MIB.buildLShr(Ty, Q, SignShift);
5857 T = MIB.buildAnd(Ty, T, ShiftMask);
5858 auto ret = MIB.buildAdd(Ty, Q, T);
5859
5860 if (Opcode == TargetOpcode::G_SREM) {
5861 auto Prod = MIB.buildMul(Ty, ret, RHS);
5862 return MIB.buildSub(Ty, LHS, Prod);
5863 }
5864 return ret;
5865}
5866
5868 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5869 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5870 "Expected SDIV or UDIV");
5871 auto &Div = cast<GenericMachineInstr>(MI);
5872 Register RHS = Div.getReg(2);
5873 auto MatchPow2 = [&](const Constant *C) {
5874 auto *CI = dyn_cast<ConstantInt>(C);
5875 return CI && (CI->getValue().isPowerOf2() ||
5876 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5877 };
5878 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5879}
5880
5882 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5883 auto &SDiv = cast<GenericMachineInstr>(MI);
5884 Register Dst = SDiv.getReg(0);
5885 Register LHS = SDiv.getReg(1);
5886 Register RHS = SDiv.getReg(2);
5887 LLT Ty = MRI.getType(Dst);
5889 LLT CCVT =
5890 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5891
5892 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5893 // to the following version:
5894 //
5895 // %c1 = G_CTTZ %rhs
5896 // %inexact = G_SUB $bitwidth, %c1
5897 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5898 // %lshr = G_LSHR %sign, %inexact
5899 // %add = G_ADD %lhs, %lshr
5900 // %ashr = G_ASHR %add, %c1
5901 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5902 // %zero = G_CONSTANT $0
5903 // %neg = G_NEG %ashr
5904 // %isneg = G_ICMP SLT %rhs, %zero
5905 // %res = G_SELECT %isneg, %neg, %ashr
5906
5907 unsigned BitWidth = Ty.getScalarSizeInBits();
5908 auto Zero = Builder.buildConstant(Ty, 0);
5909
5910 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5911 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5912 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5913 // Splat the sign bit into the register
5914 auto Sign = Builder.buildAShr(
5915 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5916
5917 // Add (LHS < 0) ? abs2 - 1 : 0;
5918 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5919 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5920 auto AShr = Builder.buildAShr(Ty, Add, C1);
5921
5922 // Special case: (sdiv X, 1) -> X
5923 // Special Case: (sdiv X, -1) -> 0-X
5924 auto One = Builder.buildConstant(Ty, 1);
5925 auto MinusOne = Builder.buildConstant(Ty, -1);
5926 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5927 auto IsMinusOne =
5928 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
5929 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5930 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5931
5932 // If divided by a positive value, we're done. Otherwise, the result must be
5933 // negated.
5934 auto Neg = Builder.buildNeg(Ty, AShr);
5935 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5936 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5937 MI.eraseFromParent();
5938}
5939
5941 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5942 auto &UDiv = cast<GenericMachineInstr>(MI);
5943 Register Dst = UDiv.getReg(0);
5944 Register LHS = UDiv.getReg(1);
5945 Register RHS = UDiv.getReg(2);
5946 LLT Ty = MRI.getType(Dst);
5948
5949 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5950 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5951 MI.eraseFromParent();
5952}
5953
5955 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5956 Register RHS = MI.getOperand(2).getReg();
5957 Register Dst = MI.getOperand(0).getReg();
5958 LLT Ty = MRI.getType(Dst);
5959 LLT RHSTy = MRI.getType(RHS);
5961 auto MatchPow2ExceptOne = [&](const Constant *C) {
5962 if (auto *CI = dyn_cast<ConstantInt>(C))
5963 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5964 return false;
5965 };
5966 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5967 return false;
5968 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
5969 // get log base 2, and it is not always legal for on a target.
5970 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
5971 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
5972}
5973
5975 Register LHS = MI.getOperand(1).getReg();
5976 Register RHS = MI.getOperand(2).getReg();
5977 Register Dst = MI.getOperand(0).getReg();
5978 LLT Ty = MRI.getType(Dst);
5980 unsigned NumEltBits = Ty.getScalarSizeInBits();
5981
5982 auto LogBase2 = buildLogBase2(RHS, Builder);
5983 auto ShiftAmt =
5984 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5985 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5986 Builder.buildLShr(Dst, LHS, Trunc);
5987 MI.eraseFromParent();
5988}
5989
5991 Register &MatchInfo) const {
5992 Register Dst = MI.getOperand(0).getReg();
5993 Register Src = MI.getOperand(1).getReg();
5994 LLT DstTy = MRI.getType(Dst);
5995 LLT SrcTy = MRI.getType(Src);
5996 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5997 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5998 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5999
6000 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6001 return false;
6002
6003 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6004 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6005 return mi_match(Src, MRI,
6006 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6007 m_SpecificICstOrSplat(SignedMin)),
6008 m_SpecificICstOrSplat(SignedMax))) ||
6009 mi_match(Src, MRI,
6010 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6011 m_SpecificICstOrSplat(SignedMax)),
6012 m_SpecificICstOrSplat(SignedMin)));
6013}
6014
6016 Register &MatchInfo) const {
6017 Register Dst = MI.getOperand(0).getReg();
6018 Builder.buildTruncSSatS(Dst, MatchInfo);
6019 MI.eraseFromParent();
6020}
6021
6023 Register &MatchInfo) const {
6024 Register Dst = MI.getOperand(0).getReg();
6025 Register Src = MI.getOperand(1).getReg();
6026 LLT DstTy = MRI.getType(Dst);
6027 LLT SrcTy = MRI.getType(Src);
6028 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6029 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6030 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6031
6032 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6033 return false;
6034 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6035 return mi_match(Src, MRI,
6037 m_SpecificICstOrSplat(UnsignedMax))) ||
6038 mi_match(Src, MRI,
6039 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6040 m_SpecificICstOrSplat(UnsignedMax)),
6041 m_SpecificICstOrSplat(0))) ||
6042 mi_match(Src, MRI,
6044 m_SpecificICstOrSplat(UnsignedMax)));
6045}
6046
6048 Register &MatchInfo) const {
6049 Register Dst = MI.getOperand(0).getReg();
6050 Builder.buildTruncSSatU(Dst, MatchInfo);
6051 MI.eraseFromParent();
6052}
6053
6055 MachineInstr &MinMI) const {
6056 Register Min = MinMI.getOperand(2).getReg();
6057 Register Val = MinMI.getOperand(1).getReg();
6058 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6059 LLT SrcTy = MRI.getType(Val);
6060 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6061 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6062 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6063
6064 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6065 return false;
6066 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6067 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6068 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6069}
6070
6072 MachineInstr &SrcMI) const {
6073 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6074 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6075
6076 return LI &&
6077 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6078}
6079
6081 BuildFnTy &MatchInfo) const {
6082 unsigned Opc = MI.getOpcode();
6083 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6084 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6085 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6086
6087 Register Dst = MI.getOperand(0).getReg();
6088 Register X = MI.getOperand(1).getReg();
6089 Register Y = MI.getOperand(2).getReg();
6090 LLT Type = MRI.getType(Dst);
6091
6092 // fold (fadd x, fneg(y)) -> (fsub x, y)
6093 // fold (fadd fneg(y), x) -> (fsub x, y)
6094 // G_ADD is commutative so both cases are checked by m_GFAdd
6095 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6096 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6097 Opc = TargetOpcode::G_FSUB;
6098 }
6099 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6100 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6101 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6102 Opc = TargetOpcode::G_FADD;
6103 }
6104 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6105 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6106 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6107 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6108 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6109 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6110 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6111 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6112 // no opcode change
6113 } else
6114 return false;
6115
6116 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6117 Observer.changingInstr(MI);
6118 MI.setDesc(B.getTII().get(Opc));
6119 MI.getOperand(1).setReg(X);
6120 MI.getOperand(2).setReg(Y);
6121 Observer.changedInstr(MI);
6122 };
6123 return true;
6124}
6125
6127 Register &MatchInfo) const {
6128 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6129
6130 Register LHS = MI.getOperand(1).getReg();
6131 MatchInfo = MI.getOperand(2).getReg();
6132 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6133
6134 const auto LHSCst = Ty.isVector()
6135 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6137 if (!LHSCst)
6138 return false;
6139
6140 // -0.0 is always allowed
6141 if (LHSCst->Value.isNegZero())
6142 return true;
6143
6144 // +0.0 is only allowed if nsz is set.
6145 if (LHSCst->Value.isPosZero())
6146 return MI.getFlag(MachineInstr::FmNsz);
6147
6148 return false;
6149}
6150
6152 Register &MatchInfo) const {
6153 Register Dst = MI.getOperand(0).getReg();
6154 Builder.buildFNeg(
6155 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6156 eraseInst(MI);
6157}
6158
6159/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6160/// due to global flags or MachineInstr flags.
6161static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6162 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6163 return false;
6164 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6165}
6166
6167static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6168 const MachineRegisterInfo &MRI) {
6169 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6170 MRI.use_instr_nodbg_end()) >
6171 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6172 MRI.use_instr_nodbg_end());
6173}
6174
6176 bool &AllowFusionGlobally,
6177 bool &HasFMAD, bool &Aggressive,
6178 bool CanReassociate) const {
6179
6180 auto *MF = MI.getMF();
6181 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6182 const TargetOptions &Options = MF->getTarget().Options;
6183 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6184
6185 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6186 return false;
6187
6188 // Floating-point multiply-add with intermediate rounding.
6189 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6190 // Floating-point multiply-add without intermediate rounding.
6191 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6192 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6193 // No valid opcode, do not combine.
6194 if (!HasFMAD && !HasFMA)
6195 return false;
6196
6197 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6198 // If the addition is not contractable, do not combine.
6199 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6200 return false;
6201
6202 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6203 return true;
6204}
6205
6208 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6209 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6210
6211 bool AllowFusionGlobally, HasFMAD, Aggressive;
6212 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6213 return false;
6214
6215 Register Op1 = MI.getOperand(1).getReg();
6216 Register Op2 = MI.getOperand(2).getReg();
6217 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6218 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6219 unsigned PreferredFusedOpcode =
6220 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6221
6222 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6223 // prefer to fold the multiply with fewer uses.
6224 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6225 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6226 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6227 std::swap(LHS, RHS);
6228 }
6229
6230 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6231 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6232 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6233 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6234 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6235 {LHS.MI->getOperand(1).getReg(),
6236 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6237 };
6238 return true;
6239 }
6240
6241 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6242 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6243 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6244 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6245 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6246 {RHS.MI->getOperand(1).getReg(),
6247 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6248 };
6249 return true;
6250 }
6251
6252 return false;
6253}
6254
6257 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6258 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6259
6260 bool AllowFusionGlobally, HasFMAD, Aggressive;
6261 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6262 return false;
6263
6264 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6265 Register Op1 = MI.getOperand(1).getReg();
6266 Register Op2 = MI.getOperand(2).getReg();
6267 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6268 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6269 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6270
6271 unsigned PreferredFusedOpcode =
6272 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6273
6274 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6275 // prefer to fold the multiply with fewer uses.
6276 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6277 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6278 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6279 std::swap(LHS, RHS);
6280 }
6281
6282 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6283 MachineInstr *FpExtSrc;
6284 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6285 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6286 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6287 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6288 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6289 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6290 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6291 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6292 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6293 };
6294 return true;
6295 }
6296
6297 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6298 // Note: Commutes FADD operands.
6299 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6300 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6301 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6302 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6303 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6304 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6305 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6306 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6307 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6308 };
6309 return true;
6310 }
6311
6312 return false;
6313}
6314
6317 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6318 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6319
6320 bool AllowFusionGlobally, HasFMAD, Aggressive;
6321 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6322 return false;
6323
6324 Register Op1 = MI.getOperand(1).getReg();
6325 Register Op2 = MI.getOperand(2).getReg();
6326 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6327 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6328 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6329
6330 unsigned PreferredFusedOpcode =
6331 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6332
6333 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6334 // prefer to fold the multiply with fewer uses.
6335 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6336 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6337 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6338 std::swap(LHS, RHS);
6339 }
6340
6341 MachineInstr *FMA = nullptr;
6342 Register Z;
6343 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6344 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6345 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6346 TargetOpcode::G_FMUL) &&
6347 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6348 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6349 FMA = LHS.MI;
6350 Z = RHS.Reg;
6351 }
6352 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6353 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6354 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6355 TargetOpcode::G_FMUL) &&
6356 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6357 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6358 Z = LHS.Reg;
6359 FMA = RHS.MI;
6360 }
6361
6362 if (FMA) {
6363 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6364 Register X = FMA->getOperand(1).getReg();
6365 Register Y = FMA->getOperand(2).getReg();
6366 Register U = FMulMI->getOperand(1).getReg();
6367 Register V = FMulMI->getOperand(2).getReg();
6368
6369 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6370 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6371 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6372 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6373 {X, Y, InnerFMA});
6374 };
6375 return true;
6376 }
6377
6378 return false;
6379}
6380
6383 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6384 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6385
6386 bool AllowFusionGlobally, HasFMAD, Aggressive;
6387 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6388 return false;
6389
6390 if (!Aggressive)
6391 return false;
6392
6393 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6394 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6395 Register Op1 = MI.getOperand(1).getReg();
6396 Register Op2 = MI.getOperand(2).getReg();
6397 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6398 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6399
6400 unsigned PreferredFusedOpcode =
6401 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6402
6403 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6404 // prefer to fold the multiply with fewer uses.
6405 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6406 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6407 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6408 std::swap(LHS, RHS);
6409 }
6410
6411 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6412 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6414 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6415 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6416 Register InnerFMA =
6417 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6418 .getReg(0);
6419 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6420 {X, Y, InnerFMA});
6421 };
6422
6423 MachineInstr *FMulMI, *FMAMI;
6424 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6425 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6426 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6427 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6428 m_GFPExt(m_MInstr(FMulMI))) &&
6429 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6430 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6431 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6432 MatchInfo = [=](MachineIRBuilder &B) {
6433 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6434 FMulMI->getOperand(2).getReg(), RHS.Reg,
6435 LHS.MI->getOperand(1).getReg(),
6436 LHS.MI->getOperand(2).getReg(), B);
6437 };
6438 return true;
6439 }
6440
6441 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6442 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6443 // FIXME: This turns two single-precision and one double-precision
6444 // operation into two double-precision operations, which might not be
6445 // interesting for all targets, especially GPUs.
6446 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6447 FMAMI->getOpcode() == PreferredFusedOpcode) {
6448 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6449 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6450 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6451 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6452 MatchInfo = [=](MachineIRBuilder &B) {
6453 Register X = FMAMI->getOperand(1).getReg();
6454 Register Y = FMAMI->getOperand(2).getReg();
6455 X = B.buildFPExt(DstType, X).getReg(0);
6456 Y = B.buildFPExt(DstType, Y).getReg(0);
6457 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6458 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6459 };
6460
6461 return true;
6462 }
6463 }
6464
6465 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6466 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6467 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6468 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6469 m_GFPExt(m_MInstr(FMulMI))) &&
6470 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6471 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6472 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6473 MatchInfo = [=](MachineIRBuilder &B) {
6474 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6475 FMulMI->getOperand(2).getReg(), LHS.Reg,
6476 RHS.MI->getOperand(1).getReg(),
6477 RHS.MI->getOperand(2).getReg(), B);
6478 };
6479 return true;
6480 }
6481
6482 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6483 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6484 // FIXME: This turns two single-precision and one double-precision
6485 // operation into two double-precision operations, which might not be
6486 // interesting for all targets, especially GPUs.
6487 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6488 FMAMI->getOpcode() == PreferredFusedOpcode) {
6489 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6490 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6491 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6492 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6493 MatchInfo = [=](MachineIRBuilder &B) {
6494 Register X = FMAMI->getOperand(1).getReg();
6495 Register Y = FMAMI->getOperand(2).getReg();
6496 X = B.buildFPExt(DstType, X).getReg(0);
6497 Y = B.buildFPExt(DstType, Y).getReg(0);
6498 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6499 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6500 };
6501 return true;
6502 }
6503 }
6504
6505 return false;
6506}
6507
6510 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6511 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6512
6513 bool AllowFusionGlobally, HasFMAD, Aggressive;
6514 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6515 return false;
6516
6517 Register Op1 = MI.getOperand(1).getReg();
6518 Register Op2 = MI.getOperand(2).getReg();
6519 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6520 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6521 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6522
6523 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6524 // prefer to fold the multiply with fewer uses.
6525 int FirstMulHasFewerUses = true;
6526 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6527 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6528 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6529 FirstMulHasFewerUses = false;
6530
6531 unsigned PreferredFusedOpcode =
6532 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6533
6534 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6535 if (FirstMulHasFewerUses &&
6536 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6537 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6538 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6539 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6540 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6541 {LHS.MI->getOperand(1).getReg(),
6542 LHS.MI->getOperand(2).getReg(), NegZ});
6543 };
6544 return true;
6545 }
6546 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6547 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6548 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6549 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6550 Register NegY =
6551 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6552 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6553 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6554 };
6555 return true;
6556 }
6557
6558 return false;
6559}
6560
6563 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6564 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6565
6566 bool AllowFusionGlobally, HasFMAD, Aggressive;
6567 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6568 return false;
6569
6570 Register LHSReg = MI.getOperand(1).getReg();
6571 Register RHSReg = MI.getOperand(2).getReg();
6572 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6573
6574 unsigned PreferredFusedOpcode =
6575 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6576
6577 MachineInstr *FMulMI;
6578 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6579 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6580 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6581 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6582 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6583 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6584 Register NegX =
6585 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6586 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6587 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6588 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6589 };
6590 return true;
6591 }
6592
6593 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6594 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6595 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6596 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6597 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6598 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6599 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6600 {FMulMI->getOperand(1).getReg(),
6601 FMulMI->getOperand(2).getReg(), LHSReg});
6602 };
6603 return true;
6604 }
6605
6606 return false;
6607}
6608
6611 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6612 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6613
6614 bool AllowFusionGlobally, HasFMAD, Aggressive;
6615 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6616 return false;
6617
6618 Register LHSReg = MI.getOperand(1).getReg();
6619 Register RHSReg = MI.getOperand(2).getReg();
6620 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6621
6622 unsigned PreferredFusedOpcode =
6623 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6624
6625 MachineInstr *FMulMI;
6626 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6627 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6628 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6629 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6630 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6631 Register FpExtX =
6632 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6633 Register FpExtY =
6634 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6635 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6636 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6637 {FpExtX, FpExtY, NegZ});
6638 };
6639 return true;
6640 }
6641
6642 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6643 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6644 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6645 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6646 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6647 Register FpExtY =
6648 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6649 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6650 Register FpExtZ =
6651 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6652 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6653 {NegY, FpExtZ, LHSReg});
6654 };
6655 return true;
6656 }
6657
6658 return false;
6659}
6660
6663 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6664 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6665
6666 bool AllowFusionGlobally, HasFMAD, Aggressive;
6667 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6668 return false;
6669
6670 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6671 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6672 Register LHSReg = MI.getOperand(1).getReg();
6673 Register RHSReg = MI.getOperand(2).getReg();
6674
6675 unsigned PreferredFusedOpcode =
6676 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6677
6678 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6680 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6681 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6682 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6683 };
6684
6685 MachineInstr *FMulMI;
6686 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6687 // (fneg (fma (fpext x), (fpext y), z))
6688 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6689 // (fneg (fma (fpext x), (fpext y), z))
6690 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6691 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6692 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6693 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6694 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6695 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6696 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6697 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6698 FMulMI->getOperand(2).getReg(), RHSReg, B);
6699 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6700 };
6701 return true;
6702 }
6703
6704 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6705 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6706 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6707 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6708 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6709 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6710 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6711 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6712 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6713 FMulMI->getOperand(2).getReg(), LHSReg, B);
6714 };
6715 return true;
6716 }
6717
6718 return false;
6719}
6720
6722 unsigned &IdxToPropagate) const {
6723 bool PropagateNaN;
6724 switch (MI.getOpcode()) {
6725 default:
6726 return false;
6727 case TargetOpcode::G_FMINNUM:
6728 case TargetOpcode::G_FMAXNUM:
6729 PropagateNaN = false;
6730 break;
6731 case TargetOpcode::G_FMINIMUM:
6732 case TargetOpcode::G_FMAXIMUM:
6733 PropagateNaN = true;
6734 break;
6735 }
6736
6737 auto MatchNaN = [&](unsigned Idx) {
6738 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6739 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6740 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6741 return false;
6742 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6743 return true;
6744 };
6745
6746 return MatchNaN(1) || MatchNaN(2);
6747}
6748
6749// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6750// reciprocal.
6751// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6753 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6754 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6755
6756 Register X = MI.getOperand(1).getReg();
6757 Register Y = MI.getOperand(2).getReg();
6758
6759 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6760 return false;
6761
6762 // Skip if current node is a reciprocal/fneg-reciprocal.
6763 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6764 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6765 return false;
6766
6767 // Exit early if the target does not want this transform or if there can't
6768 // possibly be enough uses of the divisor to make the transform worthwhile.
6769 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6770 if (!MinUses)
6771 return false;
6772
6773 // Find all FDIV users of the same divisor. For the moment we limit all
6774 // instructions to a single BB and use the first Instr in MatchInfo as the
6775 // dominating position.
6776 MatchInfo.push_back(&MI);
6777 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6778 if (&U == &MI || U.getParent() != MI.getParent())
6779 continue;
6780 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6781 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
6782 // This division is eligible for optimization only if global unsafe math
6783 // is enabled or if this division allows reciprocal formation.
6784 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6785 MatchInfo.push_back(&U);
6786 if (dominates(U, *MatchInfo[0]))
6787 std::swap(MatchInfo[0], MatchInfo.back());
6788 }
6789 }
6790 }
6791
6792 // Now that we have the actual number of divisor uses, make sure it meets
6793 // the minimum threshold specified by the target.
6794 return MatchInfo.size() >= MinUses;
6795}
6796
6798 SmallVector<MachineInstr *> &MatchInfo) const {
6799 // Generate the new div at the position of the first instruction, that we have
6800 // ensured will dominate all other instructions.
6801 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6802 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6803 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6804 MatchInfo[0]->getOperand(2).getReg(),
6805 MatchInfo[0]->getFlags());
6806
6807 // Replace all found div's with fmul instructions.
6808 for (MachineInstr *MI : MatchInfo) {
6809 Builder.setInsertPt(*MI->getParent(), MI);
6810 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6811 Div->getOperand(0).getReg(), MI->getFlags());
6812 MI->eraseFromParent();
6813 }
6814}
6815
6817 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6818 Register LHS = MI.getOperand(1).getReg();
6819 Register RHS = MI.getOperand(2).getReg();
6820
6821 // Helper lambda to check for opportunities for
6822 // A + (B - A) -> B
6823 // (B - A) + A -> B
6824 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6825 Register Reg;
6826 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6827 Reg == MaybeSameReg;
6828 };
6829 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6830}
6831
6833 Register &MatchInfo) const {
6834 // This combine folds the following patterns:
6835 //
6836 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6837 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6838 // into
6839 // x
6840 // if
6841 // k == sizeof(VecEltTy)/2
6842 // type(x) == type(dst)
6843 //
6844 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6845 // into
6846 // x
6847 // if
6848 // type(x) == type(dst)
6849
6850 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6851 LLT DstEltTy = DstVecTy.getElementType();
6852
6853 Register Lo, Hi;
6854
6855 if (mi_match(
6856 MI, MRI,
6858 MatchInfo = Lo;
6859 return MRI.getType(MatchInfo) == DstVecTy;
6860 }
6861
6862 std::optional<ValueAndVReg> ShiftAmount;
6863 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6864 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6865 if (mi_match(
6866 MI, MRI,
6867 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6868 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6869 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6870 MatchInfo = Lo;
6871 return MRI.getType(MatchInfo) == DstVecTy;
6872 }
6873 }
6874
6875 return false;
6876}
6877
6879 Register &MatchInfo) const {
6880 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6881 // if type(x) == type(G_TRUNC)
6882 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6883 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6884 return false;
6885
6886 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6887}
6888
6890 Register &MatchInfo) const {
6891 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6892 // y if K == size of vector element type
6893 std::optional<ValueAndVReg> ShiftAmt;
6894 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6896 m_GCst(ShiftAmt))))
6897 return false;
6898
6899 LLT MatchTy = MRI.getType(MatchInfo);
6900 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6901 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6902}
6903
6904unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6905 CmpInst::Predicate Pred, LLT DstTy,
6906 SelectPatternNaNBehaviour VsNaNRetVal) const {
6907 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6908 "Expected a NaN behaviour?");
6909 // Choose an opcode based off of legality or the behaviour when one of the
6910 // LHS/RHS may be NaN.
6911 switch (Pred) {
6912 default:
6913 return 0;
6914 case CmpInst::FCMP_UGT:
6915 case CmpInst::FCMP_UGE:
6916 case CmpInst::FCMP_OGT:
6917 case CmpInst::FCMP_OGE:
6918 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6919 return TargetOpcode::G_FMAXNUM;
6920 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6921 return TargetOpcode::G_FMAXIMUM;
6922 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6923 return TargetOpcode::G_FMAXNUM;
6924 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6925 return TargetOpcode::G_FMAXIMUM;
6926 return 0;
6927 case CmpInst::FCMP_ULT:
6928 case CmpInst::FCMP_ULE:
6929 case CmpInst::FCMP_OLT:
6930 case CmpInst::FCMP_OLE:
6931 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6932 return TargetOpcode::G_FMINNUM;
6933 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6934 return TargetOpcode::G_FMINIMUM;
6935 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6936 return TargetOpcode::G_FMINNUM;
6937 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6938 return 0;
6939 return TargetOpcode::G_FMINIMUM;
6940 }
6941}
6942
6943CombinerHelper::SelectPatternNaNBehaviour
6944CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6945 bool IsOrderedComparison) const {
6946 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6947 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6948 // Completely unsafe.
6949 if (!LHSSafe && !RHSSafe)
6950 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6951 if (LHSSafe && RHSSafe)
6952 return SelectPatternNaNBehaviour::RETURNS_ANY;
6953 // An ordered comparison will return false when given a NaN, so it
6954 // returns the RHS.
6955 if (IsOrderedComparison)
6956 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6957 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6958 // An unordered comparison will return true when given a NaN, so it
6959 // returns the LHS.
6960 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6961 : SelectPatternNaNBehaviour::RETURNS_NAN;
6962}
6963
6964bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6965 Register TrueVal, Register FalseVal,
6966 BuildFnTy &MatchInfo) const {
6967 // Match: select (fcmp cond x, y) x, y
6968 // select (fcmp cond x, y) y, x
6969 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6970 LLT DstTy = MRI.getType(Dst);
6971 // Bail out early on pointers, since we'll never want to fold to a min/max.
6972 if (DstTy.isPointer())
6973 return false;
6974 // Match a floating point compare with a less-than/greater-than predicate.
6975 // TODO: Allow multiple users of the compare if they are all selects.
6976 CmpInst::Predicate Pred;
6977 Register CmpLHS, CmpRHS;
6978 if (!mi_match(Cond, MRI,
6980 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6981 CmpInst::isEquality(Pred))
6982 return false;
6983 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6984 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6985 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6986 return false;
6987 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6988 std::swap(CmpLHS, CmpRHS);
6989 Pred = CmpInst::getSwappedPredicate(Pred);
6990 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6991 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6992 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6993 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6994 }
6995 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6996 return false;
6997 // Decide what type of max/min this should be based off of the predicate.
6998 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6999 if (!Opc || !isLegal({Opc, {DstTy}}))
7000 return false;
7001 // Comparisons between signed zero and zero may have different results...
7002 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7003 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7004 // We don't know if a comparison between two 0s will give us a consistent
7005 // result. Be conservative and only proceed if at least one side is
7006 // non-zero.
7007 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7008 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7009 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7010 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7011 return false;
7012 }
7013 }
7014 MatchInfo = [=](MachineIRBuilder &B) {
7015 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7016 };
7017 return true;
7018}
7019
7021 BuildFnTy &MatchInfo) const {
7022 // TODO: Handle integer cases.
7023 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7024 // Condition may be fed by a truncated compare.
7025 Register Cond = MI.getOperand(1).getReg();
7026 Register MaybeTrunc;
7027 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7028 Cond = MaybeTrunc;
7029 Register Dst = MI.getOperand(0).getReg();
7030 Register TrueVal = MI.getOperand(2).getReg();
7031 Register FalseVal = MI.getOperand(3).getReg();
7032 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7033}
7034
7036 BuildFnTy &MatchInfo) const {
7037 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7038 // (X + Y) == X --> Y == 0
7039 // (X + Y) != X --> Y != 0
7040 // (X - Y) == X --> Y == 0
7041 // (X - Y) != X --> Y != 0
7042 // (X ^ Y) == X --> Y == 0
7043 // (X ^ Y) != X --> Y != 0
7044 Register Dst = MI.getOperand(0).getReg();
7045 CmpInst::Predicate Pred;
7046 Register X, Y, OpLHS, OpRHS;
7047 bool MatchedSub = mi_match(
7048 Dst, MRI,
7049 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7050 if (MatchedSub && X != OpLHS)
7051 return false;
7052 if (!MatchedSub) {
7053 if (!mi_match(Dst, MRI,
7054 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7055 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7056 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7057 return false;
7058 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7059 }
7060 MatchInfo = [=](MachineIRBuilder &B) {
7061 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7062 B.buildICmp(Pred, Dst, Y, Zero);
7063 };
7064 return CmpInst::isEquality(Pred) && Y.isValid();
7065}
7066
7067/// Return the minimum useless shift amount that results in complete loss of the
7068/// source value. Return std::nullopt when it cannot determine a value.
7069static std::optional<unsigned>
7070getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7071 std::optional<int64_t> &Result) {
7072 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7073 Opcode == TargetOpcode::G_ASHR) &&
7074 "Expect G_SHL, G_LSHR or G_ASHR.");
7075 auto SignificantBits = 0;
7076 switch (Opcode) {
7077 case TargetOpcode::G_SHL:
7078 SignificantBits = ValueKB.countMinTrailingZeros();
7079 Result = 0;
7080 break;
7081 case TargetOpcode::G_LSHR:
7082 Result = 0;
7083 SignificantBits = ValueKB.countMinLeadingZeros();
7084 break;
7085 case TargetOpcode::G_ASHR:
7086 if (ValueKB.isNonNegative()) {
7087 SignificantBits = ValueKB.countMinLeadingZeros();
7088 Result = 0;
7089 } else if (ValueKB.isNegative()) {
7090 SignificantBits = ValueKB.countMinLeadingOnes();
7091 Result = -1;
7092 } else {
7093 // Cannot determine shift result.
7094 Result = std::nullopt;
7095 }
7096 break;
7097 default:
7098 break;
7099 }
7100 return ValueKB.getBitWidth() - SignificantBits;
7101}
7102
7104 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7105 Register ShiftVal = MI.getOperand(1).getReg();
7106 Register ShiftReg = MI.getOperand(2).getReg();
7107 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7108 auto IsShiftTooBig = [&](const Constant *C) {
7109 auto *CI = dyn_cast<ConstantInt>(C);
7110 if (!CI)
7111 return false;
7112 if (CI->uge(ResTy.getScalarSizeInBits())) {
7113 MatchInfo = std::nullopt;
7114 return true;
7115 }
7116 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7117 MI.getOpcode(), MatchInfo);
7118 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7119 };
7120 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7121}
7122
7124 unsigned LHSOpndIdx = 1;
7125 unsigned RHSOpndIdx = 2;
7126 switch (MI.getOpcode()) {
7127 case TargetOpcode::G_UADDO:
7128 case TargetOpcode::G_SADDO:
7129 case TargetOpcode::G_UMULO:
7130 case TargetOpcode::G_SMULO:
7131 LHSOpndIdx = 2;
7132 RHSOpndIdx = 3;
7133 break;
7134 default:
7135 break;
7136 }
7137 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7138 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7139 if (!getIConstantVRegVal(LHS, MRI)) {
7140 // Skip commuting if LHS is not a constant. But, LHS may be a
7141 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7142 // have a constant on the RHS.
7143 if (MRI.getVRegDef(LHS)->getOpcode() !=
7144 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7145 return false;
7146 }
7147 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7148 return MRI.getVRegDef(RHS)->getOpcode() !=
7149 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7150 !getIConstantVRegVal(RHS, MRI);
7151}
7152
7154 Register LHS = MI.getOperand(1).getReg();
7155 Register RHS = MI.getOperand(2).getReg();
7156 std::optional<FPValueAndVReg> ValAndVReg;
7157 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7158 return false;
7159 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7160}
7161
7163 Observer.changingInstr(MI);
7164 unsigned LHSOpndIdx = 1;
7165 unsigned RHSOpndIdx = 2;
7166 switch (MI.getOpcode()) {
7167 case TargetOpcode::G_UADDO:
7168 case TargetOpcode::G_SADDO:
7169 case TargetOpcode::G_UMULO:
7170 case TargetOpcode::G_SMULO:
7171 LHSOpndIdx = 2;
7172 RHSOpndIdx = 3;
7173 break;
7174 default:
7175 break;
7176 }
7177 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7178 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7179 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7180 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7181 Observer.changedInstr(MI);
7182}
7183
7184bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7185 LLT SrcTy = MRI.getType(Src);
7186 if (SrcTy.isFixedVector())
7187 return isConstantSplatVector(Src, 1, AllowUndefs);
7188 if (SrcTy.isScalar()) {
7189 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7190 return true;
7191 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7192 return IConstant && IConstant->Value == 1;
7193 }
7194 return false; // scalable vector
7195}
7196
7197bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7198 LLT SrcTy = MRI.getType(Src);
7199 if (SrcTy.isFixedVector())
7200 return isConstantSplatVector(Src, 0, AllowUndefs);
7201 if (SrcTy.isScalar()) {
7202 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7203 return true;
7204 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7205 return IConstant && IConstant->Value == 0;
7206 }
7207 return false; // scalable vector
7208}
7209
7210// Ignores COPYs during conformance checks.
7211// FIXME scalable vectors.
7212bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7213 bool AllowUndefs) const {
7214 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7215 if (!BuildVector)
7216 return false;
7217 unsigned NumSources = BuildVector->getNumSources();
7218
7219 for (unsigned I = 0; I < NumSources; ++I) {
7220 GImplicitDef *ImplicitDef =
7222 if (ImplicitDef && AllowUndefs)
7223 continue;
7224 if (ImplicitDef && !AllowUndefs)
7225 return false;
7226 std::optional<ValueAndVReg> IConstant =
7228 if (IConstant && IConstant->Value == SplatValue)
7229 continue;
7230 return false;
7231 }
7232 return true;
7233}
7234
7235// Ignores COPYs during lookups.
7236// FIXME scalable vectors
7237std::optional<APInt>
7238CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7239 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7240 if (IConstant)
7241 return IConstant->Value;
7242
7243 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7244 if (!BuildVector)
7245 return std::nullopt;
7246 unsigned NumSources = BuildVector->getNumSources();
7247
7248 std::optional<APInt> Value = std::nullopt;
7249 for (unsigned I = 0; I < NumSources; ++I) {
7250 std::optional<ValueAndVReg> IConstant =
7252 if (!IConstant)
7253 return std::nullopt;
7254 if (!Value)
7255 Value = IConstant->Value;
7256 else if (*Value != IConstant->Value)
7257 return std::nullopt;
7258 }
7259 return Value;
7260}
7261
7262// FIXME G_SPLAT_VECTOR
7263bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7264 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7265 if (IConstant)
7266 return true;
7267
7268 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7269 if (!BuildVector)
7270 return false;
7271
7272 unsigned NumSources = BuildVector->getNumSources();
7273 for (unsigned I = 0; I < NumSources; ++I) {
7274 std::optional<ValueAndVReg> IConstant =
7276 if (!IConstant)
7277 return false;
7278 }
7279 return true;
7280}
7281
7282// TODO: use knownbits to determine zeros
7283bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7284 BuildFnTy &MatchInfo) const {
7285 uint32_t Flags = Select->getFlags();
7286 Register Dest = Select->getReg(0);
7287 Register Cond = Select->getCondReg();
7288 Register True = Select->getTrueReg();
7289 Register False = Select->getFalseReg();
7290 LLT CondTy = MRI.getType(Select->getCondReg());
7291 LLT TrueTy = MRI.getType(Select->getTrueReg());
7292
7293 // We only do this combine for scalar boolean conditions.
7294 if (CondTy != LLT::scalar(1))
7295 return false;
7296
7297 if (TrueTy.isPointer())
7298 return false;
7299
7300 // Both are scalars.
7301 std::optional<ValueAndVReg> TrueOpt =
7303 std::optional<ValueAndVReg> FalseOpt =
7305
7306 if (!TrueOpt || !FalseOpt)
7307 return false;
7308
7309 APInt TrueValue = TrueOpt->Value;
7310 APInt FalseValue = FalseOpt->Value;
7311
7312 // select Cond, 1, 0 --> zext (Cond)
7313 if (TrueValue.isOne() && FalseValue.isZero()) {
7314 MatchInfo = [=](MachineIRBuilder &B) {
7315 B.setInstrAndDebugLoc(*Select);
7316 B.buildZExtOrTrunc(Dest, Cond);
7317 };
7318 return true;
7319 }
7320
7321 // select Cond, -1, 0 --> sext (Cond)
7322 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7323 MatchInfo = [=](MachineIRBuilder &B) {
7324 B.setInstrAndDebugLoc(*Select);
7325 B.buildSExtOrTrunc(Dest, Cond);
7326 };
7327 return true;
7328 }
7329
7330 // select Cond, 0, 1 --> zext (!Cond)
7331 if (TrueValue.isZero() && FalseValue.isOne()) {
7332 MatchInfo = [=](MachineIRBuilder &B) {
7333 B.setInstrAndDebugLoc(*Select);
7334 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7335 B.buildNot(Inner, Cond);
7336 B.buildZExtOrTrunc(Dest, Inner);
7337 };
7338 return true;
7339 }
7340
7341 // select Cond, 0, -1 --> sext (!Cond)
7342 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7343 MatchInfo = [=](MachineIRBuilder &B) {
7344 B.setInstrAndDebugLoc(*Select);
7345 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7346 B.buildNot(Inner, Cond);
7347 B.buildSExtOrTrunc(Dest, Inner);
7348 };
7349 return true;
7350 }
7351
7352 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7353 if (TrueValue - 1 == FalseValue) {
7354 MatchInfo = [=](MachineIRBuilder &B) {
7355 B.setInstrAndDebugLoc(*Select);
7356 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7357 B.buildZExtOrTrunc(Inner, Cond);
7358 B.buildAdd(Dest, Inner, False);
7359 };
7360 return true;
7361 }
7362
7363 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7364 if (TrueValue + 1 == FalseValue) {
7365 MatchInfo = [=](MachineIRBuilder &B) {
7366 B.setInstrAndDebugLoc(*Select);
7367 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7368 B.buildSExtOrTrunc(Inner, Cond);
7369 B.buildAdd(Dest, Inner, False);
7370 };
7371 return true;
7372 }
7373
7374 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7375 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7376 MatchInfo = [=](MachineIRBuilder &B) {
7377 B.setInstrAndDebugLoc(*Select);
7378 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7379 B.buildZExtOrTrunc(Inner, Cond);
7380 // The shift amount must be scalar.
7381 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7382 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7383 B.buildShl(Dest, Inner, ShAmtC, Flags);
7384 };
7385 return true;
7386 }
7387
7388 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7389 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7390 MatchInfo = [=](MachineIRBuilder &B) {
7391 B.setInstrAndDebugLoc(*Select);
7392 Register Not = MRI.createGenericVirtualRegister(CondTy);
7393 B.buildNot(Not, Cond);
7394 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7395 B.buildZExtOrTrunc(Inner, Not);
7396 // The shift amount must be scalar.
7397 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7398 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7399 B.buildShl(Dest, Inner, ShAmtC, Flags);
7400 };
7401 return true;
7402 }
7403
7404 // select Cond, -1, C --> or (sext Cond), C
7405 if (TrueValue.isAllOnes()) {
7406 MatchInfo = [=](MachineIRBuilder &B) {
7407 B.setInstrAndDebugLoc(*Select);
7408 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7409 B.buildSExtOrTrunc(Inner, Cond);
7410 B.buildOr(Dest, Inner, False, Flags);
7411 };
7412 return true;
7413 }
7414
7415 // select Cond, C, -1 --> or (sext (not Cond)), C
7416 if (FalseValue.isAllOnes()) {
7417 MatchInfo = [=](MachineIRBuilder &B) {
7418 B.setInstrAndDebugLoc(*Select);
7419 Register Not = MRI.createGenericVirtualRegister(CondTy);
7420 B.buildNot(Not, Cond);
7421 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7422 B.buildSExtOrTrunc(Inner, Not);
7423 B.buildOr(Dest, Inner, True, Flags);
7424 };
7425 return true;
7426 }
7427
7428 return false;
7429}
7430
7431// TODO: use knownbits to determine zeros
7432bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7433 BuildFnTy &MatchInfo) const {
7434 uint32_t Flags = Select->getFlags();
7435 Register DstReg = Select->getReg(0);
7436 Register Cond = Select->getCondReg();
7437 Register True = Select->getTrueReg();
7438 Register False = Select->getFalseReg();
7439 LLT CondTy = MRI.getType(Select->getCondReg());
7440 LLT TrueTy = MRI.getType(Select->getTrueReg());
7441
7442 // Boolean or fixed vector of booleans.
7443 if (CondTy.isScalableVector() ||
7444 (CondTy.isFixedVector() &&
7445 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7446 CondTy.getScalarSizeInBits() != 1)
7447 return false;
7448
7449 if (CondTy != TrueTy)
7450 return false;
7451
7452 // select Cond, Cond, F --> or Cond, F
7453 // select Cond, 1, F --> or Cond, F
7454 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7455 MatchInfo = [=](MachineIRBuilder &B) {
7456 B.setInstrAndDebugLoc(*Select);
7457 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7458 B.buildZExtOrTrunc(Ext, Cond);
7459 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7460 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7461 };
7462 return true;
7463 }
7464
7465 // select Cond, T, Cond --> and Cond, T
7466 // select Cond, T, 0 --> and Cond, T
7467 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7468 MatchInfo = [=](MachineIRBuilder &B) {
7469 B.setInstrAndDebugLoc(*Select);
7470 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7471 B.buildZExtOrTrunc(Ext, Cond);
7472 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7473 B.buildAnd(DstReg, Ext, FreezeTrue);
7474 };
7475 return true;
7476 }
7477
7478 // select Cond, T, 1 --> or (not Cond), T
7479 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7480 MatchInfo = [=](MachineIRBuilder &B) {
7481 B.setInstrAndDebugLoc(*Select);
7482 // First the not.
7483 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7484 B.buildNot(Inner, Cond);
7485 // Then an ext to match the destination register.
7486 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7487 B.buildZExtOrTrunc(Ext, Inner);
7488 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7489 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7490 };
7491 return true;
7492 }
7493
7494 // select Cond, 0, F --> and (not Cond), F
7495 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7496 MatchInfo = [=](MachineIRBuilder &B) {
7497 B.setInstrAndDebugLoc(*Select);
7498 // First the not.
7499 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7500 B.buildNot(Inner, Cond);
7501 // Then an ext to match the destination register.
7502 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7503 B.buildZExtOrTrunc(Ext, Inner);
7504 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7505 B.buildAnd(DstReg, Ext, FreezeFalse);
7506 };
7507 return true;
7508 }
7509
7510 return false;
7511}
7512
7514 BuildFnTy &MatchInfo) const {
7515 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7516 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7517
7518 Register DstReg = Select->getReg(0);
7519 Register True = Select->getTrueReg();
7520 Register False = Select->getFalseReg();
7521 LLT DstTy = MRI.getType(DstReg);
7522
7523 if (DstTy.isPointer())
7524 return false;
7525
7526 // We want to fold the icmp and replace the select.
7527 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7528 return false;
7529
7530 CmpInst::Predicate Pred = Cmp->getCond();
7531 // We need a larger or smaller predicate for
7532 // canonicalization.
7533 if (CmpInst::isEquality(Pred))
7534 return false;
7535
7536 Register CmpLHS = Cmp->getLHSReg();
7537 Register CmpRHS = Cmp->getRHSReg();
7538
7539 // We can swap CmpLHS and CmpRHS for higher hitrate.
7540 if (True == CmpRHS && False == CmpLHS) {
7541 std::swap(CmpLHS, CmpRHS);
7542 Pred = CmpInst::getSwappedPredicate(Pred);
7543 }
7544
7545 // (icmp X, Y) ? X : Y -> integer minmax.
7546 // see matchSelectPattern in ValueTracking.
7547 // Legality between G_SELECT and integer minmax can differ.
7548 if (True != CmpLHS || False != CmpRHS)
7549 return false;
7550
7551 switch (Pred) {
7552 case ICmpInst::ICMP_UGT:
7553 case ICmpInst::ICMP_UGE: {
7554 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7555 return false;
7556 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7557 return true;
7558 }
7559 case ICmpInst::ICMP_SGT:
7560 case ICmpInst::ICMP_SGE: {
7561 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7562 return false;
7563 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7564 return true;
7565 }
7566 case ICmpInst::ICMP_ULT:
7567 case ICmpInst::ICMP_ULE: {
7568 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7569 return false;
7570 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7571 return true;
7572 }
7573 case ICmpInst::ICMP_SLT:
7574 case ICmpInst::ICMP_SLE: {
7575 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7576 return false;
7577 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7578 return true;
7579 }
7580 default:
7581 return false;
7582 }
7583}
7584
7585// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7587 BuildFnTy &MatchInfo) const {
7588 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7589 Register DestReg = MI.getOperand(0).getReg();
7590 LLT DestTy = MRI.getType(DestReg);
7591
7592 Register X;
7593 Register Sub0;
7594 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7595 if (mi_match(DestReg, MRI,
7596 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7597 m_GSMax(m_Reg(X), NegPattern),
7598 m_GUMin(m_Reg(X), NegPattern),
7599 m_GUMax(m_Reg(X), NegPattern)))))) {
7600 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7601 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7602 if (isLegal({NewOpc, {DestTy}})) {
7603 MatchInfo = [=](MachineIRBuilder &B) {
7604 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7605 };
7606 return true;
7607 }
7608 }
7609
7610 return false;
7611}
7612
7615
7616 if (tryFoldSelectOfConstants(Select, MatchInfo))
7617 return true;
7618
7619 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7620 return true;
7621
7622 return false;
7623}
7624
7625/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7626/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7627/// into a single comparison using range-based reasoning.
7628/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7629bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7630 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7631 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7632 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7633 Register DstReg = Logic->getReg(0);
7634 Register LHS = Logic->getLHSReg();
7635 Register RHS = Logic->getRHSReg();
7636 unsigned Flags = Logic->getFlags();
7637
7638 // We need an G_ICMP on the LHS register.
7639 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7640 if (!Cmp1)
7641 return false;
7642
7643 // We need an G_ICMP on the RHS register.
7644 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7645 if (!Cmp2)
7646 return false;
7647
7648 // We want to fold the icmps.
7649 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7650 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7651 return false;
7652
7653 APInt C1;
7654 APInt C2;
7655 std::optional<ValueAndVReg> MaybeC1 =
7657 if (!MaybeC1)
7658 return false;
7659 C1 = MaybeC1->Value;
7660
7661 std::optional<ValueAndVReg> MaybeC2 =
7663 if (!MaybeC2)
7664 return false;
7665 C2 = MaybeC2->Value;
7666
7667 Register R1 = Cmp1->getLHSReg();
7668 Register R2 = Cmp2->getLHSReg();
7669 CmpInst::Predicate Pred1 = Cmp1->getCond();
7670 CmpInst::Predicate Pred2 = Cmp2->getCond();
7671 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7672 LLT CmpOperandTy = MRI.getType(R1);
7673
7674 if (CmpOperandTy.isPointer())
7675 return false;
7676
7677 // We build ands, adds, and constants of type CmpOperandTy.
7678 // They must be legal to build.
7679 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7680 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7681 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7682 return false;
7683
7684 // Look through add of a constant offset on R1, R2, or both operands. This
7685 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7686 std::optional<APInt> Offset1;
7687 std::optional<APInt> Offset2;
7688 if (R1 != R2) {
7689 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7690 std::optional<ValueAndVReg> MaybeOffset1 =
7692 if (MaybeOffset1) {
7693 R1 = Add->getLHSReg();
7694 Offset1 = MaybeOffset1->Value;
7695 }
7696 }
7697 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7698 std::optional<ValueAndVReg> MaybeOffset2 =
7700 if (MaybeOffset2) {
7701 R2 = Add->getLHSReg();
7702 Offset2 = MaybeOffset2->Value;
7703 }
7704 }
7705 }
7706
7707 if (R1 != R2)
7708 return false;
7709
7710 // We calculate the icmp ranges including maybe offsets.
7711 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7712 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7713 if (Offset1)
7714 CR1 = CR1.subtract(*Offset1);
7715
7716 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7717 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7718 if (Offset2)
7719 CR2 = CR2.subtract(*Offset2);
7720
7721 bool CreateMask = false;
7722 APInt LowerDiff;
7723 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7724 if (!CR) {
7725 // We need non-wrapping ranges.
7726 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7727 return false;
7728
7729 // Check whether we have equal-size ranges that only differ by one bit.
7730 // In that case we can apply a mask to map one range onto the other.
7731 LowerDiff = CR1.getLower() ^ CR2.getLower();
7732 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7733 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7734 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7735 CR1Size != CR2.getUpper() - CR2.getLower())
7736 return false;
7737
7738 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7739 CreateMask = true;
7740 }
7741
7742 if (IsAnd)
7743 CR = CR->inverse();
7744
7745 CmpInst::Predicate NewPred;
7746 APInt NewC, Offset;
7747 CR->getEquivalentICmp(NewPred, NewC, Offset);
7748
7749 // We take the result type of one of the original icmps, CmpTy, for
7750 // the to be build icmp. The operand type, CmpOperandTy, is used for
7751 // the other instructions and constants to be build. The types of
7752 // the parameters and output are the same for add and and. CmpTy
7753 // and the type of DstReg might differ. That is why we zext or trunc
7754 // the icmp into the destination register.
7755
7756 MatchInfo = [=](MachineIRBuilder &B) {
7757 if (CreateMask && Offset != 0) {
7758 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7759 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7760 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7761 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7762 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7763 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7764 B.buildZExtOrTrunc(DstReg, ICmp);
7765 } else if (CreateMask && Offset == 0) {
7766 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7767 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7768 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7769 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7770 B.buildZExtOrTrunc(DstReg, ICmp);
7771 } else if (!CreateMask && Offset != 0) {
7772 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7773 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7774 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7775 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7776 B.buildZExtOrTrunc(DstReg, ICmp);
7777 } else if (!CreateMask && Offset == 0) {
7778 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7779 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7780 B.buildZExtOrTrunc(DstReg, ICmp);
7781 } else {
7782 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7783 }
7784 };
7785 return true;
7786}
7787
7788bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7789 BuildFnTy &MatchInfo) const {
7790 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7791 Register DestReg = Logic->getReg(0);
7792 Register LHS = Logic->getLHSReg();
7793 Register RHS = Logic->getRHSReg();
7794 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7795
7796 // We need a compare on the LHS register.
7797 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7798 if (!Cmp1)
7799 return false;
7800
7801 // We need a compare on the RHS register.
7802 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7803 if (!Cmp2)
7804 return false;
7805
7806 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7807 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7808
7809 // We build one fcmp, want to fold the fcmps, replace the logic op,
7810 // and the fcmps must have the same shape.
7812 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7813 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7814 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7815 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7816 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7817 return false;
7818
7819 CmpInst::Predicate PredL = Cmp1->getCond();
7820 CmpInst::Predicate PredR = Cmp2->getCond();
7821 Register LHS0 = Cmp1->getLHSReg();
7822 Register LHS1 = Cmp1->getRHSReg();
7823 Register RHS0 = Cmp2->getLHSReg();
7824 Register RHS1 = Cmp2->getRHSReg();
7825
7826 if (LHS0 == RHS1 && LHS1 == RHS0) {
7827 // Swap RHS operands to match LHS.
7828 PredR = CmpInst::getSwappedPredicate(PredR);
7829 std::swap(RHS0, RHS1);
7830 }
7831
7832 if (LHS0 == RHS0 && LHS1 == RHS1) {
7833 // We determine the new predicate.
7834 unsigned CmpCodeL = getFCmpCode(PredL);
7835 unsigned CmpCodeR = getFCmpCode(PredR);
7836 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7837 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7838 MatchInfo = [=](MachineIRBuilder &B) {
7839 // The fcmp predicates fill the lower part of the enum.
7840 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7841 if (Pred == FCmpInst::FCMP_FALSE &&
7843 auto False = B.buildConstant(CmpTy, 0);
7844 B.buildZExtOrTrunc(DestReg, False);
7845 } else if (Pred == FCmpInst::FCMP_TRUE &&
7847 auto True =
7848 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7849 CmpTy.isVector() /*isVector*/,
7850 true /*isFP*/));
7851 B.buildZExtOrTrunc(DestReg, True);
7852 } else { // We take the predicate without predicate optimizations.
7853 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7854 B.buildZExtOrTrunc(DestReg, Cmp);
7855 }
7856 };
7857 return true;
7858 }
7859
7860 return false;
7861}
7862
7864 GAnd *And = cast<GAnd>(&MI);
7865
7866 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7867 return true;
7868
7869 if (tryFoldLogicOfFCmps(And, MatchInfo))
7870 return true;
7871
7872 return false;
7873}
7874
7876 GOr *Or = cast<GOr>(&MI);
7877
7878 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7879 return true;
7880
7881 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7882 return true;
7883
7884 return false;
7885}
7886
7888 BuildFnTy &MatchInfo) const {
7890
7891 // Addo has no flags
7892 Register Dst = Add->getReg(0);
7893 Register Carry = Add->getReg(1);
7894 Register LHS = Add->getLHSReg();
7895 Register RHS = Add->getRHSReg();
7896 bool IsSigned = Add->isSigned();
7897 LLT DstTy = MRI.getType(Dst);
7898 LLT CarryTy = MRI.getType(Carry);
7899
7900 // Fold addo, if the carry is dead -> add, undef.
7901 if (MRI.use_nodbg_empty(Carry) &&
7902 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7903 MatchInfo = [=](MachineIRBuilder &B) {
7904 B.buildAdd(Dst, LHS, RHS);
7905 B.buildUndef(Carry);
7906 };
7907 return true;
7908 }
7909
7910 // Canonicalize constant to RHS.
7911 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7912 if (IsSigned) {
7913 MatchInfo = [=](MachineIRBuilder &B) {
7914 B.buildSAddo(Dst, Carry, RHS, LHS);
7915 };
7916 return true;
7917 }
7918 // !IsSigned
7919 MatchInfo = [=](MachineIRBuilder &B) {
7920 B.buildUAddo(Dst, Carry, RHS, LHS);
7921 };
7922 return true;
7923 }
7924
7925 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7926 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7927
7928 // Fold addo(c1, c2) -> c3, carry.
7929 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7931 bool Overflow;
7932 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7933 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7934 MatchInfo = [=](MachineIRBuilder &B) {
7935 B.buildConstant(Dst, Result);
7936 B.buildConstant(Carry, Overflow);
7937 };
7938 return true;
7939 }
7940
7941 // Fold (addo x, 0) -> x, no carry
7942 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7943 MatchInfo = [=](MachineIRBuilder &B) {
7944 B.buildCopy(Dst, LHS);
7945 B.buildConstant(Carry, 0);
7946 };
7947 return true;
7948 }
7949
7950 // Given 2 constant operands whose sum does not overflow:
7951 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7952 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7953 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7954 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7955 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7956 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7957 std::optional<APInt> MaybeAddRHS =
7958 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7959 if (MaybeAddRHS) {
7960 bool Overflow;
7961 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7962 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7963 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7964 if (IsSigned) {
7965 MatchInfo = [=](MachineIRBuilder &B) {
7966 auto ConstRHS = B.buildConstant(DstTy, NewC);
7967 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7968 };
7969 return true;
7970 }
7971 // !IsSigned
7972 MatchInfo = [=](MachineIRBuilder &B) {
7973 auto ConstRHS = B.buildConstant(DstTy, NewC);
7974 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7975 };
7976 return true;
7977 }
7978 }
7979 };
7980
7981 // We try to combine addo to non-overflowing add.
7982 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7984 return false;
7985
7986 // We try to combine uaddo to non-overflowing add.
7987 if (!IsSigned) {
7988 ConstantRange CRLHS =
7989 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
7990 ConstantRange CRRHS =
7991 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
7992
7993 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7995 return false;
7997 MatchInfo = [=](MachineIRBuilder &B) {
7998 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7999 B.buildConstant(Carry, 0);
8000 };
8001 return true;
8002 }
8005 MatchInfo = [=](MachineIRBuilder &B) {
8006 B.buildAdd(Dst, LHS, RHS);
8007 B.buildConstant(Carry, 1);
8008 };
8009 return true;
8010 }
8011 }
8012 return false;
8013 }
8014
8015 // We try to combine saddo to non-overflowing add.
8016
8017 // If LHS and RHS each have at least two sign bits, then there is no signed
8018 // overflow.
8019 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8020 MatchInfo = [=](MachineIRBuilder &B) {
8021 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8022 B.buildConstant(Carry, 0);
8023 };
8024 return true;
8025 }
8026
8027 ConstantRange CRLHS =
8028 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8029 ConstantRange CRRHS =
8030 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8031
8032 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8034 return false;
8036 MatchInfo = [=](MachineIRBuilder &B) {
8037 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8038 B.buildConstant(Carry, 0);
8039 };
8040 return true;
8041 }
8044 MatchInfo = [=](MachineIRBuilder &B) {
8045 B.buildAdd(Dst, LHS, RHS);
8046 B.buildConstant(Carry, 1);
8047 };
8048 return true;
8049 }
8050 }
8051
8052 return false;
8053}
8054
8056 BuildFnTy &MatchInfo) const {
8058 MatchInfo(Builder);
8059 Root->eraseFromParent();
8060}
8061
8063 int64_t Exponent) const {
8064 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8066}
8067
8069 int64_t Exponent) const {
8070 auto [Dst, Base] = MI.getFirst2Regs();
8071 LLT Ty = MRI.getType(Dst);
8072 int64_t ExpVal = Exponent;
8073
8074 if (ExpVal == 0) {
8075 Builder.buildFConstant(Dst, 1.0);
8076 MI.removeFromParent();
8077 return;
8078 }
8079
8080 if (ExpVal < 0)
8081 ExpVal = -ExpVal;
8082
8083 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8084 // to generate the multiply sequence. There are more optimal ways to do this
8085 // (for example, powi(x,15) generates one more multiply than it should), but
8086 // this has the benefit of being both really simple and much better than a
8087 // libcall.
8088 std::optional<SrcOp> Res;
8089 SrcOp CurSquare = Base;
8090 while (ExpVal > 0) {
8091 if (ExpVal & 1) {
8092 if (!Res)
8093 Res = CurSquare;
8094 else
8095 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8096 }
8097
8098 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8099 ExpVal >>= 1;
8100 }
8101
8102 // If the original exponent was negative, invert the result, producing
8103 // 1/(x*x*x).
8104 if (Exponent < 0)
8105 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8106 MI.getFlags());
8107
8108 Builder.buildCopy(Dst, *Res);
8109 MI.eraseFromParent();
8110}
8111
8113 BuildFnTy &MatchInfo) const {
8114 // fold (A+C1)-C2 -> A+(C1-C2)
8115 const GSub *Sub = cast<GSub>(&MI);
8116 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8117
8118 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8119 return false;
8120
8121 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8122 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8123
8124 Register Dst = Sub->getReg(0);
8125 LLT DstTy = MRI.getType(Dst);
8126
8127 MatchInfo = [=](MachineIRBuilder &B) {
8128 auto Const = B.buildConstant(DstTy, C1 - C2);
8129 B.buildAdd(Dst, Add->getLHSReg(), Const);
8130 };
8131
8132 return true;
8133}
8134
8136 BuildFnTy &MatchInfo) const {
8137 // fold C2-(A+C1) -> (C2-C1)-A
8138 const GSub *Sub = cast<GSub>(&MI);
8139 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8140
8141 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8142 return false;
8143
8144 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8145 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8146
8147 Register Dst = Sub->getReg(0);
8148 LLT DstTy = MRI.getType(Dst);
8149
8150 MatchInfo = [=](MachineIRBuilder &B) {
8151 auto Const = B.buildConstant(DstTy, C2 - C1);
8152 B.buildSub(Dst, Const, Add->getLHSReg());
8153 };
8154
8155 return true;
8156}
8157
8159 BuildFnTy &MatchInfo) const {
8160 // fold (A-C1)-C2 -> A-(C1+C2)
8161 const GSub *Sub1 = cast<GSub>(&MI);
8162 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8163
8164 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8165 return false;
8166
8167 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8168 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8169
8170 Register Dst = Sub1->getReg(0);
8171 LLT DstTy = MRI.getType(Dst);
8172
8173 MatchInfo = [=](MachineIRBuilder &B) {
8174 auto Const = B.buildConstant(DstTy, C1 + C2);
8175 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8176 };
8177
8178 return true;
8179}
8180
8182 BuildFnTy &MatchInfo) const {
8183 // fold (C1-A)-C2 -> (C1-C2)-A
8184 const GSub *Sub1 = cast<GSub>(&MI);
8185 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8186
8187 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8188 return false;
8189
8190 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8191 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8192
8193 Register Dst = Sub1->getReg(0);
8194 LLT DstTy = MRI.getType(Dst);
8195
8196 MatchInfo = [=](MachineIRBuilder &B) {
8197 auto Const = B.buildConstant(DstTy, C1 - C2);
8198 B.buildSub(Dst, Const, Sub2->getRHSReg());
8199 };
8200
8201 return true;
8202}
8203
8205 BuildFnTy &MatchInfo) const {
8206 // fold ((A-C1)+C2) -> (A+(C2-C1))
8207 const GAdd *Add = cast<GAdd>(&MI);
8208 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8209
8210 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8211 return false;
8212
8213 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8214 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8215
8216 Register Dst = Add->getReg(0);
8217 LLT DstTy = MRI.getType(Dst);
8218
8219 MatchInfo = [=](MachineIRBuilder &B) {
8220 auto Const = B.buildConstant(DstTy, C2 - C1);
8221 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8222 };
8223
8224 return true;
8225}
8226
8228 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8229 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8230
8231 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8232 return false;
8233
8234 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8235
8236 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8237
8238 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8239 // $any:_(<8 x s16>) = G_ANYEXT $bv
8240 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8241 //
8242 // ->
8243 //
8244 // $any:_(s16) = G_ANYEXT $bv[0]
8245 // $any1:_(s16) = G_ANYEXT $bv[1]
8246 // $any2:_(s16) = G_ANYEXT $bv[2]
8247 // $any3:_(s16) = G_ANYEXT $bv[3]
8248 // $any4:_(s16) = G_ANYEXT $bv[4]
8249 // $any5:_(s16) = G_ANYEXT $bv[5]
8250 // $any6:_(s16) = G_ANYEXT $bv[6]
8251 // $any7:_(s16) = G_ANYEXT $bv[7]
8252 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8253 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8254
8255 // We want to unmerge into vectors.
8256 if (!DstTy.isFixedVector())
8257 return false;
8258
8259 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8260 if (!Any)
8261 return false;
8262
8263 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8264
8265 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8266 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8267
8268 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8269 return false;
8270
8271 // FIXME: check element types?
8272 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8273 return false;
8274
8275 LLT BigBvTy = MRI.getType(BV->getReg(0));
8276 LLT SmallBvTy = DstTy;
8277 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8278
8280 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8281 return false;
8282
8283 // We check the legality of scalar anyext.
8285 {TargetOpcode::G_ANYEXT,
8286 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8287 return false;
8288
8289 MatchInfo = [=](MachineIRBuilder &B) {
8290 // Build into each G_UNMERGE_VALUES def
8291 // a small build vector with anyext from the source build vector.
8292 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8294 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8295 Register SourceArray =
8296 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8297 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8298 Ops.push_back(AnyExt.getReg(0));
8299 }
8300 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8301 };
8302 };
8303 return true;
8304 };
8305
8306 return false;
8307}
8308
8310 BuildFnTy &MatchInfo) const {
8311
8312 bool Changed = false;
8313 auto &Shuffle = cast<GShuffleVector>(MI);
8314 ArrayRef<int> OrigMask = Shuffle.getMask();
8315 SmallVector<int, 16> NewMask;
8316 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8317 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8318 const unsigned NumDstElts = OrigMask.size();
8319 for (unsigned i = 0; i != NumDstElts; ++i) {
8320 int Idx = OrigMask[i];
8321 if (Idx >= (int)NumSrcElems) {
8322 Idx = -1;
8323 Changed = true;
8324 }
8325 NewMask.push_back(Idx);
8326 }
8327
8328 if (!Changed)
8329 return false;
8330
8331 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8332 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8333 std::move(NewMask));
8334 };
8335
8336 return true;
8337}
8338
8339static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8340 const unsigned MaskSize = Mask.size();
8341 for (unsigned I = 0; I < MaskSize; ++I) {
8342 int Idx = Mask[I];
8343 if (Idx < 0)
8344 continue;
8345
8346 if (Idx < (int)NumElems)
8347 Mask[I] = Idx + NumElems;
8348 else
8349 Mask[I] = Idx - NumElems;
8350 }
8351}
8352
8354 BuildFnTy &MatchInfo) const {
8355
8356 auto &Shuffle = cast<GShuffleVector>(MI);
8357 // If any of the two inputs is already undef, don't check the mask again to
8358 // prevent infinite loop
8359 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8360 return false;
8361
8362 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8363 return false;
8364
8365 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8366 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8368 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8369 return false;
8370
8371 ArrayRef<int> Mask = Shuffle.getMask();
8372 const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
8373
8374 bool TouchesSrc1 = false;
8375 bool TouchesSrc2 = false;
8376 const unsigned NumElems = Mask.size();
8377 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8378 if (Mask[Idx] < 0)
8379 continue;
8380
8381 if (Mask[Idx] < (int)NumSrcElems)
8382 TouchesSrc1 = true;
8383 else
8384 TouchesSrc2 = true;
8385 }
8386
8387 if (TouchesSrc1 == TouchesSrc2)
8388 return false;
8389
8390 Register NewSrc1 = Shuffle.getSrc1Reg();
8391 SmallVector<int, 16> NewMask(Mask);
8392 if (TouchesSrc2) {
8393 NewSrc1 = Shuffle.getSrc2Reg();
8394 commuteMask(NewMask, NumSrcElems);
8395 }
8396
8397 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8398 auto Undef = B.buildUndef(Src1Ty);
8399 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8400 };
8401
8402 return true;
8403}
8404
8406 BuildFnTy &MatchInfo) const {
8407 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8408
8409 Register Dst = Subo->getReg(0);
8410 Register LHS = Subo->getLHSReg();
8411 Register RHS = Subo->getRHSReg();
8412 Register Carry = Subo->getCarryOutReg();
8413 LLT DstTy = MRI.getType(Dst);
8414 LLT CarryTy = MRI.getType(Carry);
8415
8416 // Check legality before known bits.
8417 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8419 return false;
8420
8421 ConstantRange KBLHS =
8422 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8423 /* IsSigned=*/Subo->isSigned());
8424 ConstantRange KBRHS =
8425 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8426 /* IsSigned=*/Subo->isSigned());
8427
8428 if (Subo->isSigned()) {
8429 // G_SSUBO
8430 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8432 return false;
8434 MatchInfo = [=](MachineIRBuilder &B) {
8435 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8436 B.buildConstant(Carry, 0);
8437 };
8438 return true;
8439 }
8442 MatchInfo = [=](MachineIRBuilder &B) {
8443 B.buildSub(Dst, LHS, RHS);
8444 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8445 /*isVector=*/CarryTy.isVector(),
8446 /*isFP=*/false));
8447 };
8448 return true;
8449 }
8450 }
8451 return false;
8452 }
8453
8454 // G_USUBO
8455 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8457 return false;
8459 MatchInfo = [=](MachineIRBuilder &B) {
8460 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8461 B.buildConstant(Carry, 0);
8462 };
8463 return true;
8464 }
8467 MatchInfo = [=](MachineIRBuilder &B) {
8468 B.buildSub(Dst, LHS, RHS);
8469 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8470 /*isVector=*/CarryTy.isVector(),
8471 /*isFP=*/false));
8472 };
8473 return true;
8474 }
8475 }
8476
8477 return false;
8478}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNaN() const
Definition APFloat.h:1447
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1235
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
int32_t exactLogBase2() const
Definition APInt.h:1783
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMask(unsigned numBits) const
Definition APInt.h:488
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchShuffleToExtract(MachineInstr &MI) const
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is zero.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
void applyShuffleToExtract(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValue() const
Definition Constants.h:321
const APFloat & getValueAPF() const
Definition Constants.h:320
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:261
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(APInt RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1482
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2033
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1442
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:739
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1622
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1654
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:670
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1545
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:201
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1475
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:966
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1578
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1679
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1460
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:234
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:251
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...