LLVM 22.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
41#include <cmath>
42#include <optional>
43#include <tuple>
44
45#define DEBUG_TYPE "gi-combiner"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50// Option to allow testing of the combiner while no targets know about indexed
51// addressing.
52static cl::opt<bool>
53 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
54 cl::desc("Force all indexed operations to be "
55 "legal for the GlobalISel combiner"));
56
61 const LegalizerInfo *LI)
62 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
64 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
65 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
66 (void)this->VT;
67}
68
70 return *Builder.getMF().getSubtarget().getTargetLowering();
71}
72
74 return Builder.getMF();
75}
76
80
81LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
82
83/// \returns The little endian in-memory byte position of byte \p I in a
84/// \p ByteWidth bytes wide type.
85///
86/// E.g. Given a 4-byte type x, x[0] -> byte 0
87static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
88 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
89 return I;
90}
91
92/// Determines the LogBase2 value for a non-null input value using the
93/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
95 auto &MRI = *MIB.getMRI();
96 LLT Ty = MRI.getType(V);
97 auto Ctlz = MIB.buildCTLZ(Ty, V);
98 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
99 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
100}
101
102/// \returns The big endian in-memory byte position of byte \p I in a
103/// \p ByteWidth bytes wide type.
104///
105/// E.g. Given a 4-byte type x, x[0] -> byte 3
106static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
107 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
108 return ByteWidth - I - 1;
109}
110
111/// Given a map from byte offsets in memory to indices in a load/store,
112/// determine if that map corresponds to a little or big endian byte pattern.
113///
114/// \param MemOffset2Idx maps memory offsets to address offsets.
115/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
116///
117/// \returns true if the map corresponds to a big endian byte pattern, false if
118/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
119///
120/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
121/// are as follows:
122///
123/// AddrOffset Little endian Big endian
124/// 0 0 3
125/// 1 1 2
126/// 2 2 1
127/// 3 3 0
128static std::optional<bool>
130 int64_t LowestIdx) {
131 // Need at least two byte positions to decide on endianness.
132 unsigned Width = MemOffset2Idx.size();
133 if (Width < 2)
134 return std::nullopt;
135 bool BigEndian = true, LittleEndian = true;
136 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
137 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
138 if (MemOffsetAndIdx == MemOffset2Idx.end())
139 return std::nullopt;
140 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
141 assert(Idx >= 0 && "Expected non-negative byte offset?");
142 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
143 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
144 if (!BigEndian && !LittleEndian)
145 return std::nullopt;
146 }
147
148 assert((BigEndian != LittleEndian) &&
149 "Pattern cannot be both big and little endian!");
150 return BigEndian;
151}
152
154
155bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
156 assert(LI && "Must have LegalizerInfo to query isLegal!");
157 return LI->getAction(Query).Action == LegalizeActions::Legal;
158}
159
161 const LegalityQuery &Query) const {
162 return isPreLegalize() || isLegal(Query);
163}
164
166 return isLegal(Query) ||
167 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
168}
169
171 if (!Ty.isVector())
172 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
173 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
174 if (isPreLegalize())
175 return true;
176 LLT EltTy = Ty.getElementType();
177 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
178 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
179}
180
182 Register ToReg) const {
183 Observer.changingAllUsesOfReg(MRI, FromReg);
184
185 if (MRI.constrainRegAttrs(ToReg, FromReg))
186 MRI.replaceRegWith(FromReg, ToReg);
187 else
188 Builder.buildCopy(FromReg, ToReg);
189
190 Observer.finishedChangingAllUsesOfReg();
191}
192
194 MachineOperand &FromRegOp,
195 Register ToReg) const {
196 assert(FromRegOp.getParent() && "Expected an operand in an MI");
197 Observer.changingInstr(*FromRegOp.getParent());
198
199 FromRegOp.setReg(ToReg);
200
201 Observer.changedInstr(*FromRegOp.getParent());
202}
203
205 unsigned ToOpcode) const {
206 Observer.changingInstr(FromMI);
207
208 FromMI.setDesc(Builder.getTII().get(ToOpcode));
209
210 Observer.changedInstr(FromMI);
211}
212
214 return RBI->getRegBank(Reg, MRI, *TRI);
215}
216
218 const RegisterBank *RegBank) const {
219 if (RegBank)
220 MRI.setRegBank(Reg, *RegBank);
221}
222
224 if (matchCombineCopy(MI)) {
226 return true;
227 }
228 return false;
229}
231 if (MI.getOpcode() != TargetOpcode::COPY)
232 return false;
233 Register DstReg = MI.getOperand(0).getReg();
234 Register SrcReg = MI.getOperand(1).getReg();
235 return canReplaceReg(DstReg, SrcReg, MRI);
236}
238 Register DstReg = MI.getOperand(0).getReg();
239 Register SrcReg = MI.getOperand(1).getReg();
240 replaceRegWith(MRI, DstReg, SrcReg);
241 MI.eraseFromParent();
242}
243
245 MachineInstr &MI, BuildFnTy &MatchInfo) const {
246 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
247 Register DstOp = MI.getOperand(0).getReg();
248 Register OrigOp = MI.getOperand(1).getReg();
249
250 if (!MRI.hasOneNonDBGUse(OrigOp))
251 return false;
252
253 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
254 // Even if only a single operand of the PHI is not guaranteed non-poison,
255 // moving freeze() backwards across a PHI can cause optimization issues for
256 // other users of that operand.
257 //
258 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
259 // the source register is unprofitable because it makes the freeze() more
260 // strict than is necessary (it would affect the whole register instead of
261 // just the subreg being frozen).
262 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
263 return false;
264
265 if (canCreateUndefOrPoison(OrigOp, MRI,
266 /*ConsiderFlagsAndMetadata=*/false))
267 return false;
268
269 std::optional<MachineOperand> MaybePoisonOperand;
270 for (MachineOperand &Operand : OrigDef->uses()) {
271 if (!Operand.isReg())
272 return false;
273
274 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
275 continue;
276
277 if (!MaybePoisonOperand)
278 MaybePoisonOperand = Operand;
279 else {
280 // We have more than one maybe-poison operand. Moving the freeze is
281 // unsafe.
282 return false;
283 }
284 }
285
286 // Eliminate freeze if all operands are guaranteed non-poison.
287 if (!MaybePoisonOperand) {
288 MatchInfo = [=](MachineIRBuilder &B) {
289 Observer.changingInstr(*OrigDef);
290 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
291 Observer.changedInstr(*OrigDef);
292 B.buildCopy(DstOp, OrigOp);
293 };
294 return true;
295 }
296
297 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
298 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
299
300 MatchInfo = [=](MachineIRBuilder &B) mutable {
301 Observer.changingInstr(*OrigDef);
302 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
303 Observer.changedInstr(*OrigDef);
304 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
305 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
307 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
308 Freeze.getReg(0));
309 replaceRegWith(MRI, DstOp, OrigOp);
310 };
311 return true;
312}
313
316 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
317 "Invalid instruction");
318 bool IsUndef = true;
319 MachineInstr *Undef = nullptr;
320
321 // Walk over all the operands of concat vectors and check if they are
322 // build_vector themselves or undef.
323 // Then collect their operands in Ops.
324 for (const MachineOperand &MO : MI.uses()) {
325 Register Reg = MO.getReg();
326 MachineInstr *Def = MRI.getVRegDef(Reg);
327 assert(Def && "Operand not defined");
328 if (!MRI.hasOneNonDBGUse(Reg))
329 return false;
330 switch (Def->getOpcode()) {
331 case TargetOpcode::G_BUILD_VECTOR:
332 IsUndef = false;
333 // Remember the operands of the build_vector to fold
334 // them into the yet-to-build flattened concat vectors.
335 for (const MachineOperand &BuildVecMO : Def->uses())
336 Ops.push_back(BuildVecMO.getReg());
337 break;
338 case TargetOpcode::G_IMPLICIT_DEF: {
339 LLT OpType = MRI.getType(Reg);
340 // Keep one undef value for all the undef operands.
341 if (!Undef) {
342 Builder.setInsertPt(*MI.getParent(), MI);
343 Undef = Builder.buildUndef(OpType.getScalarType());
344 }
345 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
346 OpType.getScalarType() &&
347 "All undefs should have the same type");
348 // Break the undef vector in as many scalar elements as needed
349 // for the flattening.
350 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
351 EltIdx != EltEnd; ++EltIdx)
352 Ops.push_back(Undef->getOperand(0).getReg());
353 break;
354 }
355 default:
356 return false;
357 }
358 }
359
360 // Check if the combine is illegal
361 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
363 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
364 return false;
365 }
366
367 if (IsUndef)
368 Ops.clear();
369
370 return true;
371}
374 // We determined that the concat_vectors can be flatten.
375 // Generate the flattened build_vector.
376 Register DstReg = MI.getOperand(0).getReg();
377 Builder.setInsertPt(*MI.getParent(), MI);
378 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
379
380 // Note: IsUndef is sort of redundant. We could have determine it by
381 // checking that at all Ops are undef. Alternatively, we could have
382 // generate a build_vector of undefs and rely on another combine to
383 // clean that up. For now, given we already gather this information
384 // in matchCombineConcatVectors, just save compile time and issue the
385 // right thing.
386 if (Ops.empty())
387 Builder.buildUndef(NewDstReg);
388 else
389 Builder.buildBuildVector(NewDstReg, Ops);
390 replaceRegWith(MRI, DstReg, NewDstReg);
391 MI.eraseFromParent();
392}
393
395 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
396 "Invalid instruction");
397 auto &Shuffle = cast<GShuffleVector>(MI);
398
399 Register SrcVec1 = Shuffle.getSrc1Reg();
400 Register SrcVec2 = Shuffle.getSrc2Reg();
401
402 LLT SrcVec1Type = MRI.getType(SrcVec1);
403 LLT SrcVec2Type = MRI.getType(SrcVec2);
404 return SrcVec1Type.isVector() && SrcVec2Type.isVector();
405}
406
408 auto &Shuffle = cast<GShuffleVector>(MI);
409
410 Register SrcVec1 = Shuffle.getSrc1Reg();
411 Register SrcVec2 = Shuffle.getSrc2Reg();
412 LLT EltTy = MRI.getType(SrcVec1).getElementType();
413 int Width = MRI.getType(SrcVec1).getNumElements();
414
415 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
416 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
417
418 SmallVector<Register> Extracts;
419 // Select only applicable elements from unmerged values.
420 for (int Val : Shuffle.getMask()) {
421 if (Val == -1)
422 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
423 else if (Val < Width)
424 Extracts.push_back(Unmerge1.getReg(Val));
425 else
426 Extracts.push_back(Unmerge2.getReg(Val - Width));
427 }
428 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
429 if (Extracts.size() == 1)
430 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
431 else
432 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
433 MI.eraseFromParent();
434}
435
438 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
439 auto ConcatMI1 =
440 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
441 auto ConcatMI2 =
442 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
443 if (!ConcatMI1 || !ConcatMI2)
444 return false;
445
446 // Check that the sources of the Concat instructions have the same type
447 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
448 MRI.getType(ConcatMI2->getSourceReg(0)))
449 return false;
450
451 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
452 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
453 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
454 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
455 // Check if the index takes a whole source register from G_CONCAT_VECTORS
456 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
457 if (Mask[i] == -1) {
458 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
459 if (i + j >= Mask.size())
460 return false;
461 if (Mask[i + j] != -1)
462 return false;
463 }
465 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
466 return false;
467 Ops.push_back(0);
468 } else if (Mask[i] % ConcatSrcNumElt == 0) {
469 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
470 if (i + j >= Mask.size())
471 return false;
472 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
473 return false;
474 }
475 // Retrieve the source register from its respective G_CONCAT_VECTORS
476 // instruction
477 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
478 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
479 } else {
480 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
481 ConcatMI1->getNumSources()));
482 }
483 } else {
484 return false;
485 }
486 }
487
489 {TargetOpcode::G_CONCAT_VECTORS,
490 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
491 return false;
492
493 return !Ops.empty();
494}
495
498 LLT SrcTy;
499 for (Register &Reg : Ops) {
500 if (Reg != 0)
501 SrcTy = MRI.getType(Reg);
502 }
503 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
504
505 Register UndefReg = 0;
506
507 for (Register &Reg : Ops) {
508 if (Reg == 0) {
509 if (UndefReg == 0)
510 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
511 Reg = UndefReg;
512 }
513 }
514
515 if (Ops.size() > 1)
516 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
517 else
518 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
519 MI.eraseFromParent();
520}
521
526 return true;
527 }
528 return false;
529}
530
533 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
534 "Invalid instruction kind");
535 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
536 Register Src1 = MI.getOperand(1).getReg();
537 LLT SrcType = MRI.getType(Src1);
538 // As bizarre as it may look, shuffle vector can actually produce
539 // scalar! This is because at the IR level a <1 x ty> shuffle
540 // vector is perfectly valid.
541 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
542 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
543
544 // If the resulting vector is smaller than the size of the source
545 // vectors being concatenated, we won't be able to replace the
546 // shuffle vector into a concat_vectors.
547 //
548 // Note: We may still be able to produce a concat_vectors fed by
549 // extract_vector_elt and so on. It is less clear that would
550 // be better though, so don't bother for now.
551 //
552 // If the destination is a scalar, the size of the sources doesn't
553 // matter. we will lower the shuffle to a plain copy. This will
554 // work only if the source and destination have the same size. But
555 // that's covered by the next condition.
556 //
557 // TODO: If the size between the source and destination don't match
558 // we could still emit an extract vector element in that case.
559 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
560 return false;
561
562 // Check that the shuffle mask can be broken evenly between the
563 // different sources.
564 if (DstNumElts % SrcNumElts != 0)
565 return false;
566
567 // Mask length is a multiple of the source vector length.
568 // Check if the shuffle is some kind of concatenation of the input
569 // vectors.
570 unsigned NumConcat = DstNumElts / SrcNumElts;
571 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
572 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
573 for (unsigned i = 0; i != DstNumElts; ++i) {
574 int Idx = Mask[i];
575 // Undef value.
576 if (Idx < 0)
577 continue;
578 // Ensure the indices in each SrcType sized piece are sequential and that
579 // the same source is used for the whole piece.
580 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
581 (ConcatSrcs[i / SrcNumElts] >= 0 &&
582 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
583 return false;
584 // Remember which source this index came from.
585 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
586 }
587
588 // The shuffle is concatenating multiple vectors together.
589 // Collect the different operands for that.
590 Register UndefReg;
591 Register Src2 = MI.getOperand(2).getReg();
592 for (auto Src : ConcatSrcs) {
593 if (Src < 0) {
594 if (!UndefReg) {
595 Builder.setInsertPt(*MI.getParent(), MI);
596 UndefReg = Builder.buildUndef(SrcType).getReg(0);
597 }
598 Ops.push_back(UndefReg);
599 } else if (Src == 0)
600 Ops.push_back(Src1);
601 else
602 Ops.push_back(Src2);
603 }
604 return true;
605}
606
608 MachineInstr &MI, const ArrayRef<Register> Ops) const {
609 Register DstReg = MI.getOperand(0).getReg();
610 Builder.setInsertPt(*MI.getParent(), MI);
611 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
612
613 if (Ops.size() == 1)
614 Builder.buildCopy(NewDstReg, Ops[0]);
615 else
616 Builder.buildMergeLikeInstr(NewDstReg, Ops);
617
618 replaceRegWith(MRI, DstReg, NewDstReg);
619 MI.eraseFromParent();
620}
621
623 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
624 "Invalid instruction kind");
625
626 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
627 return Mask.size() == 1;
628}
629
631 Register DstReg = MI.getOperand(0).getReg();
632 Builder.setInsertPt(*MI.getParent(), MI);
633
634 int I = MI.getOperand(3).getShuffleMask()[0];
635 Register Src1 = MI.getOperand(1).getReg();
636 LLT Src1Ty = MRI.getType(Src1);
637 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
638 Register SrcReg;
639 if (I >= Src1NumElts) {
640 SrcReg = MI.getOperand(2).getReg();
641 I -= Src1NumElts;
642 } else if (I >= 0)
643 SrcReg = Src1;
644
645 if (I < 0)
646 Builder.buildUndef(DstReg);
647 else if (!MRI.getType(SrcReg).isVector())
648 Builder.buildCopy(DstReg, SrcReg);
649 else
650 Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
651
652 MI.eraseFromParent();
653}
654
655namespace {
656
657/// Select a preference between two uses. CurrentUse is the current preference
658/// while *ForCandidate is attributes of the candidate under consideration.
659PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
660 PreferredTuple &CurrentUse,
661 const LLT TyForCandidate,
662 unsigned OpcodeForCandidate,
663 MachineInstr *MIForCandidate) {
664 if (!CurrentUse.Ty.isValid()) {
665 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
666 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
667 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
668 return CurrentUse;
669 }
670
671 // We permit the extend to hoist through basic blocks but this is only
672 // sensible if the target has extending loads. If you end up lowering back
673 // into a load and extend during the legalizer then the end result is
674 // hoisting the extend up to the load.
675
676 // Prefer defined extensions to undefined extensions as these are more
677 // likely to reduce the number of instructions.
678 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
679 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
680 return CurrentUse;
681 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
682 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
683 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
684
685 // Prefer sign extensions to zero extensions as sign-extensions tend to be
686 // more expensive. Don't do this if the load is already a zero-extend load
687 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
688 // later.
689 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
690 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
691 OpcodeForCandidate == TargetOpcode::G_ZEXT)
692 return CurrentUse;
693 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
694 OpcodeForCandidate == TargetOpcode::G_SEXT)
695 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
696 }
697
698 // This is potentially target specific. We've chosen the largest type
699 // because G_TRUNC is usually free. One potential catch with this is that
700 // some targets have a reduced number of larger registers than smaller
701 // registers and this choice potentially increases the live-range for the
702 // larger value.
703 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
704 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
705 }
706 return CurrentUse;
707}
708
709/// Find a suitable place to insert some instructions and insert them. This
710/// function accounts for special cases like inserting before a PHI node.
711/// The current strategy for inserting before PHI's is to duplicate the
712/// instructions for each predecessor. However, while that's ok for G_TRUNC
713/// on most targets since it generally requires no code, other targets/cases may
714/// want to try harder to find a dominating block.
715static void InsertInsnsWithoutSideEffectsBeforeUse(
718 MachineOperand &UseMO)>
719 Inserter) {
720 MachineInstr &UseMI = *UseMO.getParent();
721
722 MachineBasicBlock *InsertBB = UseMI.getParent();
723
724 // If the use is a PHI then we want the predecessor block instead.
725 if (UseMI.isPHI()) {
726 MachineOperand *PredBB = std::next(&UseMO);
727 InsertBB = PredBB->getMBB();
728 }
729
730 // If the block is the same block as the def then we want to insert just after
731 // the def instead of at the start of the block.
732 if (InsertBB == DefMI.getParent()) {
734 Inserter(InsertBB, std::next(InsertPt), UseMO);
735 return;
736 }
737
738 // Otherwise we want the start of the BB
739 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
740}
741} // end anonymous namespace
742
744 PreferredTuple Preferred;
745 if (matchCombineExtendingLoads(MI, Preferred)) {
746 applyCombineExtendingLoads(MI, Preferred);
747 return true;
748 }
749 return false;
750}
751
752static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
753 unsigned CandidateLoadOpc;
754 switch (ExtOpc) {
755 case TargetOpcode::G_ANYEXT:
756 CandidateLoadOpc = TargetOpcode::G_LOAD;
757 break;
758 case TargetOpcode::G_SEXT:
759 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
760 break;
761 case TargetOpcode::G_ZEXT:
762 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
763 break;
764 default:
765 llvm_unreachable("Unexpected extend opc");
766 }
767 return CandidateLoadOpc;
768}
769
771 MachineInstr &MI, PreferredTuple &Preferred) const {
772 // We match the loads and follow the uses to the extend instead of matching
773 // the extends and following the def to the load. This is because the load
774 // must remain in the same position for correctness (unless we also add code
775 // to find a safe place to sink it) whereas the extend is freely movable.
776 // It also prevents us from duplicating the load for the volatile case or just
777 // for performance.
778 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
779 if (!LoadMI)
780 return false;
781
782 Register LoadReg = LoadMI->getDstReg();
783
784 LLT LoadValueTy = MRI.getType(LoadReg);
785 if (!LoadValueTy.isScalar())
786 return false;
787
788 // Most architectures are going to legalize <s8 loads into at least a 1 byte
789 // load, and the MMOs can only describe memory accesses in multiples of bytes.
790 // If we try to perform extload combining on those, we can end up with
791 // %a(s8) = extload %ptr (load 1 byte from %ptr)
792 // ... which is an illegal extload instruction.
793 if (LoadValueTy.getSizeInBits() < 8)
794 return false;
795
796 // For non power-of-2 types, they will very likely be legalized into multiple
797 // loads. Don't bother trying to match them into extending loads.
799 return false;
800
801 // Find the preferred type aside from the any-extends (unless it's the only
802 // one) and non-extending ops. We'll emit an extending load to that type and
803 // and emit a variant of (extend (trunc X)) for the others according to the
804 // relative type sizes. At the same time, pick an extend to use based on the
805 // extend involved in the chosen type.
806 unsigned PreferredOpcode =
807 isa<GLoad>(&MI)
808 ? TargetOpcode::G_ANYEXT
809 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
810 Preferred = {LLT(), PreferredOpcode, nullptr};
811 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
812 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
813 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
814 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
815 const auto &MMO = LoadMI->getMMO();
816 // Don't do anything for atomics.
817 if (MMO.isAtomic())
818 continue;
819 // Check for legality.
820 if (!isPreLegalize()) {
821 LegalityQuery::MemDesc MMDesc(MMO);
822 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
823 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
824 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
825 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
826 .Action != LegalizeActions::Legal)
827 continue;
828 }
829 Preferred = ChoosePreferredUse(MI, Preferred,
830 MRI.getType(UseMI.getOperand(0).getReg()),
831 UseMI.getOpcode(), &UseMI);
832 }
833 }
834
835 // There were no extends
836 if (!Preferred.MI)
837 return false;
838 // It should be impossible to chose an extend without selecting a different
839 // type since by definition the result of an extend is larger.
840 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
841
842 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
843 return true;
844}
845
847 MachineInstr &MI, PreferredTuple &Preferred) const {
848 // Rewrite the load to the chosen extending load.
849 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
850
851 // Inserter to insert a truncate back to the original type at a given point
852 // with some basic CSE to limit truncate duplication to one per BB.
854 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
855 MachineBasicBlock::iterator InsertBefore,
856 MachineOperand &UseMO) {
857 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
858 if (PreviouslyEmitted) {
859 Observer.changingInstr(*UseMO.getParent());
860 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
861 Observer.changedInstr(*UseMO.getParent());
862 return;
863 }
864
865 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
866 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
867 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
868 EmittedInsns[InsertIntoBB] = NewMI;
869 replaceRegOpWith(MRI, UseMO, NewDstReg);
870 };
871
872 Observer.changingInstr(MI);
873 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
874 MI.setDesc(Builder.getTII().get(LoadOpc));
875
876 // Rewrite all the uses to fix up the types.
877 auto &LoadValue = MI.getOperand(0);
879 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
880
881 for (auto *UseMO : Uses) {
882 MachineInstr *UseMI = UseMO->getParent();
883
884 // If the extend is compatible with the preferred extend then we should fix
885 // up the type and extend so that it uses the preferred use.
886 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
887 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
888 Register UseDstReg = UseMI->getOperand(0).getReg();
889 MachineOperand &UseSrcMO = UseMI->getOperand(1);
890 const LLT UseDstTy = MRI.getType(UseDstReg);
891 if (UseDstReg != ChosenDstReg) {
892 if (Preferred.Ty == UseDstTy) {
893 // If the use has the same type as the preferred use, then merge
894 // the vregs and erase the extend. For example:
895 // %1:_(s8) = G_LOAD ...
896 // %2:_(s32) = G_SEXT %1(s8)
897 // %3:_(s32) = G_ANYEXT %1(s8)
898 // ... = ... %3(s32)
899 // rewrites to:
900 // %2:_(s32) = G_SEXTLOAD ...
901 // ... = ... %2(s32)
902 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
903 Observer.erasingInstr(*UseMO->getParent());
904 UseMO->getParent()->eraseFromParent();
905 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
906 // If the preferred size is smaller, then keep the extend but extend
907 // from the result of the extending load. For example:
908 // %1:_(s8) = G_LOAD ...
909 // %2:_(s32) = G_SEXT %1(s8)
910 // %3:_(s64) = G_ANYEXT %1(s8)
911 // ... = ... %3(s64)
912 /// rewrites to:
913 // %2:_(s32) = G_SEXTLOAD ...
914 // %3:_(s64) = G_ANYEXT %2:_(s32)
915 // ... = ... %3(s64)
916 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
917 } else {
918 // If the preferred size is large, then insert a truncate. For
919 // example:
920 // %1:_(s8) = G_LOAD ...
921 // %2:_(s64) = G_SEXT %1(s8)
922 // %3:_(s32) = G_ZEXT %1(s8)
923 // ... = ... %3(s32)
924 /// rewrites to:
925 // %2:_(s64) = G_SEXTLOAD ...
926 // %4:_(s8) = G_TRUNC %2:_(s32)
927 // %3:_(s64) = G_ZEXT %2:_(s8)
928 // ... = ... %3(s64)
929 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
930 InsertTruncAt);
931 }
932 continue;
933 }
934 // The use is (one of) the uses of the preferred use we chose earlier.
935 // We're going to update the load to def this value later so just erase
936 // the old extend.
937 Observer.erasingInstr(*UseMO->getParent());
938 UseMO->getParent()->eraseFromParent();
939 continue;
940 }
941
942 // The use isn't an extend. Truncate back to the type we originally loaded.
943 // This is free on many targets.
944 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
945 }
946
947 MI.getOperand(0).setReg(ChosenDstReg);
948 Observer.changedInstr(MI);
949}
950
952 BuildFnTy &MatchInfo) const {
953 assert(MI.getOpcode() == TargetOpcode::G_AND);
954
955 // If we have the following code:
956 // %mask = G_CONSTANT 255
957 // %ld = G_LOAD %ptr, (load s16)
958 // %and = G_AND %ld, %mask
959 //
960 // Try to fold it into
961 // %ld = G_ZEXTLOAD %ptr, (load s8)
962
963 Register Dst = MI.getOperand(0).getReg();
964 if (MRI.getType(Dst).isVector())
965 return false;
966
967 auto MaybeMask =
968 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
969 if (!MaybeMask)
970 return false;
971
972 APInt MaskVal = MaybeMask->Value;
973
974 if (!MaskVal.isMask())
975 return false;
976
977 Register SrcReg = MI.getOperand(1).getReg();
978 // Don't use getOpcodeDef() here since intermediate instructions may have
979 // multiple users.
980 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
981 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
982 return false;
983
984 Register LoadReg = LoadMI->getDstReg();
985 LLT RegTy = MRI.getType(LoadReg);
986 Register PtrReg = LoadMI->getPointerReg();
987 unsigned RegSize = RegTy.getSizeInBits();
988 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
989 unsigned MaskSizeBits = MaskVal.countr_one();
990
991 // The mask may not be larger than the in-memory type, as it might cover sign
992 // extended bits
993 if (MaskSizeBits > LoadSizeBits.getValue())
994 return false;
995
996 // If the mask covers the whole destination register, there's nothing to
997 // extend
998 if (MaskSizeBits >= RegSize)
999 return false;
1000
1001 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1002 // at least byte loads. Avoid creating such loads here
1003 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1004 return false;
1005
1006 const MachineMemOperand &MMO = LoadMI->getMMO();
1007 LegalityQuery::MemDesc MemDesc(MMO);
1008
1009 // Don't modify the memory access size if this is atomic/volatile, but we can
1010 // still adjust the opcode to indicate the high bit behavior.
1011 if (LoadMI->isSimple())
1012 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1013 else if (LoadSizeBits.getValue() > MaskSizeBits ||
1014 LoadSizeBits.getValue() == RegSize)
1015 return false;
1016
1017 // TODO: Could check if it's legal with the reduced or original memory size.
1019 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1020 return false;
1021
1022 MatchInfo = [=](MachineIRBuilder &B) {
1023 B.setInstrAndDebugLoc(*LoadMI);
1024 auto &MF = B.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1027 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1028 LoadMI->eraseFromParent();
1029 };
1030 return true;
1031}
1032
1034 const MachineInstr &UseMI) const {
1035 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1036 "shouldn't consider debug uses");
1037 assert(DefMI.getParent() == UseMI.getParent());
1038 if (&DefMI == &UseMI)
1039 return true;
1040 const MachineBasicBlock &MBB = *DefMI.getParent();
1041 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1042 return &MI == &DefMI || &MI == &UseMI;
1043 });
1044 if (DefOrUse == MBB.end())
1045 llvm_unreachable("Block must contain both DefMI and UseMI!");
1046 return &*DefOrUse == &DefMI;
1047}
1048
1050 const MachineInstr &UseMI) const {
1051 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1052 "shouldn't consider debug uses");
1053 if (MDT)
1054 return MDT->dominates(&DefMI, &UseMI);
1055 else if (DefMI.getParent() != UseMI.getParent())
1056 return false;
1057
1058 return isPredecessor(DefMI, UseMI);
1059}
1060
1062 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1063 Register SrcReg = MI.getOperand(1).getReg();
1064 Register LoadUser = SrcReg;
1065
1066 if (MRI.getType(SrcReg).isVector())
1067 return false;
1068
1069 Register TruncSrc;
1070 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1071 LoadUser = TruncSrc;
1072
1073 uint64_t SizeInBits = MI.getOperand(2).getImm();
1074 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1075 // need any extend at all, just a truncate.
1076 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1077 // If truncating more than the original extended value, abort.
1078 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1079 if (TruncSrc &&
1080 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1081 return false;
1082 if (LoadSizeBits == SizeInBits)
1083 return true;
1084 }
1085 return false;
1086}
1087
1089 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1090 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1091 MI.eraseFromParent();
1092}
1093
1095 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1096 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1097
1098 Register DstReg = MI.getOperand(0).getReg();
1099 LLT RegTy = MRI.getType(DstReg);
1100
1101 // Only supports scalars for now.
1102 if (RegTy.isVector())
1103 return false;
1104
1105 Register SrcReg = MI.getOperand(1).getReg();
1106 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1107 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1108 return false;
1109
1110 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1111
1112 // If the sign extend extends from a narrower width than the load's width,
1113 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1114 // Avoid widening the load at all.
1115 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1116
1117 // Don't generate G_SEXTLOADs with a < 1 byte width.
1118 if (NewSizeBits < 8)
1119 return false;
1120 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1121 // anyway for most targets.
1122 if (!isPowerOf2_32(NewSizeBits))
1123 return false;
1124
1125 const MachineMemOperand &MMO = LoadDef->getMMO();
1126 LegalityQuery::MemDesc MMDesc(MMO);
1127
1128 // Don't modify the memory access size if this is atomic/volatile, but we can
1129 // still adjust the opcode to indicate the high bit behavior.
1130 if (LoadDef->isSimple())
1131 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1132 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1133 return false;
1134
1135 // TODO: Could check if it's legal with the reduced or original memory size.
1136 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1137 {MRI.getType(LoadDef->getDstReg()),
1138 MRI.getType(LoadDef->getPointerReg())},
1139 {MMDesc}}))
1140 return false;
1141
1142 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1143 return true;
1144}
1145
1147 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1148 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1149 Register LoadReg;
1150 unsigned ScalarSizeBits;
1151 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1152 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1153
1154 // If we have the following:
1155 // %ld = G_LOAD %ptr, (load 2)
1156 // %ext = G_SEXT_INREG %ld, 8
1157 // ==>
1158 // %ld = G_SEXTLOAD %ptr (load 1)
1159
1160 auto &MMO = LoadDef->getMMO();
1161 Builder.setInstrAndDebugLoc(*LoadDef);
1162 auto &MF = Builder.getMF();
1163 auto PtrInfo = MMO.getPointerInfo();
1164 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1165 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1166 LoadDef->getPointerReg(), *NewMMO);
1167 MI.eraseFromParent();
1168
1169 // Not all loads can be deleted, so make sure the old one is removed.
1170 LoadDef->eraseFromParent();
1171}
1172
1173/// Return true if 'MI' is a load or a store that may be fold it's address
1174/// operand into the load / store addressing mode.
1178 auto *MF = MI->getMF();
1179 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1180 if (!Addr)
1181 return false;
1182
1183 AM.HasBaseReg = true;
1184 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1185 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1186 else
1187 AM.Scale = 1; // [reg +/- reg]
1188
1189 return TLI.isLegalAddressingMode(
1190 MF->getDataLayout(), AM,
1191 getTypeForLLT(MI->getMMO().getMemoryType(),
1192 MF->getFunction().getContext()),
1193 MI->getMMO().getAddrSpace());
1194}
1195
1196static unsigned getIndexedOpc(unsigned LdStOpc) {
1197 switch (LdStOpc) {
1198 case TargetOpcode::G_LOAD:
1199 return TargetOpcode::G_INDEXED_LOAD;
1200 case TargetOpcode::G_STORE:
1201 return TargetOpcode::G_INDEXED_STORE;
1202 case TargetOpcode::G_ZEXTLOAD:
1203 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1204 case TargetOpcode::G_SEXTLOAD:
1205 return TargetOpcode::G_INDEXED_SEXTLOAD;
1206 default:
1207 llvm_unreachable("Unexpected opcode");
1208 }
1209}
1210
1211bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1212 // Check for legality.
1213 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1214 LLT Ty = MRI.getType(LdSt.getReg(0));
1215 LLT MemTy = LdSt.getMMO().getMemoryType();
1217 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1219 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1220 SmallVector<LLT> OpTys;
1221 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1222 OpTys = {PtrTy, Ty, Ty};
1223 else
1224 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1225
1226 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1227 return isLegal(Q);
1228}
1229
1231 "post-index-use-threshold", cl::Hidden, cl::init(32),
1232 cl::desc("Number of uses of a base pointer to check before it is no longer "
1233 "considered for post-indexing."));
1234
1235bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1237 bool &RematOffset) const {
1238 // We're looking for the following pattern, for either load or store:
1239 // %baseptr:_(p0) = ...
1240 // G_STORE %val(s64), %baseptr(p0)
1241 // %offset:_(s64) = G_CONSTANT i64 -256
1242 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1243 const auto &TLI = getTargetLowering();
1244
1245 Register Ptr = LdSt.getPointerReg();
1246 // If the store is the only use, don't bother.
1247 if (MRI.hasOneNonDBGUse(Ptr))
1248 return false;
1249
1250 if (!isIndexedLoadStoreLegal(LdSt))
1251 return false;
1252
1253 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1254 return false;
1255
1256 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1257 auto *PtrDef = MRI.getVRegDef(Ptr);
1258
1259 unsigned NumUsesChecked = 0;
1260 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1261 if (++NumUsesChecked > PostIndexUseThreshold)
1262 return false; // Try to avoid exploding compile time.
1263
1264 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1265 // The use itself might be dead. This can happen during combines if DCE
1266 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1267 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1268 continue;
1269
1270 // Check the user of this isn't the store, otherwise we'd be generate a
1271 // indexed store defining its own use.
1272 if (StoredValDef == &Use)
1273 continue;
1274
1275 Offset = PtrAdd->getOffsetReg();
1276 if (!ForceLegalIndexing &&
1277 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1278 /*IsPre*/ false, MRI))
1279 continue;
1280
1281 // Make sure the offset calculation is before the potentially indexed op.
1282 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1283 RematOffset = false;
1284 if (!dominates(*OffsetDef, LdSt)) {
1285 // If the offset however is just a G_CONSTANT, we can always just
1286 // rematerialize it where we need it.
1287 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1288 continue;
1289 RematOffset = true;
1290 }
1291
1292 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1293 if (&BasePtrUse == PtrDef)
1294 continue;
1295
1296 // If the user is a later load/store that can be post-indexed, then don't
1297 // combine this one.
1298 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1299 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1300 dominates(LdSt, *BasePtrLdSt) &&
1301 isIndexedLoadStoreLegal(*BasePtrLdSt))
1302 return false;
1303
1304 // Now we're looking for the key G_PTR_ADD instruction, which contains
1305 // the offset add that we want to fold.
1306 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1307 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1308 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1309 // If the use is in a different block, then we may produce worse code
1310 // due to the extra register pressure.
1311 if (BaseUseUse.getParent() != LdSt.getParent())
1312 return false;
1313
1314 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1315 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1316 return false;
1317 }
1318 if (!dominates(LdSt, BasePtrUse))
1319 return false; // All use must be dominated by the load/store.
1320 }
1321 }
1322
1323 Addr = PtrAdd->getReg(0);
1324 Base = PtrAdd->getBaseReg();
1325 return true;
1326 }
1327
1328 return false;
1329}
1330
1331bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1332 Register &Base,
1333 Register &Offset) const {
1334 auto &MF = *LdSt.getParent()->getParent();
1335 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1336
1337 Addr = LdSt.getPointerReg();
1338 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1339 MRI.hasOneNonDBGUse(Addr))
1340 return false;
1341
1342 if (!ForceLegalIndexing &&
1343 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1344 return false;
1345
1346 if (!isIndexedLoadStoreLegal(LdSt))
1347 return false;
1348
1349 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1350 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1351 return false;
1352
1353 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1354 // Would require a copy.
1355 if (Base == St->getValueReg())
1356 return false;
1357
1358 // We're expecting one use of Addr in MI, but it could also be the
1359 // value stored, which isn't actually dominated by the instruction.
1360 if (St->getValueReg() == Addr)
1361 return false;
1362 }
1363
1364 // Avoid increasing cross-block register pressure.
1365 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1366 if (AddrUse.getParent() != LdSt.getParent())
1367 return false;
1368
1369 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1370 // That might allow us to end base's liveness here by adjusting the constant.
1371 bool RealUse = false;
1372 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1373 if (!dominates(LdSt, AddrUse))
1374 return false; // All use must be dominated by the load/store.
1375
1376 // If Ptr may be folded in addressing mode of other use, then it's
1377 // not profitable to do this transformation.
1378 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1379 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1380 RealUse = true;
1381 } else {
1382 RealUse = true;
1383 }
1384 }
1385 return RealUse;
1386}
1387
1389 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1390 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1391
1392 // Check if there is a load that defines the vector being extracted from.
1393 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1394 if (!LoadMI)
1395 return false;
1396
1397 Register Vector = MI.getOperand(1).getReg();
1398 LLT VecEltTy = MRI.getType(Vector).getElementType();
1399
1400 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1401
1402 // Checking whether we should reduce the load width.
1403 if (!MRI.hasOneNonDBGUse(Vector))
1404 return false;
1405
1406 // Check if the defining load is simple.
1407 if (!LoadMI->isSimple())
1408 return false;
1409
1410 // If the vector element type is not a multiple of a byte then we are unable
1411 // to correctly compute an address to load only the extracted element as a
1412 // scalar.
1413 if (!VecEltTy.isByteSized())
1414 return false;
1415
1416 // Check for load fold barriers between the extraction and the load.
1417 if (MI.getParent() != LoadMI->getParent())
1418 return false;
1419 const unsigned MaxIter = 20;
1420 unsigned Iter = 0;
1421 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1422 if (II->isLoadFoldBarrier())
1423 return false;
1424 if (Iter++ == MaxIter)
1425 return false;
1426 }
1427
1428 // Check if the new load that we are going to create is legal
1429 // if we are in the post-legalization phase.
1430 MachineMemOperand MMO = LoadMI->getMMO();
1431 Align Alignment = MMO.getAlign();
1432 MachinePointerInfo PtrInfo;
1434
1435 // Finding the appropriate PtrInfo if offset is a known constant.
1436 // This is required to create the memory operand for the narrowed load.
1437 // This machine memory operand object helps us infer about legality
1438 // before we proceed to combine the instruction.
1439 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1440 int Elt = CVal->getZExtValue();
1441 // FIXME: should be (ABI size)*Elt.
1442 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1443 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1444 } else {
1445 // Discard the pointer info except the address space because the memory
1446 // operand can't represent this new access since the offset is variable.
1447 Offset = VecEltTy.getSizeInBits() / 8;
1449 }
1450
1451 Alignment = commonAlignment(Alignment, Offset);
1452
1453 Register VecPtr = LoadMI->getPointerReg();
1454 LLT PtrTy = MRI.getType(VecPtr);
1455
1456 MachineFunction &MF = *MI.getMF();
1457 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1458
1459 LegalityQuery::MemDesc MMDesc(*NewMMO);
1460
1462 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1463 return false;
1464
1465 // Load must be allowed and fast on the target.
1467 auto &DL = MF.getDataLayout();
1468 unsigned Fast = 0;
1469 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1470 &Fast) ||
1471 !Fast)
1472 return false;
1473
1474 Register Result = MI.getOperand(0).getReg();
1475 Register Index = MI.getOperand(2).getReg();
1476
1477 MatchInfo = [=](MachineIRBuilder &B) {
1478 GISelObserverWrapper DummyObserver;
1479 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1480 //// Get pointer to the vector element.
1481 Register finalPtr = Helper.getVectorElementPointer(
1482 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1483 Index);
1484 // New G_LOAD instruction.
1485 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1486 // Remove original GLOAD instruction.
1487 LoadMI->eraseFromParent();
1488 };
1489
1490 return true;
1491}
1492
1494 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1495 auto &LdSt = cast<GLoadStore>(MI);
1496
1497 if (LdSt.isAtomic())
1498 return false;
1499
1500 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1501 MatchInfo.Offset);
1502 if (!MatchInfo.IsPre &&
1503 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1504 MatchInfo.Offset, MatchInfo.RematOffset))
1505 return false;
1506
1507 return true;
1508}
1509
1511 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1512 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1513 unsigned Opcode = MI.getOpcode();
1514 bool IsStore = Opcode == TargetOpcode::G_STORE;
1515 unsigned NewOpcode = getIndexedOpc(Opcode);
1516
1517 // If the offset constant didn't happen to dominate the load/store, we can
1518 // just clone it as needed.
1519 if (MatchInfo.RematOffset) {
1520 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1521 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1522 *OldCst->getOperand(1).getCImm());
1523 MatchInfo.Offset = NewCst.getReg(0);
1524 }
1525
1526 auto MIB = Builder.buildInstr(NewOpcode);
1527 if (IsStore) {
1528 MIB.addDef(MatchInfo.Addr);
1529 MIB.addUse(MI.getOperand(0).getReg());
1530 } else {
1531 MIB.addDef(MI.getOperand(0).getReg());
1532 MIB.addDef(MatchInfo.Addr);
1533 }
1534
1535 MIB.addUse(MatchInfo.Base);
1536 MIB.addUse(MatchInfo.Offset);
1537 MIB.addImm(MatchInfo.IsPre);
1538 MIB->cloneMemRefs(*MI.getMF(), MI);
1539 MI.eraseFromParent();
1540 AddrDef.eraseFromParent();
1541
1542 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1543}
1544
1546 MachineInstr *&OtherMI) const {
1547 unsigned Opcode = MI.getOpcode();
1548 bool IsDiv, IsSigned;
1549
1550 switch (Opcode) {
1551 default:
1552 llvm_unreachable("Unexpected opcode!");
1553 case TargetOpcode::G_SDIV:
1554 case TargetOpcode::G_UDIV: {
1555 IsDiv = true;
1556 IsSigned = Opcode == TargetOpcode::G_SDIV;
1557 break;
1558 }
1559 case TargetOpcode::G_SREM:
1560 case TargetOpcode::G_UREM: {
1561 IsDiv = false;
1562 IsSigned = Opcode == TargetOpcode::G_SREM;
1563 break;
1564 }
1565 }
1566
1567 Register Src1 = MI.getOperand(1).getReg();
1568 unsigned DivOpcode, RemOpcode, DivremOpcode;
1569 if (IsSigned) {
1570 DivOpcode = TargetOpcode::G_SDIV;
1571 RemOpcode = TargetOpcode::G_SREM;
1572 DivremOpcode = TargetOpcode::G_SDIVREM;
1573 } else {
1574 DivOpcode = TargetOpcode::G_UDIV;
1575 RemOpcode = TargetOpcode::G_UREM;
1576 DivremOpcode = TargetOpcode::G_UDIVREM;
1577 }
1578
1579 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1580 return false;
1581
1582 // Combine:
1583 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1584 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1585 // into:
1586 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1587
1588 // Combine:
1589 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1590 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1591 // into:
1592 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1593
1594 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1595 if (MI.getParent() == UseMI.getParent() &&
1596 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1597 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1598 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1599 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1600 OtherMI = &UseMI;
1601 return true;
1602 }
1603 }
1604
1605 return false;
1606}
1607
1609 MachineInstr *&OtherMI) const {
1610 unsigned Opcode = MI.getOpcode();
1611 assert(OtherMI && "OtherMI shouldn't be empty.");
1612
1613 Register DestDivReg, DestRemReg;
1614 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1615 DestDivReg = MI.getOperand(0).getReg();
1616 DestRemReg = OtherMI->getOperand(0).getReg();
1617 } else {
1618 DestDivReg = OtherMI->getOperand(0).getReg();
1619 DestRemReg = MI.getOperand(0).getReg();
1620 }
1621
1622 bool IsSigned =
1623 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1624
1625 // Check which instruction is first in the block so we don't break def-use
1626 // deps by "moving" the instruction incorrectly. Also keep track of which
1627 // instruction is first so we pick it's operands, avoiding use-before-def
1628 // bugs.
1629 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1630 Builder.setInstrAndDebugLoc(*FirstInst);
1631
1632 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1633 : TargetOpcode::G_UDIVREM,
1634 {DestDivReg, DestRemReg},
1635 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1636 MI.eraseFromParent();
1637 OtherMI->eraseFromParent();
1638}
1639
1641 MachineInstr &MI, MachineInstr *&BrCond) const {
1642 assert(MI.getOpcode() == TargetOpcode::G_BR);
1643
1644 // Try to match the following:
1645 // bb1:
1646 // G_BRCOND %c1, %bb2
1647 // G_BR %bb3
1648 // bb2:
1649 // ...
1650 // bb3:
1651
1652 // The above pattern does not have a fall through to the successor bb2, always
1653 // resulting in a branch no matter which path is taken. Here we try to find
1654 // and replace that pattern with conditional branch to bb3 and otherwise
1655 // fallthrough to bb2. This is generally better for branch predictors.
1656
1657 MachineBasicBlock *MBB = MI.getParent();
1659 if (BrIt == MBB->begin())
1660 return false;
1661 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1662
1663 BrCond = &*std::prev(BrIt);
1664 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1665 return false;
1666
1667 // Check that the next block is the conditional branch target. Also make sure
1668 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1669 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1670 return BrCondTarget != MI.getOperand(0).getMBB() &&
1671 MBB->isLayoutSuccessor(BrCondTarget);
1672}
1673
1675 MachineInstr &MI, MachineInstr *&BrCond) const {
1676 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1677 Builder.setInstrAndDebugLoc(*BrCond);
1678 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1679 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1680 // this to i1 only since we might not know for sure what kind of
1681 // compare generated the condition value.
1682 auto True = Builder.buildConstant(
1683 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1684 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1685
1686 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1687 Observer.changingInstr(MI);
1688 MI.getOperand(0).setMBB(FallthroughBB);
1689 Observer.changedInstr(MI);
1690
1691 // Change the conditional branch to use the inverted condition and
1692 // new target block.
1693 Observer.changingInstr(*BrCond);
1694 BrCond->getOperand(0).setReg(Xor.getReg(0));
1695 BrCond->getOperand(1).setMBB(BrTarget);
1696 Observer.changedInstr(*BrCond);
1697}
1698
1700 MachineIRBuilder HelperBuilder(MI);
1701 GISelObserverWrapper DummyObserver;
1702 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1703 return Helper.lowerMemcpyInline(MI) ==
1705}
1706
1708 unsigned MaxLen) const {
1709 MachineIRBuilder HelperBuilder(MI);
1710 GISelObserverWrapper DummyObserver;
1711 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1712 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1714}
1715
1717 const MachineRegisterInfo &MRI,
1718 const APFloat &Val) {
1719 APFloat Result(Val);
1720 switch (MI.getOpcode()) {
1721 default:
1722 llvm_unreachable("Unexpected opcode!");
1723 case TargetOpcode::G_FNEG: {
1724 Result.changeSign();
1725 return Result;
1726 }
1727 case TargetOpcode::G_FABS: {
1728 Result.clearSign();
1729 return Result;
1730 }
1731 case TargetOpcode::G_FPTRUNC: {
1732 bool Unused;
1733 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1735 &Unused);
1736 return Result;
1737 }
1738 case TargetOpcode::G_FSQRT: {
1739 bool Unused;
1741 &Unused);
1742 Result = APFloat(sqrt(Result.convertToDouble()));
1743 break;
1744 }
1745 case TargetOpcode::G_FLOG2: {
1746 bool Unused;
1748 &Unused);
1749 Result = APFloat(log2(Result.convertToDouble()));
1750 break;
1751 }
1752 }
1753 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1754 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1755 // `G_FLOG2` reach here.
1756 bool Unused;
1757 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1758 return Result;
1759}
1760
1762 MachineInstr &MI, const ConstantFP *Cst) const {
1763 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1764 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1765 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1766 MI.eraseFromParent();
1767}
1768
1770 PtrAddChain &MatchInfo) const {
1771 // We're trying to match the following pattern:
1772 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1773 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1774 // -->
1775 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1776
1777 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1778 return false;
1779
1780 Register Add2 = MI.getOperand(1).getReg();
1781 Register Imm1 = MI.getOperand(2).getReg();
1782 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1783 if (!MaybeImmVal)
1784 return false;
1785
1786 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1787 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1788 return false;
1789
1790 Register Base = Add2Def->getOperand(1).getReg();
1791 Register Imm2 = Add2Def->getOperand(2).getReg();
1792 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1793 if (!MaybeImm2Val)
1794 return false;
1795
1796 // Check if the new combined immediate forms an illegal addressing mode.
1797 // Do not combine if it was legal before but would get illegal.
1798 // To do so, we need to find a load/store user of the pointer to get
1799 // the access type.
1800 Type *AccessTy = nullptr;
1801 auto &MF = *MI.getMF();
1802 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1803 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1804 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1805 MF.getFunction().getContext());
1806 break;
1807 }
1808 }
1810 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1811 AMNew.BaseOffs = CombinedImm.getSExtValue();
1812 if (AccessTy) {
1813 AMNew.HasBaseReg = true;
1815 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1816 AMOld.HasBaseReg = true;
1817 unsigned AS = MRI.getType(Add2).getAddressSpace();
1818 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1819 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1820 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1821 return false;
1822 }
1823
1824 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1825 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1826 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1827 // largest signed integer that fits into the index type, which is the maximum
1828 // size of allocated objects according to the IR Language Reference.
1829 unsigned PtrAddFlags = MI.getFlags();
1830 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1831 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1832 bool IsInBounds =
1833 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1834 unsigned Flags = 0;
1835 if (IsNoUWrap)
1837 if (IsInBounds) {
1840 }
1841
1842 // Pass the combined immediate to the apply function.
1843 MatchInfo.Imm = AMNew.BaseOffs;
1844 MatchInfo.Base = Base;
1845 MatchInfo.Bank = getRegBank(Imm2);
1846 MatchInfo.Flags = Flags;
1847 return true;
1848}
1849
1851 PtrAddChain &MatchInfo) const {
1852 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1853 MachineIRBuilder MIB(MI);
1854 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1855 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1856 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1857 Observer.changingInstr(MI);
1858 MI.getOperand(1).setReg(MatchInfo.Base);
1859 MI.getOperand(2).setReg(NewOffset.getReg(0));
1860 MI.setFlags(MatchInfo.Flags);
1861 Observer.changedInstr(MI);
1862}
1863
1865 RegisterImmPair &MatchInfo) const {
1866 // We're trying to match the following pattern with any of
1867 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1868 // %t1 = SHIFT %base, G_CONSTANT imm1
1869 // %root = SHIFT %t1, G_CONSTANT imm2
1870 // -->
1871 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1872
1873 unsigned Opcode = MI.getOpcode();
1874 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1875 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1876 Opcode == TargetOpcode::G_USHLSAT) &&
1877 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1878
1879 Register Shl2 = MI.getOperand(1).getReg();
1880 Register Imm1 = MI.getOperand(2).getReg();
1881 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1882 if (!MaybeImmVal)
1883 return false;
1884
1885 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1886 if (Shl2Def->getOpcode() != Opcode)
1887 return false;
1888
1889 Register Base = Shl2Def->getOperand(1).getReg();
1890 Register Imm2 = Shl2Def->getOperand(2).getReg();
1891 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1892 if (!MaybeImm2Val)
1893 return false;
1894
1895 // Pass the combined immediate to the apply function.
1896 MatchInfo.Imm =
1897 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1898 MatchInfo.Reg = Base;
1899
1900 // There is no simple replacement for a saturating unsigned left shift that
1901 // exceeds the scalar size.
1902 if (Opcode == TargetOpcode::G_USHLSAT &&
1903 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1904 return false;
1905
1906 return true;
1907}
1908
1910 RegisterImmPair &MatchInfo) const {
1911 unsigned Opcode = MI.getOpcode();
1912 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1913 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1914 Opcode == TargetOpcode::G_USHLSAT) &&
1915 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1916
1917 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1918 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1919 auto Imm = MatchInfo.Imm;
1920
1921 if (Imm >= ScalarSizeInBits) {
1922 // Any logical shift that exceeds scalar size will produce zero.
1923 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1924 Builder.buildConstant(MI.getOperand(0), 0);
1925 MI.eraseFromParent();
1926 return;
1927 }
1928 // Arithmetic shift and saturating signed left shift have no effect beyond
1929 // scalar size.
1930 Imm = ScalarSizeInBits - 1;
1931 }
1932
1933 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1934 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1935 Observer.changingInstr(MI);
1936 MI.getOperand(1).setReg(MatchInfo.Reg);
1937 MI.getOperand(2).setReg(NewImm);
1938 Observer.changedInstr(MI);
1939}
1940
1942 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1943 // We're trying to match the following pattern with any of
1944 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1945 // with any of G_AND/G_OR/G_XOR logic instructions.
1946 // %t1 = SHIFT %X, G_CONSTANT C0
1947 // %t2 = LOGIC %t1, %Y
1948 // %root = SHIFT %t2, G_CONSTANT C1
1949 // -->
1950 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1951 // %t4 = SHIFT %Y, G_CONSTANT C1
1952 // %root = LOGIC %t3, %t4
1953 unsigned ShiftOpcode = MI.getOpcode();
1954 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1955 ShiftOpcode == TargetOpcode::G_ASHR ||
1956 ShiftOpcode == TargetOpcode::G_LSHR ||
1957 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1958 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1959 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1960
1961 // Match a one-use bitwise logic op.
1962 Register LogicDest = MI.getOperand(1).getReg();
1963 if (!MRI.hasOneNonDBGUse(LogicDest))
1964 return false;
1965
1966 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1967 unsigned LogicOpcode = LogicMI->getOpcode();
1968 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1969 LogicOpcode != TargetOpcode::G_XOR)
1970 return false;
1971
1972 // Find a matching one-use shift by constant.
1973 const Register C1 = MI.getOperand(2).getReg();
1974 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1975 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1976 return false;
1977
1978 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1979
1980 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1981 // Shift should match previous one and should be a one-use.
1982 if (MI->getOpcode() != ShiftOpcode ||
1983 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1984 return false;
1985
1986 // Must be a constant.
1987 auto MaybeImmVal =
1988 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1989 if (!MaybeImmVal)
1990 return false;
1991
1992 ShiftVal = MaybeImmVal->Value.getSExtValue();
1993 return true;
1994 };
1995
1996 // Logic ops are commutative, so check each operand for a match.
1997 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1998 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1999 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2000 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2001 uint64_t C0Val;
2002
2003 if (matchFirstShift(LogicMIOp1, C0Val)) {
2004 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2005 MatchInfo.Shift2 = LogicMIOp1;
2006 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2007 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2008 MatchInfo.Shift2 = LogicMIOp2;
2009 } else
2010 return false;
2011
2012 MatchInfo.ValSum = C0Val + C1Val;
2013
2014 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2015 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2016 return false;
2017
2018 MatchInfo.Logic = LogicMI;
2019 return true;
2020}
2021
2023 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2024 unsigned Opcode = MI.getOpcode();
2025 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2026 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2027 Opcode == TargetOpcode::G_SSHLSAT) &&
2028 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2029
2030 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2031 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2032
2033 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2034
2035 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2036 Register Shift1 =
2037 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2038
2039 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2040 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2041 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2042 // remove old shift1. And it will cause crash later. So erase it earlier to
2043 // avoid the crash.
2044 MatchInfo.Shift2->eraseFromParent();
2045
2046 Register Shift2Const = MI.getOperand(2).getReg();
2047 Register Shift2 = Builder
2048 .buildInstr(Opcode, {DestType},
2049 {MatchInfo.LogicNonShiftReg, Shift2Const})
2050 .getReg(0);
2051
2052 Register Dest = MI.getOperand(0).getReg();
2053 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2054
2055 // This was one use so it's safe to remove it.
2056 MatchInfo.Logic->eraseFromParent();
2057
2058 MI.eraseFromParent();
2059}
2060
2062 BuildFnTy &MatchInfo) const {
2063 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2064 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2065 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2066 auto &Shl = cast<GenericMachineInstr>(MI);
2067 Register DstReg = Shl.getReg(0);
2068 Register SrcReg = Shl.getReg(1);
2069 Register ShiftReg = Shl.getReg(2);
2070 Register X, C1;
2071
2072 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2073 return false;
2074
2075 if (!mi_match(SrcReg, MRI,
2077 m_GOr(m_Reg(X), m_Reg(C1))))))
2078 return false;
2079
2080 APInt C1Val, C2Val;
2081 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2082 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2083 return false;
2084
2085 auto *SrcDef = MRI.getVRegDef(SrcReg);
2086 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2087 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2088 LLT SrcTy = MRI.getType(SrcReg);
2089 MatchInfo = [=](MachineIRBuilder &B) {
2090 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2091 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2092 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2093 };
2094 return true;
2095}
2096
2098 LshrOfTruncOfLshr &MatchInfo,
2099 MachineInstr &ShiftMI) const {
2100 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2101
2102 Register N0 = MI.getOperand(1).getReg();
2103 Register N1 = MI.getOperand(2).getReg();
2104 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2105
2106 APInt N1C, N001C;
2107 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2108 return false;
2109 auto N001 = ShiftMI.getOperand(2).getReg();
2110 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2111 return false;
2112
2113 if (N001C.getBitWidth() > N1C.getBitWidth())
2114 N1C = N1C.zext(N001C.getBitWidth());
2115 else
2116 N001C = N001C.zext(N1C.getBitWidth());
2117
2118 Register InnerShift = ShiftMI.getOperand(0).getReg();
2119 LLT InnerShiftTy = MRI.getType(InnerShift);
2120 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2121 if ((N1C + N001C).ult(InnerShiftSize)) {
2122 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2123 MatchInfo.ShiftAmt = N1C + N001C;
2124 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2125 MatchInfo.InnerShiftTy = InnerShiftTy;
2126
2127 if ((N001C + OpSizeInBits) == InnerShiftSize)
2128 return true;
2129 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2130 MatchInfo.Mask = true;
2131 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2132 return true;
2133 }
2134 }
2135 return false;
2136}
2137
2139 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2140 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2141
2142 Register Dst = MI.getOperand(0).getReg();
2143 auto ShiftAmt =
2144 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2145 auto Shift =
2146 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2147 if (MatchInfo.Mask == true) {
2148 APInt MaskVal =
2150 MatchInfo.MaskVal.getZExtValue());
2151 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2152 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2153 Builder.buildTrunc(Dst, And);
2154 } else
2155 Builder.buildTrunc(Dst, Shift);
2156 MI.eraseFromParent();
2157}
2158
2160 unsigned &ShiftVal) const {
2161 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2162 auto MaybeImmVal =
2163 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2164 if (!MaybeImmVal)
2165 return false;
2166
2167 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2168 return (static_cast<int32_t>(ShiftVal) != -1);
2169}
2170
2172 unsigned &ShiftVal) const {
2173 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2174 MachineIRBuilder MIB(MI);
2175 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2176 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2177 Observer.changingInstr(MI);
2178 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2179 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2180 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2182 Observer.changedInstr(MI);
2183}
2184
2186 BuildFnTy &MatchInfo) const {
2187 GSub &Sub = cast<GSub>(MI);
2188
2189 LLT Ty = MRI.getType(Sub.getReg(0));
2190
2191 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2192 return false;
2193
2195 return false;
2196
2197 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2198
2199 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2200 auto NegCst = B.buildConstant(Ty, -Imm);
2201 Observer.changingInstr(MI);
2202 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2203 MI.getOperand(2).setReg(NegCst.getReg(0));
2205 if (Imm.isMinSignedValue())
2207 Observer.changedInstr(MI);
2208 };
2209 return true;
2210}
2211
2212// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2214 RegisterImmPair &MatchData) const {
2215 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2216 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2217 return false;
2218
2219 Register LHS = MI.getOperand(1).getReg();
2220
2221 Register ExtSrc;
2222 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2223 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2224 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2225 return false;
2226
2227 Register RHS = MI.getOperand(2).getReg();
2228 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2229 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2230 if (!MaybeShiftAmtVal)
2231 return false;
2232
2233 if (LI) {
2234 LLT SrcTy = MRI.getType(ExtSrc);
2235
2236 // We only really care about the legality with the shifted value. We can
2237 // pick any type the constant shift amount, so ask the target what to
2238 // use. Otherwise we would have to guess and hope it is reported as legal.
2239 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2240 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2241 return false;
2242 }
2243
2244 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2245 MatchData.Reg = ExtSrc;
2246 MatchData.Imm = ShiftAmt;
2247
2248 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2249 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2250 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2251}
2252
2254 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2255 Register ExtSrcReg = MatchData.Reg;
2256 int64_t ShiftAmtVal = MatchData.Imm;
2257
2258 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2259 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2260 auto NarrowShift =
2261 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2262 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2263 MI.eraseFromParent();
2264}
2265
2267 Register &MatchInfo) const {
2269 SmallVector<Register, 16> MergedValues;
2270 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2271 MergedValues.emplace_back(Merge.getSourceReg(I));
2272
2273 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2274 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2275 return false;
2276
2277 for (unsigned I = 0; I < MergedValues.size(); ++I)
2278 if (MergedValues[I] != Unmerge->getReg(I))
2279 return false;
2280
2281 MatchInfo = Unmerge->getSourceReg();
2282 return true;
2283}
2284
2286 const MachineRegisterInfo &MRI) {
2287 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2288 ;
2289
2290 return Reg;
2291}
2292
2295 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2296 "Expected an unmerge");
2297 auto &Unmerge = cast<GUnmerge>(MI);
2298 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2299
2300 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2301 if (!SrcInstr)
2302 return false;
2303
2304 // Check the source type of the merge.
2305 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2306 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2307 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2308 if (SrcMergeTy != Dst0Ty && !SameSize)
2309 return false;
2310 // They are the same now (modulo a bitcast).
2311 // We can collect all the src registers.
2312 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2313 Operands.push_back(SrcInstr->getSourceReg(Idx));
2314 return true;
2315}
2316
2319 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2320 "Expected an unmerge");
2321 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2322 "Not enough operands to replace all defs");
2323 unsigned NumElems = MI.getNumOperands() - 1;
2324
2325 LLT SrcTy = MRI.getType(Operands[0]);
2326 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2327 bool CanReuseInputDirectly = DstTy == SrcTy;
2328 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2329 Register DstReg = MI.getOperand(Idx).getReg();
2330 Register SrcReg = Operands[Idx];
2331
2332 // This combine may run after RegBankSelect, so we need to be aware of
2333 // register banks.
2334 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2335 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2336 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2337 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2338 }
2339
2340 if (CanReuseInputDirectly)
2341 replaceRegWith(MRI, DstReg, SrcReg);
2342 else
2343 Builder.buildCast(DstReg, SrcReg);
2344 }
2345 MI.eraseFromParent();
2346}
2347
2349 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2350 unsigned SrcIdx = MI.getNumOperands() - 1;
2351 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2352 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2353 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2354 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2355 return false;
2356 // Break down the big constant in smaller ones.
2357 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2358 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2359 ? CstVal.getCImm()->getValue()
2360 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2361
2362 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2363 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2364 // Unmerge a constant.
2365 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2366 Csts.emplace_back(Val.trunc(ShiftAmt));
2367 Val = Val.lshr(ShiftAmt);
2368 }
2369
2370 return true;
2371}
2372
2374 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2375 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2376 "Expected an unmerge");
2377 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2378 "Not enough operands to replace all defs");
2379 unsigned NumElems = MI.getNumOperands() - 1;
2380 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2381 Register DstReg = MI.getOperand(Idx).getReg();
2382 Builder.buildConstant(DstReg, Csts[Idx]);
2383 }
2384
2385 MI.eraseFromParent();
2386}
2387
2390 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2391 unsigned SrcIdx = MI.getNumOperands() - 1;
2392 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2393 MatchInfo = [&MI](MachineIRBuilder &B) {
2394 unsigned NumElems = MI.getNumOperands() - 1;
2395 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2396 Register DstReg = MI.getOperand(Idx).getReg();
2397 B.buildUndef(DstReg);
2398 }
2399 };
2400 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2401}
2402
2404 MachineInstr &MI) const {
2405 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2406 "Expected an unmerge");
2407 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2408 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2409 return false;
2410 // Check that all the lanes are dead except the first one.
2411 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2412 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2413 return false;
2414 }
2415 return true;
2416}
2417
2419 MachineInstr &MI) const {
2420 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2421 Register Dst0Reg = MI.getOperand(0).getReg();
2422 Builder.buildTrunc(Dst0Reg, SrcReg);
2423 MI.eraseFromParent();
2424}
2425
2427 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2428 "Expected an unmerge");
2429 Register Dst0Reg = MI.getOperand(0).getReg();
2430 LLT Dst0Ty = MRI.getType(Dst0Reg);
2431 // G_ZEXT on vector applies to each lane, so it will
2432 // affect all destinations. Therefore we won't be able
2433 // to simplify the unmerge to just the first definition.
2434 if (Dst0Ty.isVector())
2435 return false;
2436 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2437 LLT SrcTy = MRI.getType(SrcReg);
2438 if (SrcTy.isVector())
2439 return false;
2440
2441 Register ZExtSrcReg;
2442 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2443 return false;
2444
2445 // Finally we can replace the first definition with
2446 // a zext of the source if the definition is big enough to hold
2447 // all of ZExtSrc bits.
2448 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2449 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2450}
2451
2453 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2454 "Expected an unmerge");
2455
2456 Register Dst0Reg = MI.getOperand(0).getReg();
2457
2458 MachineInstr *ZExtInstr =
2459 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2460 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2461 "Expecting a G_ZEXT");
2462
2463 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2464 LLT Dst0Ty = MRI.getType(Dst0Reg);
2465 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2466
2467 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2468 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2469 } else {
2470 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2471 "ZExt src doesn't fit in destination");
2472 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2473 }
2474
2475 Register ZeroReg;
2476 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2477 if (!ZeroReg)
2478 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2479 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2480 }
2481 MI.eraseFromParent();
2482}
2483
2485 unsigned TargetShiftSize,
2486 unsigned &ShiftVal) const {
2487 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2488 MI.getOpcode() == TargetOpcode::G_LSHR ||
2489 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2490
2491 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2492 if (Ty.isVector()) // TODO:
2493 return false;
2494
2495 // Don't narrow further than the requested size.
2496 unsigned Size = Ty.getSizeInBits();
2497 if (Size <= TargetShiftSize)
2498 return false;
2499
2500 auto MaybeImmVal =
2501 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2502 if (!MaybeImmVal)
2503 return false;
2504
2505 ShiftVal = MaybeImmVal->Value.getSExtValue();
2506 return ShiftVal >= Size / 2 && ShiftVal < Size;
2507}
2508
2510 MachineInstr &MI, const unsigned &ShiftVal) const {
2511 Register DstReg = MI.getOperand(0).getReg();
2512 Register SrcReg = MI.getOperand(1).getReg();
2513 LLT Ty = MRI.getType(SrcReg);
2514 unsigned Size = Ty.getSizeInBits();
2515 unsigned HalfSize = Size / 2;
2516 assert(ShiftVal >= HalfSize);
2517
2518 LLT HalfTy = LLT::scalar(HalfSize);
2519
2520 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2521 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2522
2523 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2524 Register Narrowed = Unmerge.getReg(1);
2525
2526 // dst = G_LSHR s64:x, C for C >= 32
2527 // =>
2528 // lo, hi = G_UNMERGE_VALUES x
2529 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2530
2531 if (NarrowShiftAmt != 0) {
2532 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2533 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2534 }
2535
2536 auto Zero = Builder.buildConstant(HalfTy, 0);
2537 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2538 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2539 Register Narrowed = Unmerge.getReg(0);
2540 // dst = G_SHL s64:x, C for C >= 32
2541 // =>
2542 // lo, hi = G_UNMERGE_VALUES x
2543 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2544 if (NarrowShiftAmt != 0) {
2545 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2546 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2547 }
2548
2549 auto Zero = Builder.buildConstant(HalfTy, 0);
2550 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2551 } else {
2552 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2553 auto Hi = Builder.buildAShr(
2554 HalfTy, Unmerge.getReg(1),
2555 Builder.buildConstant(HalfTy, HalfSize - 1));
2556
2557 if (ShiftVal == HalfSize) {
2558 // (G_ASHR i64:x, 32) ->
2559 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2560 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2561 } else if (ShiftVal == Size - 1) {
2562 // Don't need a second shift.
2563 // (G_ASHR i64:x, 63) ->
2564 // %narrowed = (G_ASHR hi_32(x), 31)
2565 // G_MERGE_VALUES %narrowed, %narrowed
2566 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2567 } else {
2568 auto Lo = Builder.buildAShr(
2569 HalfTy, Unmerge.getReg(1),
2570 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2571
2572 // (G_ASHR i64:x, C) ->, for C >= 32
2573 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2574 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2575 }
2576 }
2577
2578 MI.eraseFromParent();
2579}
2580
2582 MachineInstr &MI, unsigned TargetShiftAmount) const {
2583 unsigned ShiftAmt;
2584 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2585 applyCombineShiftToUnmerge(MI, ShiftAmt);
2586 return true;
2587 }
2588
2589 return false;
2590}
2591
2593 Register &Reg) const {
2594 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2595 Register DstReg = MI.getOperand(0).getReg();
2596 LLT DstTy = MRI.getType(DstReg);
2597 Register SrcReg = MI.getOperand(1).getReg();
2598 return mi_match(SrcReg, MRI,
2599 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2600}
2601
2603 Register &Reg) const {
2604 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2605 Register DstReg = MI.getOperand(0).getReg();
2606 Builder.buildCopy(DstReg, Reg);
2607 MI.eraseFromParent();
2608}
2609
2611 Register &Reg) const {
2612 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2613 Register DstReg = MI.getOperand(0).getReg();
2614 Builder.buildZExtOrTrunc(DstReg, Reg);
2615 MI.eraseFromParent();
2616}
2617
2619 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2620 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2621 Register LHS = MI.getOperand(1).getReg();
2622 Register RHS = MI.getOperand(2).getReg();
2623 LLT IntTy = MRI.getType(LHS);
2624
2625 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2626 // instruction.
2627 PtrReg.second = false;
2628 for (Register SrcReg : {LHS, RHS}) {
2629 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2630 // Don't handle cases where the integer is implicitly converted to the
2631 // pointer width.
2632 LLT PtrTy = MRI.getType(PtrReg.first);
2633 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2634 return true;
2635 }
2636
2637 PtrReg.second = true;
2638 }
2639
2640 return false;
2641}
2642
2644 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2645 Register Dst = MI.getOperand(0).getReg();
2646 Register LHS = MI.getOperand(1).getReg();
2647 Register RHS = MI.getOperand(2).getReg();
2648
2649 const bool DoCommute = PtrReg.second;
2650 if (DoCommute)
2651 std::swap(LHS, RHS);
2652 LHS = PtrReg.first;
2653
2654 LLT PtrTy = MRI.getType(LHS);
2655
2656 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2657 Builder.buildPtrToInt(Dst, PtrAdd);
2658 MI.eraseFromParent();
2659}
2660
2662 APInt &NewCst) const {
2663 auto &PtrAdd = cast<GPtrAdd>(MI);
2664 Register LHS = PtrAdd.getBaseReg();
2665 Register RHS = PtrAdd.getOffsetReg();
2666 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2667
2668 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2669 APInt Cst;
2670 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2671 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2672 // G_INTTOPTR uses zero-extension
2673 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2674 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2675 return true;
2676 }
2677 }
2678
2679 return false;
2680}
2681
2683 APInt &NewCst) const {
2684 auto &PtrAdd = cast<GPtrAdd>(MI);
2685 Register Dst = PtrAdd.getReg(0);
2686
2687 Builder.buildConstant(Dst, NewCst);
2688 PtrAdd.eraseFromParent();
2689}
2690
2692 Register &Reg) const {
2693 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2694 Register DstReg = MI.getOperand(0).getReg();
2695 Register SrcReg = MI.getOperand(1).getReg();
2696 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2697 if (OriginalSrcReg.isValid())
2698 SrcReg = OriginalSrcReg;
2699 LLT DstTy = MRI.getType(DstReg);
2700 return mi_match(SrcReg, MRI,
2701 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2702 canReplaceReg(DstReg, Reg, MRI);
2703}
2704
2706 Register &Reg) const {
2707 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2708 Register DstReg = MI.getOperand(0).getReg();
2709 Register SrcReg = MI.getOperand(1).getReg();
2710 LLT DstTy = MRI.getType(DstReg);
2711 if (mi_match(SrcReg, MRI,
2712 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2713 canReplaceReg(DstReg, Reg, MRI)) {
2714 unsigned DstSize = DstTy.getScalarSizeInBits();
2715 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2716 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2717 }
2718 return false;
2719}
2720
2722 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2723 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2724
2725 // ShiftTy > 32 > TruncTy -> 32
2726 if (ShiftSize > 32 && TruncSize < 32)
2727 return ShiftTy.changeElementSize(32);
2728
2729 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2730 // Some targets like it, some don't, some only like it under certain
2731 // conditions/processor versions, etc.
2732 // A TL hook might be needed for this.
2733
2734 // Don't combine
2735 return ShiftTy;
2736}
2737
2739 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2740 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2741 Register DstReg = MI.getOperand(0).getReg();
2742 Register SrcReg = MI.getOperand(1).getReg();
2743
2744 if (!MRI.hasOneNonDBGUse(SrcReg))
2745 return false;
2746
2747 LLT SrcTy = MRI.getType(SrcReg);
2748 LLT DstTy = MRI.getType(DstReg);
2749
2750 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2751 const auto &TL = getTargetLowering();
2752
2753 LLT NewShiftTy;
2754 switch (SrcMI->getOpcode()) {
2755 default:
2756 return false;
2757 case TargetOpcode::G_SHL: {
2758 NewShiftTy = DstTy;
2759
2760 // Make sure new shift amount is legal.
2761 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2762 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2763 return false;
2764 break;
2765 }
2766 case TargetOpcode::G_LSHR:
2767 case TargetOpcode::G_ASHR: {
2768 // For right shifts, we conservatively do not do the transform if the TRUNC
2769 // has any STORE users. The reason is that if we change the type of the
2770 // shift, we may break the truncstore combine.
2771 //
2772 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2773 for (auto &User : MRI.use_instructions(DstReg))
2774 if (User.getOpcode() == TargetOpcode::G_STORE)
2775 return false;
2776
2777 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2778 if (NewShiftTy == SrcTy)
2779 return false;
2780
2781 // Make sure we won't lose information by truncating the high bits.
2782 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2783 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2784 DstTy.getScalarSizeInBits()))
2785 return false;
2786 break;
2787 }
2788 }
2789
2791 {SrcMI->getOpcode(),
2792 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2793 return false;
2794
2795 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2796 return true;
2797}
2798
2800 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2801 MachineInstr *ShiftMI = MatchInfo.first;
2802 LLT NewShiftTy = MatchInfo.second;
2803
2804 Register Dst = MI.getOperand(0).getReg();
2805 LLT DstTy = MRI.getType(Dst);
2806
2807 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2808 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2809 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2810
2811 Register NewShift =
2812 Builder
2813 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2814 .getReg(0);
2815
2816 if (NewShiftTy == DstTy)
2817 replaceRegWith(MRI, Dst, NewShift);
2818 else
2819 Builder.buildTrunc(Dst, NewShift);
2820
2821 eraseInst(MI);
2822}
2823
2825 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2826 return MO.isReg() &&
2827 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2828 });
2829}
2830
2832 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2833 return !MO.isReg() ||
2834 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2835 });
2836}
2837
2839 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2840 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2841 return all_of(Mask, [](int Elt) { return Elt < 0; });
2842}
2843
2845 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2846 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2847 MRI);
2848}
2849
2851 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2852 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2853 MRI);
2854}
2855
2857 MachineInstr &MI) const {
2858 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2859 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2860 "Expected an insert/extract element op");
2861 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2862 if (VecTy.isScalableVector())
2863 return false;
2864
2865 unsigned IdxIdx =
2866 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2867 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2868 if (!Idx)
2869 return false;
2870 return Idx->getZExtValue() >= VecTy.getNumElements();
2871}
2872
2874 unsigned &OpIdx) const {
2875 GSelect &SelMI = cast<GSelect>(MI);
2876 auto Cst =
2877 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2878 if (!Cst)
2879 return false;
2880 OpIdx = Cst->isZero() ? 3 : 2;
2881 return true;
2882}
2883
2884void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2885
2887 const MachineOperand &MOP2) const {
2888 if (!MOP1.isReg() || !MOP2.isReg())
2889 return false;
2890 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2891 if (!InstAndDef1)
2892 return false;
2893 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2894 if (!InstAndDef2)
2895 return false;
2896 MachineInstr *I1 = InstAndDef1->MI;
2897 MachineInstr *I2 = InstAndDef2->MI;
2898
2899 // Handle a case like this:
2900 //
2901 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2902 //
2903 // Even though %0 and %1 are produced by the same instruction they are not
2904 // the same values.
2905 if (I1 == I2)
2906 return MOP1.getReg() == MOP2.getReg();
2907
2908 // If we have an instruction which loads or stores, we can't guarantee that
2909 // it is identical.
2910 //
2911 // For example, we may have
2912 //
2913 // %x1 = G_LOAD %addr (load N from @somewhere)
2914 // ...
2915 // call @foo
2916 // ...
2917 // %x2 = G_LOAD %addr (load N from @somewhere)
2918 // ...
2919 // %or = G_OR %x1, %x2
2920 //
2921 // It's possible that @foo will modify whatever lives at the address we're
2922 // loading from. To be safe, let's just assume that all loads and stores
2923 // are different (unless we have something which is guaranteed to not
2924 // change.)
2925 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2926 return false;
2927
2928 // If both instructions are loads or stores, they are equal only if both
2929 // are dereferenceable invariant loads with the same number of bits.
2930 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2933 if (!LS1 || !LS2)
2934 return false;
2935
2936 if (!I2->isDereferenceableInvariantLoad() ||
2937 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2938 return false;
2939 }
2940
2941 // Check for physical registers on the instructions first to avoid cases
2942 // like this:
2943 //
2944 // %a = COPY $physreg
2945 // ...
2946 // SOMETHING implicit-def $physreg
2947 // ...
2948 // %b = COPY $physreg
2949 //
2950 // These copies are not equivalent.
2951 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2952 return MO.isReg() && MO.getReg().isPhysical();
2953 })) {
2954 // Check if we have a case like this:
2955 //
2956 // %a = COPY $physreg
2957 // %b = COPY %a
2958 //
2959 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2960 // From that, we know that they must have the same value, since they must
2961 // have come from the same COPY.
2962 return I1->isIdenticalTo(*I2);
2963 }
2964
2965 // We don't have any physical registers, so we don't necessarily need the
2966 // same vreg defs.
2967 //
2968 // On the off-chance that there's some target instruction feeding into the
2969 // instruction, let's use produceSameValue instead of isIdenticalTo.
2970 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2971 // Handle instructions with multiple defs that produce same values. Values
2972 // are same for operands with same index.
2973 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2974 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2975 // I1 and I2 are different instructions but produce same values,
2976 // %1 and %6 are same, %1 and %7 are not the same value.
2977 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2978 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2979 }
2980 return false;
2981}
2982
2984 int64_t C) const {
2985 if (!MOP.isReg())
2986 return false;
2987 auto *MI = MRI.getVRegDef(MOP.getReg());
2988 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2989 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2990 MaybeCst->getSExtValue() == C;
2991}
2992
2994 double C) const {
2995 if (!MOP.isReg())
2996 return false;
2997 std::optional<FPValueAndVReg> MaybeCst;
2998 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2999 return false;
3000
3001 return MaybeCst->Value.isExactlyValue(C);
3002}
3003
3005 unsigned OpIdx) const {
3006 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3007 Register OldReg = MI.getOperand(0).getReg();
3008 Register Replacement = MI.getOperand(OpIdx).getReg();
3009 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3010 replaceRegWith(MRI, OldReg, Replacement);
3011 MI.eraseFromParent();
3012}
3013
3015 Register Replacement) const {
3016 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3017 Register OldReg = MI.getOperand(0).getReg();
3018 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3019 replaceRegWith(MRI, OldReg, Replacement);
3020 MI.eraseFromParent();
3021}
3022
3024 unsigned ConstIdx) const {
3025 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3026 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3027
3028 // Get the shift amount
3029 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3030 if (!VRegAndVal)
3031 return false;
3032
3033 // Return true of shift amount >= Bitwidth
3034 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3035}
3036
3038 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3039 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3040 "This is not a funnel shift operation");
3041
3042 Register ConstReg = MI.getOperand(3).getReg();
3043 LLT ConstTy = MRI.getType(ConstReg);
3044 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3045
3046 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3047 assert((VRegAndVal) && "Value is not a constant");
3048
3049 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3050 APInt NewConst = VRegAndVal->Value.urem(
3051 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3052
3053 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3054 Builder.buildInstr(
3055 MI.getOpcode(), {MI.getOperand(0)},
3056 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3057
3058 MI.eraseFromParent();
3059}
3060
3062 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3063 // Match (cond ? x : x)
3064 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3065 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3066 MRI);
3067}
3068
3070 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3071 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3072 MRI);
3073}
3074
3076 unsigned OpIdx) const {
3077 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
3078 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
3079 MRI);
3080}
3081
3083 unsigned OpIdx) const {
3084 MachineOperand &MO = MI.getOperand(OpIdx);
3085 return MO.isReg() &&
3086 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3087}
3088
3090 unsigned OpIdx) const {
3091 MachineOperand &MO = MI.getOperand(OpIdx);
3092 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3093}
3094
3096 double C) const {
3097 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3098 Builder.buildFConstant(MI.getOperand(0), C);
3099 MI.eraseFromParent();
3100}
3101
3103 int64_t C) const {
3104 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3105 Builder.buildConstant(MI.getOperand(0), C);
3106 MI.eraseFromParent();
3107}
3108
3110 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3111 Builder.buildConstant(MI.getOperand(0), C);
3112 MI.eraseFromParent();
3113}
3114
3116 ConstantFP *CFP) const {
3117 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3118 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3119 MI.eraseFromParent();
3120}
3121
3123 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3124 Builder.buildUndef(MI.getOperand(0));
3125 MI.eraseFromParent();
3126}
3127
3129 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3130 Register LHS = MI.getOperand(1).getReg();
3131 Register RHS = MI.getOperand(2).getReg();
3132 Register &NewLHS = std::get<0>(MatchInfo);
3133 Register &NewRHS = std::get<1>(MatchInfo);
3134
3135 // Helper lambda to check for opportunities for
3136 // ((0-A) + B) -> B - A
3137 // (A + (0-B)) -> A - B
3138 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3139 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3140 return false;
3141 NewLHS = MaybeNewLHS;
3142 return true;
3143 };
3144
3145 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3146}
3147
3149 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3150 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3151 "Invalid opcode");
3152 Register DstReg = MI.getOperand(0).getReg();
3153 LLT DstTy = MRI.getType(DstReg);
3154 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3155
3156 if (DstTy.isScalableVector())
3157 return false;
3158
3159 unsigned NumElts = DstTy.getNumElements();
3160 // If this MI is part of a sequence of insert_vec_elts, then
3161 // don't do the combine in the middle of the sequence.
3162 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3163 TargetOpcode::G_INSERT_VECTOR_ELT)
3164 return false;
3165 MachineInstr *CurrInst = &MI;
3166 MachineInstr *TmpInst;
3167 int64_t IntImm;
3168 Register TmpReg;
3169 MatchInfo.resize(NumElts);
3170 while (mi_match(
3171 CurrInst->getOperand(0).getReg(), MRI,
3172 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3173 if (IntImm >= NumElts || IntImm < 0)
3174 return false;
3175 if (!MatchInfo[IntImm])
3176 MatchInfo[IntImm] = TmpReg;
3177 CurrInst = TmpInst;
3178 }
3179 // Variable index.
3180 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3181 return false;
3182 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3183 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3184 if (!MatchInfo[I - 1].isValid())
3185 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3186 }
3187 return true;
3188 }
3189 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3190 // overwritten, bail out.
3191 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3192 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3193}
3194
3196 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3197 Register UndefReg;
3198 auto GetUndef = [&]() {
3199 if (UndefReg)
3200 return UndefReg;
3201 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3202 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3203 return UndefReg;
3204 };
3205 for (Register &Reg : MatchInfo) {
3206 if (!Reg)
3207 Reg = GetUndef();
3208 }
3209 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3210 MI.eraseFromParent();
3211}
3212
3214 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3215 Register SubLHS, SubRHS;
3216 std::tie(SubLHS, SubRHS) = MatchInfo;
3217 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3218 MI.eraseFromParent();
3219}
3220
3222 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3223 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3224 //
3225 // Creates the new hand + logic instruction (but does not insert them.)
3226 //
3227 // On success, MatchInfo is populated with the new instructions. These are
3228 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3229 unsigned LogicOpcode = MI.getOpcode();
3230 assert(LogicOpcode == TargetOpcode::G_AND ||
3231 LogicOpcode == TargetOpcode::G_OR ||
3232 LogicOpcode == TargetOpcode::G_XOR);
3233 MachineIRBuilder MIB(MI);
3234 Register Dst = MI.getOperand(0).getReg();
3235 Register LHSReg = MI.getOperand(1).getReg();
3236 Register RHSReg = MI.getOperand(2).getReg();
3237
3238 // Don't recompute anything.
3239 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3240 return false;
3241
3242 // Make sure we have (hand x, ...), (hand y, ...)
3243 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3244 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3245 if (!LeftHandInst || !RightHandInst)
3246 return false;
3247 unsigned HandOpcode = LeftHandInst->getOpcode();
3248 if (HandOpcode != RightHandInst->getOpcode())
3249 return false;
3250 if (LeftHandInst->getNumOperands() < 2 ||
3251 !LeftHandInst->getOperand(1).isReg() ||
3252 RightHandInst->getNumOperands() < 2 ||
3253 !RightHandInst->getOperand(1).isReg())
3254 return false;
3255
3256 // Make sure the types match up, and if we're doing this post-legalization,
3257 // we end up with legal types.
3258 Register X = LeftHandInst->getOperand(1).getReg();
3259 Register Y = RightHandInst->getOperand(1).getReg();
3260 LLT XTy = MRI.getType(X);
3261 LLT YTy = MRI.getType(Y);
3262 if (!XTy.isValid() || XTy != YTy)
3263 return false;
3264
3265 // Optional extra source register.
3266 Register ExtraHandOpSrcReg;
3267 switch (HandOpcode) {
3268 default:
3269 return false;
3270 case TargetOpcode::G_ANYEXT:
3271 case TargetOpcode::G_SEXT:
3272 case TargetOpcode::G_ZEXT: {
3273 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3274 break;
3275 }
3276 case TargetOpcode::G_TRUNC: {
3277 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3278 const MachineFunction *MF = MI.getMF();
3279 LLVMContext &Ctx = MF->getFunction().getContext();
3280
3281 LLT DstTy = MRI.getType(Dst);
3282 const TargetLowering &TLI = getTargetLowering();
3283
3284 // Be extra careful sinking truncate. If it's free, there's no benefit in
3285 // widening a binop.
3286 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3287 return false;
3288 break;
3289 }
3290 case TargetOpcode::G_AND:
3291 case TargetOpcode::G_ASHR:
3292 case TargetOpcode::G_LSHR:
3293 case TargetOpcode::G_SHL: {
3294 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3295 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3296 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3297 return false;
3298 ExtraHandOpSrcReg = ZOp.getReg();
3299 break;
3300 }
3301 }
3302
3303 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3304 return false;
3305
3306 // Record the steps to build the new instructions.
3307 //
3308 // Steps to build (logic x, y)
3309 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3310 OperandBuildSteps LogicBuildSteps = {
3311 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3312 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3313 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3314 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3315
3316 // Steps to build hand (logic x, y), ...z
3317 OperandBuildSteps HandBuildSteps = {
3318 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3319 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3320 if (ExtraHandOpSrcReg.isValid())
3321 HandBuildSteps.push_back(
3322 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3323 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3324
3325 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3326 return true;
3327}
3328
3330 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3331 assert(MatchInfo.InstrsToBuild.size() &&
3332 "Expected at least one instr to build?");
3333 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3334 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3335 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3336 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3337 for (auto &OperandFn : InstrToBuild.OperandFns)
3338 OperandFn(Instr);
3339 }
3340 MI.eraseFromParent();
3341}
3342
3344 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3345 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3346 int64_t ShlCst, AshrCst;
3347 Register Src;
3348 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3349 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3350 m_ICstOrSplat(AshrCst))))
3351 return false;
3352 if (ShlCst != AshrCst)
3353 return false;
3355 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3356 return false;
3357 MatchInfo = std::make_tuple(Src, ShlCst);
3358 return true;
3359}
3360
3362 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3363 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3364 Register Src;
3365 int64_t ShiftAmt;
3366 std::tie(Src, ShiftAmt) = MatchInfo;
3367 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3368 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3369 MI.eraseFromParent();
3370}
3371
3372/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3375 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3376 assert(MI.getOpcode() == TargetOpcode::G_AND);
3377
3378 Register Dst = MI.getOperand(0).getReg();
3379 LLT Ty = MRI.getType(Dst);
3380
3381 Register R;
3382 int64_t C1;
3383 int64_t C2;
3384 if (!mi_match(
3385 Dst, MRI,
3386 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3387 return false;
3388
3389 MatchInfo = [=](MachineIRBuilder &B) {
3390 if (C1 & C2) {
3391 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3392 return;
3393 }
3394 auto Zero = B.buildConstant(Ty, 0);
3395 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3396 };
3397 return true;
3398}
3399
3401 Register &Replacement) const {
3402 // Given
3403 //
3404 // %y:_(sN) = G_SOMETHING
3405 // %x:_(sN) = G_SOMETHING
3406 // %res:_(sN) = G_AND %x, %y
3407 //
3408 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3409 //
3410 // Patterns like this can appear as a result of legalization. E.g.
3411 //
3412 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3413 // %one:_(s32) = G_CONSTANT i32 1
3414 // %and:_(s32) = G_AND %cmp, %one
3415 //
3416 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3417 assert(MI.getOpcode() == TargetOpcode::G_AND);
3418 if (!VT)
3419 return false;
3420
3421 Register AndDst = MI.getOperand(0).getReg();
3422 Register LHS = MI.getOperand(1).getReg();
3423 Register RHS = MI.getOperand(2).getReg();
3424
3425 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3426 // we can't do anything. If we do, then it depends on whether we have
3427 // KnownBits on the LHS.
3428 KnownBits RHSBits = VT->getKnownBits(RHS);
3429 if (RHSBits.isUnknown())
3430 return false;
3431
3432 KnownBits LHSBits = VT->getKnownBits(LHS);
3433
3434 // Check that x & Mask == x.
3435 // x & 1 == x, always
3436 // x & 0 == x, only if x is also 0
3437 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3438 //
3439 // Check if we can replace AndDst with the LHS of the G_AND
3440 if (canReplaceReg(AndDst, LHS, MRI) &&
3441 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3442 Replacement = LHS;
3443 return true;
3444 }
3445
3446 // Check if we can replace AndDst with the RHS of the G_AND
3447 if (canReplaceReg(AndDst, RHS, MRI) &&
3448 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3449 Replacement = RHS;
3450 return true;
3451 }
3452
3453 return false;
3454}
3455
3457 Register &Replacement) const {
3458 // Given
3459 //
3460 // %y:_(sN) = G_SOMETHING
3461 // %x:_(sN) = G_SOMETHING
3462 // %res:_(sN) = G_OR %x, %y
3463 //
3464 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3465 assert(MI.getOpcode() == TargetOpcode::G_OR);
3466 if (!VT)
3467 return false;
3468
3469 Register OrDst = MI.getOperand(0).getReg();
3470 Register LHS = MI.getOperand(1).getReg();
3471 Register RHS = MI.getOperand(2).getReg();
3472
3473 KnownBits LHSBits = VT->getKnownBits(LHS);
3474 KnownBits RHSBits = VT->getKnownBits(RHS);
3475
3476 // Check that x | Mask == x.
3477 // x | 0 == x, always
3478 // x | 1 == x, only if x is also 1
3479 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3480 //
3481 // Check if we can replace OrDst with the LHS of the G_OR
3482 if (canReplaceReg(OrDst, LHS, MRI) &&
3483 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3484 Replacement = LHS;
3485 return true;
3486 }
3487
3488 // Check if we can replace OrDst with the RHS of the G_OR
3489 if (canReplaceReg(OrDst, RHS, MRI) &&
3490 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3491 Replacement = RHS;
3492 return true;
3493 }
3494
3495 return false;
3496}
3497
3499 // If the input is already sign extended, just drop the extension.
3500 Register Src = MI.getOperand(1).getReg();
3501 unsigned ExtBits = MI.getOperand(2).getImm();
3502 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3503 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3504}
3505
3506static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3507 int64_t Cst, bool IsVector, bool IsFP) {
3508 // For i1, Cst will always be -1 regardless of boolean contents.
3509 return (ScalarSizeBits == 1 && Cst == -1) ||
3510 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3511}
3512
3513// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3514// using vector truncates instead
3515//
3516// EXAMPLE:
3517// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3518// %T_a(i16) = G_TRUNC %a(i32)
3519// %T_b(i16) = G_TRUNC %b(i32)
3520// %Undef(i16) = G_IMPLICIT_DEF(i16)
3521// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3522//
3523// ===>
3524// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3525// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3526// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3527//
3528// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3530 Register &MatchInfo) const {
3531 auto BuildMI = cast<GBuildVector>(&MI);
3532 unsigned NumOperands = BuildMI->getNumSources();
3533 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3534
3535 // Check the G_BUILD_VECTOR sources
3536 unsigned I;
3537 MachineInstr *UnmergeMI = nullptr;
3538
3539 // Check all source TRUNCs come from the same UNMERGE instruction
3540 for (I = 0; I < NumOperands; ++I) {
3541 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3542 auto SrcMIOpc = SrcMI->getOpcode();
3543
3544 // Check if the G_TRUNC instructions all come from the same MI
3545 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3546 if (!UnmergeMI) {
3547 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3548 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3549 return false;
3550 } else {
3551 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3552 if (UnmergeMI != UnmergeSrcMI)
3553 return false;
3554 }
3555 } else {
3556 break;
3557 }
3558 }
3559 if (I < 2)
3560 return false;
3561
3562 // Check the remaining source elements are only G_IMPLICIT_DEF
3563 for (; I < NumOperands; ++I) {
3564 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3565 auto SrcMIOpc = SrcMI->getOpcode();
3566
3567 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3568 return false;
3569 }
3570
3571 // Check the size of unmerge source
3572 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3573 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3574 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3575 return false;
3576
3577 // Check the unmerge source and destination element types match
3578 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3579 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3580 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3581 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3582 return false;
3583
3584 // Only generate legal instructions post-legalizer
3585 if (!IsPreLegalize) {
3586 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3587
3588 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3589 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3590 return false;
3591
3592 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3593 return false;
3594 }
3595
3596 return true;
3597}
3598
3600 Register &MatchInfo) const {
3601 Register MidReg;
3602 auto BuildMI = cast<GBuildVector>(&MI);
3603 Register DstReg = BuildMI->getReg(0);
3604 LLT DstTy = MRI.getType(DstReg);
3605 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3606 unsigned DstTyNumElt = DstTy.getNumElements();
3607 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3608
3609 // No need to pad vector if only G_TRUNC is needed
3610 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3611 MidReg = MatchInfo;
3612 } else {
3613 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3614 SmallVector<Register> ConcatRegs = {MatchInfo};
3615 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3616 ConcatRegs.push_back(UndefReg);
3617
3618 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3619 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3620 }
3621
3622 Builder.buildTrunc(DstReg, MidReg);
3623 MI.eraseFromParent();
3624}
3625
3627 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3628 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3629 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3630 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3631 Register XorSrc;
3632 Register CstReg;
3633 // We match xor(src, true) here.
3634 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3635 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3636 return false;
3637
3638 if (!MRI.hasOneNonDBGUse(XorSrc))
3639 return false;
3640
3641 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3642 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3643 // list of tree nodes to visit.
3644 RegsToNegate.push_back(XorSrc);
3645 // Remember whether the comparisons are all integer or all floating point.
3646 bool IsInt = false;
3647 bool IsFP = false;
3648 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3649 Register Reg = RegsToNegate[I];
3650 if (!MRI.hasOneNonDBGUse(Reg))
3651 return false;
3652 MachineInstr *Def = MRI.getVRegDef(Reg);
3653 switch (Def->getOpcode()) {
3654 default:
3655 // Don't match if the tree contains anything other than ANDs, ORs and
3656 // comparisons.
3657 return false;
3658 case TargetOpcode::G_ICMP:
3659 if (IsFP)
3660 return false;
3661 IsInt = true;
3662 // When we apply the combine we will invert the predicate.
3663 break;
3664 case TargetOpcode::G_FCMP:
3665 if (IsInt)
3666 return false;
3667 IsFP = true;
3668 // When we apply the combine we will invert the predicate.
3669 break;
3670 case TargetOpcode::G_AND:
3671 case TargetOpcode::G_OR:
3672 // Implement De Morgan's laws:
3673 // ~(x & y) -> ~x | ~y
3674 // ~(x | y) -> ~x & ~y
3675 // When we apply the combine we will change the opcode and recursively
3676 // negate the operands.
3677 RegsToNegate.push_back(Def->getOperand(1).getReg());
3678 RegsToNegate.push_back(Def->getOperand(2).getReg());
3679 break;
3680 }
3681 }
3682
3683 // Now we know whether the comparisons are integer or floating point, check
3684 // the constant in the xor.
3685 int64_t Cst;
3686 if (Ty.isVector()) {
3687 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3688 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3689 if (!MaybeCst)
3690 return false;
3691 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3692 return false;
3693 } else {
3694 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3695 return false;
3696 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3697 return false;
3698 }
3699
3700 return true;
3701}
3702
3704 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3705 for (Register Reg : RegsToNegate) {
3706 MachineInstr *Def = MRI.getVRegDef(Reg);
3707 Observer.changingInstr(*Def);
3708 // For each comparison, invert the opcode. For each AND and OR, change the
3709 // opcode.
3710 switch (Def->getOpcode()) {
3711 default:
3712 llvm_unreachable("Unexpected opcode");
3713 case TargetOpcode::G_ICMP:
3714 case TargetOpcode::G_FCMP: {
3715 MachineOperand &PredOp = Def->getOperand(1);
3718 PredOp.setPredicate(NewP);
3719 break;
3720 }
3721 case TargetOpcode::G_AND:
3722 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3723 break;
3724 case TargetOpcode::G_OR:
3725 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3726 break;
3727 }
3728 Observer.changedInstr(*Def);
3729 }
3730
3731 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3732 MI.eraseFromParent();
3733}
3734
3736 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3737 // Match (xor (and x, y), y) (or any of its commuted cases)
3738 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3739 Register &X = MatchInfo.first;
3740 Register &Y = MatchInfo.second;
3741 Register AndReg = MI.getOperand(1).getReg();
3742 Register SharedReg = MI.getOperand(2).getReg();
3743
3744 // Find a G_AND on either side of the G_XOR.
3745 // Look for one of
3746 //
3747 // (xor (and x, y), SharedReg)
3748 // (xor SharedReg, (and x, y))
3749 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3750 std::swap(AndReg, SharedReg);
3751 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3752 return false;
3753 }
3754
3755 // Only do this if we'll eliminate the G_AND.
3756 if (!MRI.hasOneNonDBGUse(AndReg))
3757 return false;
3758
3759 // We can combine if SharedReg is the same as either the LHS or RHS of the
3760 // G_AND.
3761 if (Y != SharedReg)
3762 std::swap(X, Y);
3763 return Y == SharedReg;
3764}
3765
3767 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3768 // Fold (xor (and x, y), y) -> (and (not x), y)
3769 Register X, Y;
3770 std::tie(X, Y) = MatchInfo;
3771 auto Not = Builder.buildNot(MRI.getType(X), X);
3772 Observer.changingInstr(MI);
3773 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3774 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3775 MI.getOperand(2).setReg(Y);
3776 Observer.changedInstr(MI);
3777}
3778
3780 auto &PtrAdd = cast<GPtrAdd>(MI);
3781 Register DstReg = PtrAdd.getReg(0);
3782 LLT Ty = MRI.getType(DstReg);
3783 const DataLayout &DL = Builder.getMF().getDataLayout();
3784
3785 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3786 return false;
3787
3788 if (Ty.isPointer()) {
3789 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3790 return ConstVal && *ConstVal == 0;
3791 }
3792
3793 assert(Ty.isVector() && "Expecting a vector type");
3794 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3795 return isBuildVectorAllZeros(*VecMI, MRI);
3796}
3797
3799 auto &PtrAdd = cast<GPtrAdd>(MI);
3800 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3801 PtrAdd.eraseFromParent();
3802}
3803
3804/// The second source operand is known to be a power of 2.
3806 Register DstReg = MI.getOperand(0).getReg();
3807 Register Src0 = MI.getOperand(1).getReg();
3808 Register Pow2Src1 = MI.getOperand(2).getReg();
3809 LLT Ty = MRI.getType(DstReg);
3810
3811 // Fold (urem x, pow2) -> (and x, pow2-1)
3812 auto NegOne = Builder.buildConstant(Ty, -1);
3813 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3814 Builder.buildAnd(DstReg, Src0, Add);
3815 MI.eraseFromParent();
3816}
3817
3819 unsigned &SelectOpNo) const {
3820 Register LHS = MI.getOperand(1).getReg();
3821 Register RHS = MI.getOperand(2).getReg();
3822
3823 Register OtherOperandReg = RHS;
3824 SelectOpNo = 1;
3825 MachineInstr *Select = MRI.getVRegDef(LHS);
3826
3827 // Don't do this unless the old select is going away. We want to eliminate the
3828 // binary operator, not replace a binop with a select.
3829 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3830 !MRI.hasOneNonDBGUse(LHS)) {
3831 OtherOperandReg = LHS;
3832 SelectOpNo = 2;
3833 Select = MRI.getVRegDef(RHS);
3834 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3835 !MRI.hasOneNonDBGUse(RHS))
3836 return false;
3837 }
3838
3839 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3840 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3841
3842 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3843 /*AllowFP*/ true,
3844 /*AllowOpaqueConstants*/ false))
3845 return false;
3846 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3847 /*AllowFP*/ true,
3848 /*AllowOpaqueConstants*/ false))
3849 return false;
3850
3851 unsigned BinOpcode = MI.getOpcode();
3852
3853 // We know that one of the operands is a select of constants. Now verify that
3854 // the other binary operator operand is either a constant, or we can handle a
3855 // variable.
3856 bool CanFoldNonConst =
3857 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3858 (isNullOrNullSplat(*SelectLHS, MRI) ||
3859 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3860 (isNullOrNullSplat(*SelectRHS, MRI) ||
3861 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3862 if (CanFoldNonConst)
3863 return true;
3864
3865 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3866 /*AllowFP*/ true,
3867 /*AllowOpaqueConstants*/ false);
3868}
3869
3870/// \p SelectOperand is the operand in binary operator \p MI that is the select
3871/// to fold.
3873 MachineInstr &MI, const unsigned &SelectOperand) const {
3874 Register Dst = MI.getOperand(0).getReg();
3875 Register LHS = MI.getOperand(1).getReg();
3876 Register RHS = MI.getOperand(2).getReg();
3877 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3878
3879 Register SelectCond = Select->getOperand(1).getReg();
3880 Register SelectTrue = Select->getOperand(2).getReg();
3881 Register SelectFalse = Select->getOperand(3).getReg();
3882
3883 LLT Ty = MRI.getType(Dst);
3884 unsigned BinOpcode = MI.getOpcode();
3885
3886 Register FoldTrue, FoldFalse;
3887
3888 // We have a select-of-constants followed by a binary operator with a
3889 // constant. Eliminate the binop by pulling the constant math into the select.
3890 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3891 if (SelectOperand == 1) {
3892 // TODO: SelectionDAG verifies this actually constant folds before
3893 // committing to the combine.
3894
3895 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3896 FoldFalse =
3897 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3898 } else {
3899 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3900 FoldFalse =
3901 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3902 }
3903
3904 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3905 MI.eraseFromParent();
3906}
3907
3908std::optional<SmallVector<Register, 8>>
3909CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3910 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3911 // We want to detect if Root is part of a tree which represents a bunch
3912 // of loads being merged into a larger load. We'll try to recognize patterns
3913 // like, for example:
3914 //
3915 // Reg Reg
3916 // \ /
3917 // OR_1 Reg
3918 // \ /
3919 // OR_2
3920 // \ Reg
3921 // .. /
3922 // Root
3923 //
3924 // Reg Reg Reg Reg
3925 // \ / \ /
3926 // OR_1 OR_2
3927 // \ /
3928 // \ /
3929 // ...
3930 // Root
3931 //
3932 // Each "Reg" may have been produced by a load + some arithmetic. This
3933 // function will save each of them.
3934 SmallVector<Register, 8> RegsToVisit;
3936
3937 // In the "worst" case, we're dealing with a load for each byte. So, there
3938 // are at most #bytes - 1 ORs.
3939 const unsigned MaxIter =
3940 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3941 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3942 if (Ors.empty())
3943 break;
3944 const MachineInstr *Curr = Ors.pop_back_val();
3945 Register OrLHS = Curr->getOperand(1).getReg();
3946 Register OrRHS = Curr->getOperand(2).getReg();
3947
3948 // In the combine, we want to elimate the entire tree.
3949 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3950 return std::nullopt;
3951
3952 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3953 // something that may be a load + arithmetic.
3954 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3955 Ors.push_back(Or);
3956 else
3957 RegsToVisit.push_back(OrLHS);
3958 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3959 Ors.push_back(Or);
3960 else
3961 RegsToVisit.push_back(OrRHS);
3962 }
3963
3964 // We're going to try and merge each register into a wider power-of-2 type,
3965 // so we ought to have an even number of registers.
3966 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3967 return std::nullopt;
3968 return RegsToVisit;
3969}
3970
3971/// Helper function for findLoadOffsetsForLoadOrCombine.
3972///
3973/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3974/// and then moving that value into a specific byte offset.
3975///
3976/// e.g. x[i] << 24
3977///
3978/// \returns The load instruction and the byte offset it is moved into.
3979static std::optional<std::pair<GZExtLoad *, int64_t>>
3980matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3981 const MachineRegisterInfo &MRI) {
3982 assert(MRI.hasOneNonDBGUse(Reg) &&
3983 "Expected Reg to only have one non-debug use?");
3984 Register MaybeLoad;
3985 int64_t Shift;
3986 if (!mi_match(Reg, MRI,
3987 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3988 Shift = 0;
3989 MaybeLoad = Reg;
3990 }
3991
3992 if (Shift % MemSizeInBits != 0)
3993 return std::nullopt;
3994
3995 // TODO: Handle other types of loads.
3996 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3997 if (!Load)
3998 return std::nullopt;
3999
4000 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4001 return std::nullopt;
4002
4003 return std::make_pair(Load, Shift / MemSizeInBits);
4004}
4005
4006std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4007CombinerHelper::findLoadOffsetsForLoadOrCombine(
4009 const SmallVector<Register, 8> &RegsToVisit,
4010 const unsigned MemSizeInBits) const {
4011
4012 // Each load found for the pattern. There should be one for each RegsToVisit.
4013 SmallSetVector<const MachineInstr *, 8> Loads;
4014
4015 // The lowest index used in any load. (The lowest "i" for each x[i].)
4016 int64_t LowestIdx = INT64_MAX;
4017
4018 // The load which uses the lowest index.
4019 GZExtLoad *LowestIdxLoad = nullptr;
4020
4021 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4022 SmallSet<int64_t, 8> SeenIdx;
4023
4024 // Ensure each load is in the same MBB.
4025 // TODO: Support multiple MachineBasicBlocks.
4026 MachineBasicBlock *MBB = nullptr;
4027 const MachineMemOperand *MMO = nullptr;
4028
4029 // Earliest instruction-order load in the pattern.
4030 GZExtLoad *EarliestLoad = nullptr;
4031
4032 // Latest instruction-order load in the pattern.
4033 GZExtLoad *LatestLoad = nullptr;
4034
4035 // Base pointer which every load should share.
4037
4038 // We want to find a load for each register. Each load should have some
4039 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4040 // track of the load which uses the lowest index. Later, we will check if we
4041 // can use its pointer in the final, combined load.
4042 for (auto Reg : RegsToVisit) {
4043 // Find the load, and find the position that it will end up in (e.g. a
4044 // shifted) value.
4045 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4046 if (!LoadAndPos)
4047 return std::nullopt;
4048 GZExtLoad *Load;
4049 int64_t DstPos;
4050 std::tie(Load, DstPos) = *LoadAndPos;
4051
4052 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4053 // it is difficult to check for stores/calls/etc between loads.
4054 MachineBasicBlock *LoadMBB = Load->getParent();
4055 if (!MBB)
4056 MBB = LoadMBB;
4057 if (LoadMBB != MBB)
4058 return std::nullopt;
4059
4060 // Make sure that the MachineMemOperands of every seen load are compatible.
4061 auto &LoadMMO = Load->getMMO();
4062 if (!MMO)
4063 MMO = &LoadMMO;
4064 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4065 return std::nullopt;
4066
4067 // Find out what the base pointer and index for the load is.
4068 Register LoadPtr;
4069 int64_t Idx;
4070 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4071 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4072 LoadPtr = Load->getOperand(1).getReg();
4073 Idx = 0;
4074 }
4075
4076 // Don't combine things like a[i], a[i] -> a bigger load.
4077 if (!SeenIdx.insert(Idx).second)
4078 return std::nullopt;
4079
4080 // Every load must share the same base pointer; don't combine things like:
4081 //
4082 // a[i], b[i + 1] -> a bigger load.
4083 if (!BasePtr.isValid())
4084 BasePtr = LoadPtr;
4085 if (BasePtr != LoadPtr)
4086 return std::nullopt;
4087
4088 if (Idx < LowestIdx) {
4089 LowestIdx = Idx;
4090 LowestIdxLoad = Load;
4091 }
4092
4093 // Keep track of the byte offset that this load ends up at. If we have seen
4094 // the byte offset, then stop here. We do not want to combine:
4095 //
4096 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4097 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4098 return std::nullopt;
4099 Loads.insert(Load);
4100
4101 // Keep track of the position of the earliest/latest loads in the pattern.
4102 // We will check that there are no load fold barriers between them later
4103 // on.
4104 //
4105 // FIXME: Is there a better way to check for load fold barriers?
4106 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4107 EarliestLoad = Load;
4108 if (!LatestLoad || dominates(*LatestLoad, *Load))
4109 LatestLoad = Load;
4110 }
4111
4112 // We found a load for each register. Let's check if each load satisfies the
4113 // pattern.
4114 assert(Loads.size() == RegsToVisit.size() &&
4115 "Expected to find a load for each register?");
4116 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4117 LatestLoad && "Expected at least two loads?");
4118
4119 // Check if there are any stores, calls, etc. between any of the loads. If
4120 // there are, then we can't safely perform the combine.
4121 //
4122 // MaxIter is chosen based off the (worst case) number of iterations it
4123 // typically takes to succeed in the LLVM test suite plus some padding.
4124 //
4125 // FIXME: Is there a better way to check for load fold barriers?
4126 const unsigned MaxIter = 20;
4127 unsigned Iter = 0;
4128 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4129 LatestLoad->getIterator())) {
4130 if (Loads.count(&MI))
4131 continue;
4132 if (MI.isLoadFoldBarrier())
4133 return std::nullopt;
4134 if (Iter++ == MaxIter)
4135 return std::nullopt;
4136 }
4137
4138 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4139}
4140
4143 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4144 assert(MI.getOpcode() == TargetOpcode::G_OR);
4145 MachineFunction &MF = *MI.getMF();
4146 // Assuming a little-endian target, transform:
4147 // s8 *a = ...
4148 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4149 // =>
4150 // s32 val = *((i32)a)
4151 //
4152 // s8 *a = ...
4153 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4154 // =>
4155 // s32 val = BSWAP(*((s32)a))
4156 Register Dst = MI.getOperand(0).getReg();
4157 LLT Ty = MRI.getType(Dst);
4158 if (Ty.isVector())
4159 return false;
4160
4161 // We need to combine at least two loads into this type. Since the smallest
4162 // possible load is into a byte, we need at least a 16-bit wide type.
4163 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4164 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4165 return false;
4166
4167 // Match a collection of non-OR instructions in the pattern.
4168 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4169 if (!RegsToVisit)
4170 return false;
4171
4172 // We have a collection of non-OR instructions. Figure out how wide each of
4173 // the small loads should be based off of the number of potential loads we
4174 // found.
4175 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4176 if (NarrowMemSizeInBits % 8 != 0)
4177 return false;
4178
4179 // Check if each register feeding into each OR is a load from the same
4180 // base pointer + some arithmetic.
4181 //
4182 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4183 //
4184 // Also verify that each of these ends up putting a[i] into the same memory
4185 // offset as a load into a wide type would.
4187 GZExtLoad *LowestIdxLoad, *LatestLoad;
4188 int64_t LowestIdx;
4189 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4190 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4191 if (!MaybeLoadInfo)
4192 return false;
4193 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4194
4195 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4196 // we found before, check if this corresponds to a big or little endian byte
4197 // pattern. If it does, then we can represent it using a load + possibly a
4198 // BSWAP.
4199 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4200 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4201 if (!IsBigEndian)
4202 return false;
4203 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4204 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4205 return false;
4206
4207 // Make sure that the load from the lowest index produces offset 0 in the
4208 // final value.
4209 //
4210 // This ensures that we won't combine something like this:
4211 //
4212 // load x[i] -> byte 2
4213 // load x[i+1] -> byte 0 ---> wide_load x[i]
4214 // load x[i+2] -> byte 1
4215 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4216 const unsigned ZeroByteOffset =
4217 *IsBigEndian
4218 ? bigEndianByteAt(NumLoadsInTy, 0)
4219 : littleEndianByteAt(NumLoadsInTy, 0);
4220 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4221 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4222 ZeroOffsetIdx->second != LowestIdx)
4223 return false;
4224
4225 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4226 // may not use index 0.
4227 Register Ptr = LowestIdxLoad->getPointerReg();
4228 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4229 LegalityQuery::MemDesc MMDesc(MMO);
4230 MMDesc.MemoryTy = Ty;
4232 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4233 return false;
4234 auto PtrInfo = MMO.getPointerInfo();
4235 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4236
4237 // Load must be allowed and fast on the target.
4239 auto &DL = MF.getDataLayout();
4240 unsigned Fast = 0;
4241 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4242 !Fast)
4243 return false;
4244
4245 MatchInfo = [=](MachineIRBuilder &MIB) {
4246 MIB.setInstrAndDebugLoc(*LatestLoad);
4247 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4248 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4249 if (NeedsBSwap)
4250 MIB.buildBSwap(Dst, LoadDst);
4251 };
4252 return true;
4253}
4254
4256 MachineInstr *&ExtMI) const {
4257 auto &PHI = cast<GPhi>(MI);
4258 Register DstReg = PHI.getReg(0);
4259
4260 // TODO: Extending a vector may be expensive, don't do this until heuristics
4261 // are better.
4262 if (MRI.getType(DstReg).isVector())
4263 return false;
4264
4265 // Try to match a phi, whose only use is an extend.
4266 if (!MRI.hasOneNonDBGUse(DstReg))
4267 return false;
4268 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4269 switch (ExtMI->getOpcode()) {
4270 case TargetOpcode::G_ANYEXT:
4271 return true; // G_ANYEXT is usually free.
4272 case TargetOpcode::G_ZEXT:
4273 case TargetOpcode::G_SEXT:
4274 break;
4275 default:
4276 return false;
4277 }
4278
4279 // If the target is likely to fold this extend away, don't propagate.
4280 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4281 return false;
4282
4283 // We don't want to propagate the extends unless there's a good chance that
4284 // they'll be optimized in some way.
4285 // Collect the unique incoming values.
4287 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4288 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4289 switch (DefMI->getOpcode()) {
4290 case TargetOpcode::G_LOAD:
4291 case TargetOpcode::G_TRUNC:
4292 case TargetOpcode::G_SEXT:
4293 case TargetOpcode::G_ZEXT:
4294 case TargetOpcode::G_ANYEXT:
4295 case TargetOpcode::G_CONSTANT:
4296 InSrcs.insert(DefMI);
4297 // Don't try to propagate if there are too many places to create new
4298 // extends, chances are it'll increase code size.
4299 if (InSrcs.size() > 2)
4300 return false;
4301 break;
4302 default:
4303 return false;
4304 }
4305 }
4306 return true;
4307}
4308
4310 MachineInstr *&ExtMI) const {
4311 auto &PHI = cast<GPhi>(MI);
4312 Register DstReg = ExtMI->getOperand(0).getReg();
4313 LLT ExtTy = MRI.getType(DstReg);
4314
4315 // Propagate the extension into the block of each incoming reg's block.
4316 // Use a SetVector here because PHIs can have duplicate edges, and we want
4317 // deterministic iteration order.
4320 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4321 auto SrcReg = PHI.getIncomingValue(I);
4322 auto *SrcMI = MRI.getVRegDef(SrcReg);
4323 if (!SrcMIs.insert(SrcMI))
4324 continue;
4325
4326 // Build an extend after each src inst.
4327 auto *MBB = SrcMI->getParent();
4328 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4329 if (InsertPt != MBB->end() && InsertPt->isPHI())
4330 InsertPt = MBB->getFirstNonPHI();
4331
4332 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4333 Builder.setDebugLoc(MI.getDebugLoc());
4334 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4335 OldToNewSrcMap[SrcMI] = NewExt;
4336 }
4337
4338 // Create a new phi with the extended inputs.
4339 Builder.setInstrAndDebugLoc(MI);
4340 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4341 NewPhi.addDef(DstReg);
4342 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4343 if (!MO.isReg()) {
4344 NewPhi.addMBB(MO.getMBB());
4345 continue;
4346 }
4347 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4348 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4349 }
4350 Builder.insertInstr(NewPhi);
4351 ExtMI->eraseFromParent();
4352}
4353
4355 Register &Reg) const {
4356 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4357 // If we have a constant index, look for a G_BUILD_VECTOR source
4358 // and find the source register that the index maps to.
4359 Register SrcVec = MI.getOperand(1).getReg();
4360 LLT SrcTy = MRI.getType(SrcVec);
4361 if (SrcTy.isScalableVector())
4362 return false;
4363
4364 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4365 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4366 return false;
4367
4368 unsigned VecIdx = Cst->Value.getZExtValue();
4369
4370 // Check if we have a build_vector or build_vector_trunc with an optional
4371 // trunc in front.
4372 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4373 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4374 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4375 }
4376
4377 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4378 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4379 return false;
4380
4381 EVT Ty(getMVTForLLT(SrcTy));
4382 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4383 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4384 return false;
4385
4386 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4387 return true;
4388}
4389
4391 Register &Reg) const {
4392 // Check the type of the register, since it may have come from a
4393 // G_BUILD_VECTOR_TRUNC.
4394 LLT ScalarTy = MRI.getType(Reg);
4395 Register DstReg = MI.getOperand(0).getReg();
4396 LLT DstTy = MRI.getType(DstReg);
4397
4398 if (ScalarTy != DstTy) {
4399 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4400 Builder.buildTrunc(DstReg, Reg);
4401 MI.eraseFromParent();
4402 return;
4403 }
4405}
4406
4409 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4410 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4411 // This combine tries to find build_vector's which have every source element
4412 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4413 // the masked load scalarization is run late in the pipeline. There's already
4414 // a combine for a similar pattern starting from the extract, but that
4415 // doesn't attempt to do it if there are multiple uses of the build_vector,
4416 // which in this case is true. Starting the combine from the build_vector
4417 // feels more natural than trying to find sibling nodes of extracts.
4418 // E.g.
4419 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4420 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4421 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4422 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4423 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4424 // ==>
4425 // replace ext{1,2,3,4} with %s{1,2,3,4}
4426
4427 Register DstReg = MI.getOperand(0).getReg();
4428 LLT DstTy = MRI.getType(DstReg);
4429 unsigned NumElts = DstTy.getNumElements();
4430
4431 SmallBitVector ExtractedElts(NumElts);
4432 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4433 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4434 return false;
4435 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4436 if (!Cst)
4437 return false;
4438 unsigned Idx = Cst->getZExtValue();
4439 if (Idx >= NumElts)
4440 return false; // Out of range.
4441 ExtractedElts.set(Idx);
4442 SrcDstPairs.emplace_back(
4443 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4444 }
4445 // Match if every element was extracted.
4446 return ExtractedElts.all();
4447}
4448
4451 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4452 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4453 for (auto &Pair : SrcDstPairs) {
4454 auto *ExtMI = Pair.second;
4455 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4456 ExtMI->eraseFromParent();
4457 }
4458 MI.eraseFromParent();
4459}
4460
4463 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4464 applyBuildFnNoErase(MI, MatchInfo);
4465 MI.eraseFromParent();
4466}
4467
4470 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4471 MatchInfo(Builder);
4472}
4473
4475 BuildFnTy &MatchInfo) const {
4476 assert(MI.getOpcode() == TargetOpcode::G_OR);
4477
4478 Register Dst = MI.getOperand(0).getReg();
4479 LLT Ty = MRI.getType(Dst);
4480 unsigned BitWidth = Ty.getScalarSizeInBits();
4481
4482 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4483 unsigned FshOpc = 0;
4484
4485 // Match (or (shl ...), (lshr ...)).
4486 if (!mi_match(Dst, MRI,
4487 // m_GOr() handles the commuted version as well.
4488 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4489 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4490 return false;
4491
4492 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4493 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4494 int64_t CstShlAmt, CstLShrAmt;
4495 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4496 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4497 CstShlAmt + CstLShrAmt == BitWidth) {
4498 FshOpc = TargetOpcode::G_FSHR;
4499 Amt = LShrAmt;
4500
4501 } else if (mi_match(LShrAmt, MRI,
4503 ShlAmt == Amt) {
4504 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4505 FshOpc = TargetOpcode::G_FSHL;
4506
4507 } else if (mi_match(ShlAmt, MRI,
4509 LShrAmt == Amt) {
4510 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4511 FshOpc = TargetOpcode::G_FSHR;
4512
4513 } else {
4514 return false;
4515 }
4516
4517 LLT AmtTy = MRI.getType(Amt);
4518 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4519 return false;
4520
4521 MatchInfo = [=](MachineIRBuilder &B) {
4522 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4523 };
4524 return true;
4525}
4526
4527/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4529 unsigned Opc = MI.getOpcode();
4530 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4531 Register X = MI.getOperand(1).getReg();
4532 Register Y = MI.getOperand(2).getReg();
4533 if (X != Y)
4534 return false;
4535 unsigned RotateOpc =
4536 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4537 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4538}
4539
4541 unsigned Opc = MI.getOpcode();
4542 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4543 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4544 Observer.changingInstr(MI);
4545 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4546 : TargetOpcode::G_ROTR));
4547 MI.removeOperand(2);
4548 Observer.changedInstr(MI);
4549}
4550
4551// Fold (rot x, c) -> (rot x, c % BitSize)
4553 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4554 MI.getOpcode() == TargetOpcode::G_ROTR);
4555 unsigned Bitsize =
4556 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4557 Register AmtReg = MI.getOperand(2).getReg();
4558 bool OutOfRange = false;
4559 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4560 if (auto *CI = dyn_cast<ConstantInt>(C))
4561 OutOfRange |= CI->getValue().uge(Bitsize);
4562 return true;
4563 };
4564 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4565}
4566
4568 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4569 MI.getOpcode() == TargetOpcode::G_ROTR);
4570 unsigned Bitsize =
4571 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4572 Register Amt = MI.getOperand(2).getReg();
4573 LLT AmtTy = MRI.getType(Amt);
4574 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4575 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4576 Observer.changingInstr(MI);
4577 MI.getOperand(2).setReg(Amt);
4578 Observer.changedInstr(MI);
4579}
4580
4582 int64_t &MatchInfo) const {
4583 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4584 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4585
4586 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4587 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4588 // KnownBits on the LHS in two cases:
4589 //
4590 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4591 // we cannot do any transforms so we can safely bail out early.
4592 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4593 // >=0.
4594 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4595 if (KnownRHS.isUnknown())
4596 return false;
4597
4598 std::optional<bool> KnownVal;
4599 if (KnownRHS.isZero()) {
4600 // ? uge 0 -> always true
4601 // ? ult 0 -> always false
4602 if (Pred == CmpInst::ICMP_UGE)
4603 KnownVal = true;
4604 else if (Pred == CmpInst::ICMP_ULT)
4605 KnownVal = false;
4606 }
4607
4608 if (!KnownVal) {
4609 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4610 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4611 }
4612
4613 if (!KnownVal)
4614 return false;
4615 MatchInfo =
4616 *KnownVal
4618 /*IsVector = */
4619 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4620 /* IsFP = */ false)
4621 : 0;
4622 return true;
4623}
4624
4627 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4628 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4629 // Given:
4630 //
4631 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4632 // %cmp = G_ICMP ne %x, 0
4633 //
4634 // Or:
4635 //
4636 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4637 // %cmp = G_ICMP eq %x, 1
4638 //
4639 // We can replace %cmp with %x assuming true is 1 on the target.
4640 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4641 if (!CmpInst::isEquality(Pred))
4642 return false;
4643 Register Dst = MI.getOperand(0).getReg();
4644 LLT DstTy = MRI.getType(Dst);
4646 /* IsFP = */ false) != 1)
4647 return false;
4648 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4649 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4650 return false;
4651 Register LHS = MI.getOperand(2).getReg();
4652 auto KnownLHS = VT->getKnownBits(LHS);
4653 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4654 return false;
4655 // Make sure replacing Dst with the LHS is a legal operation.
4656 LLT LHSTy = MRI.getType(LHS);
4657 unsigned LHSSize = LHSTy.getSizeInBits();
4658 unsigned DstSize = DstTy.getSizeInBits();
4659 unsigned Op = TargetOpcode::COPY;
4660 if (DstSize != LHSSize)
4661 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4662 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4663 return false;
4664 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4665 return true;
4666}
4667
4668// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4671 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4672 assert(MI.getOpcode() == TargetOpcode::G_AND);
4673
4674 // Ignore vector types to simplify matching the two constants.
4675 // TODO: do this for vectors and scalars via a demanded bits analysis.
4676 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4677 if (Ty.isVector())
4678 return false;
4679
4680 Register Src;
4681 Register AndMaskReg;
4682 int64_t AndMaskBits;
4683 int64_t OrMaskBits;
4684 if (!mi_match(MI, MRI,
4685 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4686 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4687 return false;
4688
4689 // Check if OrMask could turn on any bits in Src.
4690 if (AndMaskBits & OrMaskBits)
4691 return false;
4692
4693 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4694 Observer.changingInstr(MI);
4695 // Canonicalize the result to have the constant on the RHS.
4696 if (MI.getOperand(1).getReg() == AndMaskReg)
4697 MI.getOperand(2).setReg(AndMaskReg);
4698 MI.getOperand(1).setReg(Src);
4699 Observer.changedInstr(MI);
4700 };
4701 return true;
4702}
4703
4704/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4707 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4708 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4709 Register Dst = MI.getOperand(0).getReg();
4710 Register Src = MI.getOperand(1).getReg();
4711 LLT Ty = MRI.getType(Src);
4713 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4714 return false;
4715 int64_t Width = MI.getOperand(2).getImm();
4716 Register ShiftSrc;
4717 int64_t ShiftImm;
4718 if (!mi_match(
4719 Src, MRI,
4720 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4721 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4722 return false;
4723 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4724 return false;
4725
4726 MatchInfo = [=](MachineIRBuilder &B) {
4727 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4728 auto Cst2 = B.buildConstant(ExtractTy, Width);
4729 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4730 };
4731 return true;
4732}
4733
4734/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4736 BuildFnTy &MatchInfo) const {
4737 GAnd *And = cast<GAnd>(&MI);
4738 Register Dst = And->getReg(0);
4739 LLT Ty = MRI.getType(Dst);
4741 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4742 // into account.
4743 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4744 return false;
4745
4746 int64_t AndImm, LSBImm;
4747 Register ShiftSrc;
4748 const unsigned Size = Ty.getScalarSizeInBits();
4749 if (!mi_match(And->getReg(0), MRI,
4750 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4751 m_ICst(AndImm))))
4752 return false;
4753
4754 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4755 auto MaybeMask = static_cast<uint64_t>(AndImm);
4756 if (MaybeMask & (MaybeMask + 1))
4757 return false;
4758
4759 // LSB must fit within the register.
4760 if (static_cast<uint64_t>(LSBImm) >= Size)
4761 return false;
4762
4763 uint64_t Width = APInt(Size, AndImm).countr_one();
4764 MatchInfo = [=](MachineIRBuilder &B) {
4765 auto WidthCst = B.buildConstant(ExtractTy, Width);
4766 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4767 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4768 };
4769 return true;
4770}
4771
4774 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4775 const unsigned Opcode = MI.getOpcode();
4776 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4777
4778 const Register Dst = MI.getOperand(0).getReg();
4779
4780 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4781 ? TargetOpcode::G_SBFX
4782 : TargetOpcode::G_UBFX;
4783
4784 // Check if the type we would use for the extract is legal
4785 LLT Ty = MRI.getType(Dst);
4787 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4788 return false;
4789
4790 Register ShlSrc;
4791 int64_t ShrAmt;
4792 int64_t ShlAmt;
4793 const unsigned Size = Ty.getScalarSizeInBits();
4794
4795 // Try to match shr (shl x, c1), c2
4796 if (!mi_match(Dst, MRI,
4797 m_BinOp(Opcode,
4798 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4799 m_ICst(ShrAmt))))
4800 return false;
4801
4802 // Make sure that the shift sizes can fit a bitfield extract
4803 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4804 return false;
4805
4806 // Skip this combine if the G_SEXT_INREG combine could handle it
4807 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4808 return false;
4809
4810 // Calculate start position and width of the extract
4811 const int64_t Pos = ShrAmt - ShlAmt;
4812 const int64_t Width = Size - ShrAmt;
4813
4814 MatchInfo = [=](MachineIRBuilder &B) {
4815 auto WidthCst = B.buildConstant(ExtractTy, Width);
4816 auto PosCst = B.buildConstant(ExtractTy, Pos);
4817 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4818 };
4819 return true;
4820}
4821
4824 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4825 const unsigned Opcode = MI.getOpcode();
4826 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4827
4828 const Register Dst = MI.getOperand(0).getReg();
4829 LLT Ty = MRI.getType(Dst);
4831 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4832 return false;
4833
4834 // Try to match shr (and x, c1), c2
4835 Register AndSrc;
4836 int64_t ShrAmt;
4837 int64_t SMask;
4838 if (!mi_match(Dst, MRI,
4839 m_BinOp(Opcode,
4840 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4841 m_ICst(ShrAmt))))
4842 return false;
4843
4844 const unsigned Size = Ty.getScalarSizeInBits();
4845 if (ShrAmt < 0 || ShrAmt >= Size)
4846 return false;
4847
4848 // If the shift subsumes the mask, emit the 0 directly.
4849 if (0 == (SMask >> ShrAmt)) {
4850 MatchInfo = [=](MachineIRBuilder &B) {
4851 B.buildConstant(Dst, 0);
4852 };
4853 return true;
4854 }
4855
4856 // Check that ubfx can do the extraction, with no holes in the mask.
4857 uint64_t UMask = SMask;
4858 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4860 if (!isMask_64(UMask))
4861 return false;
4862
4863 // Calculate start position and width of the extract.
4864 const int64_t Pos = ShrAmt;
4865 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4866
4867 // It's preferable to keep the shift, rather than form G_SBFX.
4868 // TODO: remove the G_AND via demanded bits analysis.
4869 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4870 return false;
4871
4872 MatchInfo = [=](MachineIRBuilder &B) {
4873 auto WidthCst = B.buildConstant(ExtractTy, Width);
4874 auto PosCst = B.buildConstant(ExtractTy, Pos);
4875 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4876 };
4877 return true;
4878}
4879
4880bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4881 MachineInstr &MI) const {
4882 auto &PtrAdd = cast<GPtrAdd>(MI);
4883
4884 Register Src1Reg = PtrAdd.getBaseReg();
4885 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4886 if (!Src1Def)
4887 return false;
4888
4889 Register Src2Reg = PtrAdd.getOffsetReg();
4890
4891 if (MRI.hasOneNonDBGUse(Src1Reg))
4892 return false;
4893
4894 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4895 if (!C1)
4896 return false;
4897 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4898 if (!C2)
4899 return false;
4900
4901 const APInt &C1APIntVal = *C1;
4902 const APInt &C2APIntVal = *C2;
4903 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4904
4905 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4906 // This combine may end up running before ptrtoint/inttoptr combines
4907 // manage to eliminate redundant conversions, so try to look through them.
4908 MachineInstr *ConvUseMI = &UseMI;
4909 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4910 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4911 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4912 Register DefReg = ConvUseMI->getOperand(0).getReg();
4913 if (!MRI.hasOneNonDBGUse(DefReg))
4914 break;
4915 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4916 ConvUseOpc = ConvUseMI->getOpcode();
4917 }
4918 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4919 if (!LdStMI)
4920 continue;
4921 // Is x[offset2] already not a legal addressing mode? If so then
4922 // reassociating the constants breaks nothing (we test offset2 because
4923 // that's the one we hope to fold into the load or store).
4924 TargetLoweringBase::AddrMode AM;
4925 AM.HasBaseReg = true;
4926 AM.BaseOffs = C2APIntVal.getSExtValue();
4927 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4928 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4929 PtrAdd.getMF()->getFunction().getContext());
4930 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4931 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4932 AccessTy, AS))
4933 continue;
4934
4935 // Would x[offset1+offset2] still be a legal addressing mode?
4936 AM.BaseOffs = CombinedValue;
4937 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4938 AccessTy, AS))
4939 return true;
4940 }
4941
4942 return false;
4943}
4944
4946 MachineInstr *RHS,
4947 BuildFnTy &MatchInfo) const {
4948 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4949 Register Src1Reg = MI.getOperand(1).getReg();
4950 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4951 return false;
4952 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4953 if (!C2)
4954 return false;
4955
4956 // If both additions are nuw, the reassociated additions are also nuw.
4957 // If the original G_PTR_ADD is additionally nusw, X and C are both not
4958 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
4959 // therefore also nusw.
4960 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
4961 // the new G_PTR_ADDs are then also inbounds.
4962 unsigned PtrAddFlags = MI.getFlags();
4963 unsigned AddFlags = RHS->getFlags();
4964 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
4965 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
4966 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
4967 unsigned Flags = 0;
4968 if (IsNoUWrap)
4970 if (IsNoUSWrap)
4972 if (IsInBounds)
4974
4975 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4976 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4977
4978 auto NewBase =
4979 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
4980 Observer.changingInstr(MI);
4981 MI.getOperand(1).setReg(NewBase.getReg(0));
4982 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4983 MI.setFlags(Flags);
4984 Observer.changedInstr(MI);
4985 };
4986 return !reassociationCanBreakAddressingModePattern(MI);
4987}
4988
4990 MachineInstr *LHS,
4991 MachineInstr *RHS,
4992 BuildFnTy &MatchInfo) const {
4993 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4994 // if and only if (G_PTR_ADD X, C) has one use.
4995 Register LHSBase;
4996 std::optional<ValueAndVReg> LHSCstOff;
4997 if (!mi_match(MI.getBaseReg(), MRI,
4998 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4999 return false;
5000
5001 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5002
5003 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5004 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5005 // so the new G_PTR_ADDs are also inbounds.
5006 unsigned PtrAddFlags = MI.getFlags();
5007 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5008 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5009 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5011 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5013 unsigned Flags = 0;
5014 if (IsNoUWrap)
5016 if (IsNoUSWrap)
5018 if (IsInBounds)
5020
5021 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5022 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5023 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5024 // doesn't happen.
5025 LHSPtrAdd->moveBefore(&MI);
5026 Register RHSReg = MI.getOffsetReg();
5027 // set VReg will cause type mismatch if it comes from extend/trunc
5028 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5029 Observer.changingInstr(MI);
5030 MI.getOperand(2).setReg(NewCst.getReg(0));
5031 MI.setFlags(Flags);
5032 Observer.changedInstr(MI);
5033 Observer.changingInstr(*LHSPtrAdd);
5034 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5035 LHSPtrAdd->setFlags(Flags);
5036 Observer.changedInstr(*LHSPtrAdd);
5037 };
5038 return !reassociationCanBreakAddressingModePattern(MI);
5039}
5040
5042 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5043 BuildFnTy &MatchInfo) const {
5044 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5045 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5046 if (!LHSPtrAdd)
5047 return false;
5048
5049 Register Src2Reg = MI.getOperand(2).getReg();
5050 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5051 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5052 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5053 if (!C1)
5054 return false;
5055 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5056 if (!C2)
5057 return false;
5058
5059 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5060 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5061 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5062 // largest signed integer that fits into the index type, which is the maximum
5063 // size of allocated objects according to the IR Language Reference.
5064 unsigned PtrAddFlags = MI.getFlags();
5065 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5066 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5067 bool IsInBounds =
5068 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5069 unsigned Flags = 0;
5070 if (IsNoUWrap)
5072 if (IsInBounds) {
5075 }
5076
5077 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5078 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5079 Observer.changingInstr(MI);
5080 MI.getOperand(1).setReg(LHSSrc1);
5081 MI.getOperand(2).setReg(NewCst.getReg(0));
5082 MI.setFlags(Flags);
5083 Observer.changedInstr(MI);
5084 };
5085 return !reassociationCanBreakAddressingModePattern(MI);
5086}
5087
5089 BuildFnTy &MatchInfo) const {
5090 auto &PtrAdd = cast<GPtrAdd>(MI);
5091 // We're trying to match a few pointer computation patterns here for
5092 // re-association opportunities.
5093 // 1) Isolating a constant operand to be on the RHS, e.g.:
5094 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5095 //
5096 // 2) Folding two constants in each sub-tree as long as such folding
5097 // doesn't break a legal addressing mode.
5098 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5099 //
5100 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5101 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5102 // iif (G_PTR_ADD X, C) has one use.
5103 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5104 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5105
5106 // Try to match example 2.
5107 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5108 return true;
5109
5110 // Try to match example 3.
5111 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5112 return true;
5113
5114 // Try to match example 1.
5115 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5116 return true;
5117
5118 return false;
5119}
5121 Register OpLHS, Register OpRHS,
5122 BuildFnTy &MatchInfo) const {
5123 LLT OpRHSTy = MRI.getType(OpRHS);
5124 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5125
5126 if (OpLHSDef->getOpcode() != Opc)
5127 return false;
5128
5129 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5130 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5131 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5132
5133 // If the inner op is (X op C), pull the constant out so it can be folded with
5134 // other constants in the expression tree. Folding is not guaranteed so we
5135 // might have (C1 op C2). In that case do not pull a constant out because it
5136 // won't help and can lead to infinite loops.
5137 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5138 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5139 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5140 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5141 MatchInfo = [=](MachineIRBuilder &B) {
5142 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5143 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5144 };
5145 return true;
5146 }
5147 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5148 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5149 // iff (op x, c1) has one use
5150 MatchInfo = [=](MachineIRBuilder &B) {
5151 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5152 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5153 };
5154 return true;
5155 }
5156 }
5157
5158 return false;
5159}
5160
5162 BuildFnTy &MatchInfo) const {
5163 // We don't check if the reassociation will break a legal addressing mode
5164 // here since pointer arithmetic is handled by G_PTR_ADD.
5165 unsigned Opc = MI.getOpcode();
5166 Register DstReg = MI.getOperand(0).getReg();
5167 Register LHSReg = MI.getOperand(1).getReg();
5168 Register RHSReg = MI.getOperand(2).getReg();
5169
5170 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5171 return true;
5172 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5173 return true;
5174 return false;
5175}
5176
5178 APInt &MatchInfo) const {
5179 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5180 Register SrcOp = MI.getOperand(1).getReg();
5181
5182 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5183 MatchInfo = *MaybeCst;
5184 return true;
5185 }
5186
5187 return false;
5188}
5189
5191 APInt &MatchInfo) const {
5192 Register Op1 = MI.getOperand(1).getReg();
5193 Register Op2 = MI.getOperand(2).getReg();
5194 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5195 if (!MaybeCst)
5196 return false;
5197 MatchInfo = *MaybeCst;
5198 return true;
5199}
5200
5202 ConstantFP *&MatchInfo) const {
5203 Register Op1 = MI.getOperand(1).getReg();
5204 Register Op2 = MI.getOperand(2).getReg();
5205 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5206 if (!MaybeCst)
5207 return false;
5208 MatchInfo =
5209 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5210 return true;
5211}
5212
5214 ConstantFP *&MatchInfo) const {
5215 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5216 MI.getOpcode() == TargetOpcode::G_FMAD);
5217 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5218
5219 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5220 if (!Op3Cst)
5221 return false;
5222
5223 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5224 if (!Op2Cst)
5225 return false;
5226
5227 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5228 if (!Op1Cst)
5229 return false;
5230
5231 APFloat Op1F = Op1Cst->getValueAPF();
5232 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5234 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5235 return true;
5236}
5237
5240 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5241 // Look for a binop feeding into an AND with a mask:
5242 //
5243 // %add = G_ADD %lhs, %rhs
5244 // %and = G_AND %add, 000...11111111
5245 //
5246 // Check if it's possible to perform the binop at a narrower width and zext
5247 // back to the original width like so:
5248 //
5249 // %narrow_lhs = G_TRUNC %lhs
5250 // %narrow_rhs = G_TRUNC %rhs
5251 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5252 // %new_add = G_ZEXT %narrow_add
5253 // %and = G_AND %new_add, 000...11111111
5254 //
5255 // This can allow later combines to eliminate the G_AND if it turns out
5256 // that the mask is irrelevant.
5257 assert(MI.getOpcode() == TargetOpcode::G_AND);
5258 Register Dst = MI.getOperand(0).getReg();
5259 Register AndLHS = MI.getOperand(1).getReg();
5260 Register AndRHS = MI.getOperand(2).getReg();
5261 LLT WideTy = MRI.getType(Dst);
5262
5263 // If the potential binop has more than one use, then it's possible that one
5264 // of those uses will need its full width.
5265 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5266 return false;
5267
5268 // Check if the LHS feeding the AND is impacted by the high bits that we're
5269 // masking out.
5270 //
5271 // e.g. for 64-bit x, y:
5272 //
5273 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5274 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5275 if (!LHSInst)
5276 return false;
5277 unsigned LHSOpc = LHSInst->getOpcode();
5278 switch (LHSOpc) {
5279 default:
5280 return false;
5281 case TargetOpcode::G_ADD:
5282 case TargetOpcode::G_SUB:
5283 case TargetOpcode::G_MUL:
5284 case TargetOpcode::G_AND:
5285 case TargetOpcode::G_OR:
5286 case TargetOpcode::G_XOR:
5287 break;
5288 }
5289
5290 // Find the mask on the RHS.
5291 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5292 if (!Cst)
5293 return false;
5294 auto Mask = Cst->Value;
5295 if (!Mask.isMask())
5296 return false;
5297
5298 // No point in combining if there's nothing to truncate.
5299 unsigned NarrowWidth = Mask.countr_one();
5300 if (NarrowWidth == WideTy.getSizeInBits())
5301 return false;
5302 LLT NarrowTy = LLT::scalar(NarrowWidth);
5303
5304 // Check if adding the zext + truncates could be harmful.
5305 auto &MF = *MI.getMF();
5306 const auto &TLI = getTargetLowering();
5307 LLVMContext &Ctx = MF.getFunction().getContext();
5308 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5309 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5310 return false;
5311 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5312 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5313 return false;
5314 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5315 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5316 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5317 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5318 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5319 auto NarrowBinOp =
5320 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5321 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5322 Observer.changingInstr(MI);
5323 MI.getOperand(1).setReg(Ext.getReg(0));
5324 Observer.changedInstr(MI);
5325 };
5326 return true;
5327}
5328
5330 BuildFnTy &MatchInfo) const {
5331 unsigned Opc = MI.getOpcode();
5332 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5333
5334 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5335 return false;
5336
5337 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5338 Observer.changingInstr(MI);
5339 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5340 : TargetOpcode::G_SADDO;
5341 MI.setDesc(Builder.getTII().get(NewOpc));
5342 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5343 Observer.changedInstr(MI);
5344 };
5345 return true;
5346}
5347
5349 BuildFnTy &MatchInfo) const {
5350 // (G_*MULO x, 0) -> 0 + no carry out
5351 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5352 MI.getOpcode() == TargetOpcode::G_SMULO);
5353 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5354 return false;
5355 Register Dst = MI.getOperand(0).getReg();
5356 Register Carry = MI.getOperand(1).getReg();
5357 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5358 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5359 return false;
5360 MatchInfo = [=](MachineIRBuilder &B) {
5361 B.buildConstant(Dst, 0);
5362 B.buildConstant(Carry, 0);
5363 };
5364 return true;
5365}
5366
5368 BuildFnTy &MatchInfo) const {
5369 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5370 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5371 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5372 MI.getOpcode() == TargetOpcode::G_SADDE ||
5373 MI.getOpcode() == TargetOpcode::G_USUBE ||
5374 MI.getOpcode() == TargetOpcode::G_SSUBE);
5375 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5376 return false;
5377 MatchInfo = [&](MachineIRBuilder &B) {
5378 unsigned NewOpcode;
5379 switch (MI.getOpcode()) {
5380 case TargetOpcode::G_UADDE:
5381 NewOpcode = TargetOpcode::G_UADDO;
5382 break;
5383 case TargetOpcode::G_SADDE:
5384 NewOpcode = TargetOpcode::G_SADDO;
5385 break;
5386 case TargetOpcode::G_USUBE:
5387 NewOpcode = TargetOpcode::G_USUBO;
5388 break;
5389 case TargetOpcode::G_SSUBE:
5390 NewOpcode = TargetOpcode::G_SSUBO;
5391 break;
5392 }
5393 Observer.changingInstr(MI);
5394 MI.setDesc(B.getTII().get(NewOpcode));
5395 MI.removeOperand(4);
5396 Observer.changedInstr(MI);
5397 };
5398 return true;
5399}
5400
5402 BuildFnTy &MatchInfo) const {
5403 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5404 Register Dst = MI.getOperand(0).getReg();
5405 // (x + y) - z -> x (if y == z)
5406 // (x + y) - z -> y (if x == z)
5407 Register X, Y, Z;
5408 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5409 Register ReplaceReg;
5410 int64_t CstX, CstY;
5411 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5413 ReplaceReg = X;
5414 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5416 ReplaceReg = Y;
5417 if (ReplaceReg) {
5418 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5419 return true;
5420 }
5421 }
5422
5423 // x - (y + z) -> 0 - y (if x == z)
5424 // x - (y + z) -> 0 - z (if x == y)
5425 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5426 Register ReplaceReg;
5427 int64_t CstX;
5428 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5430 ReplaceReg = Y;
5431 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5433 ReplaceReg = Z;
5434 if (ReplaceReg) {
5435 MatchInfo = [=](MachineIRBuilder &B) {
5436 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5437 B.buildSub(Dst, Zero, ReplaceReg);
5438 };
5439 return true;
5440 }
5441 }
5442 return false;
5443}
5444
5446 unsigned Opcode = MI.getOpcode();
5447 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5448 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5449 Register Dst = UDivorRem.getReg(0);
5450 Register LHS = UDivorRem.getReg(1);
5451 Register RHS = UDivorRem.getReg(2);
5452 LLT Ty = MRI.getType(Dst);
5453 LLT ScalarTy = Ty.getScalarType();
5454 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5456 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5457
5458 auto &MIB = Builder;
5459
5460 bool UseSRL = false;
5461 SmallVector<Register, 16> Shifts, Factors;
5462 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5463 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5464
5465 auto BuildExactUDIVPattern = [&](const Constant *C) {
5466 // Don't recompute inverses for each splat element.
5467 if (IsSplat && !Factors.empty()) {
5468 Shifts.push_back(Shifts[0]);
5469 Factors.push_back(Factors[0]);
5470 return true;
5471 }
5472
5473 auto *CI = cast<ConstantInt>(C);
5474 APInt Divisor = CI->getValue();
5475 unsigned Shift = Divisor.countr_zero();
5476 if (Shift) {
5477 Divisor.lshrInPlace(Shift);
5478 UseSRL = true;
5479 }
5480
5481 // Calculate the multiplicative inverse modulo BW.
5482 APInt Factor = Divisor.multiplicativeInverse();
5483 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5484 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5485 return true;
5486 };
5487
5488 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5489 // Collect all magic values from the build vector.
5490 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5491 llvm_unreachable("Expected unary predicate match to succeed");
5492
5493 Register Shift, Factor;
5494 if (Ty.isVector()) {
5495 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5496 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5497 } else {
5498 Shift = Shifts[0];
5499 Factor = Factors[0];
5500 }
5501
5502 Register Res = LHS;
5503
5504 if (UseSRL)
5505 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5506
5507 return MIB.buildMul(Ty, Res, Factor);
5508 }
5509
5510 unsigned KnownLeadingZeros =
5511 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5512
5513 bool UseNPQ = false;
5514 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5515 auto BuildUDIVPattern = [&](const Constant *C) {
5516 auto *CI = cast<ConstantInt>(C);
5517 const APInt &Divisor = CI->getValue();
5518
5519 bool SelNPQ = false;
5520 APInt Magic(Divisor.getBitWidth(), 0);
5521 unsigned PreShift = 0, PostShift = 0;
5522
5523 // Magic algorithm doesn't work for division by 1. We need to emit a select
5524 // at the end.
5525 // TODO: Use undef values for divisor of 1.
5526 if (!Divisor.isOne()) {
5527
5528 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5529 // in the dividend exceeds the leading zeros for the divisor.
5532 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5533
5534 Magic = std::move(magics.Magic);
5535
5536 assert(magics.PreShift < Divisor.getBitWidth() &&
5537 "We shouldn't generate an undefined shift!");
5538 assert(magics.PostShift < Divisor.getBitWidth() &&
5539 "We shouldn't generate an undefined shift!");
5540 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5541 PreShift = magics.PreShift;
5542 PostShift = magics.PostShift;
5543 SelNPQ = magics.IsAdd;
5544 }
5545
5546 PreShifts.push_back(
5547 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5548 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5549 NPQFactors.push_back(
5550 MIB.buildConstant(ScalarTy,
5551 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5552 : APInt::getZero(EltBits))
5553 .getReg(0));
5554 PostShifts.push_back(
5555 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5556 UseNPQ |= SelNPQ;
5557 return true;
5558 };
5559
5560 // Collect the shifts/magic values from each element.
5561 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5562 (void)Matched;
5563 assert(Matched && "Expected unary predicate match to succeed");
5564
5565 Register PreShift, PostShift, MagicFactor, NPQFactor;
5566 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5567 if (RHSDef) {
5568 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5569 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5570 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5571 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5572 } else {
5573 assert(MRI.getType(RHS).isScalar() &&
5574 "Non-build_vector operation should have been a scalar");
5575 PreShift = PreShifts[0];
5576 MagicFactor = MagicFactors[0];
5577 PostShift = PostShifts[0];
5578 }
5579
5580 Register Q = LHS;
5581 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5582
5583 // Multiply the numerator (operand 0) by the magic value.
5584 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5585
5586 if (UseNPQ) {
5587 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5588
5589 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5590 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5591 if (Ty.isVector())
5592 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5593 else
5594 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5595
5596 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5597 }
5598
5599 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5600 auto One = MIB.buildConstant(Ty, 1);
5601 auto IsOne = MIB.buildICmp(
5603 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5604 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5605
5606 if (Opcode == TargetOpcode::G_UREM) {
5607 auto Prod = MIB.buildMul(Ty, ret, RHS);
5608 return MIB.buildSub(Ty, LHS, Prod);
5609 }
5610 return ret;
5611}
5612
5614 unsigned Opcode = MI.getOpcode();
5615 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5616 Register Dst = MI.getOperand(0).getReg();
5617 Register RHS = MI.getOperand(2).getReg();
5618 LLT DstTy = MRI.getType(Dst);
5619
5620 auto &MF = *MI.getMF();
5621 AttributeList Attr = MF.getFunction().getAttributes();
5622 const auto &TLI = getTargetLowering();
5623 LLVMContext &Ctx = MF.getFunction().getContext();
5624 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5625 return false;
5626
5627 // Don't do this for minsize because the instruction sequence is usually
5628 // larger.
5629 if (MF.getFunction().hasMinSize())
5630 return false;
5631
5632 if (Opcode == TargetOpcode::G_UDIV &&
5634 return matchUnaryPredicate(
5635 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5636 }
5637
5638 auto *RHSDef = MRI.getVRegDef(RHS);
5639 if (!isConstantOrConstantVector(*RHSDef, MRI))
5640 return false;
5641
5642 // Don't do this if the types are not going to be legal.
5643 if (LI) {
5644 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5645 return false;
5646 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5647 return false;
5649 {TargetOpcode::G_ICMP,
5650 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5651 DstTy}}))
5652 return false;
5653 if (Opcode == TargetOpcode::G_UREM &&
5654 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5655 return false;
5656 }
5657
5658 return matchUnaryPredicate(
5659 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5660}
5661
5663 auto *NewMI = buildUDivOrURemUsingMul(MI);
5664 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5665}
5666
5668 unsigned Opcode = MI.getOpcode();
5669 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5670 Register Dst = MI.getOperand(0).getReg();
5671 Register RHS = MI.getOperand(2).getReg();
5672 LLT DstTy = MRI.getType(Dst);
5673 auto SizeInBits = DstTy.getScalarSizeInBits();
5674 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5675
5676 auto &MF = *MI.getMF();
5677 AttributeList Attr = MF.getFunction().getAttributes();
5678 const auto &TLI = getTargetLowering();
5679 LLVMContext &Ctx = MF.getFunction().getContext();
5680 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5681 return false;
5682
5683 // Don't do this for minsize because the instruction sequence is usually
5684 // larger.
5685 if (MF.getFunction().hasMinSize())
5686 return false;
5687
5688 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5689 if (Opcode == TargetOpcode::G_SDIV &&
5691 return matchUnaryPredicate(
5692 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5693 }
5694
5695 auto *RHSDef = MRI.getVRegDef(RHS);
5696 if (!isConstantOrConstantVector(*RHSDef, MRI))
5697 return false;
5698
5699 // Don't do this if the types are not going to be legal.
5700 if (LI) {
5701 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5702 return false;
5703 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5704 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5705 return false;
5706 if (Opcode == TargetOpcode::G_SREM &&
5707 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5708 return false;
5709 }
5710
5711 return matchUnaryPredicate(
5712 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5713}
5714
5716 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5717 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5718}
5719
5721 unsigned Opcode = MI.getOpcode();
5722 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5723 Opcode == TargetOpcode::G_SREM);
5724 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5725 Register Dst = SDivorRem.getReg(0);
5726 Register LHS = SDivorRem.getReg(1);
5727 Register RHS = SDivorRem.getReg(2);
5728 LLT Ty = MRI.getType(Dst);
5729 LLT ScalarTy = Ty.getScalarType();
5730 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5732 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5733 auto &MIB = Builder;
5734
5735 bool UseSRA = false;
5736 SmallVector<Register, 16> ExactShifts, ExactFactors;
5737
5738 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5739 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5740
5741 auto BuildExactSDIVPattern = [&](const Constant *C) {
5742 // Don't recompute inverses for each splat element.
5743 if (IsSplat && !ExactFactors.empty()) {
5744 ExactShifts.push_back(ExactShifts[0]);
5745 ExactFactors.push_back(ExactFactors[0]);
5746 return true;
5747 }
5748
5749 auto *CI = cast<ConstantInt>(C);
5750 APInt Divisor = CI->getValue();
5751 unsigned Shift = Divisor.countr_zero();
5752 if (Shift) {
5753 Divisor.ashrInPlace(Shift);
5754 UseSRA = true;
5755 }
5756
5757 // Calculate the multiplicative inverse modulo BW.
5758 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5759 APInt Factor = Divisor.multiplicativeInverse();
5760 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5761 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5762 return true;
5763 };
5764
5765 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5766 // Collect all magic values from the build vector.
5767 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5768 (void)Matched;
5769 assert(Matched && "Expected unary predicate match to succeed");
5770
5771 Register Shift, Factor;
5772 if (Ty.isVector()) {
5773 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5774 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5775 } else {
5776 Shift = ExactShifts[0];
5777 Factor = ExactFactors[0];
5778 }
5779
5780 Register Res = LHS;
5781
5782 if (UseSRA)
5783 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5784
5785 return MIB.buildMul(Ty, Res, Factor);
5786 }
5787
5788 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5789
5790 auto BuildSDIVPattern = [&](const Constant *C) {
5791 auto *CI = cast<ConstantInt>(C);
5792 const APInt &Divisor = CI->getValue();
5793
5796 int NumeratorFactor = 0;
5797 int ShiftMask = -1;
5798
5799 if (Divisor.isOne() || Divisor.isAllOnes()) {
5800 // If d is +1/-1, we just multiply the numerator by +1/-1.
5801 NumeratorFactor = Divisor.getSExtValue();
5802 Magics.Magic = 0;
5803 Magics.ShiftAmount = 0;
5804 ShiftMask = 0;
5805 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5806 // If d > 0 and m < 0, add the numerator.
5807 NumeratorFactor = 1;
5808 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5809 // If d < 0 and m > 0, subtract the numerator.
5810 NumeratorFactor = -1;
5811 }
5812
5813 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5814 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5815 Shifts.push_back(
5816 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5817 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5818
5819 return true;
5820 };
5821
5822 // Collect the shifts/magic values from each element.
5823 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5824 (void)Matched;
5825 assert(Matched && "Expected unary predicate match to succeed");
5826
5827 Register MagicFactor, Factor, Shift, ShiftMask;
5828 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5829 if (RHSDef) {
5830 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5831 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5832 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5833 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5834 } else {
5835 assert(MRI.getType(RHS).isScalar() &&
5836 "Non-build_vector operation should have been a scalar");
5837 MagicFactor = MagicFactors[0];
5838 Factor = Factors[0];
5839 Shift = Shifts[0];
5840 ShiftMask = ShiftMasks[0];
5841 }
5842
5843 Register Q = LHS;
5844 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5845
5846 // (Optionally) Add/subtract the numerator using Factor.
5847 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5848 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5849
5850 // Shift right algebraic by shift value.
5851 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5852
5853 // Extract the sign bit, mask it and add it to the quotient.
5854 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5855 auto T = MIB.buildLShr(Ty, Q, SignShift);
5856 T = MIB.buildAnd(Ty, T, ShiftMask);
5857 auto ret = MIB.buildAdd(Ty, Q, T);
5858
5859 if (Opcode == TargetOpcode::G_SREM) {
5860 auto Prod = MIB.buildMul(Ty, ret, RHS);
5861 return MIB.buildSub(Ty, LHS, Prod);
5862 }
5863 return ret;
5864}
5865
5867 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5868 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5869 "Expected SDIV or UDIV");
5870 auto &Div = cast<GenericMachineInstr>(MI);
5871 Register RHS = Div.getReg(2);
5872 auto MatchPow2 = [&](const Constant *C) {
5873 auto *CI = dyn_cast<ConstantInt>(C);
5874 return CI && (CI->getValue().isPowerOf2() ||
5875 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5876 };
5877 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5878}
5879
5881 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5882 auto &SDiv = cast<GenericMachineInstr>(MI);
5883 Register Dst = SDiv.getReg(0);
5884 Register LHS = SDiv.getReg(1);
5885 Register RHS = SDiv.getReg(2);
5886 LLT Ty = MRI.getType(Dst);
5888 LLT CCVT =
5889 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5890
5891 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5892 // to the following version:
5893 //
5894 // %c1 = G_CTTZ %rhs
5895 // %inexact = G_SUB $bitwidth, %c1
5896 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5897 // %lshr = G_LSHR %sign, %inexact
5898 // %add = G_ADD %lhs, %lshr
5899 // %ashr = G_ASHR %add, %c1
5900 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5901 // %zero = G_CONSTANT $0
5902 // %neg = G_NEG %ashr
5903 // %isneg = G_ICMP SLT %rhs, %zero
5904 // %res = G_SELECT %isneg, %neg, %ashr
5905
5906 unsigned BitWidth = Ty.getScalarSizeInBits();
5907 auto Zero = Builder.buildConstant(Ty, 0);
5908
5909 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5910 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5911 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5912 // Splat the sign bit into the register
5913 auto Sign = Builder.buildAShr(
5914 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5915
5916 // Add (LHS < 0) ? abs2 - 1 : 0;
5917 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5918 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5919 auto AShr = Builder.buildAShr(Ty, Add, C1);
5920
5921 // Special case: (sdiv X, 1) -> X
5922 // Special Case: (sdiv X, -1) -> 0-X
5923 auto One = Builder.buildConstant(Ty, 1);
5924 auto MinusOne = Builder.buildConstant(Ty, -1);
5925 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5926 auto IsMinusOne =
5927 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
5928 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5929 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5930
5931 // If divided by a positive value, we're done. Otherwise, the result must be
5932 // negated.
5933 auto Neg = Builder.buildNeg(Ty, AShr);
5934 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5935 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5936 MI.eraseFromParent();
5937}
5938
5940 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5941 auto &UDiv = cast<GenericMachineInstr>(MI);
5942 Register Dst = UDiv.getReg(0);
5943 Register LHS = UDiv.getReg(1);
5944 Register RHS = UDiv.getReg(2);
5945 LLT Ty = MRI.getType(Dst);
5947
5948 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5949 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5950 MI.eraseFromParent();
5951}
5952
5954 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5955 Register RHS = MI.getOperand(2).getReg();
5956 Register Dst = MI.getOperand(0).getReg();
5957 LLT Ty = MRI.getType(Dst);
5958 LLT RHSTy = MRI.getType(RHS);
5960 auto MatchPow2ExceptOne = [&](const Constant *C) {
5961 if (auto *CI = dyn_cast<ConstantInt>(C))
5962 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5963 return false;
5964 };
5965 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5966 return false;
5967 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
5968 // get log base 2, and it is not always legal for on a target.
5969 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
5970 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
5971}
5972
5974 Register LHS = MI.getOperand(1).getReg();
5975 Register RHS = MI.getOperand(2).getReg();
5976 Register Dst = MI.getOperand(0).getReg();
5977 LLT Ty = MRI.getType(Dst);
5979 unsigned NumEltBits = Ty.getScalarSizeInBits();
5980
5981 auto LogBase2 = buildLogBase2(RHS, Builder);
5982 auto ShiftAmt =
5983 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5984 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5985 Builder.buildLShr(Dst, LHS, Trunc);
5986 MI.eraseFromParent();
5987}
5988
5990 Register &MatchInfo) const {
5991 Register Dst = MI.getOperand(0).getReg();
5992 Register Src = MI.getOperand(1).getReg();
5993 LLT DstTy = MRI.getType(Dst);
5994 LLT SrcTy = MRI.getType(Src);
5995 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5996 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5997 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5998
5999 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6000 return false;
6001
6002 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6003 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6004 return mi_match(Src, MRI,
6005 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6006 m_SpecificICstOrSplat(SignedMin)),
6007 m_SpecificICstOrSplat(SignedMax))) ||
6008 mi_match(Src, MRI,
6009 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6010 m_SpecificICstOrSplat(SignedMax)),
6011 m_SpecificICstOrSplat(SignedMin)));
6012}
6013
6015 Register &MatchInfo) const {
6016 Register Dst = MI.getOperand(0).getReg();
6017 Builder.buildTruncSSatS(Dst, MatchInfo);
6018 MI.eraseFromParent();
6019}
6020
6022 Register &MatchInfo) const {
6023 Register Dst = MI.getOperand(0).getReg();
6024 Register Src = MI.getOperand(1).getReg();
6025 LLT DstTy = MRI.getType(Dst);
6026 LLT SrcTy = MRI.getType(Src);
6027 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6028 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6029 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6030
6031 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6032 return false;
6033 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6034 return mi_match(Src, MRI,
6036 m_SpecificICstOrSplat(UnsignedMax))) ||
6037 mi_match(Src, MRI,
6038 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6039 m_SpecificICstOrSplat(UnsignedMax)),
6040 m_SpecificICstOrSplat(0))) ||
6041 mi_match(Src, MRI,
6043 m_SpecificICstOrSplat(UnsignedMax)));
6044}
6045
6047 Register &MatchInfo) const {
6048 Register Dst = MI.getOperand(0).getReg();
6049 Builder.buildTruncSSatU(Dst, MatchInfo);
6050 MI.eraseFromParent();
6051}
6052
6054 MachineInstr &MinMI) const {
6055 Register Min = MinMI.getOperand(2).getReg();
6056 Register Val = MinMI.getOperand(1).getReg();
6057 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6058 LLT SrcTy = MRI.getType(Val);
6059 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6060 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6061 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6062
6063 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6064 return false;
6065 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6066 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6067 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6068}
6069
6071 MachineInstr &SrcMI) const {
6072 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6073 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6074
6075 return LI &&
6076 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6077}
6078
6080 BuildFnTy &MatchInfo) const {
6081 unsigned Opc = MI.getOpcode();
6082 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6083 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6084 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6085
6086 Register Dst = MI.getOperand(0).getReg();
6087 Register X = MI.getOperand(1).getReg();
6088 Register Y = MI.getOperand(2).getReg();
6089 LLT Type = MRI.getType(Dst);
6090
6091 // fold (fadd x, fneg(y)) -> (fsub x, y)
6092 // fold (fadd fneg(y), x) -> (fsub x, y)
6093 // G_ADD is commutative so both cases are checked by m_GFAdd
6094 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6095 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6096 Opc = TargetOpcode::G_FSUB;
6097 }
6098 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6099 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6100 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6101 Opc = TargetOpcode::G_FADD;
6102 }
6103 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6104 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6105 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6106 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6107 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6108 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6109 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6110 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6111 // no opcode change
6112 } else
6113 return false;
6114
6115 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6116 Observer.changingInstr(MI);
6117 MI.setDesc(B.getTII().get(Opc));
6118 MI.getOperand(1).setReg(X);
6119 MI.getOperand(2).setReg(Y);
6120 Observer.changedInstr(MI);
6121 };
6122 return true;
6123}
6124
6126 Register &MatchInfo) const {
6127 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6128
6129 Register LHS = MI.getOperand(1).getReg();
6130 MatchInfo = MI.getOperand(2).getReg();
6131 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6132
6133 const auto LHSCst = Ty.isVector()
6134 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6136 if (!LHSCst)
6137 return false;
6138
6139 // -0.0 is always allowed
6140 if (LHSCst->Value.isNegZero())
6141 return true;
6142
6143 // +0.0 is only allowed if nsz is set.
6144 if (LHSCst->Value.isPosZero())
6145 return MI.getFlag(MachineInstr::FmNsz);
6146
6147 return false;
6148}
6149
6151 Register &MatchInfo) const {
6152 Register Dst = MI.getOperand(0).getReg();
6153 Builder.buildFNeg(
6154 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6155 eraseInst(MI);
6156}
6157
6158/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6159/// due to global flags or MachineInstr flags.
6160static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6161 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6162 return false;
6163 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6164}
6165
6166static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6167 const MachineRegisterInfo &MRI) {
6168 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6169 MRI.use_instr_nodbg_end()) >
6170 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6171 MRI.use_instr_nodbg_end());
6172}
6173
6175 bool &AllowFusionGlobally,
6176 bool &HasFMAD, bool &Aggressive,
6177 bool CanReassociate) const {
6178
6179 auto *MF = MI.getMF();
6180 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6181 const TargetOptions &Options = MF->getTarget().Options;
6182 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6183
6184 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6185 return false;
6186
6187 // Floating-point multiply-add with intermediate rounding.
6188 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6189 // Floating-point multiply-add without intermediate rounding.
6190 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6191 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6192 // No valid opcode, do not combine.
6193 if (!HasFMAD && !HasFMA)
6194 return false;
6195
6196 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6197 // If the addition is not contractable, do not combine.
6198 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6199 return false;
6200
6201 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6202 return true;
6203}
6204
6207 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6208 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6209
6210 bool AllowFusionGlobally, HasFMAD, Aggressive;
6211 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6212 return false;
6213
6214 Register Op1 = MI.getOperand(1).getReg();
6215 Register Op2 = MI.getOperand(2).getReg();
6216 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6217 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6218 unsigned PreferredFusedOpcode =
6219 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6220
6221 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6222 // prefer to fold the multiply with fewer uses.
6223 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6224 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6225 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6226 std::swap(LHS, RHS);
6227 }
6228
6229 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6230 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6231 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6232 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6233 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6234 {LHS.MI->getOperand(1).getReg(),
6235 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6236 };
6237 return true;
6238 }
6239
6240 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6241 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6242 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6243 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6244 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6245 {RHS.MI->getOperand(1).getReg(),
6246 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6247 };
6248 return true;
6249 }
6250
6251 return false;
6252}
6253
6256 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6257 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6258
6259 bool AllowFusionGlobally, HasFMAD, Aggressive;
6260 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6261 return false;
6262
6263 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6264 Register Op1 = MI.getOperand(1).getReg();
6265 Register Op2 = MI.getOperand(2).getReg();
6266 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6267 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6268 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6269
6270 unsigned PreferredFusedOpcode =
6271 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6272
6273 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6274 // prefer to fold the multiply with fewer uses.
6275 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6276 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6277 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6278 std::swap(LHS, RHS);
6279 }
6280
6281 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6282 MachineInstr *FpExtSrc;
6283 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6284 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6285 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6286 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6287 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6288 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6289 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6290 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6291 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6292 };
6293 return true;
6294 }
6295
6296 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6297 // Note: Commutes FADD operands.
6298 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6299 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6300 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6301 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6302 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6303 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6304 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6305 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6306 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6307 };
6308 return true;
6309 }
6310
6311 return false;
6312}
6313
6316 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6317 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6318
6319 bool AllowFusionGlobally, HasFMAD, Aggressive;
6320 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6321 return false;
6322
6323 Register Op1 = MI.getOperand(1).getReg();
6324 Register Op2 = MI.getOperand(2).getReg();
6325 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6326 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6327 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6328
6329 unsigned PreferredFusedOpcode =
6330 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6331
6332 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6333 // prefer to fold the multiply with fewer uses.
6334 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6335 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6336 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6337 std::swap(LHS, RHS);
6338 }
6339
6340 MachineInstr *FMA = nullptr;
6341 Register Z;
6342 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6343 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6344 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6345 TargetOpcode::G_FMUL) &&
6346 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6347 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6348 FMA = LHS.MI;
6349 Z = RHS.Reg;
6350 }
6351 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6352 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6353 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6354 TargetOpcode::G_FMUL) &&
6355 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6356 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6357 Z = LHS.Reg;
6358 FMA = RHS.MI;
6359 }
6360
6361 if (FMA) {
6362 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6363 Register X = FMA->getOperand(1).getReg();
6364 Register Y = FMA->getOperand(2).getReg();
6365 Register U = FMulMI->getOperand(1).getReg();
6366 Register V = FMulMI->getOperand(2).getReg();
6367
6368 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6369 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6370 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6371 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6372 {X, Y, InnerFMA});
6373 };
6374 return true;
6375 }
6376
6377 return false;
6378}
6379
6382 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6383 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6384
6385 bool AllowFusionGlobally, HasFMAD, Aggressive;
6386 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6387 return false;
6388
6389 if (!Aggressive)
6390 return false;
6391
6392 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6393 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6394 Register Op1 = MI.getOperand(1).getReg();
6395 Register Op2 = MI.getOperand(2).getReg();
6396 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6397 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6398
6399 unsigned PreferredFusedOpcode =
6400 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6401
6402 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6403 // prefer to fold the multiply with fewer uses.
6404 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6405 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6406 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6407 std::swap(LHS, RHS);
6408 }
6409
6410 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6411 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6413 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6414 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6415 Register InnerFMA =
6416 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6417 .getReg(0);
6418 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6419 {X, Y, InnerFMA});
6420 };
6421
6422 MachineInstr *FMulMI, *FMAMI;
6423 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6424 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6425 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6426 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6427 m_GFPExt(m_MInstr(FMulMI))) &&
6428 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6429 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6430 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6431 MatchInfo = [=](MachineIRBuilder &B) {
6432 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6433 FMulMI->getOperand(2).getReg(), RHS.Reg,
6434 LHS.MI->getOperand(1).getReg(),
6435 LHS.MI->getOperand(2).getReg(), B);
6436 };
6437 return true;
6438 }
6439
6440 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6441 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6442 // FIXME: This turns two single-precision and one double-precision
6443 // operation into two double-precision operations, which might not be
6444 // interesting for all targets, especially GPUs.
6445 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6446 FMAMI->getOpcode() == PreferredFusedOpcode) {
6447 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6448 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6449 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6450 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6451 MatchInfo = [=](MachineIRBuilder &B) {
6452 Register X = FMAMI->getOperand(1).getReg();
6453 Register Y = FMAMI->getOperand(2).getReg();
6454 X = B.buildFPExt(DstType, X).getReg(0);
6455 Y = B.buildFPExt(DstType, Y).getReg(0);
6456 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6457 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6458 };
6459
6460 return true;
6461 }
6462 }
6463
6464 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6465 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6466 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6467 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6468 m_GFPExt(m_MInstr(FMulMI))) &&
6469 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6470 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6471 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6472 MatchInfo = [=](MachineIRBuilder &B) {
6473 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6474 FMulMI->getOperand(2).getReg(), LHS.Reg,
6475 RHS.MI->getOperand(1).getReg(),
6476 RHS.MI->getOperand(2).getReg(), B);
6477 };
6478 return true;
6479 }
6480
6481 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6482 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6483 // FIXME: This turns two single-precision and one double-precision
6484 // operation into two double-precision operations, which might not be
6485 // interesting for all targets, especially GPUs.
6486 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6487 FMAMI->getOpcode() == PreferredFusedOpcode) {
6488 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6489 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6490 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6491 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6492 MatchInfo = [=](MachineIRBuilder &B) {
6493 Register X = FMAMI->getOperand(1).getReg();
6494 Register Y = FMAMI->getOperand(2).getReg();
6495 X = B.buildFPExt(DstType, X).getReg(0);
6496 Y = B.buildFPExt(DstType, Y).getReg(0);
6497 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6498 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6499 };
6500 return true;
6501 }
6502 }
6503
6504 return false;
6505}
6506
6509 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6510 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6511
6512 bool AllowFusionGlobally, HasFMAD, Aggressive;
6513 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6514 return false;
6515
6516 Register Op1 = MI.getOperand(1).getReg();
6517 Register Op2 = MI.getOperand(2).getReg();
6518 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6519 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6520 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6521
6522 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6523 // prefer to fold the multiply with fewer uses.
6524 int FirstMulHasFewerUses = true;
6525 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6526 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6527 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6528 FirstMulHasFewerUses = false;
6529
6530 unsigned PreferredFusedOpcode =
6531 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6532
6533 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6534 if (FirstMulHasFewerUses &&
6535 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6536 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6537 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6538 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6539 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6540 {LHS.MI->getOperand(1).getReg(),
6541 LHS.MI->getOperand(2).getReg(), NegZ});
6542 };
6543 return true;
6544 }
6545 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6546 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6547 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6548 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6549 Register NegY =
6550 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6551 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6552 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6553 };
6554 return true;
6555 }
6556
6557 return false;
6558}
6559
6562 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6563 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6564
6565 bool AllowFusionGlobally, HasFMAD, Aggressive;
6566 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6567 return false;
6568
6569 Register LHSReg = MI.getOperand(1).getReg();
6570 Register RHSReg = MI.getOperand(2).getReg();
6571 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6572
6573 unsigned PreferredFusedOpcode =
6574 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6575
6576 MachineInstr *FMulMI;
6577 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6578 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6579 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6580 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6581 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6582 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6583 Register NegX =
6584 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6585 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6586 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6587 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6588 };
6589 return true;
6590 }
6591
6592 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6593 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6594 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6595 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6596 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6597 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6598 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6599 {FMulMI->getOperand(1).getReg(),
6600 FMulMI->getOperand(2).getReg(), LHSReg});
6601 };
6602 return true;
6603 }
6604
6605 return false;
6606}
6607
6610 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6611 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6612
6613 bool AllowFusionGlobally, HasFMAD, Aggressive;
6614 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6615 return false;
6616
6617 Register LHSReg = MI.getOperand(1).getReg();
6618 Register RHSReg = MI.getOperand(2).getReg();
6619 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6620
6621 unsigned PreferredFusedOpcode =
6622 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6623
6624 MachineInstr *FMulMI;
6625 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6626 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6627 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6628 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6629 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6630 Register FpExtX =
6631 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6632 Register FpExtY =
6633 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6634 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6635 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6636 {FpExtX, FpExtY, NegZ});
6637 };
6638 return true;
6639 }
6640
6641 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6642 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6643 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6644 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6645 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6646 Register FpExtY =
6647 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6648 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6649 Register FpExtZ =
6650 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6651 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6652 {NegY, FpExtZ, LHSReg});
6653 };
6654 return true;
6655 }
6656
6657 return false;
6658}
6659
6662 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6663 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6664
6665 bool AllowFusionGlobally, HasFMAD, Aggressive;
6666 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6667 return false;
6668
6669 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6670 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6671 Register LHSReg = MI.getOperand(1).getReg();
6672 Register RHSReg = MI.getOperand(2).getReg();
6673
6674 unsigned PreferredFusedOpcode =
6675 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6676
6677 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6679 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6680 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6681 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6682 };
6683
6684 MachineInstr *FMulMI;
6685 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6686 // (fneg (fma (fpext x), (fpext y), z))
6687 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6688 // (fneg (fma (fpext x), (fpext y), z))
6689 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6690 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6691 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6692 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6693 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6694 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6695 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6696 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6697 FMulMI->getOperand(2).getReg(), RHSReg, B);
6698 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6699 };
6700 return true;
6701 }
6702
6703 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6704 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6705 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6706 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6707 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6708 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6709 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6710 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6711 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6712 FMulMI->getOperand(2).getReg(), LHSReg, B);
6713 };
6714 return true;
6715 }
6716
6717 return false;
6718}
6719
6721 unsigned &IdxToPropagate) const {
6722 bool PropagateNaN;
6723 switch (MI.getOpcode()) {
6724 default:
6725 return false;
6726 case TargetOpcode::G_FMINNUM:
6727 case TargetOpcode::G_FMAXNUM:
6728 PropagateNaN = false;
6729 break;
6730 case TargetOpcode::G_FMINIMUM:
6731 case TargetOpcode::G_FMAXIMUM:
6732 PropagateNaN = true;
6733 break;
6734 }
6735
6736 auto MatchNaN = [&](unsigned Idx) {
6737 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6738 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6739 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6740 return false;
6741 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6742 return true;
6743 };
6744
6745 return MatchNaN(1) || MatchNaN(2);
6746}
6747
6749 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6750 Register LHS = MI.getOperand(1).getReg();
6751 Register RHS = MI.getOperand(2).getReg();
6752
6753 // Helper lambda to check for opportunities for
6754 // A + (B - A) -> B
6755 // (B - A) + A -> B
6756 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6757 Register Reg;
6758 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6759 Reg == MaybeSameReg;
6760 };
6761 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6762}
6763
6765 Register &MatchInfo) const {
6766 // This combine folds the following patterns:
6767 //
6768 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6769 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6770 // into
6771 // x
6772 // if
6773 // k == sizeof(VecEltTy)/2
6774 // type(x) == type(dst)
6775 //
6776 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6777 // into
6778 // x
6779 // if
6780 // type(x) == type(dst)
6781
6782 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6783 LLT DstEltTy = DstVecTy.getElementType();
6784
6785 Register Lo, Hi;
6786
6787 if (mi_match(
6788 MI, MRI,
6790 MatchInfo = Lo;
6791 return MRI.getType(MatchInfo) == DstVecTy;
6792 }
6793
6794 std::optional<ValueAndVReg> ShiftAmount;
6795 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6796 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6797 if (mi_match(
6798 MI, MRI,
6799 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6800 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6801 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6802 MatchInfo = Lo;
6803 return MRI.getType(MatchInfo) == DstVecTy;
6804 }
6805 }
6806
6807 return false;
6808}
6809
6811 Register &MatchInfo) const {
6812 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6813 // if type(x) == type(G_TRUNC)
6814 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6815 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6816 return false;
6817
6818 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6819}
6820
6822 Register &MatchInfo) const {
6823 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6824 // y if K == size of vector element type
6825 std::optional<ValueAndVReg> ShiftAmt;
6826 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6828 m_GCst(ShiftAmt))))
6829 return false;
6830
6831 LLT MatchTy = MRI.getType(MatchInfo);
6832 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6833 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6834}
6835
6836unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6837 CmpInst::Predicate Pred, LLT DstTy,
6838 SelectPatternNaNBehaviour VsNaNRetVal) const {
6839 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6840 "Expected a NaN behaviour?");
6841 // Choose an opcode based off of legality or the behaviour when one of the
6842 // LHS/RHS may be NaN.
6843 switch (Pred) {
6844 default:
6845 return 0;
6846 case CmpInst::FCMP_UGT:
6847 case CmpInst::FCMP_UGE:
6848 case CmpInst::FCMP_OGT:
6849 case CmpInst::FCMP_OGE:
6850 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6851 return TargetOpcode::G_FMAXNUM;
6852 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6853 return TargetOpcode::G_FMAXIMUM;
6854 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6855 return TargetOpcode::G_FMAXNUM;
6856 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6857 return TargetOpcode::G_FMAXIMUM;
6858 return 0;
6859 case CmpInst::FCMP_ULT:
6860 case CmpInst::FCMP_ULE:
6861 case CmpInst::FCMP_OLT:
6862 case CmpInst::FCMP_OLE:
6863 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6864 return TargetOpcode::G_FMINNUM;
6865 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6866 return TargetOpcode::G_FMINIMUM;
6867 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6868 return TargetOpcode::G_FMINNUM;
6869 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6870 return 0;
6871 return TargetOpcode::G_FMINIMUM;
6872 }
6873}
6874
6875CombinerHelper::SelectPatternNaNBehaviour
6876CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6877 bool IsOrderedComparison) const {
6878 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6879 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6880 // Completely unsafe.
6881 if (!LHSSafe && !RHSSafe)
6882 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6883 if (LHSSafe && RHSSafe)
6884 return SelectPatternNaNBehaviour::RETURNS_ANY;
6885 // An ordered comparison will return false when given a NaN, so it
6886 // returns the RHS.
6887 if (IsOrderedComparison)
6888 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6889 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6890 // An unordered comparison will return true when given a NaN, so it
6891 // returns the LHS.
6892 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6893 : SelectPatternNaNBehaviour::RETURNS_NAN;
6894}
6895
6896bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6897 Register TrueVal, Register FalseVal,
6898 BuildFnTy &MatchInfo) const {
6899 // Match: select (fcmp cond x, y) x, y
6900 // select (fcmp cond x, y) y, x
6901 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6902 LLT DstTy = MRI.getType(Dst);
6903 // Bail out early on pointers, since we'll never want to fold to a min/max.
6904 if (DstTy.isPointer())
6905 return false;
6906 // Match a floating point compare with a less-than/greater-than predicate.
6907 // TODO: Allow multiple users of the compare if they are all selects.
6908 CmpInst::Predicate Pred;
6909 Register CmpLHS, CmpRHS;
6910 if (!mi_match(Cond, MRI,
6912 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6913 CmpInst::isEquality(Pred))
6914 return false;
6915 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6916 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6917 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6918 return false;
6919 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6920 std::swap(CmpLHS, CmpRHS);
6921 Pred = CmpInst::getSwappedPredicate(Pred);
6922 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6923 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6924 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6925 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6926 }
6927 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6928 return false;
6929 // Decide what type of max/min this should be based off of the predicate.
6930 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6931 if (!Opc || !isLegal({Opc, {DstTy}}))
6932 return false;
6933 // Comparisons between signed zero and zero may have different results...
6934 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6935 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6936 // We don't know if a comparison between two 0s will give us a consistent
6937 // result. Be conservative and only proceed if at least one side is
6938 // non-zero.
6939 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6940 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6941 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6942 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6943 return false;
6944 }
6945 }
6946 MatchInfo = [=](MachineIRBuilder &B) {
6947 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6948 };
6949 return true;
6950}
6951
6953 BuildFnTy &MatchInfo) const {
6954 // TODO: Handle integer cases.
6955 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6956 // Condition may be fed by a truncated compare.
6957 Register Cond = MI.getOperand(1).getReg();
6958 Register MaybeTrunc;
6959 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6960 Cond = MaybeTrunc;
6961 Register Dst = MI.getOperand(0).getReg();
6962 Register TrueVal = MI.getOperand(2).getReg();
6963 Register FalseVal = MI.getOperand(3).getReg();
6964 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6965}
6966
6968 BuildFnTy &MatchInfo) const {
6969 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6970 // (X + Y) == X --> Y == 0
6971 // (X + Y) != X --> Y != 0
6972 // (X - Y) == X --> Y == 0
6973 // (X - Y) != X --> Y != 0
6974 // (X ^ Y) == X --> Y == 0
6975 // (X ^ Y) != X --> Y != 0
6976 Register Dst = MI.getOperand(0).getReg();
6977 CmpInst::Predicate Pred;
6978 Register X, Y, OpLHS, OpRHS;
6979 bool MatchedSub = mi_match(
6980 Dst, MRI,
6981 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6982 if (MatchedSub && X != OpLHS)
6983 return false;
6984 if (!MatchedSub) {
6985 if (!mi_match(Dst, MRI,
6986 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6987 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6988 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6989 return false;
6990 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6991 }
6992 MatchInfo = [=](MachineIRBuilder &B) {
6993 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6994 B.buildICmp(Pred, Dst, Y, Zero);
6995 };
6996 return CmpInst::isEquality(Pred) && Y.isValid();
6997}
6998
6999/// Return the minimum useless shift amount that results in complete loss of the
7000/// source value. Return std::nullopt when it cannot determine a value.
7001static std::optional<unsigned>
7002getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7003 std::optional<int64_t> &Result) {
7004 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7005 Opcode == TargetOpcode::G_ASHR) &&
7006 "Expect G_SHL, G_LSHR or G_ASHR.");
7007 auto SignificantBits = 0;
7008 switch (Opcode) {
7009 case TargetOpcode::G_SHL:
7010 SignificantBits = ValueKB.countMinTrailingZeros();
7011 Result = 0;
7012 break;
7013 case TargetOpcode::G_LSHR:
7014 Result = 0;
7015 SignificantBits = ValueKB.countMinLeadingZeros();
7016 break;
7017 case TargetOpcode::G_ASHR:
7018 if (ValueKB.isNonNegative()) {
7019 SignificantBits = ValueKB.countMinLeadingZeros();
7020 Result = 0;
7021 } else if (ValueKB.isNegative()) {
7022 SignificantBits = ValueKB.countMinLeadingOnes();
7023 Result = -1;
7024 } else {
7025 // Cannot determine shift result.
7026 Result = std::nullopt;
7027 }
7028 break;
7029 default:
7030 break;
7031 }
7032 return ValueKB.getBitWidth() - SignificantBits;
7033}
7034
7036 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7037 Register ShiftVal = MI.getOperand(1).getReg();
7038 Register ShiftReg = MI.getOperand(2).getReg();
7039 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7040 auto IsShiftTooBig = [&](const Constant *C) {
7041 auto *CI = dyn_cast<ConstantInt>(C);
7042 if (!CI)
7043 return false;
7044 if (CI->uge(ResTy.getScalarSizeInBits())) {
7045 MatchInfo = std::nullopt;
7046 return true;
7047 }
7048 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7049 MI.getOpcode(), MatchInfo);
7050 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7051 };
7052 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7053}
7054
7056 unsigned LHSOpndIdx = 1;
7057 unsigned RHSOpndIdx = 2;
7058 switch (MI.getOpcode()) {
7059 case TargetOpcode::G_UADDO:
7060 case TargetOpcode::G_SADDO:
7061 case TargetOpcode::G_UMULO:
7062 case TargetOpcode::G_SMULO:
7063 LHSOpndIdx = 2;
7064 RHSOpndIdx = 3;
7065 break;
7066 default:
7067 break;
7068 }
7069 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7070 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7071 if (!getIConstantVRegVal(LHS, MRI)) {
7072 // Skip commuting if LHS is not a constant. But, LHS may be a
7073 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7074 // have a constant on the RHS.
7075 if (MRI.getVRegDef(LHS)->getOpcode() !=
7076 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7077 return false;
7078 }
7079 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7080 return MRI.getVRegDef(RHS)->getOpcode() !=
7081 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7082 !getIConstantVRegVal(RHS, MRI);
7083}
7084
7086 Register LHS = MI.getOperand(1).getReg();
7087 Register RHS = MI.getOperand(2).getReg();
7088 std::optional<FPValueAndVReg> ValAndVReg;
7089 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7090 return false;
7091 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7092}
7093
7095 Observer.changingInstr(MI);
7096 unsigned LHSOpndIdx = 1;
7097 unsigned RHSOpndIdx = 2;
7098 switch (MI.getOpcode()) {
7099 case TargetOpcode::G_UADDO:
7100 case TargetOpcode::G_SADDO:
7101 case TargetOpcode::G_UMULO:
7102 case TargetOpcode::G_SMULO:
7103 LHSOpndIdx = 2;
7104 RHSOpndIdx = 3;
7105 break;
7106 default:
7107 break;
7108 }
7109 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7110 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7111 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7112 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7113 Observer.changedInstr(MI);
7114}
7115
7116bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7117 LLT SrcTy = MRI.getType(Src);
7118 if (SrcTy.isFixedVector())
7119 return isConstantSplatVector(Src, 1, AllowUndefs);
7120 if (SrcTy.isScalar()) {
7121 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7122 return true;
7123 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7124 return IConstant && IConstant->Value == 1;
7125 }
7126 return false; // scalable vector
7127}
7128
7129bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7130 LLT SrcTy = MRI.getType(Src);
7131 if (SrcTy.isFixedVector())
7132 return isConstantSplatVector(Src, 0, AllowUndefs);
7133 if (SrcTy.isScalar()) {
7134 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7135 return true;
7136 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7137 return IConstant && IConstant->Value == 0;
7138 }
7139 return false; // scalable vector
7140}
7141
7142// Ignores COPYs during conformance checks.
7143// FIXME scalable vectors.
7144bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7145 bool AllowUndefs) const {
7146 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7147 if (!BuildVector)
7148 return false;
7149 unsigned NumSources = BuildVector->getNumSources();
7150
7151 for (unsigned I = 0; I < NumSources; ++I) {
7152 GImplicitDef *ImplicitDef =
7154 if (ImplicitDef && AllowUndefs)
7155 continue;
7156 if (ImplicitDef && !AllowUndefs)
7157 return false;
7158 std::optional<ValueAndVReg> IConstant =
7160 if (IConstant && IConstant->Value == SplatValue)
7161 continue;
7162 return false;
7163 }
7164 return true;
7165}
7166
7167// Ignores COPYs during lookups.
7168// FIXME scalable vectors
7169std::optional<APInt>
7170CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7171 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7172 if (IConstant)
7173 return IConstant->Value;
7174
7175 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7176 if (!BuildVector)
7177 return std::nullopt;
7178 unsigned NumSources = BuildVector->getNumSources();
7179
7180 std::optional<APInt> Value = std::nullopt;
7181 for (unsigned I = 0; I < NumSources; ++I) {
7182 std::optional<ValueAndVReg> IConstant =
7184 if (!IConstant)
7185 return std::nullopt;
7186 if (!Value)
7187 Value = IConstant->Value;
7188 else if (*Value != IConstant->Value)
7189 return std::nullopt;
7190 }
7191 return Value;
7192}
7193
7194// FIXME G_SPLAT_VECTOR
7195bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7196 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7197 if (IConstant)
7198 return true;
7199
7200 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7201 if (!BuildVector)
7202 return false;
7203
7204 unsigned NumSources = BuildVector->getNumSources();
7205 for (unsigned I = 0; I < NumSources; ++I) {
7206 std::optional<ValueAndVReg> IConstant =
7208 if (!IConstant)
7209 return false;
7210 }
7211 return true;
7212}
7213
7214// TODO: use knownbits to determine zeros
7215bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7216 BuildFnTy &MatchInfo) const {
7217 uint32_t Flags = Select->getFlags();
7218 Register Dest = Select->getReg(0);
7219 Register Cond = Select->getCondReg();
7220 Register True = Select->getTrueReg();
7221 Register False = Select->getFalseReg();
7222 LLT CondTy = MRI.getType(Select->getCondReg());
7223 LLT TrueTy = MRI.getType(Select->getTrueReg());
7224
7225 // We only do this combine for scalar boolean conditions.
7226 if (CondTy != LLT::scalar(1))
7227 return false;
7228
7229 if (TrueTy.isPointer())
7230 return false;
7231
7232 // Both are scalars.
7233 std::optional<ValueAndVReg> TrueOpt =
7235 std::optional<ValueAndVReg> FalseOpt =
7237
7238 if (!TrueOpt || !FalseOpt)
7239 return false;
7240
7241 APInt TrueValue = TrueOpt->Value;
7242 APInt FalseValue = FalseOpt->Value;
7243
7244 // select Cond, 1, 0 --> zext (Cond)
7245 if (TrueValue.isOne() && FalseValue.isZero()) {
7246 MatchInfo = [=](MachineIRBuilder &B) {
7247 B.setInstrAndDebugLoc(*Select);
7248 B.buildZExtOrTrunc(Dest, Cond);
7249 };
7250 return true;
7251 }
7252
7253 // select Cond, -1, 0 --> sext (Cond)
7254 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7255 MatchInfo = [=](MachineIRBuilder &B) {
7256 B.setInstrAndDebugLoc(*Select);
7257 B.buildSExtOrTrunc(Dest, Cond);
7258 };
7259 return true;
7260 }
7261
7262 // select Cond, 0, 1 --> zext (!Cond)
7263 if (TrueValue.isZero() && FalseValue.isOne()) {
7264 MatchInfo = [=](MachineIRBuilder &B) {
7265 B.setInstrAndDebugLoc(*Select);
7266 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7267 B.buildNot(Inner, Cond);
7268 B.buildZExtOrTrunc(Dest, Inner);
7269 };
7270 return true;
7271 }
7272
7273 // select Cond, 0, -1 --> sext (!Cond)
7274 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7275 MatchInfo = [=](MachineIRBuilder &B) {
7276 B.setInstrAndDebugLoc(*Select);
7277 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7278 B.buildNot(Inner, Cond);
7279 B.buildSExtOrTrunc(Dest, Inner);
7280 };
7281 return true;
7282 }
7283
7284 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7285 if (TrueValue - 1 == FalseValue) {
7286 MatchInfo = [=](MachineIRBuilder &B) {
7287 B.setInstrAndDebugLoc(*Select);
7288 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7289 B.buildZExtOrTrunc(Inner, Cond);
7290 B.buildAdd(Dest, Inner, False);
7291 };
7292 return true;
7293 }
7294
7295 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7296 if (TrueValue + 1 == FalseValue) {
7297 MatchInfo = [=](MachineIRBuilder &B) {
7298 B.setInstrAndDebugLoc(*Select);
7299 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7300 B.buildSExtOrTrunc(Inner, Cond);
7301 B.buildAdd(Dest, Inner, False);
7302 };
7303 return true;
7304 }
7305
7306 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7307 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7308 MatchInfo = [=](MachineIRBuilder &B) {
7309 B.setInstrAndDebugLoc(*Select);
7310 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7311 B.buildZExtOrTrunc(Inner, Cond);
7312 // The shift amount must be scalar.
7313 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7314 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7315 B.buildShl(Dest, Inner, ShAmtC, Flags);
7316 };
7317 return true;
7318 }
7319
7320 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7321 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7322 MatchInfo = [=](MachineIRBuilder &B) {
7323 B.setInstrAndDebugLoc(*Select);
7324 Register Not = MRI.createGenericVirtualRegister(CondTy);
7325 B.buildNot(Not, Cond);
7326 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7327 B.buildZExtOrTrunc(Inner, Not);
7328 // The shift amount must be scalar.
7329 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7330 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7331 B.buildShl(Dest, Inner, ShAmtC, Flags);
7332 };
7333 return true;
7334 }
7335
7336 // select Cond, -1, C --> or (sext Cond), C
7337 if (TrueValue.isAllOnes()) {
7338 MatchInfo = [=](MachineIRBuilder &B) {
7339 B.setInstrAndDebugLoc(*Select);
7340 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7341 B.buildSExtOrTrunc(Inner, Cond);
7342 B.buildOr(Dest, Inner, False, Flags);
7343 };
7344 return true;
7345 }
7346
7347 // select Cond, C, -1 --> or (sext (not Cond)), C
7348 if (FalseValue.isAllOnes()) {
7349 MatchInfo = [=](MachineIRBuilder &B) {
7350 B.setInstrAndDebugLoc(*Select);
7351 Register Not = MRI.createGenericVirtualRegister(CondTy);
7352 B.buildNot(Not, Cond);
7353 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7354 B.buildSExtOrTrunc(Inner, Not);
7355 B.buildOr(Dest, Inner, True, Flags);
7356 };
7357 return true;
7358 }
7359
7360 return false;
7361}
7362
7363// TODO: use knownbits to determine zeros
7364bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7365 BuildFnTy &MatchInfo) const {
7366 uint32_t Flags = Select->getFlags();
7367 Register DstReg = Select->getReg(0);
7368 Register Cond = Select->getCondReg();
7369 Register True = Select->getTrueReg();
7370 Register False = Select->getFalseReg();
7371 LLT CondTy = MRI.getType(Select->getCondReg());
7372 LLT TrueTy = MRI.getType(Select->getTrueReg());
7373
7374 // Boolean or fixed vector of booleans.
7375 if (CondTy.isScalableVector() ||
7376 (CondTy.isFixedVector() &&
7377 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7378 CondTy.getScalarSizeInBits() != 1)
7379 return false;
7380
7381 if (CondTy != TrueTy)
7382 return false;
7383
7384 // select Cond, Cond, F --> or Cond, F
7385 // select Cond, 1, F --> or Cond, F
7386 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7387 MatchInfo = [=](MachineIRBuilder &B) {
7388 B.setInstrAndDebugLoc(*Select);
7389 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7390 B.buildZExtOrTrunc(Ext, Cond);
7391 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7392 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7393 };
7394 return true;
7395 }
7396
7397 // select Cond, T, Cond --> and Cond, T
7398 // select Cond, T, 0 --> and Cond, T
7399 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7400 MatchInfo = [=](MachineIRBuilder &B) {
7401 B.setInstrAndDebugLoc(*Select);
7402 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7403 B.buildZExtOrTrunc(Ext, Cond);
7404 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7405 B.buildAnd(DstReg, Ext, FreezeTrue);
7406 };
7407 return true;
7408 }
7409
7410 // select Cond, T, 1 --> or (not Cond), T
7411 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7412 MatchInfo = [=](MachineIRBuilder &B) {
7413 B.setInstrAndDebugLoc(*Select);
7414 // First the not.
7415 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7416 B.buildNot(Inner, Cond);
7417 // Then an ext to match the destination register.
7418 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7419 B.buildZExtOrTrunc(Ext, Inner);
7420 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7421 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7422 };
7423 return true;
7424 }
7425
7426 // select Cond, 0, F --> and (not Cond), F
7427 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7428 MatchInfo = [=](MachineIRBuilder &B) {
7429 B.setInstrAndDebugLoc(*Select);
7430 // First the not.
7431 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7432 B.buildNot(Inner, Cond);
7433 // Then an ext to match the destination register.
7434 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7435 B.buildZExtOrTrunc(Ext, Inner);
7436 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7437 B.buildAnd(DstReg, Ext, FreezeFalse);
7438 };
7439 return true;
7440 }
7441
7442 return false;
7443}
7444
7446 BuildFnTy &MatchInfo) const {
7447 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7448 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7449
7450 Register DstReg = Select->getReg(0);
7451 Register True = Select->getTrueReg();
7452 Register False = Select->getFalseReg();
7453 LLT DstTy = MRI.getType(DstReg);
7454
7455 if (DstTy.isPointer())
7456 return false;
7457
7458 // We want to fold the icmp and replace the select.
7459 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7460 return false;
7461
7462 CmpInst::Predicate Pred = Cmp->getCond();
7463 // We need a larger or smaller predicate for
7464 // canonicalization.
7465 if (CmpInst::isEquality(Pred))
7466 return false;
7467
7468 Register CmpLHS = Cmp->getLHSReg();
7469 Register CmpRHS = Cmp->getRHSReg();
7470
7471 // We can swap CmpLHS and CmpRHS for higher hitrate.
7472 if (True == CmpRHS && False == CmpLHS) {
7473 std::swap(CmpLHS, CmpRHS);
7474 Pred = CmpInst::getSwappedPredicate(Pred);
7475 }
7476
7477 // (icmp X, Y) ? X : Y -> integer minmax.
7478 // see matchSelectPattern in ValueTracking.
7479 // Legality between G_SELECT and integer minmax can differ.
7480 if (True != CmpLHS || False != CmpRHS)
7481 return false;
7482
7483 switch (Pred) {
7484 case ICmpInst::ICMP_UGT:
7485 case ICmpInst::ICMP_UGE: {
7486 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7487 return false;
7488 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7489 return true;
7490 }
7491 case ICmpInst::ICMP_SGT:
7492 case ICmpInst::ICMP_SGE: {
7493 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7494 return false;
7495 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7496 return true;
7497 }
7498 case ICmpInst::ICMP_ULT:
7499 case ICmpInst::ICMP_ULE: {
7500 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7501 return false;
7502 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7503 return true;
7504 }
7505 case ICmpInst::ICMP_SLT:
7506 case ICmpInst::ICMP_SLE: {
7507 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7508 return false;
7509 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7510 return true;
7511 }
7512 default:
7513 return false;
7514 }
7515}
7516
7517// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7519 BuildFnTy &MatchInfo) const {
7520 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7521 Register DestReg = MI.getOperand(0).getReg();
7522 LLT DestTy = MRI.getType(DestReg);
7523
7524 Register X;
7525 Register Sub0;
7526 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7527 if (mi_match(DestReg, MRI,
7528 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7529 m_GSMax(m_Reg(X), NegPattern),
7530 m_GUMin(m_Reg(X), NegPattern),
7531 m_GUMax(m_Reg(X), NegPattern)))))) {
7532 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7533 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7534 if (isLegal({NewOpc, {DestTy}})) {
7535 MatchInfo = [=](MachineIRBuilder &B) {
7536 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7537 };
7538 return true;
7539 }
7540 }
7541
7542 return false;
7543}
7544
7547
7548 if (tryFoldSelectOfConstants(Select, MatchInfo))
7549 return true;
7550
7551 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7552 return true;
7553
7554 return false;
7555}
7556
7557/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7558/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7559/// into a single comparison using range-based reasoning.
7560/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7561bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7562 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7563 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7564 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7565 Register DstReg = Logic->getReg(0);
7566 Register LHS = Logic->getLHSReg();
7567 Register RHS = Logic->getRHSReg();
7568 unsigned Flags = Logic->getFlags();
7569
7570 // We need an G_ICMP on the LHS register.
7571 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7572 if (!Cmp1)
7573 return false;
7574
7575 // We need an G_ICMP on the RHS register.
7576 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7577 if (!Cmp2)
7578 return false;
7579
7580 // We want to fold the icmps.
7581 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7582 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7583 return false;
7584
7585 APInt C1;
7586 APInt C2;
7587 std::optional<ValueAndVReg> MaybeC1 =
7589 if (!MaybeC1)
7590 return false;
7591 C1 = MaybeC1->Value;
7592
7593 std::optional<ValueAndVReg> MaybeC2 =
7595 if (!MaybeC2)
7596 return false;
7597 C2 = MaybeC2->Value;
7598
7599 Register R1 = Cmp1->getLHSReg();
7600 Register R2 = Cmp2->getLHSReg();
7601 CmpInst::Predicate Pred1 = Cmp1->getCond();
7602 CmpInst::Predicate Pred2 = Cmp2->getCond();
7603 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7604 LLT CmpOperandTy = MRI.getType(R1);
7605
7606 if (CmpOperandTy.isPointer())
7607 return false;
7608
7609 // We build ands, adds, and constants of type CmpOperandTy.
7610 // They must be legal to build.
7611 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7612 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7613 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7614 return false;
7615
7616 // Look through add of a constant offset on R1, R2, or both operands. This
7617 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7618 std::optional<APInt> Offset1;
7619 std::optional<APInt> Offset2;
7620 if (R1 != R2) {
7621 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7622 std::optional<ValueAndVReg> MaybeOffset1 =
7624 if (MaybeOffset1) {
7625 R1 = Add->getLHSReg();
7626 Offset1 = MaybeOffset1->Value;
7627 }
7628 }
7629 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7630 std::optional<ValueAndVReg> MaybeOffset2 =
7632 if (MaybeOffset2) {
7633 R2 = Add->getLHSReg();
7634 Offset2 = MaybeOffset2->Value;
7635 }
7636 }
7637 }
7638
7639 if (R1 != R2)
7640 return false;
7641
7642 // We calculate the icmp ranges including maybe offsets.
7643 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7644 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7645 if (Offset1)
7646 CR1 = CR1.subtract(*Offset1);
7647
7648 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7649 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7650 if (Offset2)
7651 CR2 = CR2.subtract(*Offset2);
7652
7653 bool CreateMask = false;
7654 APInt LowerDiff;
7655 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7656 if (!CR) {
7657 // We need non-wrapping ranges.
7658 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7659 return false;
7660
7661 // Check whether we have equal-size ranges that only differ by one bit.
7662 // In that case we can apply a mask to map one range onto the other.
7663 LowerDiff = CR1.getLower() ^ CR2.getLower();
7664 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7665 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7666 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7667 CR1Size != CR2.getUpper() - CR2.getLower())
7668 return false;
7669
7670 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7671 CreateMask = true;
7672 }
7673
7674 if (IsAnd)
7675 CR = CR->inverse();
7676
7677 CmpInst::Predicate NewPred;
7678 APInt NewC, Offset;
7679 CR->getEquivalentICmp(NewPred, NewC, Offset);
7680
7681 // We take the result type of one of the original icmps, CmpTy, for
7682 // the to be build icmp. The operand type, CmpOperandTy, is used for
7683 // the other instructions and constants to be build. The types of
7684 // the parameters and output are the same for add and and. CmpTy
7685 // and the type of DstReg might differ. That is why we zext or trunc
7686 // the icmp into the destination register.
7687
7688 MatchInfo = [=](MachineIRBuilder &B) {
7689 if (CreateMask && Offset != 0) {
7690 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7691 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7692 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7693 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7694 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7695 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7696 B.buildZExtOrTrunc(DstReg, ICmp);
7697 } else if (CreateMask && Offset == 0) {
7698 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7699 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7700 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7701 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7702 B.buildZExtOrTrunc(DstReg, ICmp);
7703 } else if (!CreateMask && Offset != 0) {
7704 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7705 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7706 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7707 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7708 B.buildZExtOrTrunc(DstReg, ICmp);
7709 } else if (!CreateMask && Offset == 0) {
7710 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7711 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7712 B.buildZExtOrTrunc(DstReg, ICmp);
7713 } else {
7714 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7715 }
7716 };
7717 return true;
7718}
7719
7720bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7721 BuildFnTy &MatchInfo) const {
7722 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7723 Register DestReg = Logic->getReg(0);
7724 Register LHS = Logic->getLHSReg();
7725 Register RHS = Logic->getRHSReg();
7726 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7727
7728 // We need a compare on the LHS register.
7729 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7730 if (!Cmp1)
7731 return false;
7732
7733 // We need a compare on the RHS register.
7734 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7735 if (!Cmp2)
7736 return false;
7737
7738 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7739 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7740
7741 // We build one fcmp, want to fold the fcmps, replace the logic op,
7742 // and the fcmps must have the same shape.
7744 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7745 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7746 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7747 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7748 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7749 return false;
7750
7751 CmpInst::Predicate PredL = Cmp1->getCond();
7752 CmpInst::Predicate PredR = Cmp2->getCond();
7753 Register LHS0 = Cmp1->getLHSReg();
7754 Register LHS1 = Cmp1->getRHSReg();
7755 Register RHS0 = Cmp2->getLHSReg();
7756 Register RHS1 = Cmp2->getRHSReg();
7757
7758 if (LHS0 == RHS1 && LHS1 == RHS0) {
7759 // Swap RHS operands to match LHS.
7760 PredR = CmpInst::getSwappedPredicate(PredR);
7761 std::swap(RHS0, RHS1);
7762 }
7763
7764 if (LHS0 == RHS0 && LHS1 == RHS1) {
7765 // We determine the new predicate.
7766 unsigned CmpCodeL = getFCmpCode(PredL);
7767 unsigned CmpCodeR = getFCmpCode(PredR);
7768 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7769 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7770 MatchInfo = [=](MachineIRBuilder &B) {
7771 // The fcmp predicates fill the lower part of the enum.
7772 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7773 if (Pred == FCmpInst::FCMP_FALSE &&
7775 auto False = B.buildConstant(CmpTy, 0);
7776 B.buildZExtOrTrunc(DestReg, False);
7777 } else if (Pred == FCmpInst::FCMP_TRUE &&
7779 auto True =
7780 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7781 CmpTy.isVector() /*isVector*/,
7782 true /*isFP*/));
7783 B.buildZExtOrTrunc(DestReg, True);
7784 } else { // We take the predicate without predicate optimizations.
7785 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7786 B.buildZExtOrTrunc(DestReg, Cmp);
7787 }
7788 };
7789 return true;
7790 }
7791
7792 return false;
7793}
7794
7796 GAnd *And = cast<GAnd>(&MI);
7797
7798 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7799 return true;
7800
7801 if (tryFoldLogicOfFCmps(And, MatchInfo))
7802 return true;
7803
7804 return false;
7805}
7806
7808 GOr *Or = cast<GOr>(&MI);
7809
7810 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7811 return true;
7812
7813 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7814 return true;
7815
7816 return false;
7817}
7818
7820 BuildFnTy &MatchInfo) const {
7822
7823 // Addo has no flags
7824 Register Dst = Add->getReg(0);
7825 Register Carry = Add->getReg(1);
7826 Register LHS = Add->getLHSReg();
7827 Register RHS = Add->getRHSReg();
7828 bool IsSigned = Add->isSigned();
7829 LLT DstTy = MRI.getType(Dst);
7830 LLT CarryTy = MRI.getType(Carry);
7831
7832 // Fold addo, if the carry is dead -> add, undef.
7833 if (MRI.use_nodbg_empty(Carry) &&
7834 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7835 MatchInfo = [=](MachineIRBuilder &B) {
7836 B.buildAdd(Dst, LHS, RHS);
7837 B.buildUndef(Carry);
7838 };
7839 return true;
7840 }
7841
7842 // Canonicalize constant to RHS.
7843 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7844 if (IsSigned) {
7845 MatchInfo = [=](MachineIRBuilder &B) {
7846 B.buildSAddo(Dst, Carry, RHS, LHS);
7847 };
7848 return true;
7849 }
7850 // !IsSigned
7851 MatchInfo = [=](MachineIRBuilder &B) {
7852 B.buildUAddo(Dst, Carry, RHS, LHS);
7853 };
7854 return true;
7855 }
7856
7857 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7858 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7859
7860 // Fold addo(c1, c2) -> c3, carry.
7861 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7863 bool Overflow;
7864 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7865 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7866 MatchInfo = [=](MachineIRBuilder &B) {
7867 B.buildConstant(Dst, Result);
7868 B.buildConstant(Carry, Overflow);
7869 };
7870 return true;
7871 }
7872
7873 // Fold (addo x, 0) -> x, no carry
7874 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7875 MatchInfo = [=](MachineIRBuilder &B) {
7876 B.buildCopy(Dst, LHS);
7877 B.buildConstant(Carry, 0);
7878 };
7879 return true;
7880 }
7881
7882 // Given 2 constant operands whose sum does not overflow:
7883 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7884 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7885 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7886 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7887 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7888 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7889 std::optional<APInt> MaybeAddRHS =
7890 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7891 if (MaybeAddRHS) {
7892 bool Overflow;
7893 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7894 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7895 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7896 if (IsSigned) {
7897 MatchInfo = [=](MachineIRBuilder &B) {
7898 auto ConstRHS = B.buildConstant(DstTy, NewC);
7899 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7900 };
7901 return true;
7902 }
7903 // !IsSigned
7904 MatchInfo = [=](MachineIRBuilder &B) {
7905 auto ConstRHS = B.buildConstant(DstTy, NewC);
7906 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7907 };
7908 return true;
7909 }
7910 }
7911 };
7912
7913 // We try to combine addo to non-overflowing add.
7914 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7916 return false;
7917
7918 // We try to combine uaddo to non-overflowing add.
7919 if (!IsSigned) {
7920 ConstantRange CRLHS =
7921 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
7922 ConstantRange CRRHS =
7923 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
7924
7925 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7927 return false;
7929 MatchInfo = [=](MachineIRBuilder &B) {
7930 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7931 B.buildConstant(Carry, 0);
7932 };
7933 return true;
7934 }
7937 MatchInfo = [=](MachineIRBuilder &B) {
7938 B.buildAdd(Dst, LHS, RHS);
7939 B.buildConstant(Carry, 1);
7940 };
7941 return true;
7942 }
7943 }
7944 return false;
7945 }
7946
7947 // We try to combine saddo to non-overflowing add.
7948
7949 // If LHS and RHS each have at least two sign bits, then there is no signed
7950 // overflow.
7951 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
7952 MatchInfo = [=](MachineIRBuilder &B) {
7953 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7954 B.buildConstant(Carry, 0);
7955 };
7956 return true;
7957 }
7958
7959 ConstantRange CRLHS =
7960 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
7961 ConstantRange CRRHS =
7962 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
7963
7964 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7966 return false;
7968 MatchInfo = [=](MachineIRBuilder &B) {
7969 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7970 B.buildConstant(Carry, 0);
7971 };
7972 return true;
7973 }
7976 MatchInfo = [=](MachineIRBuilder &B) {
7977 B.buildAdd(Dst, LHS, RHS);
7978 B.buildConstant(Carry, 1);
7979 };
7980 return true;
7981 }
7982 }
7983
7984 return false;
7985}
7986
7988 BuildFnTy &MatchInfo) const {
7990 MatchInfo(Builder);
7991 Root->eraseFromParent();
7992}
7993
7995 int64_t Exponent) const {
7996 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7998}
7999
8001 int64_t Exponent) const {
8002 auto [Dst, Base] = MI.getFirst2Regs();
8003 LLT Ty = MRI.getType(Dst);
8004 int64_t ExpVal = Exponent;
8005
8006 if (ExpVal == 0) {
8007 Builder.buildFConstant(Dst, 1.0);
8008 MI.removeFromParent();
8009 return;
8010 }
8011
8012 if (ExpVal < 0)
8013 ExpVal = -ExpVal;
8014
8015 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8016 // to generate the multiply sequence. There are more optimal ways to do this
8017 // (for example, powi(x,15) generates one more multiply than it should), but
8018 // this has the benefit of being both really simple and much better than a
8019 // libcall.
8020 std::optional<SrcOp> Res;
8021 SrcOp CurSquare = Base;
8022 while (ExpVal > 0) {
8023 if (ExpVal & 1) {
8024 if (!Res)
8025 Res = CurSquare;
8026 else
8027 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8028 }
8029
8030 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8031 ExpVal >>= 1;
8032 }
8033
8034 // If the original exponent was negative, invert the result, producing
8035 // 1/(x*x*x).
8036 if (Exponent < 0)
8037 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8038 MI.getFlags());
8039
8040 Builder.buildCopy(Dst, *Res);
8041 MI.eraseFromParent();
8042}
8043
8045 BuildFnTy &MatchInfo) const {
8046 // fold (A+C1)-C2 -> A+(C1-C2)
8047 const GSub *Sub = cast<GSub>(&MI);
8048 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8049
8050 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8051 return false;
8052
8053 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8054 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8055
8056 Register Dst = Sub->getReg(0);
8057 LLT DstTy = MRI.getType(Dst);
8058
8059 MatchInfo = [=](MachineIRBuilder &B) {
8060 auto Const = B.buildConstant(DstTy, C1 - C2);
8061 B.buildAdd(Dst, Add->getLHSReg(), Const);
8062 };
8063
8064 return true;
8065}
8066
8068 BuildFnTy &MatchInfo) const {
8069 // fold C2-(A+C1) -> (C2-C1)-A
8070 const GSub *Sub = cast<GSub>(&MI);
8071 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8072
8073 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8074 return false;
8075
8076 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8077 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8078
8079 Register Dst = Sub->getReg(0);
8080 LLT DstTy = MRI.getType(Dst);
8081
8082 MatchInfo = [=](MachineIRBuilder &B) {
8083 auto Const = B.buildConstant(DstTy, C2 - C1);
8084 B.buildSub(Dst, Const, Add->getLHSReg());
8085 };
8086
8087 return true;
8088}
8089
8091 BuildFnTy &MatchInfo) const {
8092 // fold (A-C1)-C2 -> A-(C1+C2)
8093 const GSub *Sub1 = cast<GSub>(&MI);
8094 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8095
8096 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8097 return false;
8098
8099 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8100 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8101
8102 Register Dst = Sub1->getReg(0);
8103 LLT DstTy = MRI.getType(Dst);
8104
8105 MatchInfo = [=](MachineIRBuilder &B) {
8106 auto Const = B.buildConstant(DstTy, C1 + C2);
8107 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8108 };
8109
8110 return true;
8111}
8112
8114 BuildFnTy &MatchInfo) const {
8115 // fold (C1-A)-C2 -> (C1-C2)-A
8116 const GSub *Sub1 = cast<GSub>(&MI);
8117 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8118
8119 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8120 return false;
8121
8122 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8123 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8124
8125 Register Dst = Sub1->getReg(0);
8126 LLT DstTy = MRI.getType(Dst);
8127
8128 MatchInfo = [=](MachineIRBuilder &B) {
8129 auto Const = B.buildConstant(DstTy, C1 - C2);
8130 B.buildSub(Dst, Const, Sub2->getRHSReg());
8131 };
8132
8133 return true;
8134}
8135
8137 BuildFnTy &MatchInfo) const {
8138 // fold ((A-C1)+C2) -> (A+(C2-C1))
8139 const GAdd *Add = cast<GAdd>(&MI);
8140 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8141
8142 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8143 return false;
8144
8145 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8146 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8147
8148 Register Dst = Add->getReg(0);
8149 LLT DstTy = MRI.getType(Dst);
8150
8151 MatchInfo = [=](MachineIRBuilder &B) {
8152 auto Const = B.buildConstant(DstTy, C2 - C1);
8153 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8154 };
8155
8156 return true;
8157}
8158
8160 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8161 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8162
8163 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8164 return false;
8165
8166 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8167
8168 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8169
8170 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8171 // $any:_(<8 x s16>) = G_ANYEXT $bv
8172 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8173 //
8174 // ->
8175 //
8176 // $any:_(s16) = G_ANYEXT $bv[0]
8177 // $any1:_(s16) = G_ANYEXT $bv[1]
8178 // $any2:_(s16) = G_ANYEXT $bv[2]
8179 // $any3:_(s16) = G_ANYEXT $bv[3]
8180 // $any4:_(s16) = G_ANYEXT $bv[4]
8181 // $any5:_(s16) = G_ANYEXT $bv[5]
8182 // $any6:_(s16) = G_ANYEXT $bv[6]
8183 // $any7:_(s16) = G_ANYEXT $bv[7]
8184 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8185 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8186
8187 // We want to unmerge into vectors.
8188 if (!DstTy.isFixedVector())
8189 return false;
8190
8191 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8192 if (!Any)
8193 return false;
8194
8195 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8196
8197 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8198 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8199
8200 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8201 return false;
8202
8203 // FIXME: check element types?
8204 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8205 return false;
8206
8207 LLT BigBvTy = MRI.getType(BV->getReg(0));
8208 LLT SmallBvTy = DstTy;
8209 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8210
8212 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8213 return false;
8214
8215 // We check the legality of scalar anyext.
8217 {TargetOpcode::G_ANYEXT,
8218 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8219 return false;
8220
8221 MatchInfo = [=](MachineIRBuilder &B) {
8222 // Build into each G_UNMERGE_VALUES def
8223 // a small build vector with anyext from the source build vector.
8224 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8226 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8227 Register SourceArray =
8228 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8229 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8230 Ops.push_back(AnyExt.getReg(0));
8231 }
8232 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8233 };
8234 };
8235 return true;
8236 };
8237
8238 return false;
8239}
8240
8242 BuildFnTy &MatchInfo) const {
8243
8244 bool Changed = false;
8245 auto &Shuffle = cast<GShuffleVector>(MI);
8246 ArrayRef<int> OrigMask = Shuffle.getMask();
8247 SmallVector<int, 16> NewMask;
8248 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8249 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8250 const unsigned NumDstElts = OrigMask.size();
8251 for (unsigned i = 0; i != NumDstElts; ++i) {
8252 int Idx = OrigMask[i];
8253 if (Idx >= (int)NumSrcElems) {
8254 Idx = -1;
8255 Changed = true;
8256 }
8257 NewMask.push_back(Idx);
8258 }
8259
8260 if (!Changed)
8261 return false;
8262
8263 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8264 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8265 std::move(NewMask));
8266 };
8267
8268 return true;
8269}
8270
8271static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8272 const unsigned MaskSize = Mask.size();
8273 for (unsigned I = 0; I < MaskSize; ++I) {
8274 int Idx = Mask[I];
8275 if (Idx < 0)
8276 continue;
8277
8278 if (Idx < (int)NumElems)
8279 Mask[I] = Idx + NumElems;
8280 else
8281 Mask[I] = Idx - NumElems;
8282 }
8283}
8284
8286 BuildFnTy &MatchInfo) const {
8287
8288 auto &Shuffle = cast<GShuffleVector>(MI);
8289 // If any of the two inputs is already undef, don't check the mask again to
8290 // prevent infinite loop
8291 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8292 return false;
8293
8294 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8295 return false;
8296
8297 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8298 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8300 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8301 return false;
8302
8303 ArrayRef<int> Mask = Shuffle.getMask();
8304 const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
8305
8306 bool TouchesSrc1 = false;
8307 bool TouchesSrc2 = false;
8308 const unsigned NumElems = Mask.size();
8309 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8310 if (Mask[Idx] < 0)
8311 continue;
8312
8313 if (Mask[Idx] < (int)NumSrcElems)
8314 TouchesSrc1 = true;
8315 else
8316 TouchesSrc2 = true;
8317 }
8318
8319 if (TouchesSrc1 == TouchesSrc2)
8320 return false;
8321
8322 Register NewSrc1 = Shuffle.getSrc1Reg();
8323 SmallVector<int, 16> NewMask(Mask);
8324 if (TouchesSrc2) {
8325 NewSrc1 = Shuffle.getSrc2Reg();
8326 commuteMask(NewMask, NumSrcElems);
8327 }
8328
8329 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8330 auto Undef = B.buildUndef(Src1Ty);
8331 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8332 };
8333
8334 return true;
8335}
8336
8338 BuildFnTy &MatchInfo) const {
8339 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8340
8341 Register Dst = Subo->getReg(0);
8342 Register LHS = Subo->getLHSReg();
8343 Register RHS = Subo->getRHSReg();
8344 Register Carry = Subo->getCarryOutReg();
8345 LLT DstTy = MRI.getType(Dst);
8346 LLT CarryTy = MRI.getType(Carry);
8347
8348 // Check legality before known bits.
8349 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8351 return false;
8352
8353 ConstantRange KBLHS =
8354 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8355 /* IsSigned=*/Subo->isSigned());
8356 ConstantRange KBRHS =
8357 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8358 /* IsSigned=*/Subo->isSigned());
8359
8360 if (Subo->isSigned()) {
8361 // G_SSUBO
8362 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8364 return false;
8366 MatchInfo = [=](MachineIRBuilder &B) {
8367 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8368 B.buildConstant(Carry, 0);
8369 };
8370 return true;
8371 }
8374 MatchInfo = [=](MachineIRBuilder &B) {
8375 B.buildSub(Dst, LHS, RHS);
8376 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8377 /*isVector=*/CarryTy.isVector(),
8378 /*isFP=*/false));
8379 };
8380 return true;
8381 }
8382 }
8383 return false;
8384 }
8385
8386 // G_USUBO
8387 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8389 return false;
8391 MatchInfo = [=](MachineIRBuilder &B) {
8392 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8393 B.buildConstant(Carry, 0);
8394 };
8395 return true;
8396 }
8399 MatchInfo = [=](MachineIRBuilder &B) {
8400 B.buildSub(Dst, LHS, RHS);
8401 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8402 /*isVector=*/CarryTy.isVector(),
8403 /*isFP=*/false));
8404 };
8405 return true;
8406 }
8407 }
8408
8409 return false;
8410}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNaN() const
Definition APFloat.h:1447
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1235
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
int32_t exactLogBase2() const
Definition APInt.h:1783
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMask(unsigned numBits) const
Definition APInt.h:488
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:917
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:695
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:680
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchShuffleToExtract(MachineInstr &MI) const
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is zero.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
void applyShuffleToExtract(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValue() const
Definition Constants.h:321
const APFloat & getValueAPF() const
Definition Constants.h:320
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:199
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:187
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:279
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:134
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(APInt RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1480
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2029
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1440
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1605
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:260
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:739
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1563
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1587
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1620
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1652
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:670
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1543
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:201
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1473
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:966
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1677
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1458
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:234
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:244
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:138
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:98
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...