LLVM 22.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if ((!ShiftAmountC ||
209 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
210 Ext.getVectorOperand()->hasOneUse()) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
332 assert(EEI->getVectorOperand() == V);
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
400 Value *Index = EI.getIndexOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
402 SQ.getWithInstruction(&EI)))
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
414 if (SI->getCondition()->getType()->isIntegerTy() &&
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 bool HasKnownValidIndex = false;
423 if (IndexC) {
424 // Canonicalize type of constant indices to i64 to simplify CSE
425 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
426 return replaceOperand(EI, 1, NewIdx);
427
429 unsigned NumElts = EC.getKnownMinValue();
430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
431
433 Intrinsic::ID IID = II->getIntrinsicID();
434 // Index needs to be lower than the minimum size of the vector, because
435 // for scalable vector, the vector size is known at run time.
436 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
437 Type *Ty = EI.getType();
438 unsigned BitWidth = Ty->getIntegerBitWidth();
439 Value *Idx;
440 // Return index when its value does not exceed the allowed limit
441 // for the element type of the vector, otherwise return undefined.
442 if (IndexC->getValue().getActiveBits() <= BitWidth)
443 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
444 else
445 Idx = PoisonValue::get(Ty);
446 return replaceInstUsesWith(EI, Idx);
447 }
448 }
449
450 // InstSimplify should handle cases where the index is invalid.
451 // For fixed-length vector, it's invalid to extract out-of-range element.
452 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
453 return nullptr;
454
455 if (Instruction *I = foldBitcastExtElt(EI))
456 return I;
457
458 // If there's a vector PHI feeding a scalar use through this extractelement
459 // instruction, try to scalarize the PHI.
460 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
461 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
462 return ScalarPHI;
463 }
464
465 // If SrcVec is a subvector starting at index 0, extract from the
466 // wider source vector
467 Value *V;
468 if (match(SrcVec,
470 return ExtractElementInst::Create(V, Index);
471
472 // TODO come up with a n-ary matcher that subsumes both unary and
473 // binary matchers.
474 UnaryOperator *UO;
475 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
476 // extelt (unop X), Index --> unop (extelt X, Index)
477 Value *X = UO->getOperand(0);
478 Value *E = Builder.CreateExtractElement(X, Index);
480 }
481
482 // If the binop is not speculatable, we cannot hoist the extractelement if
483 // it may make the operand poison.
484 BinaryOperator *BO;
485 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
486 (HasKnownValidIndex ||
488 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
489 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
490 Value *E0 = Builder.CreateExtractElement(X, Index);
491 Value *E1 = Builder.CreateExtractElement(Y, Index);
492 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
493 }
494
495 Value *X, *Y;
496 CmpPredicate Pred;
497 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
498 cheapToScalarize(SrcVec, Index)) {
499 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
500 Value *E0 = Builder.CreateExtractElement(X, Index);
501 Value *E1 = Builder.CreateExtractElement(Y, Index);
502 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
503 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
504 SrcCmpInst);
505 }
506
507 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
508 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
509 // instsimplify already handled the case where the indices are constants
510 // and equal by value, if both are constants, they must not be the same
511 // value, extract from the pre-inserted value instead.
512 if (isa<Constant>(IE->getOperand(2)) && IndexC)
513 return replaceOperand(EI, 0, IE->getOperand(0));
514 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
515 auto *VecType = cast<VectorType>(GEP->getType());
516 ElementCount EC = VecType->getElementCount();
517 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
518 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
519 // Find out why we have a vector result - these are a few examples:
520 // 1. We have a scalar pointer and a vector of indices, or
521 // 2. We have a vector of pointers and a scalar index, or
522 // 3. We have a vector of pointers and a vector of indices, etc.
523 // Here we only consider combining when there is exactly one vector
524 // operand, since the optimization is less obviously a win due to
525 // needing more than one extractelements.
526
527 unsigned VectorOps =
528 llvm::count_if(GEP->operands(), [](const Value *V) {
529 return isa<VectorType>(V->getType());
530 });
531 if (VectorOps == 1) {
532 Value *NewPtr = GEP->getPointerOperand();
533 if (isa<VectorType>(NewPtr->getType()))
534 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
535
537 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
538 Value *Op = GEP->getOperand(I);
539 if (isa<VectorType>(Op->getType()))
540 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
541 else
542 NewOps.push_back(Op);
543 }
544
546 GEP->getSourceElementType(), NewPtr, NewOps);
547 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
548 return NewGEP;
549 }
550 }
551 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
552 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
553 // We know the all-0 splat must be reading from the first operand, even
554 // in the case of scalable vectors (vscale is always > 0).
555 if (SplatIndex == 0)
556 return ExtractElementInst::Create(SVI->getOperand(0),
557 Builder.getInt64(0));
558
559 if (isa<FixedVectorType>(SVI->getType())) {
560 std::optional<int> SrcIdx;
561 // getSplatIndex returns -1 to mean not-found.
562 if (SplatIndex != -1)
563 SrcIdx = SplatIndex;
564 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
565 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
566
567 if (SrcIdx) {
568 Value *Src;
569 unsigned LHSWidth =
570 cast<FixedVectorType>(SVI->getOperand(0)->getType())
571 ->getNumElements();
572
573 if (*SrcIdx < 0)
575 if (*SrcIdx < (int)LHSWidth)
576 Src = SVI->getOperand(0);
577 else {
578 *SrcIdx -= LHSWidth;
579 Src = SVI->getOperand(1);
580 }
581 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
583 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
584 }
585 }
586 } else if (auto *CI = dyn_cast<CastInst>(I)) {
587 // Canonicalize extractelement(cast) -> cast(extractelement).
588 // Bitcasts can change the number of vector elements, and they cost
589 // nothing.
590 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
591 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
592 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
593 }
594 }
595 }
596
597 // Run demanded elements after other transforms as this can drop flags on
598 // binops. If there's two paths to the same final result, we prefer the
599 // one which doesn't force us to drop flags.
600 if (IndexC) {
602 unsigned NumElts = EC.getKnownMinValue();
603 // This instruction only demands the single element from the input vector.
604 // Skip for scalable type, the number of elements is unknown at
605 // compile-time.
606 if (!EC.isScalable() && NumElts != 1) {
607 // If the input vector has a single use, simplify it based on this use
608 // property.
609 if (SrcVec->hasOneUse()) {
610 APInt PoisonElts(NumElts, 0);
611 APInt DemandedElts(NumElts, 0);
612 DemandedElts.setBit(IndexC->getZExtValue());
613 if (Value *V =
614 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
615 return replaceOperand(EI, 0, V);
616 } else {
617 // If the input vector has multiple uses, simplify it based on a union
618 // of all elements used.
619 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
620 if (!DemandedElts.isAllOnes()) {
621 APInt PoisonElts(NumElts, 0);
623 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
624 true /* AllowMultipleUsers */)) {
625 if (V != SrcVec) {
626 Worklist.addValue(SrcVec);
627 SrcVec->replaceAllUsesWith(V);
628 return &EI;
629 }
630 }
631 }
632 }
633 }
634 }
635 return nullptr;
636}
637
638/// If V is a shuffle of values that ONLY returns elements from either LHS or
639/// RHS, return the shuffle mask and true. Otherwise, return false.
641 SmallVectorImpl<int> &Mask) {
642 assert(LHS->getType() == RHS->getType() &&
643 "Invalid CollectSingleShuffleElements");
644 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
645
646 if (match(V, m_Poison())) {
647 Mask.assign(NumElts, -1);
648 return true;
649 }
650
651 if (V == LHS) {
652 for (unsigned i = 0; i != NumElts; ++i)
653 Mask.push_back(i);
654 return true;
655 }
656
657 if (V == RHS) {
658 for (unsigned i = 0; i != NumElts; ++i)
659 Mask.push_back(i + NumElts);
660 return true;
661 }
662
664 // If this is an insert of an extract from some other vector, include it.
665 Value *VecOp = IEI->getOperand(0);
666 Value *ScalarOp = IEI->getOperand(1);
667 Value *IdxOp = IEI->getOperand(2);
668
669 if (!isa<ConstantInt>(IdxOp))
670 return false;
671 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
672
673 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
674 // We can handle this if the vector we are inserting into is
675 // transitively ok.
676 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
677 // If so, update the mask to reflect the inserted poison.
678 Mask[InsertedIdx] = -1;
679 return true;
680 }
681 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
682 if (isa<ConstantInt>(EI->getOperand(1))) {
683 unsigned ExtractedIdx =
684 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
685 unsigned NumLHSElts =
686 cast<FixedVectorType>(LHS->getType())->getNumElements();
687
688 // This must be extracting from either LHS or RHS.
689 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
690 // We can handle this if the vector we are inserting into is
691 // transitively ok.
692 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
693 // If so, update the mask to reflect the inserted value.
694 if (EI->getOperand(0) == LHS) {
695 Mask[InsertedIdx % NumElts] = ExtractedIdx;
696 } else {
697 assert(EI->getOperand(0) == RHS);
698 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
699 }
700 return true;
701 }
702 }
703 }
704 }
705 }
706
707 return false;
708}
709
710/// If we have insertion into a vector that is wider than the vector that we
711/// are extracting from, try to widen the source vector to allow a single
712/// shufflevector to replace one or more insert/extract pairs.
714 ExtractElementInst *ExtElt,
715 InstCombinerImpl &IC) {
716 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
717 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
718 unsigned NumInsElts = InsVecType->getNumElements();
719 unsigned NumExtElts = ExtVecType->getNumElements();
720
721 // The inserted-to vector must be wider than the extracted-from vector.
722 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
723 NumExtElts >= NumInsElts)
724 return false;
725
726 Value *ExtVecOp = ExtElt->getVectorOperand();
727 // Bail out on constant vectors.
728 if (isa<ConstantData>(ExtVecOp))
729 return false;
730
731 // Create a shuffle mask to widen the extended-from vector using poison
732 // values. The mask selects all of the values of the original vector followed
733 // by as many poison values as needed to create a vector of the same length
734 // as the inserted-to vector.
735 SmallVector<int, 16> ExtendMask;
736 for (unsigned i = 0; i < NumExtElts; ++i)
737 ExtendMask.push_back(i);
738 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
739 ExtendMask.push_back(-1);
740
741 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
742 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
743 ? ExtVecOpInst->getParent()
744 : ExtElt->getParent();
745
746 // TODO: This restriction matches the basic block check below when creating
747 // new extractelement instructions. If that limitation is removed, this one
748 // could also be removed. But for now, we just bail out to ensure that we
749 // will replace the extractelement instruction that is feeding our
750 // insertelement instruction. This allows the insertelement to then be
751 // replaced by a shufflevector. If the insertelement is not replaced, we can
752 // induce infinite looping because there's an optimization for extractelement
753 // that will delete our widening shuffle. This would trigger another attempt
754 // here to create that shuffle, and we spin forever.
755 if (InsertionBlock != InsElt->getParent())
756 return false;
757
758 // TODO: This restriction matches the check in visitInsertElementInst() and
759 // prevents an infinite loop caused by not turning the extract/insert pair
760 // into a shuffle. We really should not need either check, but we're lacking
761 // folds for shufflevectors because we're afraid to generate shuffle masks
762 // that the backend can't handle.
763 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
764 return false;
765
766 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
767
768 // Insert the new shuffle after the vector operand of the extract is defined
769 // (as long as it's not a PHI) or at the start of the basic block of the
770 // extract, so any subsequent extracts in the same basic block can use it.
771 // TODO: Insert before the earliest ExtractElementInst that is replaced.
772 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
773 WideVec->insertAfter(ExtVecOpInst->getIterator());
774 else
775 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
776
777 // Replace extracts from the original narrow vector with extracts from the new
778 // wide vector.
779 for (User *U : ExtVecOp->users()) {
781 if (!OldExt || OldExt->getParent() != WideVec->getParent())
782 continue;
783 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
784 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
785 IC.replaceInstUsesWith(*OldExt, NewExt);
786 // Add the old extracts to the worklist for DCE. We can't remove the
787 // extracts directly, because they may still be used by the calling code.
788 IC.addToWorklist(OldExt);
789 }
790
791 return true;
792}
793
794/// We are building a shuffle to create V, which is a sequence of insertelement,
795/// extractelement pairs. If PermittedRHS is set, then we must either use it or
796/// not rely on the second vector source. Return a std::pair containing the
797/// left and right vectors of the proposed shuffle (or 0), and set the Mask
798/// parameter as required.
799///
800/// Note: we intentionally don't try to fold earlier shuffles since they have
801/// often been chosen carefully to be efficiently implementable on the target.
802using ShuffleOps = std::pair<Value *, Value *>;
803
805 Value *PermittedRHS,
806 InstCombinerImpl &IC, bool &Rerun) {
807 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
808 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
809
810 if (match(V, m_Poison())) {
811 Mask.assign(NumElts, -1);
812 return std::make_pair(
813 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
814 }
815
817 Mask.assign(NumElts, 0);
818 return std::make_pair(V, nullptr);
819 }
820
822 // If this is an insert of an extract from some other vector, include it.
823 Value *VecOp = IEI->getOperand(0);
824 Value *ScalarOp = IEI->getOperand(1);
825 Value *IdxOp = IEI->getOperand(2);
826
828 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
829 unsigned ExtractedIdx =
830 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
831 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
832
833 // Either the extracted from or inserted into vector must be RHSVec,
834 // otherwise we'd end up with a shuffle of three inputs.
835 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
836 Value *RHS = EI->getOperand(0);
837 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
838 assert(LR.second == nullptr || LR.second == RHS);
839
840 if (LR.first->getType() != RHS->getType()) {
841 // Although we are giving up for now, see if we can create extracts
842 // that match the inserts for another round of combining.
843 if (replaceExtractElements(IEI, EI, IC))
844 Rerun = true;
845
846 // We tried our best, but we can't find anything compatible with RHS
847 // further up the chain. Return a trivial shuffle.
848 for (unsigned i = 0; i < NumElts; ++i)
849 Mask[i] = i;
850 return std::make_pair(V, nullptr);
851 }
852
853 unsigned NumLHSElts =
854 cast<FixedVectorType>(RHS->getType())->getNumElements();
855 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
856 return std::make_pair(LR.first, RHS);
857 }
858
859 if (VecOp == PermittedRHS) {
860 // We've gone as far as we can: anything on the other side of the
861 // extractelement will already have been converted into a shuffle.
862 unsigned NumLHSElts =
864 ->getNumElements();
865 for (unsigned i = 0; i != NumElts; ++i)
866 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
867 return std::make_pair(EI->getOperand(0), PermittedRHS);
868 }
869
870 // If this insertelement is a chain that comes from exactly these two
871 // vectors, return the vector and the effective shuffle.
872 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
873 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
874 Mask))
875 return std::make_pair(EI->getOperand(0), PermittedRHS);
876 }
877 }
878 }
879
880 // Otherwise, we can't do anything fancy. Return an identity vector.
881 for (unsigned i = 0; i != NumElts; ++i)
882 Mask.push_back(i);
883 return std::make_pair(V, nullptr);
884}
885
886/// Look for chain of insertvalue's that fully define an aggregate, and trace
887/// back the values inserted, see if they are all were extractvalue'd from
888/// the same source aggregate from the exact same element indexes.
889/// If they were, just reuse the source aggregate.
890/// This potentially deals with PHI indirections.
892 InsertValueInst &OrigIVI) {
893 Type *AggTy = OrigIVI.getType();
894 unsigned NumAggElts;
895 switch (AggTy->getTypeID()) {
896 case Type::StructTyID:
897 NumAggElts = AggTy->getStructNumElements();
898 break;
899 case Type::ArrayTyID:
900 NumAggElts = AggTy->getArrayNumElements();
901 break;
902 default:
903 llvm_unreachable("Unhandled aggregate type?");
904 }
905
906 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
907 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
908 // FIXME: any interesting patterns to be caught with larger limit?
909 assert(NumAggElts > 0 && "Aggregate should have elements.");
910 if (NumAggElts > 2)
911 return nullptr;
912
913 static constexpr auto NotFound = std::nullopt;
914 static constexpr auto FoundMismatch = nullptr;
915
916 // Try to find a value of each element of an aggregate.
917 // FIXME: deal with more complex, not one-dimensional, aggregate types
918 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
919
920 // Do we know values for each element of the aggregate?
921 auto KnowAllElts = [&AggElts]() {
922 return !llvm::is_contained(AggElts, NotFound);
923 };
924
925 int Depth = 0;
926
927 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
928 // every element being overwritten twice, which should never happen.
929 static const int DepthLimit = 2 * NumAggElts;
930
931 // Recurse up the chain of `insertvalue` aggregate operands until either we've
932 // reconstructed full initializer or can't visit any more `insertvalue`'s.
933 for (InsertValueInst *CurrIVI = &OrigIVI;
934 Depth < DepthLimit && CurrIVI && !KnowAllElts();
935 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
936 ++Depth) {
937 auto *InsertedValue =
938 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
939 if (!InsertedValue)
940 return nullptr; // Inserted value must be produced by an instruction.
941
942 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
943
944 // Don't bother with more than single-level aggregates.
945 if (Indices.size() != 1)
946 return nullptr; // FIXME: deal with more complex aggregates?
947
948 // Now, we may have already previously recorded the value for this element
949 // of an aggregate. If we did, that means the CurrIVI will later be
950 // overwritten with the already-recorded value. But if not, let's record it!
951 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
952 Elt = Elt.value_or(InsertedValue);
953
954 // FIXME: should we handle chain-terminating undef base operand?
955 }
956
957 // Was that sufficient to deduce the full initializer for the aggregate?
958 if (!KnowAllElts())
959 return nullptr; // Give up then.
960
961 // We now want to find the source[s] of the aggregate elements we've found.
962 // And with "source" we mean the original aggregate[s] from which
963 // the inserted elements were extracted. This may require PHI translation.
964
965 enum class AggregateDescription {
966 /// When analyzing the value that was inserted into an aggregate, we did
967 /// not manage to find defining `extractvalue` instruction to analyze.
968 NotFound,
969 /// When analyzing the value that was inserted into an aggregate, we did
970 /// manage to find defining `extractvalue` instruction[s], and everything
971 /// matched perfectly - aggregate type, element insertion/extraction index.
972 Found,
973 /// When analyzing the value that was inserted into an aggregate, we did
974 /// manage to find defining `extractvalue` instruction, but there was
975 /// a mismatch: either the source type from which the extraction was didn't
976 /// match the aggregate type into which the insertion was,
977 /// or the extraction/insertion channels mismatched,
978 /// or different elements had different source aggregates.
979 FoundMismatch
980 };
981 auto Describe = [](std::optional<Value *> SourceAggregate) {
982 if (SourceAggregate == NotFound)
983 return AggregateDescription::NotFound;
984 if (*SourceAggregate == FoundMismatch)
985 return AggregateDescription::FoundMismatch;
986 return AggregateDescription::Found;
987 };
988
989 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
990 bool EltDefinedInUseBB = false;
991
992 // Given the value \p Elt that was being inserted into element \p EltIdx of an
993 // aggregate AggTy, see if \p Elt was originally defined by an
994 // appropriate extractvalue (same element index, same aggregate type).
995 // If found, return the source aggregate from which the extraction was.
996 // If \p PredBB is provided, does PHI translation of an \p Elt first.
997 auto FindSourceAggregate =
998 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
999 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1000 // For now(?), only deal with, at most, a single level of PHI indirection.
1001 if (UseBB && PredBB) {
1002 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
1003 if (Elt && Elt->getParent() == *UseBB)
1004 EltDefinedInUseBB = true;
1005 }
1006 // FIXME: deal with multiple levels of PHI indirection?
1007
1008 // Did we find an extraction?
1009 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1010 if (!EVI)
1011 return NotFound;
1012
1013 Value *SourceAggregate = EVI->getAggregateOperand();
1014
1015 // Is the extraction from the same type into which the insertion was?
1016 if (SourceAggregate->getType() != AggTy)
1017 return FoundMismatch;
1018 // And the element index doesn't change between extraction and insertion?
1019 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1020 return FoundMismatch;
1021
1022 return SourceAggregate; // AggregateDescription::Found
1023 };
1024
1025 // Given elements AggElts that were constructing an aggregate OrigIVI,
1026 // see if we can find appropriate source aggregate for each of the elements,
1027 // and see it's the same aggregate for each element. If so, return it.
1028 auto FindCommonSourceAggregate =
1029 [&](std::optional<BasicBlock *> UseBB,
1030 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1031 std::optional<Value *> SourceAggregate;
1032
1033 for (auto I : enumerate(AggElts)) {
1034 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1035 "We don't store nullptr in SourceAggregate!");
1036 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1037 (I.index() != 0) &&
1038 "SourceAggregate should be valid after the first element,");
1039
1040 // For this element, is there a plausible source aggregate?
1041 // FIXME: we could special-case undef element, IFF we know that in the
1042 // source aggregate said element isn't poison.
1043 std::optional<Value *> SourceAggregateForElement =
1044 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1045
1046 // Okay, what have we found? Does that correlate with previous findings?
1047
1048 // Regardless of whether or not we have previously found source
1049 // aggregate for previous elements (if any), if we didn't find one for
1050 // this element, passthrough whatever we have just found.
1051 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1052 return SourceAggregateForElement;
1053
1054 // Okay, we have found source aggregate for this element.
1055 // Let's see what we already know from previous elements, if any.
1056 switch (Describe(SourceAggregate)) {
1057 case AggregateDescription::NotFound:
1058 // This is apparently the first element that we have examined.
1059 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1060 continue; // Great, now look at next element.
1061 case AggregateDescription::Found:
1062 // We have previously already successfully examined other elements.
1063 // Is this the same source aggregate we've found for other elements?
1064 if (*SourceAggregateForElement != *SourceAggregate)
1065 return FoundMismatch;
1066 continue; // Still the same aggregate, look at next element.
1067 case AggregateDescription::FoundMismatch:
1068 llvm_unreachable("Can't happen. We would have early-exited then.");
1069 };
1070 }
1071
1072 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1073 "Must be a valid Value");
1074 return *SourceAggregate;
1075 };
1076
1077 std::optional<Value *> SourceAggregate;
1078
1079 // Can we find the source aggregate without looking at predecessors?
1080 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1081 /*PredBB=*/std::nullopt);
1082 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1083 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1084 return nullptr; // Conflicting source aggregates!
1085 ++NumAggregateReconstructionsSimplified;
1086 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1087 }
1088
1089 // Okay, apparently we need to look at predecessors.
1090
1091 // We should be smart about picking the "use" basic block, which will be the
1092 // merge point for aggregate, where we'll insert the final PHI that will be
1093 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1094 // We should look in which blocks each of the AggElts is being defined,
1095 // they all should be defined in the same basic block.
1096 BasicBlock *UseBB = nullptr;
1097
1098 for (const std::optional<Instruction *> &I : AggElts) {
1099 BasicBlock *BB = (*I)->getParent();
1100 // If it's the first instruction we've encountered, record the basic block.
1101 if (!UseBB) {
1102 UseBB = BB;
1103 continue;
1104 }
1105 // Otherwise, this must be the same basic block we've seen previously.
1106 if (UseBB != BB)
1107 return nullptr;
1108 }
1109
1110 // If *all* of the elements are basic-block-independent, meaning they are
1111 // either function arguments, or constant expressions, then if we didn't
1112 // handle them without predecessor-aware handling, we won't handle them now.
1113 if (!UseBB)
1114 return nullptr;
1115
1116 // If we didn't manage to find source aggregate without looking at
1117 // predecessors, and there are no predecessors to look at, then we're done.
1118 if (pred_empty(UseBB))
1119 return nullptr;
1120
1121 // Arbitrary predecessor count limit.
1122 static const int PredCountLimit = 64;
1123
1124 // Cache the (non-uniqified!) list of predecessors in a vector,
1125 // checking the limit at the same time for efficiency.
1126 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1127 for (BasicBlock *Pred : predecessors(UseBB)) {
1128 // Don't bother if there are too many predecessors.
1129 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1130 return nullptr;
1131 Preds.emplace_back(Pred);
1132 }
1133
1134 // For each predecessor, what is the source aggregate,
1135 // from which all the elements were originally extracted from?
1136 // Note that we want for the map to have stable iteration order!
1138 bool FoundSrcAgg = false;
1139 for (BasicBlock *Pred : Preds) {
1140 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1141 SourceAggregates.try_emplace(Pred);
1142 // Did we already evaluate this predecessor?
1143 if (!IV.second)
1144 continue;
1145
1146 // Let's hope that when coming from predecessor Pred, all elements of the
1147 // aggregate produced by OrigIVI must have been originally extracted from
1148 // the same aggregate. Is that so? Can we find said original aggregate?
1149 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1150 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1151 FoundSrcAgg = true;
1152 IV.first->second = *SourceAggregate;
1153 } else {
1154 // If UseBB is the single successor of Pred, we can add InsertValue to
1155 // Pred.
1156 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1157 if (!BI || !BI->isUnconditional())
1158 return nullptr;
1159 }
1160 }
1161
1162 if (!FoundSrcAgg)
1163 return nullptr;
1164
1165 // Do some sanity check if we need to add insertvalue into predecessors.
1166 auto OrigBB = OrigIVI.getParent();
1167 for (auto &It : SourceAggregates) {
1168 if (Describe(It.second) == AggregateDescription::Found)
1169 continue;
1170
1171 // Element is defined in UseBB, so it can't be used in predecessors.
1172 if (EltDefinedInUseBB)
1173 return nullptr;
1174
1175 // Do this transformation cross loop boundary may cause dead loop. So we
1176 // should avoid this situation. But LoopInfo is not generally available, we
1177 // must be conservative here.
1178 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1179 // can't be in inner loop.
1180 if (UseBB != OrigBB)
1181 return nullptr;
1182
1183 // Avoid constructing constant aggregate because constant value may expose
1184 // more optimizations.
1185 bool ConstAgg = true;
1186 for (auto Val : AggElts) {
1187 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1188 if (!isa<Constant>(Elt)) {
1189 ConstAgg = false;
1190 break;
1191 }
1192 }
1193 if (ConstAgg)
1194 return nullptr;
1195 }
1196
1197 // For predecessors without appropriate source aggregate, create one in the
1198 // predecessor.
1199 for (auto &It : SourceAggregates) {
1200 if (Describe(It.second) == AggregateDescription::Found)
1201 continue;
1202
1203 BasicBlock *Pred = It.first;
1204 Builder.SetInsertPoint(Pred->getTerminator());
1205 Value *V = PoisonValue::get(AggTy);
1206 for (auto [Idx, Val] : enumerate(AggElts)) {
1207 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1208 V = Builder.CreateInsertValue(V, Elt, Idx);
1209 }
1210
1211 It.second = V;
1212 }
1213
1214 // All good! Now we just need to thread the source aggregates here.
1215 // Note that we have to insert the new PHI here, ourselves, because we can't
1216 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1217 // Note that the same block can be a predecessor more than once,
1218 // and we need to preserve that invariant for the PHI node.
1220 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1221 auto *PHI =
1222 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1223 for (BasicBlock *Pred : Preds)
1224 PHI->addIncoming(SourceAggregates[Pred], Pred);
1225
1226 ++NumAggregateReconstructionsSimplified;
1227 return replaceInstUsesWith(OrigIVI, PHI);
1228}
1229
1230/// Try to find redundant insertvalue instructions, like the following ones:
1231/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1232/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1233/// Here the second instruction inserts values at the same indices, as the
1234/// first one, making the first one redundant.
1235/// It should be transformed to:
1236/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1239 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1240 SQ.getWithInstruction(&I)))
1241 return replaceInstUsesWith(I, V);
1242
1243 bool IsRedundant = false;
1244 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1245
1246 // If there is a chain of insertvalue instructions (each of them except the
1247 // last one has only one use and it's another insertvalue insn from this
1248 // chain), check if any of the 'children' uses the same indices as the first
1249 // instruction. In this case, the first one is redundant.
1250 Value *V = &I;
1251 unsigned Depth = 0;
1252 while (V->hasOneUse() && Depth < 10) {
1253 User *U = V->user_back();
1254 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1255 if (!UserInsInst || U->getOperand(0) != V)
1256 break;
1257 if (UserInsInst->getIndices() == FirstIndices) {
1258 IsRedundant = true;
1259 break;
1260 }
1261 V = UserInsInst;
1262 Depth++;
1263 }
1264
1265 if (IsRedundant)
1266 return replaceInstUsesWith(I, I.getOperand(0));
1267
1269 return NewI;
1270
1271 return nullptr;
1272}
1273
1275 // Can not analyze scalable type, the number of elements is not a compile-time
1276 // constant.
1278 return false;
1279
1280 int MaskSize = Shuf.getShuffleMask().size();
1281 int VecSize =
1282 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1283
1284 // A vector select does not change the size of the operands.
1285 if (MaskSize != VecSize)
1286 return false;
1287
1288 // Each mask element must be undefined or choose a vector element from one of
1289 // the source operands without crossing vector lanes.
1290 for (int i = 0; i != MaskSize; ++i) {
1291 int Elt = Shuf.getMaskValue(i);
1292 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1293 return false;
1294 }
1295
1296 return true;
1297}
1298
1299/// Turn a chain of inserts that splats a value into an insert + shuffle:
1300/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1301/// shufflevector(insertelt(X, %k, 0), poison, zero)
1303 // We are interested in the last insert in a chain. So if this insert has a
1304 // single user and that user is an insert, bail.
1305 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1306 return nullptr;
1307
1308 VectorType *VecTy = InsElt.getType();
1309 // Can not handle scalable type, the number of elements is not a compile-time
1310 // constant.
1311 if (isa<ScalableVectorType>(VecTy))
1312 return nullptr;
1313 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1314
1315 // Do not try to do this for a one-element vector, since that's a nop,
1316 // and will cause an inf-loop.
1317 if (NumElements == 1)
1318 return nullptr;
1319
1320 Value *SplatVal = InsElt.getOperand(1);
1321 InsertElementInst *CurrIE = &InsElt;
1322 SmallBitVector ElementPresent(NumElements, false);
1323 InsertElementInst *FirstIE = nullptr;
1324
1325 // Walk the chain backwards, keeping track of which indices we inserted into,
1326 // until we hit something that isn't an insert of the splatted value.
1327 while (CurrIE) {
1328 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1329 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1330 return nullptr;
1331
1332 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1333 // Check none of the intermediate steps have any additional uses, except
1334 // for the root insertelement instruction, which can be re-used, if it
1335 // inserts at position 0.
1336 if (CurrIE != &InsElt &&
1337 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1338 return nullptr;
1339
1340 ElementPresent[Idx->getZExtValue()] = true;
1341 FirstIE = CurrIE;
1342 CurrIE = NextIE;
1343 }
1344
1345 // If this is just a single insertelement (not a sequence), we are done.
1346 if (FirstIE == &InsElt)
1347 return nullptr;
1348
1349 // If we are not inserting into a poison vector, make sure we've seen an
1350 // insert into every element.
1351 // TODO: If the base vector is not undef, it might be better to create a splat
1352 // and then a select-shuffle (blend) with the base vector.
1353 if (!match(FirstIE->getOperand(0), m_Poison()))
1354 if (!ElementPresent.all())
1355 return nullptr;
1356
1357 // Create the insert + shuffle.
1358 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1359 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1360 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1361 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1362 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1363 InsElt.getIterator());
1364
1365 // Splat from element 0, but replace absent elements with poison in the mask.
1366 SmallVector<int, 16> Mask(NumElements, 0);
1367 for (unsigned i = 0; i != NumElements; ++i)
1368 if (!ElementPresent[i])
1369 Mask[i] = -1;
1370
1371 return new ShuffleVectorInst(FirstIE, Mask);
1372}
1373
1374/// Try to fold an insert element into an existing splat shuffle by changing
1375/// the shuffle's mask to include the index of this insert element.
1377 // Check if the vector operand of this insert is a canonical splat shuffle.
1378 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1379 if (!Shuf || !Shuf->isZeroEltSplat())
1380 return nullptr;
1381
1382 // Bail out early if shuffle is scalable type. The number of elements in
1383 // shuffle mask is unknown at compile-time.
1384 if (isa<ScalableVectorType>(Shuf->getType()))
1385 return nullptr;
1386
1387 // Check for a constant insertion index.
1388 uint64_t IdxC;
1389 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1390 return nullptr;
1391
1392 // Check if the splat shuffle's input is the same as this insert's scalar op.
1393 Value *X = InsElt.getOperand(1);
1394 Value *Op0 = Shuf->getOperand(0);
1395 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1396 return nullptr;
1397
1398 // Replace the shuffle mask element at the index of this insert with a zero.
1399 // For example:
1400 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1401 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1402 unsigned NumMaskElts =
1403 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1404 SmallVector<int, 16> NewMask(NumMaskElts);
1405 for (unsigned i = 0; i != NumMaskElts; ++i)
1406 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1407
1408 return new ShuffleVectorInst(Op0, NewMask);
1409}
1410
1411/// Try to fold an extract+insert element into an existing identity shuffle by
1412/// changing the shuffle's mask to include the index of this insert element.
1414 // Check if the vector operand of this insert is an identity shuffle.
1415 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1416 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1417 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1418 return nullptr;
1419
1420 // Bail out early if shuffle is scalable type. The number of elements in
1421 // shuffle mask is unknown at compile-time.
1422 if (isa<ScalableVectorType>(Shuf->getType()))
1423 return nullptr;
1424
1425 // Check for a constant insertion index.
1426 uint64_t IdxC;
1427 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1428 return nullptr;
1429
1430 // Check if this insert's scalar op is extracted from the identity shuffle's
1431 // input vector.
1432 Value *Scalar = InsElt.getOperand(1);
1433 Value *X = Shuf->getOperand(0);
1434 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1435 return nullptr;
1436
1437 // Replace the shuffle mask element at the index of this extract+insert with
1438 // that same index value.
1439 // For example:
1440 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1441 unsigned NumMaskElts =
1442 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1443 SmallVector<int, 16> NewMask(NumMaskElts);
1444 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1445 for (unsigned i = 0; i != NumMaskElts; ++i) {
1446 if (i != IdxC) {
1447 // All mask elements besides the inserted element remain the same.
1448 NewMask[i] = OldMask[i];
1449 } else if (OldMask[i] == (int)IdxC) {
1450 // If the mask element was already set, there's nothing to do
1451 // (demanded elements analysis may unset it later).
1452 return nullptr;
1453 } else {
1454 assert(OldMask[i] == PoisonMaskElem &&
1455 "Unexpected shuffle mask element for identity shuffle");
1456 NewMask[i] = IdxC;
1457 }
1458 }
1459
1460 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1461}
1462
1463/// If we have an insertelement instruction feeding into another insertelement
1464/// and the 2nd is inserting a constant into the vector, canonicalize that
1465/// constant insertion before the insertion of a variable:
1466///
1467/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1468/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1469///
1470/// This has the potential of eliminating the 2nd insertelement instruction
1471/// via constant folding of the scalar constant into a vector constant.
1473 InstCombiner::BuilderTy &Builder) {
1474 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1475 if (!InsElt1 || !InsElt1->hasOneUse())
1476 return nullptr;
1477
1478 Value *X, *Y;
1479 Constant *ScalarC;
1480 ConstantInt *IdxC1, *IdxC2;
1481 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1482 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1483 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1484 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1485 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1486 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1487 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1488 }
1489
1490 return nullptr;
1491}
1492
1493/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1494/// --> shufflevector X, CVec', Mask'
1496 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1497 // Bail out if the parent has more than one use. In that case, we'd be
1498 // replacing the insertelt with a shuffle, and that's not a clear win.
1499 if (!Inst || !Inst->hasOneUse())
1500 return nullptr;
1501 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1502 // The shuffle must have a constant vector operand. The insertelt must have
1503 // a constant scalar being inserted at a constant position in the vector.
1504 Constant *ShufConstVec, *InsEltScalar;
1505 uint64_t InsEltIndex;
1506 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1507 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1508 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1509 return nullptr;
1510
1511 // Adding an element to an arbitrary shuffle could be expensive, but a
1512 // shuffle that selects elements from vectors without crossing lanes is
1513 // assumed cheap.
1514 // If we're just adding a constant into that shuffle, it will still be
1515 // cheap.
1516 if (!isShuffleEquivalentToSelect(*Shuf))
1517 return nullptr;
1518
1519 // From the above 'select' check, we know that the mask has the same number
1520 // of elements as the vector input operands. We also know that each constant
1521 // input element is used in its lane and can not be used more than once by
1522 // the shuffle. Therefore, replace the constant in the shuffle's constant
1523 // vector with the insertelt constant. Replace the constant in the shuffle's
1524 // mask vector with the insertelt index plus the length of the vector
1525 // (because the constant vector operand of a shuffle is always the 2nd
1526 // operand).
1527 ArrayRef<int> Mask = Shuf->getShuffleMask();
1528 unsigned NumElts = Mask.size();
1529 SmallVector<Constant *, 16> NewShufElts(NumElts);
1530 SmallVector<int, 16> NewMaskElts(NumElts);
1531 for (unsigned I = 0; I != NumElts; ++I) {
1532 if (I == InsEltIndex) {
1533 NewShufElts[I] = InsEltScalar;
1534 NewMaskElts[I] = InsEltIndex + NumElts;
1535 } else {
1536 // Copy over the existing values.
1537 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1538 NewMaskElts[I] = Mask[I];
1539 }
1540
1541 // Bail if we failed to find an element.
1542 if (!NewShufElts[I])
1543 return nullptr;
1544 }
1545
1546 // Create new operands for a shuffle that includes the constant of the
1547 // original insertelt. The old shuffle will be dead now.
1548 return new ShuffleVectorInst(Shuf->getOperand(0),
1549 ConstantVector::get(NewShufElts), NewMaskElts);
1550 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1551 // Transform sequences of insertelements ops with constant data/indexes into
1552 // a single shuffle op.
1553 // Can not handle scalable type, the number of elements needed to create
1554 // shuffle mask is not a compile-time constant.
1555 if (isa<ScalableVectorType>(InsElt.getType()))
1556 return nullptr;
1557 unsigned NumElts =
1558 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1559
1560 uint64_t InsertIdx[2];
1561 Constant *Val[2];
1562 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1563 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1564 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1565 !match(IEI->getOperand(1), m_Constant(Val[1])))
1566 return nullptr;
1567 SmallVector<Constant *, 16> Values(NumElts);
1568 SmallVector<int, 16> Mask(NumElts);
1569 auto ValI = std::begin(Val);
1570 // Generate new constant vector and mask.
1571 // We have 2 values/masks from the insertelements instructions. Insert them
1572 // into new value/mask vectors.
1573 for (uint64_t I : InsertIdx) {
1574 if (!Values[I]) {
1575 Values[I] = *ValI;
1576 Mask[I] = NumElts + I;
1577 }
1578 ++ValI;
1579 }
1580 // Remaining values are filled with 'poison' values.
1581 for (unsigned I = 0; I < NumElts; ++I) {
1582 if (!Values[I]) {
1583 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1584 Mask[I] = I;
1585 }
1586 }
1587 // Create new operands for a shuffle that includes the constant of the
1588 // original insertelt.
1589 return new ShuffleVectorInst(IEI->getOperand(0),
1590 ConstantVector::get(Values), Mask);
1591 }
1592 return nullptr;
1593}
1594
1595/// If both the base vector and the inserted element are extended from the same
1596/// type, do the insert element in the narrow source type followed by extend.
1597/// TODO: This can be extended to include other cast opcodes, but particularly
1598/// if we create a wider insertelement, make sure codegen is not harmed.
1600 InstCombiner::BuilderTy &Builder) {
1601 // We are creating a vector extend. If the original vector extend has another
1602 // use, that would mean we end up with 2 vector extends, so avoid that.
1603 // TODO: We could ease the use-clause to "if at least one op has one use"
1604 // (assuming that the source types match - see next TODO comment).
1605 Value *Vec = InsElt.getOperand(0);
1606 if (!Vec->hasOneUse())
1607 return nullptr;
1608
1609 Value *Scalar = InsElt.getOperand(1);
1610 Value *X, *Y;
1611 CastInst::CastOps CastOpcode;
1612 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1613 CastOpcode = Instruction::FPExt;
1614 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1615 CastOpcode = Instruction::SExt;
1616 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1617 CastOpcode = Instruction::ZExt;
1618 else
1619 return nullptr;
1620
1621 // TODO: We can allow mismatched types by creating an intermediate cast.
1622 if (X->getType()->getScalarType() != Y->getType())
1623 return nullptr;
1624
1625 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1626 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1627 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1628}
1629
1630/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1631/// try to convert to a single insert with appropriate bitcasts.
1633 bool IsBigEndian,
1634 InstCombiner::BuilderTy &Builder) {
1635 Value *VecOp = InsElt.getOperand(0);
1636 Value *ScalarOp = InsElt.getOperand(1);
1637 Value *IndexOp = InsElt.getOperand(2);
1638
1639 // Pattern depends on endian because we expect lower index is inserted first.
1640 // Big endian:
1641 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1642 // Little endian:
1643 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1644 // Note: It is not safe to do this transform with an arbitrary base vector
1645 // because the bitcast of that vector to fewer/larger elements could
1646 // allow poison to spill into an element that was not poison before.
1647 // TODO: Detect smaller fractions of the scalar.
1648 // TODO: One-use checks are conservative.
1649 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1650 Value *Scalar0, *BaseVec;
1651 uint64_t Index0, Index1;
1652 if (!VTy || (VTy->getNumElements() & 1) ||
1653 !match(IndexOp, m_ConstantInt(Index1)) ||
1654 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1655 m_ConstantInt(Index0))) ||
1656 !match(BaseVec, m_Undef()))
1657 return nullptr;
1658
1659 // The first insert must be to the index one less than this one, and
1660 // the first insert must be to an even index.
1661 if (Index0 + 1 != Index1 || Index0 & 1)
1662 return nullptr;
1663
1664 // For big endian, the high half of the value should be inserted first.
1665 // For little endian, the low half of the value should be inserted first.
1666 Value *X;
1667 uint64_t ShAmt;
1668 if (IsBigEndian) {
1669 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1670 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1671 return nullptr;
1672 } else {
1673 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1674 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1675 return nullptr;
1676 }
1677
1678 Type *SrcTy = X->getType();
1679 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1680 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1681 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1682 return nullptr;
1683
1684 // Bitcast the base vector to a vector type with the source element type.
1685 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1686 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1687
1688 // Scale the insert index for a vector with half as many elements.
1689 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1690 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1691 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1692 return new BitCastInst(NewInsert, VTy);
1693}
1694
1696 Value *VecOp = IE.getOperand(0);
1697 Value *ScalarOp = IE.getOperand(1);
1698 Value *IdxOp = IE.getOperand(2);
1699
1700 if (auto *V = simplifyInsertElementInst(
1701 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1702 return replaceInstUsesWith(IE, V);
1703
1704 // Canonicalize type of constant indices to i64 to simplify CSE
1705 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1706 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1707 return replaceOperand(IE, 2, NewIdx);
1708
1709 Value *BaseVec, *OtherScalar;
1710 uint64_t OtherIndexVal;
1711 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1712 m_Value(OtherScalar),
1713 m_ConstantInt(OtherIndexVal)))) &&
1714 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1715 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1716 return InsertElementInst::Create(NewIns, OtherScalar,
1717 Builder.getInt64(OtherIndexVal));
1718 }
1719 }
1720
1721 // If the scalar is bitcast and inserted into undef, do the insert in the
1722 // source type followed by bitcast.
1723 // TODO: Generalize for insert into any constant, not just undef?
1724 Value *ScalarSrc;
1725 if (match(VecOp, m_Undef()) &&
1726 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1727 (ScalarSrc->getType()->isIntegerTy() ||
1728 ScalarSrc->getType()->isFloatingPointTy())) {
1729 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1730 // bitcast (inselt undef, ScalarSrc, IdxOp)
1731 Type *ScalarTy = ScalarSrc->getType();
1732 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1733 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1734 : UndefValue::get(VecTy);
1735 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1736 return new BitCastInst(NewInsElt, IE.getType());
1737 }
1738
1739 // If the vector and scalar are both bitcast from the same element type, do
1740 // the insert in that source type followed by bitcast.
1741 Value *VecSrc;
1742 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1743 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1744 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1745 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1746 cast<VectorType>(VecSrc->getType())->getElementType() ==
1747 ScalarSrc->getType()) {
1748 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1749 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1750 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1751 return new BitCastInst(NewInsElt, IE.getType());
1752 }
1753
1754 // If the inserted element was extracted from some other fixed-length vector
1755 // and both indexes are valid constants, try to turn this into a shuffle.
1756 // Can not handle scalable vector type, the number of elements needed to
1757 // create shuffle mask is not a compile-time constant.
1758 uint64_t InsertedIdx, ExtractedIdx;
1759 Value *ExtVecOp;
1760 if (isa<FixedVectorType>(IE.getType()) &&
1761 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1762 match(ScalarOp,
1763 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1764 isa<FixedVectorType>(ExtVecOp->getType()) &&
1765 ExtractedIdx <
1766 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1767 // TODO: Looking at the user(s) to determine if this insert is a
1768 // fold-to-shuffle opportunity does not match the usual instcombine
1769 // constraints. We should decide if the transform is worthy based only
1770 // on this instruction and its operands, but that may not work currently.
1771 //
1772 // Here, we are trying to avoid creating shuffles before reaching
1773 // the end of a chain of extract-insert pairs. This is complicated because
1774 // we do not generally form arbitrary shuffle masks in instcombine
1775 // (because those may codegen poorly), but collectShuffleElements() does
1776 // exactly that.
1777 //
1778 // The rules for determining what is an acceptable target-independent
1779 // shuffle mask are fuzzy because they evolve based on the backend's
1780 // capabilities and real-world impact.
1781 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1782 if (!Insert.hasOneUse())
1783 return true;
1784 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1785 if (!InsertUser)
1786 return true;
1787 return false;
1788 };
1789
1790 // Try to form a shuffle from a chain of extract-insert ops.
1791 if (isShuffleRootCandidate(IE)) {
1792 bool Rerun = true;
1793 while (Rerun) {
1794 Rerun = false;
1795
1797 ShuffleOps LR =
1798 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1799
1800 // The proposed shuffle may be trivial, in which case we shouldn't
1801 // perform the combine.
1802 if (LR.first != &IE && LR.second != &IE) {
1803 // We now have a shuffle of LHS, RHS, Mask.
1804 if (LR.second == nullptr)
1805 LR.second = PoisonValue::get(LR.first->getType());
1806 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1807 }
1808 }
1809 }
1810 }
1811
1812 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1813 unsigned VWidth = VecTy->getNumElements();
1814 APInt PoisonElts(VWidth, 0);
1815 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1816 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1817 PoisonElts)) {
1818 if (V != &IE)
1819 return replaceInstUsesWith(IE, V);
1820 return &IE;
1821 }
1822 }
1823
1825 return Shuf;
1826
1827 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1828 return NewInsElt;
1829
1830 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1831 return Broadcast;
1832
1834 return Splat;
1835
1836 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1837 return IdentityShuf;
1838
1839 if (Instruction *Ext = narrowInsElt(IE, Builder))
1840 return Ext;
1841
1842 if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1843 return Ext;
1844
1845 return nullptr;
1846}
1847
1848/// Return true if we can evaluate the specified expression tree if the vector
1849/// elements were shuffled in a different order.
1851 unsigned Depth = 5) {
1852 // We can always reorder the elements of a constant.
1853 if (isa<Constant>(V))
1854 return true;
1855
1856 // We won't reorder vector arguments. No IPO here.
1858 if (!I) return false;
1859
1860 // Two users may expect different orders of the elements. Don't try it.
1861 if (!I->hasOneUse())
1862 return false;
1863
1864 if (Depth == 0) return false;
1865
1866 switch (I->getOpcode()) {
1867 case Instruction::UDiv:
1868 case Instruction::SDiv:
1869 case Instruction::URem:
1870 case Instruction::SRem:
1871 // Propagating an undefined shuffle mask element to integer div/rem is not
1872 // allowed because those opcodes can create immediate undefined behavior
1873 // from an undefined element in an operand.
1874 if (llvm::is_contained(Mask, -1))
1875 return false;
1876 [[fallthrough]];
1877 case Instruction::Add:
1878 case Instruction::FAdd:
1879 case Instruction::Sub:
1880 case Instruction::FSub:
1881 case Instruction::Mul:
1882 case Instruction::FMul:
1883 case Instruction::FDiv:
1884 case Instruction::FRem:
1885 case Instruction::Shl:
1886 case Instruction::LShr:
1887 case Instruction::AShr:
1888 case Instruction::And:
1889 case Instruction::Or:
1890 case Instruction::Xor:
1891 case Instruction::ICmp:
1892 case Instruction::FCmp:
1893 case Instruction::Trunc:
1894 case Instruction::ZExt:
1895 case Instruction::SExt:
1896 case Instruction::FPToUI:
1897 case Instruction::FPToSI:
1898 case Instruction::UIToFP:
1899 case Instruction::SIToFP:
1900 case Instruction::FPTrunc:
1901 case Instruction::FPExt:
1902 case Instruction::GetElementPtr: {
1903 // Bail out if we would create longer vector ops. We could allow creating
1904 // longer vector ops, but that may result in more expensive codegen.
1905 Type *ITy = I->getType();
1906 if (ITy->isVectorTy() &&
1907 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1908 return false;
1909 for (Value *Operand : I->operands()) {
1910 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1911 return false;
1912 }
1913 return true;
1914 }
1915 case Instruction::InsertElement: {
1916 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1917 if (!CI) return false;
1918 int ElementNumber = CI->getLimitedValue();
1919
1920 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1921 // can't put an element into multiple indices.
1922 bool SeenOnce = false;
1923 for (int I : Mask) {
1924 if (I == ElementNumber) {
1925 if (SeenOnce)
1926 return false;
1927 SeenOnce = true;
1928 }
1929 }
1930 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1931 }
1932 }
1933 return false;
1934}
1935
1936/// Rebuild a new instruction just like 'I' but with the new operands given.
1937/// In the event of type mismatch, the type of the operands is correct.
1939 IRBuilderBase &Builder) {
1940 Builder.SetInsertPoint(I);
1941 switch (I->getOpcode()) {
1942 case Instruction::Add:
1943 case Instruction::FAdd:
1944 case Instruction::Sub:
1945 case Instruction::FSub:
1946 case Instruction::Mul:
1947 case Instruction::FMul:
1948 case Instruction::UDiv:
1949 case Instruction::SDiv:
1950 case Instruction::FDiv:
1951 case Instruction::URem:
1952 case Instruction::SRem:
1953 case Instruction::FRem:
1954 case Instruction::Shl:
1955 case Instruction::LShr:
1956 case Instruction::AShr:
1957 case Instruction::And:
1958 case Instruction::Or:
1959 case Instruction::Xor: {
1961 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1962 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1963 NewOps[0], NewOps[1]);
1964 if (auto *NewI = dyn_cast<Instruction>(New)) {
1966 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1967 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1968 }
1970 NewI->setIsExact(BO->isExact());
1971 }
1972 if (isa<FPMathOperator>(BO))
1973 NewI->copyFastMathFlags(I);
1974 }
1975 return New;
1976 }
1977 case Instruction::ICmp:
1978 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1979 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1980 NewOps[1]);
1981 case Instruction::FCmp:
1982 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1983 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1984 NewOps[1]);
1985 case Instruction::Trunc:
1986 case Instruction::ZExt:
1987 case Instruction::SExt:
1988 case Instruction::FPToUI:
1989 case Instruction::FPToSI:
1990 case Instruction::UIToFP:
1991 case Instruction::SIToFP:
1992 case Instruction::FPTrunc:
1993 case Instruction::FPExt: {
1994 // It's possible that the mask has a different number of elements from
1995 // the original cast. We recompute the destination type to match the mask.
1996 Type *DestTy = VectorType::get(
1997 I->getType()->getScalarType(),
1998 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1999 assert(NewOps.size() == 1 && "cast with #ops != 1");
2000 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
2001 DestTy);
2002 }
2003 case Instruction::GetElementPtr: {
2004 Value *Ptr = NewOps[0];
2005 ArrayRef<Value*> Idx = NewOps.slice(1);
2006 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2007 Ptr, Idx, "",
2008 cast<GEPOperator>(I)->getNoWrapFlags());
2009 }
2010 }
2011 llvm_unreachable("failed to rebuild vector instructions");
2012}
2013
2015 IRBuilderBase &Builder) {
2016 // Mask.size() does not need to be equal to the number of vector elements.
2017
2018 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2019 Type *EltTy = V->getType()->getScalarType();
2020
2021 if (isa<PoisonValue>(V))
2022 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2023
2024 if (match(V, m_Undef()))
2025 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2026
2028 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2029
2030 if (Constant *C = dyn_cast<Constant>(V))
2032 Mask);
2033
2035 switch (I->getOpcode()) {
2036 case Instruction::Add:
2037 case Instruction::FAdd:
2038 case Instruction::Sub:
2039 case Instruction::FSub:
2040 case Instruction::Mul:
2041 case Instruction::FMul:
2042 case Instruction::UDiv:
2043 case Instruction::SDiv:
2044 case Instruction::FDiv:
2045 case Instruction::URem:
2046 case Instruction::SRem:
2047 case Instruction::FRem:
2048 case Instruction::Shl:
2049 case Instruction::LShr:
2050 case Instruction::AShr:
2051 case Instruction::And:
2052 case Instruction::Or:
2053 case Instruction::Xor:
2054 case Instruction::ICmp:
2055 case Instruction::FCmp:
2056 case Instruction::Trunc:
2057 case Instruction::ZExt:
2058 case Instruction::SExt:
2059 case Instruction::FPToUI:
2060 case Instruction::FPToSI:
2061 case Instruction::UIToFP:
2062 case Instruction::SIToFP:
2063 case Instruction::FPTrunc:
2064 case Instruction::FPExt:
2065 case Instruction::Select:
2066 case Instruction::GetElementPtr: {
2068 bool NeedsRebuild =
2069 (Mask.size() !=
2070 cast<FixedVectorType>(I->getType())->getNumElements());
2071 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2072 Value *V;
2073 // Recursively call evaluateInDifferentElementOrder on vector arguments
2074 // as well. E.g. GetElementPtr may have scalar operands even if the
2075 // return value is a vector, so we need to examine the operand type.
2076 if (I->getOperand(i)->getType()->isVectorTy())
2077 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2078 else
2079 V = I->getOperand(i);
2080 NewOps.push_back(V);
2081 NeedsRebuild |= (V != I->getOperand(i));
2082 }
2083 if (NeedsRebuild)
2084 return buildNew(I, NewOps, Builder);
2085 return I;
2086 }
2087 case Instruction::InsertElement: {
2088 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2089
2090 // The insertelement was inserting at Element. Figure out which element
2091 // that becomes after shuffling. The answer is guaranteed to be unique
2092 // by CanEvaluateShuffled.
2093 bool Found = false;
2094 int Index = 0;
2095 for (int e = Mask.size(); Index != e; ++Index) {
2096 if (Mask[Index] == Element) {
2097 Found = true;
2098 break;
2099 }
2100 }
2101
2102 // If element is not in Mask, no need to handle the operand 1 (element to
2103 // be inserted). Just evaluate values in operand 0 according to Mask.
2104 if (!Found)
2105 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2106
2107 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2108 Builder);
2109 Builder.SetInsertPoint(I);
2110 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2111 }
2112 }
2113 llvm_unreachable("failed to reorder elements of vector instruction!");
2114}
2115
2116// Returns true if the shuffle is extracting a contiguous range of values from
2117// LHS, for example:
2118// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2119// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2120// Shuffles to: |EE|FF|GG|HH|
2121// +--+--+--+--+
2123 ArrayRef<int> Mask) {
2124 unsigned LHSElems =
2125 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2126 unsigned MaskElems = Mask.size();
2127 unsigned BegIdx = Mask.front();
2128 unsigned EndIdx = Mask.back();
2129 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2130 return false;
2131 for (unsigned I = 0; I != MaskElems; ++I)
2132 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2133 return false;
2134 return true;
2135}
2136
2137/// These are the ingredients in an alternate form binary operator as described
2138/// below.
2144 Value *V0 = nullptr, Value *V1 = nullptr) :
2145 Opcode(Opc), Op0(V0), Op1(V1) {}
2146 operator bool() const { return Opcode != 0; }
2147};
2148
2149/// Binops may be transformed into binops with different opcodes and operands.
2150/// Reverse the usual canonicalization to enable folds with the non-canonical
2151/// form of the binop. If a transform is possible, return the elements of the
2152/// new binop. If not, return invalid elements.
2154 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2155 Type *Ty = BO->getType();
2156 switch (BO->getOpcode()) {
2157 case Instruction::Shl: {
2158 // shl X, C --> mul X, (1 << C)
2159 Constant *C;
2160 if (match(BO1, m_ImmConstant(C))) {
2162 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2163 assert(ShlOne && "Constant folding of immediate constants failed");
2164 return {Instruction::Mul, BO0, ShlOne};
2165 }
2166 break;
2167 }
2168 case Instruction::Or: {
2169 // or disjoin X, C --> add X, C
2170 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2171 return {Instruction::Add, BO0, BO1};
2172 break;
2173 }
2174 case Instruction::Sub:
2175 // sub 0, X --> mul X, -1
2176 if (match(BO0, m_ZeroInt()))
2177 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2178 break;
2179 default:
2180 break;
2181 }
2182 return {};
2183}
2184
2185/// A select shuffle of a select shuffle with a shared operand can be reduced
2186/// to a single select shuffle. This is an obvious improvement in IR, and the
2187/// backend is expected to lower select shuffles efficiently.
2189 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2190
2191 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2193 Shuf.getShuffleMask(Mask);
2194 unsigned NumElts = Mask.size();
2195
2196 // Canonicalize a select shuffle with common operand as Op1.
2197 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2198 if (ShufOp && ShufOp->isSelect() &&
2199 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2200 std::swap(Op0, Op1);
2202 }
2203
2204 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2205 if (!ShufOp || !ShufOp->isSelect() ||
2206 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2207 return nullptr;
2208
2209 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2211 ShufOp->getShuffleMask(Mask1);
2212 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2213
2214 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2215 if (Y == Op0) {
2216 std::swap(X, Y);
2218 }
2219
2220 // If the mask chooses from X (operand 0), it stays the same.
2221 // If the mask chooses from the earlier shuffle, the other mask value is
2222 // transferred to the combined select shuffle:
2223 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2224 SmallVector<int, 16> NewMask(NumElts);
2225 for (unsigned i = 0; i != NumElts; ++i)
2226 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2227
2228 // A select mask with undef elements might look like an identity mask.
2229 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2230 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2231 "Unexpected shuffle mask");
2232 return new ShuffleVectorInst(X, Y, NewMask);
2233}
2234
2236 const SimplifyQuery &SQ) {
2237 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2238
2239 // Are we shuffling together some value and that same value after it has been
2240 // modified by a binop with a constant?
2241 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2242 Constant *C;
2243 bool Op0IsBinop;
2244 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2245 Op0IsBinop = true;
2246 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2247 Op0IsBinop = false;
2248 else
2249 return nullptr;
2250
2251 // The identity constant for a binop leaves a variable operand unchanged. For
2252 // a vector, this is a splat of something like 0, -1, or 1.
2253 // If there's no identity constant for this binop, we're done.
2254 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2255 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2256 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2257 if (!IdC)
2258 return nullptr;
2259
2260 Value *X = Op0IsBinop ? Op1 : Op0;
2261
2262 // Prevent folding in the case the non-binop operand might have NaN values.
2263 // If X can have NaN elements then we have that the floating point math
2264 // operation in the transformed code may not preserve the exact NaN
2265 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2266 // This makes the transformation incorrect since the original program would
2267 // have preserved the exact NaN bit-pattern.
2268 // Avoid the folding if X can have NaN elements.
2269 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2270 !isKnownNeverNaN(X, SQ))
2271 return nullptr;
2272
2273 // Shuffle identity constants into the lanes that return the original value.
2274 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2275 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2276 // The existing binop constant vector remains in the same operand position.
2277 ArrayRef<int> Mask = Shuf.getShuffleMask();
2278 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2280
2281 bool MightCreatePoisonOrUB =
2283 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2284 if (MightCreatePoisonOrUB)
2285 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2286
2287 // shuf (bop X, C), X, M --> bop X, C'
2288 // shuf X, (bop X, C), M --> bop X, C'
2289 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2290 NewBO->copyIRFlags(BO);
2291
2292 // An undef shuffle mask element may propagate as an undef constant element in
2293 // the new binop. That would produce poison where the original code might not.
2294 // If we already made a safe constant, then there's no danger.
2295 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2297 return NewBO;
2298}
2299
2300/// If we have an insert of a scalar to a non-zero element of an undefined
2301/// vector and then shuffle that value, that's the same as inserting to the zero
2302/// element and shuffling. Splatting from the zero element is recognized as the
2303/// canonical form of splat.
2305 InstCombiner::BuilderTy &Builder) {
2306 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2307 ArrayRef<int> Mask = Shuf.getShuffleMask();
2308 Value *X;
2309 uint64_t IndexC;
2310
2311 // Match a shuffle that is a splat to a non-zero element.
2313 m_ConstantInt(IndexC)))) ||
2314 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2315 return nullptr;
2316
2317 // Insert into element 0 of a poison vector.
2318 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2319 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2320
2321 // Splat from element 0. Any mask element that is poison remains poison.
2322 // For example:
2323 // shuf (inselt poison, X, 2), _, <2,2,undef>
2324 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2325 unsigned NumMaskElts =
2326 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2327 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2328 for (unsigned i = 0; i != NumMaskElts; ++i)
2329 if (Mask[i] == PoisonMaskElem)
2330 NewMask[i] = Mask[i];
2331
2332 return new ShuffleVectorInst(NewIns, NewMask);
2333}
2334
2335/// Try to fold shuffles that are the equivalent of a vector select.
2337 if (!Shuf.isSelect())
2338 return nullptr;
2339
2340 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2341 // Commuting undef to operand 0 conflicts with another canonicalization.
2342 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2343 if (!match(Shuf.getOperand(1), m_Undef()) &&
2344 Shuf.getMaskValue(0) >= (int)NumElts) {
2345 // TODO: Can we assert that both operands of a shuffle-select are not undef
2346 // (otherwise, it would have been folded by instsimplify?
2347 Shuf.commute();
2348 return &Shuf;
2349 }
2350
2352 return I;
2353
2355 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2356 return I;
2357
2358 BinaryOperator *B0, *B1;
2359 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2360 !match(Shuf.getOperand(1), m_BinOp(B1)))
2361 return nullptr;
2362
2363 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2364 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2365 // with a multiply, we will exit because C0/C1 will not be set.
2366 Value *X, *Y;
2367 Constant *C0 = nullptr, *C1 = nullptr;
2368 bool ConstantsAreOp1;
2369 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2370 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2371 ConstantsAreOp1 = false;
2372 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2373 m_Neg(m_Value(X)))) &&
2375 m_Neg(m_Value(Y)))))
2376 ConstantsAreOp1 = true;
2377 else
2378 return nullptr;
2379
2380 // We need matching binops to fold the lanes together.
2383 bool DropNSW = false;
2384 if (ConstantsAreOp1 && Opc0 != Opc1) {
2385 // TODO: We drop "nsw" if shift is converted into multiply because it may
2386 // not be correct when the shift amount is BitWidth - 1. We could examine
2387 // each vector element to determine if it is safe to keep that flag.
2388 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2389 DropNSW = true;
2390 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2391 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2392 Opc0 = AltB0.Opcode;
2393 C0 = cast<Constant>(AltB0.Op1);
2394 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2395 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2396 Opc1 = AltB1.Opcode;
2397 C1 = cast<Constant>(AltB1.Op1);
2398 }
2399 }
2400
2401 if (Opc0 != Opc1 || !C0 || !C1)
2402 return nullptr;
2403
2404 // The opcodes must be the same. Use a new name to make that clear.
2405 BinaryOperator::BinaryOps BOpc = Opc0;
2406
2407 // Select the constant elements needed for the single binop.
2408 ArrayRef<int> Mask = Shuf.getShuffleMask();
2409 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2410
2411 // We are moving a binop after a shuffle. When a shuffle has an undefined
2412 // mask element, the result is undefined, but it is not poison or undefined
2413 // behavior. That is not necessarily true for div/rem/shift.
2414 bool MightCreatePoisonOrUB =
2417 if (MightCreatePoisonOrUB)
2419 ConstantsAreOp1);
2420
2421 Value *V;
2422 if (X == Y) {
2423 // Remove a binop and the shuffle by rearranging the constant:
2424 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2425 // shuffle (op C0, V), (op C1, V), M --> op C', V
2426 V = X;
2427 } else {
2428 // If there are 2 different variable operands, we must create a new shuffle
2429 // (select) first, so check uses to ensure that we don't end up with more
2430 // instructions than we started with.
2431 if (!B0->hasOneUse() && !B1->hasOneUse())
2432 return nullptr;
2433
2434 // If we use the original shuffle mask and op1 is *variable*, we would be
2435 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2436 // poison. We do not have to guard against UB when *constants* are op1
2437 // because safe constants guarantee that we do not overflow sdiv/srem (and
2438 // there's no danger for other opcodes).
2439 // TODO: To allow this case, create a new shuffle mask with no undefs.
2440 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2441 return nullptr;
2442
2443 // Note: In general, we do not create new shuffles in InstCombine because we
2444 // do not know if a target can lower an arbitrary shuffle optimally. In this
2445 // case, the shuffle uses the existing mask, so there is no additional risk.
2446
2447 // Select the variable vectors first, then perform the binop:
2448 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2449 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2450 V = Builder.CreateShuffleVector(X, Y, Mask);
2451 }
2452
2453 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2454 Builder.CreateBinOp(BOpc, NewC, V);
2455
2456 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2457 // 1. If we changed an opcode, poison conditions might have changed.
2458 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2459 // where the original code did not. But if we already made a safe constant,
2460 // then there's no danger.
2461 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2462 NewI->copyIRFlags(B0);
2463 NewI->andIRFlags(B1);
2464 if (DropNSW)
2465 NewI->setHasNoSignedWrap(false);
2466 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2467 NewI->dropPoisonGeneratingFlags();
2468 }
2469 return replaceInstUsesWith(Shuf, NewBO);
2470}
2471
2472/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2473/// Example (little endian):
2474/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2476 bool IsBigEndian) {
2477 // This must be a bitcasted shuffle of 1 vector integer operand.
2478 Type *DestType = Shuf.getType();
2479 Value *X;
2480 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2481 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2482 return nullptr;
2483
2484 // The source type must have the same number of elements as the shuffle,
2485 // and the source element type must be larger than the shuffle element type.
2486 Type *SrcType = X->getType();
2487 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2488 cast<FixedVectorType>(SrcType)->getNumElements() !=
2489 cast<FixedVectorType>(DestType)->getNumElements() ||
2490 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2491 return nullptr;
2492
2493 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2494 "Expected a shuffle that decreases length");
2495
2496 // Last, check that the mask chooses the correct low bits for each narrow
2497 // element in the result.
2498 uint64_t TruncRatio =
2499 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2500 ArrayRef<int> Mask = Shuf.getShuffleMask();
2501 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2502 if (Mask[i] == PoisonMaskElem)
2503 continue;
2504 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2505 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2506 if (Mask[i] != (int)LSBIndex)
2507 return nullptr;
2508 }
2509
2510 return new TruncInst(X, DestType);
2511}
2512
2513/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2514/// narrowing (concatenating with poison and extracting back to the original
2515/// length). This allows replacing the wide select with a narrow select.
2517 InstCombiner::BuilderTy &Builder) {
2518 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2519 // of the 1st vector operand of a shuffle.
2520 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2521 return nullptr;
2522
2523 // The vector being shuffled must be a vector select that we can eliminate.
2524 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2525 Value *Cond, *X, *Y;
2526 if (!match(Shuf.getOperand(0),
2528 return nullptr;
2529
2530 // We need a narrow condition value. It must be extended with poison elements
2531 // and have the same number of elements as this shuffle.
2532 unsigned NarrowNumElts =
2533 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2534 Value *NarrowCond;
2535 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2536 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2537 NarrowNumElts ||
2538 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2539 return nullptr;
2540
2541 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2542 // -->
2543 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2544 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2545 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2546 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2547}
2548
2549/// Canonicalize FP negate/abs after shuffle.
2551 InstCombiner::BuilderTy &Builder) {
2552 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2553 Value *X;
2554 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2555 return nullptr;
2556
2557 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2558
2559 // Match 2-input (binary) shuffle.
2560 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2561 Value *Y;
2562 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2563 S0->getOpcode() != S1->getOpcode() ||
2564 (!S0->hasOneUse() && !S1->hasOneUse()))
2565 return nullptr;
2566
2567 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2568 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2569 Instruction *NewF;
2570 if (IsFNeg) {
2571 NewF = UnaryOperator::CreateFNeg(NewShuf);
2572 } else {
2574 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2575 NewF = CallInst::Create(FAbs, {NewShuf});
2576 }
2577 NewF->copyIRFlags(S0);
2578 NewF->andIRFlags(S1);
2579 return NewF;
2580}
2581
2582/// Canonicalize casts after shuffle.
2584 InstCombiner::BuilderTy &Builder) {
2585 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2586 if (!Cast0)
2587 return nullptr;
2588
2589 // TODO: Allow other opcodes? That would require easing the type restrictions
2590 // below here.
2591 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2592 switch (CastOpcode) {
2593 case Instruction::SExt:
2594 case Instruction::ZExt:
2595 case Instruction::FPToSI:
2596 case Instruction::FPToUI:
2597 case Instruction::SIToFP:
2598 case Instruction::UIToFP:
2599 break;
2600 default:
2601 return nullptr;
2602 }
2603
2604 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2605 VectorType *ShufTy = Shuf.getType();
2606 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2607
2608 // TODO: Allow length-increasing shuffles?
2609 if (ShufTy->getElementCount().getKnownMinValue() >
2610 ShufOpTy->getElementCount().getKnownMinValue())
2611 return nullptr;
2612
2613 // shuffle (cast X), Poison, identity-with-extract-mask -->
2614 // cast (shuffle X, Poison, identity-with-extract-mask).
2615 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2616 Shuf.isIdentityWithExtract()) {
2617 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2618 PoisonValue::get(CastSrcTy),
2619 Shuf.getShuffleMask());
2620 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2621 }
2622
2623 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2624 // Do we have 2 matching cast operands?
2625 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2626 Cast0->getSrcTy() != Cast1->getSrcTy())
2627 return nullptr;
2628
2629 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2630 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2631 "Expected fixed vector operands for casts and binary shuffle");
2632 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2633 return nullptr;
2634
2635 // At least one of the operands must have only one use (the shuffle).
2636 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2637 return nullptr;
2638
2639 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2640 Value *X = Cast0->getOperand(0);
2641 Value *Y = Cast1->getOperand(0);
2642 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2643 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2644}
2645
2646/// Try to fold an extract subvector operation.
2648 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2649 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2650 return nullptr;
2651
2652 // Check if we are extracting all bits of an inserted scalar:
2653 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2654 Value *X;
2655 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2656 X->getType()->getPrimitiveSizeInBits() ==
2658 return new BitCastInst(X, Shuf.getType());
2659
2660 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2661 Value *Y;
2662 ArrayRef<int> Mask;
2663 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2664 return nullptr;
2665
2666 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2667 // then combining may result in worse codegen.
2668 if (!Op0->hasOneUse())
2669 return nullptr;
2670
2671 // We are extracting a subvector from a shuffle. Remove excess elements from
2672 // the 1st shuffle mask to eliminate the extract.
2673 //
2674 // This transform is conservatively limited to identity extracts because we do
2675 // not allow arbitrary shuffle mask creation as a target-independent transform
2676 // (because we can't guarantee that will lower efficiently).
2677 //
2678 // If the extracting shuffle has an poison mask element, it transfers to the
2679 // new shuffle mask. Otherwise, copy the original mask element. Example:
2680 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2681 // shuf X, Y, <C0, poison, C2, poison>
2682 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2683 SmallVector<int, 16> NewMask(NumElts);
2684 assert(NumElts < Mask.size() &&
2685 "Identity with extract must have less elements than its inputs");
2686
2687 for (unsigned i = 0; i != NumElts; ++i) {
2688 int ExtractMaskElt = Shuf.getMaskValue(i);
2689 int MaskElt = Mask[i];
2690 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2691 }
2692 return new ShuffleVectorInst(X, Y, NewMask);
2693}
2694
2695/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2696/// operand with the operand of an insertelement.
2698 InstCombinerImpl &IC) {
2699 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2701 Shuf.getShuffleMask(Mask);
2702
2703 int NumElts = Mask.size();
2704 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2705
2706 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2707 // not be able to handle it there if the insertelement has >1 use.
2708 // If the shuffle has an insertelement operand but does not choose the
2709 // inserted scalar element from that value, then we can replace that shuffle
2710 // operand with the source vector of the insertelement.
2711 Value *X;
2712 uint64_t IdxC;
2713 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2714 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2715 if (!is_contained(Mask, (int)IdxC))
2716 return IC.replaceOperand(Shuf, 0, X);
2717 }
2718 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2719 // Offset the index constant by the vector width because we are checking for
2720 // accesses to the 2nd vector input of the shuffle.
2721 IdxC += InpNumElts;
2722 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2723 if (!is_contained(Mask, (int)IdxC))
2724 return IC.replaceOperand(Shuf, 1, X);
2725 }
2726 // For the rest of the transform, the shuffle must not change vector sizes.
2727 // TODO: This restriction could be removed if the insert has only one use
2728 // (because the transform would require a new length-changing shuffle).
2729 if (NumElts != InpNumElts)
2730 return nullptr;
2731
2732 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2733 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2734 // We need an insertelement with a constant index.
2735 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2736 m_ConstantInt(IndexC))))
2737 return false;
2738
2739 // Test the shuffle mask to see if it splices the inserted scalar into the
2740 // operand 1 vector of the shuffle.
2741 int NewInsIndex = -1;
2742 for (int i = 0; i != NumElts; ++i) {
2743 // Ignore undef mask elements.
2744 if (Mask[i] == -1)
2745 continue;
2746
2747 // The shuffle takes elements of operand 1 without lane changes.
2748 if (Mask[i] == NumElts + i)
2749 continue;
2750
2751 // The shuffle must choose the inserted scalar exactly once.
2752 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2753 return false;
2754
2755 // The shuffle is placing the inserted scalar into element i.
2756 NewInsIndex = i;
2757 }
2758
2759 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2760
2761 // Index is updated to the potentially translated insertion lane.
2762 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2763 return true;
2764 };
2765
2766 // If the shuffle is unnecessary, insert the scalar operand directly into
2767 // operand 1 of the shuffle. Example:
2768 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2769 Value *Scalar;
2770 ConstantInt *IndexC;
2771 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2772 return InsertElementInst::Create(V1, Scalar, IndexC);
2773
2774 // Try again after commuting shuffle. Example:
2775 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2776 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2777 std::swap(V0, V1);
2779 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2780 return InsertElementInst::Create(V1, Scalar, IndexC);
2781
2782 return nullptr;
2783}
2784
2786 // Match the operands as identity with padding (also known as concatenation
2787 // with undef) shuffles of the same source type. The backend is expected to
2788 // recreate these concatenations from a shuffle of narrow operands.
2789 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2790 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2791 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2792 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2793 return nullptr;
2794
2795 // We limit this transform to power-of-2 types because we expect that the
2796 // backend can convert the simplified IR patterns to identical nodes as the
2797 // original IR.
2798 // TODO: If we can verify the same behavior for arbitrary types, the
2799 // power-of-2 checks can be removed.
2800 Value *X = Shuffle0->getOperand(0);
2801 Value *Y = Shuffle1->getOperand(0);
2802 if (X->getType() != Y->getType() ||
2803 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2805 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2806 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2807 match(X, m_Undef()) || match(Y, m_Undef()))
2808 return nullptr;
2809 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2810 match(Shuffle1->getOperand(1), m_Undef()) &&
2811 "Unexpected operand for identity shuffle");
2812
2813 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2814 // operands directly by adjusting the shuffle mask to account for the narrower
2815 // types:
2816 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2817 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2818 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2819 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2820
2821 ArrayRef<int> Mask = Shuf.getShuffleMask();
2822 SmallVector<int, 16> NewMask(Mask.size(), -1);
2823 for (int i = 0, e = Mask.size(); i != e; ++i) {
2824 if (Mask[i] == -1)
2825 continue;
2826
2827 // If this shuffle is choosing an undef element from 1 of the sources, that
2828 // element is undef.
2829 if (Mask[i] < WideElts) {
2830 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2831 continue;
2832 } else {
2833 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2834 continue;
2835 }
2836
2837 // If this shuffle is choosing from the 1st narrow op, the mask element is
2838 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2839 // element is offset down to adjust for the narrow vector widths.
2840 if (Mask[i] < WideElts) {
2841 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2842 NewMask[i] = Mask[i];
2843 } else {
2844 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2845 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2846 }
2847 }
2848 return new ShuffleVectorInst(X, Y, NewMask);
2849}
2850
2851// Splatting the first element of the result of a BinOp, where any of the
2852// BinOp's operands are the result of a first element splat can be simplified to
2853// splatting the first element of the result of the BinOp
2855 if (!match(SVI.getOperand(1), m_Poison()) ||
2856 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2857 !SVI.getOperand(0)->hasOneUse())
2858 return nullptr;
2859
2860 Value *Op0 = SVI.getOperand(0);
2861 Value *X, *Y;
2863 m_Value(Y))) &&
2864 !match(Op0, m_BinOp(m_Value(X),
2866 return nullptr;
2867 if (X->getType() != Y->getType())
2868 return nullptr;
2869
2870 auto *BinOp = cast<BinaryOperator>(Op0);
2872 return nullptr;
2873
2874 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2875 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2876 NewBOI->copyIRFlags(BinOp);
2877
2878 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2879}
2880
2882 Value *LHS = SVI.getOperand(0);
2883 Value *RHS = SVI.getOperand(1);
2884 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2885 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2886 SVI.getType(), ShufQuery))
2887 return replaceInstUsesWith(SVI, V);
2888
2889 if (Instruction *I = simplifyBinOpSplats(SVI))
2890 return I;
2891
2892 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2893 // order to support scalable vectors.
2894 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2895 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2896
2897 if (isa<ScalableVectorType>(LHS->getType()))
2898 return nullptr;
2899
2900 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2901 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2902
2903 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2904 //
2905 // if X and Y are of the same (vector) type, and the element size is not
2906 // changed by the bitcasts, we can distribute the bitcasts through the
2907 // shuffle, hopefully reducing the number of instructions. We make sure that
2908 // at least one bitcast only has one use, so we don't *increase* the number of
2909 // instructions here.
2910 Value *X, *Y;
2911 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2912 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2913 X->getType()->getScalarSizeInBits() ==
2914 SVI.getType()->getScalarSizeInBits() &&
2915 (LHS->hasOneUse() || RHS->hasOneUse())) {
2916 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2917 SVI.getName() + ".uncasted");
2918 return new BitCastInst(V, SVI.getType());
2919 }
2920
2921 ArrayRef<int> Mask = SVI.getShuffleMask();
2922
2923 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2924 // simulated shuffle can simplify, then this shuffle is unnecessary:
2925 // shuf (bitcast X), undef, Mask --> bitcast X'
2926 // TODO: This could be extended to allow length-changing shuffles.
2927 // The transform might also be obsoleted if we allowed canonicalization
2928 // of bitcasted shuffles.
2929 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2930 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2931 // Try to create a scaled mask constant.
2932 auto *XType = cast<FixedVectorType>(X->getType());
2933 unsigned XNumElts = XType->getNumElements();
2934 SmallVector<int, 16> ScaledMask;
2935 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2936 // If the shuffled source vector simplifies, cast that value to this
2937 // shuffle's type.
2938 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2939 ScaledMask, XType, ShufQuery))
2940 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2941 }
2942 }
2943
2944 // shuffle x, x, mask --> shuffle x, undef, mask'
2945 if (LHS == RHS) {
2946 assert(!match(RHS, m_Undef()) &&
2947 "Shuffle with 2 undef ops not simplified?");
2948 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2949 }
2950
2951 // shuffle undef, x, mask --> shuffle x, undef, mask'
2952 if (match(LHS, m_Undef())) {
2953 SVI.commute();
2954 return &SVI;
2955 }
2956
2958 return I;
2959
2960 if (Instruction *I = foldSelectShuffle(SVI))
2961 return I;
2962
2963 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2964 return I;
2965
2967 return I;
2968
2970 return I;
2971
2972 if (Instruction *I = foldCastShuffle(SVI, Builder))
2973 return I;
2974
2975 APInt PoisonElts(VWidth, 0);
2976 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2977 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2978 if (V != &SVI)
2979 return replaceInstUsesWith(SVI, V);
2980 return &SVI;
2981 }
2982
2984 return I;
2985
2986 // These transforms have the potential to lose undef knowledge, so they are
2987 // intentionally placed after SimplifyDemandedVectorElts().
2988 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2989 return I;
2991 return I;
2992
2993 if (match(RHS, m_Constant())) {
2994 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
2995 // We cannot do this fold for elementwise select since ShuffleVector is
2996 // not elementwise.
2997 if (SI->getCondition()->getType()->isIntegerTy() &&
2998 (isa<PoisonValue>(RHS) ||
2999 isGuaranteedNotToBePoison(SI->getCondition()))) {
3000 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
3001 return I;
3002 }
3003 }
3004 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3005 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3006 return I;
3007 }
3008 }
3009
3010 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3012 return replaceInstUsesWith(SVI, V);
3013 }
3014
3015 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3016 // a non-vector type. We can instead bitcast the original vector followed by
3017 // an extract of the desired element:
3018 //
3019 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3020 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3021 // %1 = bitcast <4 x i8> %sroa to i32
3022 // Becomes:
3023 // %bc = bitcast <16 x i8> %in to <4 x i32>
3024 // %ext = extractelement <4 x i32> %bc, i32 0
3025 //
3026 // If the shuffle is extracting a contiguous range of values from the input
3027 // vector then each use which is a bitcast of the extracted size can be
3028 // replaced. This will work if the vector types are compatible, and the begin
3029 // index is aligned to a value in the casted vector type. If the begin index
3030 // isn't aligned then we can shuffle the original vector (keeping the same
3031 // vector type) before extracting.
3032 //
3033 // This code will bail out if the target type is fundamentally incompatible
3034 // with vectors of the source type.
3035 //
3036 // Example of <16 x i8>, target type i32:
3037 // Index range [4,8): v-----------v Will work.
3038 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3039 // <16 x i8>: | | | | | | | | | | | | | | | | |
3040 // <4 x i32>: | | | | |
3041 // +-----------+-----------+-----------+-----------+
3042 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3043 // Target type i40: ^--------------^ Won't work, bail.
3044 bool MadeChange = false;
3045 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3046 Value *V = LHS;
3047 unsigned MaskElems = Mask.size();
3048 auto *SrcTy = cast<FixedVectorType>(V->getType());
3049 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3050 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3051 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3052 unsigned SrcNumElems = SrcTy->getNumElements();
3055 for (User *U : SVI.users())
3056 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3057 // Only visit bitcasts that weren't previously handled.
3058 if (BC->use_empty())
3059 continue;
3060 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3061 if (BC->hasOneUse()) {
3062 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3063 if (BC2 && isEliminableCastPair(BC, BC2))
3064 continue;
3065 }
3066 BCs.push_back(BC);
3067 }
3068 for (BitCastInst *BC : BCs) {
3069 unsigned BegIdx = Mask.front();
3070 Type *TgtTy = BC->getDestTy();
3071 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3072 if (!TgtElemBitWidth)
3073 continue;
3074 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3075 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3076 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3077 if (!VecBitWidthsEqual)
3078 continue;
3080 continue;
3081 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3082 if (!BegIsAligned) {
3083 // Shuffle the input so [0,NumElements) contains the output, and
3084 // [NumElems,SrcNumElems) is undef.
3085 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3086 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3087 ShuffleMask[I] = Idx;
3088 V = Builder.CreateShuffleVector(V, ShuffleMask,
3089 SVI.getName() + ".extract");
3090 BegIdx = 0;
3091 }
3092 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3093 assert(SrcElemsPerTgtElem);
3094 BegIdx /= SrcElemsPerTgtElem;
3095 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3096 if (Inserted)
3097 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3098 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3099 SVI.getName() + ".extract");
3100 // The shufflevector isn't being replaced: the bitcast that used it
3101 // is. InstCombine will visit the newly-created instructions.
3102 replaceInstUsesWith(*BC, Ext);
3103 MadeChange = true;
3104 }
3105 }
3106
3107 // If the LHS is a shufflevector itself, see if we can combine it with this
3108 // one without producing an unusual shuffle.
3109 // Cases that might be simplified:
3110 // 1.
3111 // x1=shuffle(v1,v2,mask1)
3112 // x=shuffle(x1,undef,mask)
3113 // ==>
3114 // x=shuffle(v1,undef,newMask)
3115 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3116 // 2.
3117 // x1=shuffle(v1,undef,mask1)
3118 // x=shuffle(x1,x2,mask)
3119 // where v1.size() == mask1.size()
3120 // ==>
3121 // x=shuffle(v1,x2,newMask)
3122 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3123 // 3.
3124 // x2=shuffle(v2,undef,mask2)
3125 // x=shuffle(x1,x2,mask)
3126 // where v2.size() == mask2.size()
3127 // ==>
3128 // x=shuffle(x1,v2,newMask)
3129 // newMask[i] = (mask[i] < x1.size())
3130 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3131 // 4.
3132 // x1=shuffle(v1,undef,mask1)
3133 // x2=shuffle(v2,undef,mask2)
3134 // x=shuffle(x1,x2,mask)
3135 // where v1.size() == v2.size()
3136 // ==>
3137 // x=shuffle(v1,v2,newMask)
3138 // newMask[i] = (mask[i] < x1.size())
3139 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3140 //
3141 // Here we are really conservative:
3142 // we are absolutely afraid of producing a shuffle mask not in the input
3143 // program, because the code gen may not be smart enough to turn a merged
3144 // shuffle into two specific shuffles: it may produce worse code. As such,
3145 // we only merge two shuffles if the result is either a splat or one of the
3146 // input shuffle masks. In this case, merging the shuffles just removes
3147 // one instruction, which we know is safe. This is good for things like
3148 // turning: (splat(splat)) -> splat, or
3149 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3152 if (LHSShuffle)
3153 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3154 !match(RHS, m_Poison()))
3155 LHSShuffle = nullptr;
3156 if (RHSShuffle)
3157 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3158 RHSShuffle = nullptr;
3159 if (!LHSShuffle && !RHSShuffle)
3160 return MadeChange ? &SVI : nullptr;
3161
3162 Value* LHSOp0 = nullptr;
3163 Value* LHSOp1 = nullptr;
3164 Value* RHSOp0 = nullptr;
3165 unsigned LHSOp0Width = 0;
3166 unsigned RHSOp0Width = 0;
3167 if (LHSShuffle) {
3168 LHSOp0 = LHSShuffle->getOperand(0);
3169 LHSOp1 = LHSShuffle->getOperand(1);
3170 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3171 }
3172 if (RHSShuffle) {
3173 RHSOp0 = RHSShuffle->getOperand(0);
3174 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3175 }
3176 Value* newLHS = LHS;
3177 Value* newRHS = RHS;
3178 if (LHSShuffle) {
3179 // case 1
3180 if (match(RHS, m_Poison())) {
3181 newLHS = LHSOp0;
3182 newRHS = LHSOp1;
3183 }
3184 // case 2 or 4
3185 else if (LHSOp0Width == LHSWidth) {
3186 newLHS = LHSOp0;
3187 }
3188 }
3189 // case 3 or 4
3190 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3191 newRHS = RHSOp0;
3192 }
3193 // case 4
3194 if (LHSOp0 == RHSOp0) {
3195 newLHS = LHSOp0;
3196 newRHS = nullptr;
3197 }
3198
3199 if (newLHS == LHS && newRHS == RHS)
3200 return MadeChange ? &SVI : nullptr;
3201
3202 ArrayRef<int> LHSMask;
3203 ArrayRef<int> RHSMask;
3204 if (newLHS != LHS)
3205 LHSMask = LHSShuffle->getShuffleMask();
3206 if (RHSShuffle && newRHS != RHS)
3207 RHSMask = RHSShuffle->getShuffleMask();
3208
3209 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3210 SmallVector<int, 16> newMask;
3211 bool isSplat = true;
3212 int SplatElt = -1;
3213 // Create a new mask for the new ShuffleVectorInst so that the new
3214 // ShuffleVectorInst is equivalent to the original one.
3215 for (unsigned i = 0; i < VWidth; ++i) {
3216 int eltMask;
3217 if (Mask[i] < 0) {
3218 // This element is a poison value.
3219 eltMask = -1;
3220 } else if (Mask[i] < (int)LHSWidth) {
3221 // This element is from left hand side vector operand.
3222 //
3223 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3224 // new mask value for the element.
3225 if (newLHS != LHS) {
3226 eltMask = LHSMask[Mask[i]];
3227 // If the value selected is an poison value, explicitly specify it
3228 // with a -1 mask value.
3229 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3230 eltMask = -1;
3231 } else
3232 eltMask = Mask[i];
3233 } else {
3234 // This element is from right hand side vector operand
3235 //
3236 // If the value selected is a poison value, explicitly specify it
3237 // with a -1 mask value. (case 1)
3238 if (match(RHS, m_Poison()))
3239 eltMask = -1;
3240 // If RHS is going to be replaced (case 3 or 4), calculate the
3241 // new mask value for the element.
3242 else if (newRHS != RHS) {
3243 eltMask = RHSMask[Mask[i]-LHSWidth];
3244 // If the value selected is an poison value, explicitly specify it
3245 // with a -1 mask value.
3246 if (eltMask >= (int)RHSOp0Width) {
3247 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3248 "should have been check above");
3249 eltMask = -1;
3250 }
3251 } else
3252 eltMask = Mask[i]-LHSWidth;
3253
3254 // If LHS's width is changed, shift the mask value accordingly.
3255 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3256 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3257 // If newRHS == newLHS, we want to remap any references from newRHS to
3258 // newLHS so that we can properly identify splats that may occur due to
3259 // obfuscation across the two vectors.
3260 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3261 eltMask += newLHSWidth;
3262 }
3263
3264 // Check if this could still be a splat.
3265 if (eltMask >= 0) {
3266 if (SplatElt >= 0 && SplatElt != eltMask)
3267 isSplat = false;
3268 SplatElt = eltMask;
3269 }
3270
3271 newMask.push_back(eltMask);
3272 }
3273
3274 // If the result mask is equal to one of the original shuffle masks,
3275 // or is a splat, do the replacement.
3276 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3277 if (!newRHS)
3278 newRHS = PoisonValue::get(newLHS->getType());
3279 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3280 }
3281
3282 return MadeChange ? &SVI : nullptr;
3283}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition InstrTypes.h:762
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:107
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1468
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition Type.h:74
@ StructTyID
Structures.
Definition Type.h:73
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
UnaryOps getOpcode() const
Definition InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1093
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
User * user_back()
Definition Value.h:412
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOpc_match< LHS, RHS, false > m_BinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:249