LLVM 22.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
177 addToWorklist(E);
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if ((!ShiftAmountC ||
209 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
210 Ext.getVectorOperand()->hasOneUse()) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
400 Value *Index = EI.getIndexOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 bool HasKnownValidIndex = false;
423 if (IndexC) {
424 // Canonicalize type of constant indices to i64 to simplify CSE
425 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
426 return replaceOperand(EI, 1, NewIdx);
427
429 unsigned NumElts = EC.getKnownMinValue();
430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
431
432 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
433 Intrinsic::ID IID = II->getIntrinsicID();
434 // Index needs to be lower than the minimum size of the vector, because
435 // for scalable vector, the vector size is known at run time.
436 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
437 Type *Ty = EI.getType();
438 unsigned BitWidth = Ty->getIntegerBitWidth();
439 Value *Idx;
440 // Return index when its value does not exceed the allowed limit
441 // for the element type of the vector, otherwise return undefined.
442 if (IndexC->getValue().getActiveBits() <= BitWidth)
443 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
444 else
445 Idx = PoisonValue::get(Ty);
446 return replaceInstUsesWith(EI, Idx);
447 }
448 }
449
450 // InstSimplify should handle cases where the index is invalid.
451 // For fixed-length vector, it's invalid to extract out-of-range element.
452 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
453 return nullptr;
454
455 if (Instruction *I = foldBitcastExtElt(EI))
456 return I;
457
458 // If there's a vector PHI feeding a scalar use through this extractelement
459 // instruction, try to scalarize the PHI.
460 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
461 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
462 return ScalarPHI;
463 }
464
465 // If SrcVec is a subvector starting at index 0, extract from the
466 // wider source vector
467 Value *V;
468 if (match(SrcVec,
469 m_Intrinsic<Intrinsic::vector_extract>(m_Value(V), m_Zero())))
470 return ExtractElementInst::Create(V, Index);
471
472 // TODO come up with a n-ary matcher that subsumes both unary and
473 // binary matchers.
474 UnaryOperator *UO;
475 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
476 // extelt (unop X), Index --> unop (extelt X, Index)
477 Value *X = UO->getOperand(0);
478 Value *E = Builder.CreateExtractElement(X, Index);
480 }
481
482 // If the binop is not speculatable, we cannot hoist the extractelement if
483 // it may make the operand poison.
484 BinaryOperator *BO;
485 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
486 (HasKnownValidIndex ||
488 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
489 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
490 Value *E0 = Builder.CreateExtractElement(X, Index);
491 Value *E1 = Builder.CreateExtractElement(Y, Index);
492 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
493 }
494
495 Value *X, *Y;
496 CmpPredicate Pred;
497 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
498 cheapToScalarize(SrcVec, Index)) {
499 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
500 Value *E0 = Builder.CreateExtractElement(X, Index);
501 Value *E1 = Builder.CreateExtractElement(Y, Index);
502 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
503 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
504 SrcCmpInst);
505 }
506
507 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
508 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
509 // instsimplify already handled the case where the indices are constants
510 // and equal by value, if both are constants, they must not be the same
511 // value, extract from the pre-inserted value instead.
512 if (isa<Constant>(IE->getOperand(2)) && IndexC)
513 return replaceOperand(EI, 0, IE->getOperand(0));
514 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
515 auto *VecType = cast<VectorType>(GEP->getType());
516 ElementCount EC = VecType->getElementCount();
517 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
518 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
519 // Find out why we have a vector result - these are a few examples:
520 // 1. We have a scalar pointer and a vector of indices, or
521 // 2. We have a vector of pointers and a scalar index, or
522 // 3. We have a vector of pointers and a vector of indices, etc.
523 // Here we only consider combining when there is exactly one vector
524 // operand, since the optimization is less obviously a win due to
525 // needing more than one extractelements.
526
527 unsigned VectorOps =
528 llvm::count_if(GEP->operands(), [](const Value *V) {
529 return isa<VectorType>(V->getType());
530 });
531 if (VectorOps == 1) {
532 Value *NewPtr = GEP->getPointerOperand();
533 if (isa<VectorType>(NewPtr->getType()))
534 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
535
537 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
538 Value *Op = GEP->getOperand(I);
539 if (isa<VectorType>(Op->getType()))
540 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
541 else
542 NewOps.push_back(Op);
543 }
544
546 GEP->getSourceElementType(), NewPtr, NewOps);
547 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
548 return NewGEP;
549 }
550 }
551 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
552 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
553 // We know the all-0 splat must be reading from the first operand, even
554 // in the case of scalable vectors (vscale is always > 0).
555 if (SplatIndex == 0)
556 return ExtractElementInst::Create(SVI->getOperand(0),
557 Builder.getInt64(0));
558
559 if (isa<FixedVectorType>(SVI->getType())) {
560 std::optional<int> SrcIdx;
561 // getSplatIndex returns -1 to mean not-found.
562 if (SplatIndex != -1)
563 SrcIdx = SplatIndex;
564 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
565 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
566
567 if (SrcIdx) {
568 Value *Src;
569 unsigned LHSWidth =
570 cast<FixedVectorType>(SVI->getOperand(0)->getType())
571 ->getNumElements();
572
573 if (*SrcIdx < 0)
575 if (*SrcIdx < (int)LHSWidth)
576 Src = SVI->getOperand(0);
577 else {
578 *SrcIdx -= LHSWidth;
579 Src = SVI->getOperand(1);
580 }
581 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
583 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
584 }
585 }
586 } else if (auto *CI = dyn_cast<CastInst>(I)) {
587 // Canonicalize extractelement(cast) -> cast(extractelement).
588 // Bitcasts can change the number of vector elements, and they cost
589 // nothing.
590 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
591 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
592 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
593 }
594 }
595 }
596
597 // Run demanded elements after other transforms as this can drop flags on
598 // binops. If there's two paths to the same final result, we prefer the
599 // one which doesn't force us to drop flags.
600 if (IndexC) {
602 unsigned NumElts = EC.getKnownMinValue();
603 // This instruction only demands the single element from the input vector.
604 // Skip for scalable type, the number of elements is unknown at
605 // compile-time.
606 if (!EC.isScalable() && NumElts != 1) {
607 // If the input vector has a single use, simplify it based on this use
608 // property.
609 if (SrcVec->hasOneUse()) {
610 APInt PoisonElts(NumElts, 0);
611 APInt DemandedElts(NumElts, 0);
612 DemandedElts.setBit(IndexC->getZExtValue());
613 if (Value *V =
614 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
615 return replaceOperand(EI, 0, V);
616 } else {
617 // If the input vector has multiple uses, simplify it based on a union
618 // of all elements used.
619 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
620 if (!DemandedElts.isAllOnes()) {
621 APInt PoisonElts(NumElts, 0);
623 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
624 true /* AllowMultipleUsers */)) {
625 if (V != SrcVec) {
626 Worklist.addValue(SrcVec);
627 SrcVec->replaceAllUsesWith(V);
628 return &EI;
629 }
630 }
631 }
632 }
633 }
634 }
635 return nullptr;
636}
637
638/// If V is a shuffle of values that ONLY returns elements from either LHS or
639/// RHS, return the shuffle mask and true. Otherwise, return false.
641 SmallVectorImpl<int> &Mask) {
642 assert(LHS->getType() == RHS->getType() &&
643 "Invalid CollectSingleShuffleElements");
644 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
645
646 if (match(V, m_Poison())) {
647 Mask.assign(NumElts, -1);
648 return true;
649 }
650
651 if (V == LHS) {
652 for (unsigned i = 0; i != NumElts; ++i)
653 Mask.push_back(i);
654 return true;
655 }
656
657 if (V == RHS) {
658 for (unsigned i = 0; i != NumElts; ++i)
659 Mask.push_back(i + NumElts);
660 return true;
661 }
662
663 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
664 // If this is an insert of an extract from some other vector, include it.
665 Value *VecOp = IEI->getOperand(0);
666 Value *ScalarOp = IEI->getOperand(1);
667 Value *IdxOp = IEI->getOperand(2);
668
669 if (!isa<ConstantInt>(IdxOp))
670 return false;
671 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
672
673 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
674 // We can handle this if the vector we are inserting into is
675 // transitively ok.
676 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
677 // If so, update the mask to reflect the inserted poison.
678 Mask[InsertedIdx] = -1;
679 return true;
680 }
681 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
682 if (isa<ConstantInt>(EI->getOperand(1))) {
683 unsigned ExtractedIdx =
684 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
685 unsigned NumLHSElts =
686 cast<FixedVectorType>(LHS->getType())->getNumElements();
687
688 // This must be extracting from either LHS or RHS.
689 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
690 // We can handle this if the vector we are inserting into is
691 // transitively ok.
692 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
693 // If so, update the mask to reflect the inserted value.
694 if (EI->getOperand(0) == LHS) {
695 Mask[InsertedIdx % NumElts] = ExtractedIdx;
696 } else {
697 assert(EI->getOperand(0) == RHS);
698 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
699 }
700 return true;
701 }
702 }
703 }
704 }
705 }
706
707 return false;
708}
709
710/// If we have insertion into a vector that is wider than the vector that we
711/// are extracting from, try to widen the source vector to allow a single
712/// shufflevector to replace one or more insert/extract pairs.
714 ExtractElementInst *ExtElt,
715 InstCombinerImpl &IC) {
716 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
717 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
718 unsigned NumInsElts = InsVecType->getNumElements();
719 unsigned NumExtElts = ExtVecType->getNumElements();
720
721 // The inserted-to vector must be wider than the extracted-from vector.
722 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
723 NumExtElts >= NumInsElts)
724 return false;
725
726 // Create a shuffle mask to widen the extended-from vector using poison
727 // values. The mask selects all of the values of the original vector followed
728 // by as many poison values as needed to create a vector of the same length
729 // as the inserted-to vector.
730 SmallVector<int, 16> ExtendMask;
731 for (unsigned i = 0; i < NumExtElts; ++i)
732 ExtendMask.push_back(i);
733 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
734 ExtendMask.push_back(-1);
735
736 Value *ExtVecOp = ExtElt->getVectorOperand();
737 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
738 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
739 ? ExtVecOpInst->getParent()
740 : ExtElt->getParent();
741
742 // TODO: This restriction matches the basic block check below when creating
743 // new extractelement instructions. If that limitation is removed, this one
744 // could also be removed. But for now, we just bail out to ensure that we
745 // will replace the extractelement instruction that is feeding our
746 // insertelement instruction. This allows the insertelement to then be
747 // replaced by a shufflevector. If the insertelement is not replaced, we can
748 // induce infinite looping because there's an optimization for extractelement
749 // that will delete our widening shuffle. This would trigger another attempt
750 // here to create that shuffle, and we spin forever.
751 if (InsertionBlock != InsElt->getParent())
752 return false;
753
754 // TODO: This restriction matches the check in visitInsertElementInst() and
755 // prevents an infinite loop caused by not turning the extract/insert pair
756 // into a shuffle. We really should not need either check, but we're lacking
757 // folds for shufflevectors because we're afraid to generate shuffle masks
758 // that the backend can't handle.
759 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
760 return false;
761
762 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
763
764 // Insert the new shuffle after the vector operand of the extract is defined
765 // (as long as it's not a PHI) or at the start of the basic block of the
766 // extract, so any subsequent extracts in the same basic block can use it.
767 // TODO: Insert before the earliest ExtractElementInst that is replaced.
768 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
769 WideVec->insertAfter(ExtVecOpInst->getIterator());
770 else
771 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
772
773 // Replace extracts from the original narrow vector with extracts from the new
774 // wide vector.
775 for (User *U : ExtVecOp->users()) {
776 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
777 if (!OldExt || OldExt->getParent() != WideVec->getParent())
778 continue;
779 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
780 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
781 IC.replaceInstUsesWith(*OldExt, NewExt);
782 // Add the old extracts to the worklist for DCE. We can't remove the
783 // extracts directly, because they may still be used by the calling code.
784 IC.addToWorklist(OldExt);
785 }
786
787 return true;
788}
789
790/// We are building a shuffle to create V, which is a sequence of insertelement,
791/// extractelement pairs. If PermittedRHS is set, then we must either use it or
792/// not rely on the second vector source. Return a std::pair containing the
793/// left and right vectors of the proposed shuffle (or 0), and set the Mask
794/// parameter as required.
795///
796/// Note: we intentionally don't try to fold earlier shuffles since they have
797/// often been chosen carefully to be efficiently implementable on the target.
798using ShuffleOps = std::pair<Value *, Value *>;
799
801 Value *PermittedRHS,
802 InstCombinerImpl &IC, bool &Rerun) {
803 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
804 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
805
806 if (match(V, m_Poison())) {
807 Mask.assign(NumElts, -1);
808 return std::make_pair(
809 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
810 }
811
812 if (isa<ConstantAggregateZero>(V)) {
813 Mask.assign(NumElts, 0);
814 return std::make_pair(V, nullptr);
815 }
816
817 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
818 // If this is an insert of an extract from some other vector, include it.
819 Value *VecOp = IEI->getOperand(0);
820 Value *ScalarOp = IEI->getOperand(1);
821 Value *IdxOp = IEI->getOperand(2);
822
823 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
824 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
825 unsigned ExtractedIdx =
826 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
827 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
828
829 // Either the extracted from or inserted into vector must be RHSVec,
830 // otherwise we'd end up with a shuffle of three inputs.
831 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
832 Value *RHS = EI->getOperand(0);
833 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
834 assert(LR.second == nullptr || LR.second == RHS);
835
836 if (LR.first->getType() != RHS->getType()) {
837 // Although we are giving up for now, see if we can create extracts
838 // that match the inserts for another round of combining.
839 if (replaceExtractElements(IEI, EI, IC))
840 Rerun = true;
841
842 // We tried our best, but we can't find anything compatible with RHS
843 // further up the chain. Return a trivial shuffle.
844 for (unsigned i = 0; i < NumElts; ++i)
845 Mask[i] = i;
846 return std::make_pair(V, nullptr);
847 }
848
849 unsigned NumLHSElts =
850 cast<FixedVectorType>(RHS->getType())->getNumElements();
851 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
852 return std::make_pair(LR.first, RHS);
853 }
854
855 if (VecOp == PermittedRHS) {
856 // We've gone as far as we can: anything on the other side of the
857 // extractelement will already have been converted into a shuffle.
858 unsigned NumLHSElts =
859 cast<FixedVectorType>(EI->getOperand(0)->getType())
860 ->getNumElements();
861 for (unsigned i = 0; i != NumElts; ++i)
862 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
863 return std::make_pair(EI->getOperand(0), PermittedRHS);
864 }
865
866 // If this insertelement is a chain that comes from exactly these two
867 // vectors, return the vector and the effective shuffle.
868 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
869 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
870 Mask))
871 return std::make_pair(EI->getOperand(0), PermittedRHS);
872 }
873 }
874 }
875
876 // Otherwise, we can't do anything fancy. Return an identity vector.
877 for (unsigned i = 0; i != NumElts; ++i)
878 Mask.push_back(i);
879 return std::make_pair(V, nullptr);
880}
881
882/// Look for chain of insertvalue's that fully define an aggregate, and trace
883/// back the values inserted, see if they are all were extractvalue'd from
884/// the same source aggregate from the exact same element indexes.
885/// If they were, just reuse the source aggregate.
886/// This potentially deals with PHI indirections.
888 InsertValueInst &OrigIVI) {
889 Type *AggTy = OrigIVI.getType();
890 unsigned NumAggElts;
891 switch (AggTy->getTypeID()) {
892 case Type::StructTyID:
893 NumAggElts = AggTy->getStructNumElements();
894 break;
895 case Type::ArrayTyID:
896 NumAggElts = AggTy->getArrayNumElements();
897 break;
898 default:
899 llvm_unreachable("Unhandled aggregate type?");
900 }
901
902 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
903 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
904 // FIXME: any interesting patterns to be caught with larger limit?
905 assert(NumAggElts > 0 && "Aggregate should have elements.");
906 if (NumAggElts > 2)
907 return nullptr;
908
909 static constexpr auto NotFound = std::nullopt;
910 static constexpr auto FoundMismatch = nullptr;
911
912 // Try to find a value of each element of an aggregate.
913 // FIXME: deal with more complex, not one-dimensional, aggregate types
914 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
915
916 // Do we know values for each element of the aggregate?
917 auto KnowAllElts = [&AggElts]() {
918 return !llvm::is_contained(AggElts, NotFound);
919 };
920
921 int Depth = 0;
922
923 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
924 // every element being overwritten twice, which should never happen.
925 static const int DepthLimit = 2 * NumAggElts;
926
927 // Recurse up the chain of `insertvalue` aggregate operands until either we've
928 // reconstructed full initializer or can't visit any more `insertvalue`'s.
929 for (InsertValueInst *CurrIVI = &OrigIVI;
930 Depth < DepthLimit && CurrIVI && !KnowAllElts();
931 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
932 ++Depth) {
933 auto *InsertedValue =
934 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
935 if (!InsertedValue)
936 return nullptr; // Inserted value must be produced by an instruction.
937
938 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
939
940 // Don't bother with more than single-level aggregates.
941 if (Indices.size() != 1)
942 return nullptr; // FIXME: deal with more complex aggregates?
943
944 // Now, we may have already previously recorded the value for this element
945 // of an aggregate. If we did, that means the CurrIVI will later be
946 // overwritten with the already-recorded value. But if not, let's record it!
947 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
948 Elt = Elt.value_or(InsertedValue);
949
950 // FIXME: should we handle chain-terminating undef base operand?
951 }
952
953 // Was that sufficient to deduce the full initializer for the aggregate?
954 if (!KnowAllElts())
955 return nullptr; // Give up then.
956
957 // We now want to find the source[s] of the aggregate elements we've found.
958 // And with "source" we mean the original aggregate[s] from which
959 // the inserted elements were extracted. This may require PHI translation.
960
961 enum class AggregateDescription {
962 /// When analyzing the value that was inserted into an aggregate, we did
963 /// not manage to find defining `extractvalue` instruction to analyze.
964 NotFound,
965 /// When analyzing the value that was inserted into an aggregate, we did
966 /// manage to find defining `extractvalue` instruction[s], and everything
967 /// matched perfectly - aggregate type, element insertion/extraction index.
968 Found,
969 /// When analyzing the value that was inserted into an aggregate, we did
970 /// manage to find defining `extractvalue` instruction, but there was
971 /// a mismatch: either the source type from which the extraction was didn't
972 /// match the aggregate type into which the insertion was,
973 /// or the extraction/insertion channels mismatched,
974 /// or different elements had different source aggregates.
975 FoundMismatch
976 };
977 auto Describe = [](std::optional<Value *> SourceAggregate) {
978 if (SourceAggregate == NotFound)
979 return AggregateDescription::NotFound;
980 if (*SourceAggregate == FoundMismatch)
981 return AggregateDescription::FoundMismatch;
982 return AggregateDescription::Found;
983 };
984
985 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
986 bool EltDefinedInUseBB = false;
987
988 // Given the value \p Elt that was being inserted into element \p EltIdx of an
989 // aggregate AggTy, see if \p Elt was originally defined by an
990 // appropriate extractvalue (same element index, same aggregate type).
991 // If found, return the source aggregate from which the extraction was.
992 // If \p PredBB is provided, does PHI translation of an \p Elt first.
993 auto FindSourceAggregate =
994 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
995 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
996 // For now(?), only deal with, at most, a single level of PHI indirection.
997 if (UseBB && PredBB) {
998 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
999 if (Elt && Elt->getParent() == *UseBB)
1000 EltDefinedInUseBB = true;
1001 }
1002 // FIXME: deal with multiple levels of PHI indirection?
1003
1004 // Did we find an extraction?
1005 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1006 if (!EVI)
1007 return NotFound;
1008
1009 Value *SourceAggregate = EVI->getAggregateOperand();
1010
1011 // Is the extraction from the same type into which the insertion was?
1012 if (SourceAggregate->getType() != AggTy)
1013 return FoundMismatch;
1014 // And the element index doesn't change between extraction and insertion?
1015 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1016 return FoundMismatch;
1017
1018 return SourceAggregate; // AggregateDescription::Found
1019 };
1020
1021 // Given elements AggElts that were constructing an aggregate OrigIVI,
1022 // see if we can find appropriate source aggregate for each of the elements,
1023 // and see it's the same aggregate for each element. If so, return it.
1024 auto FindCommonSourceAggregate =
1025 [&](std::optional<BasicBlock *> UseBB,
1026 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1027 std::optional<Value *> SourceAggregate;
1028
1029 for (auto I : enumerate(AggElts)) {
1030 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1031 "We don't store nullptr in SourceAggregate!");
1032 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1033 (I.index() != 0) &&
1034 "SourceAggregate should be valid after the first element,");
1035
1036 // For this element, is there a plausible source aggregate?
1037 // FIXME: we could special-case undef element, IFF we know that in the
1038 // source aggregate said element isn't poison.
1039 std::optional<Value *> SourceAggregateForElement =
1040 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1041
1042 // Okay, what have we found? Does that correlate with previous findings?
1043
1044 // Regardless of whether or not we have previously found source
1045 // aggregate for previous elements (if any), if we didn't find one for
1046 // this element, passthrough whatever we have just found.
1047 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1048 return SourceAggregateForElement;
1049
1050 // Okay, we have found source aggregate for this element.
1051 // Let's see what we already know from previous elements, if any.
1052 switch (Describe(SourceAggregate)) {
1053 case AggregateDescription::NotFound:
1054 // This is apparently the first element that we have examined.
1055 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1056 continue; // Great, now look at next element.
1057 case AggregateDescription::Found:
1058 // We have previously already successfully examined other elements.
1059 // Is this the same source aggregate we've found for other elements?
1060 if (*SourceAggregateForElement != *SourceAggregate)
1061 return FoundMismatch;
1062 continue; // Still the same aggregate, look at next element.
1063 case AggregateDescription::FoundMismatch:
1064 llvm_unreachable("Can't happen. We would have early-exited then.");
1065 };
1066 }
1067
1068 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1069 "Must be a valid Value");
1070 return *SourceAggregate;
1071 };
1072
1073 std::optional<Value *> SourceAggregate;
1074
1075 // Can we find the source aggregate without looking at predecessors?
1076 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1077 /*PredBB=*/std::nullopt);
1078 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1079 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1080 return nullptr; // Conflicting source aggregates!
1081 ++NumAggregateReconstructionsSimplified;
1082 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1083 }
1084
1085 // Okay, apparently we need to look at predecessors.
1086
1087 // We should be smart about picking the "use" basic block, which will be the
1088 // merge point for aggregate, where we'll insert the final PHI that will be
1089 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1090 // We should look in which blocks each of the AggElts is being defined,
1091 // they all should be defined in the same basic block.
1092 BasicBlock *UseBB = nullptr;
1093
1094 for (const std::optional<Instruction *> &I : AggElts) {
1095 BasicBlock *BB = (*I)->getParent();
1096 // If it's the first instruction we've encountered, record the basic block.
1097 if (!UseBB) {
1098 UseBB = BB;
1099 continue;
1100 }
1101 // Otherwise, this must be the same basic block we've seen previously.
1102 if (UseBB != BB)
1103 return nullptr;
1104 }
1105
1106 // If *all* of the elements are basic-block-independent, meaning they are
1107 // either function arguments, or constant expressions, then if we didn't
1108 // handle them without predecessor-aware handling, we won't handle them now.
1109 if (!UseBB)
1110 return nullptr;
1111
1112 // If we didn't manage to find source aggregate without looking at
1113 // predecessors, and there are no predecessors to look at, then we're done.
1114 if (pred_empty(UseBB))
1115 return nullptr;
1116
1117 // Arbitrary predecessor count limit.
1118 static const int PredCountLimit = 64;
1119
1120 // Cache the (non-uniqified!) list of predecessors in a vector,
1121 // checking the limit at the same time for efficiency.
1122 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1123 for (BasicBlock *Pred : predecessors(UseBB)) {
1124 // Don't bother if there are too many predecessors.
1125 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1126 return nullptr;
1127 Preds.emplace_back(Pred);
1128 }
1129
1130 // For each predecessor, what is the source aggregate,
1131 // from which all the elements were originally extracted from?
1132 // Note that we want for the map to have stable iteration order!
1134 bool FoundSrcAgg = false;
1135 for (BasicBlock *Pred : Preds) {
1136 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1137 SourceAggregates.try_emplace(Pred);
1138 // Did we already evaluate this predecessor?
1139 if (!IV.second)
1140 continue;
1141
1142 // Let's hope that when coming from predecessor Pred, all elements of the
1143 // aggregate produced by OrigIVI must have been originally extracted from
1144 // the same aggregate. Is that so? Can we find said original aggregate?
1145 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1146 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1147 FoundSrcAgg = true;
1148 IV.first->second = *SourceAggregate;
1149 } else {
1150 // If UseBB is the single successor of Pred, we can add InsertValue to
1151 // Pred.
1152 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1153 if (!BI || !BI->isUnconditional())
1154 return nullptr;
1155 }
1156 }
1157
1158 if (!FoundSrcAgg)
1159 return nullptr;
1160
1161 // Do some sanity check if we need to add insertvalue into predecessors.
1162 auto OrigBB = OrigIVI.getParent();
1163 for (auto &It : SourceAggregates) {
1164 if (Describe(It.second) == AggregateDescription::Found)
1165 continue;
1166
1167 // Element is defined in UseBB, so it can't be used in predecessors.
1168 if (EltDefinedInUseBB)
1169 return nullptr;
1170
1171 // Do this transformation cross loop boundary may cause dead loop. So we
1172 // should avoid this situation. But LoopInfo is not generally available, we
1173 // must be conservative here.
1174 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1175 // can't be in inner loop.
1176 if (UseBB != OrigBB)
1177 return nullptr;
1178
1179 // Avoid constructing constant aggregate because constant value may expose
1180 // more optimizations.
1181 bool ConstAgg = true;
1182 for (auto Val : AggElts) {
1183 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1184 if (!isa<Constant>(Elt)) {
1185 ConstAgg = false;
1186 break;
1187 }
1188 }
1189 if (ConstAgg)
1190 return nullptr;
1191 }
1192
1193 // For predecessors without appropriate source aggregate, create one in the
1194 // predecessor.
1195 for (auto &It : SourceAggregates) {
1196 if (Describe(It.second) == AggregateDescription::Found)
1197 continue;
1198
1199 BasicBlock *Pred = It.first;
1201 Value *V = PoisonValue::get(AggTy);
1202 for (auto [Idx, Val] : enumerate(AggElts)) {
1203 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1204 V = Builder.CreateInsertValue(V, Elt, Idx);
1205 }
1206
1207 It.second = V;
1208 }
1209
1210 // All good! Now we just need to thread the source aggregates here.
1211 // Note that we have to insert the new PHI here, ourselves, because we can't
1212 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1213 // Note that the same block can be a predecessor more than once,
1214 // and we need to preserve that invariant for the PHI node.
1216 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1217 auto *PHI =
1218 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1219 for (BasicBlock *Pred : Preds)
1220 PHI->addIncoming(SourceAggregates[Pred], Pred);
1221
1222 ++NumAggregateReconstructionsSimplified;
1223 return replaceInstUsesWith(OrigIVI, PHI);
1224}
1225
1226/// Try to find redundant insertvalue instructions, like the following ones:
1227/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1228/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1229/// Here the second instruction inserts values at the same indices, as the
1230/// first one, making the first one redundant.
1231/// It should be transformed to:
1232/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1235 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1237 return replaceInstUsesWith(I, V);
1238
1239 bool IsRedundant = false;
1240 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1241
1242 // If there is a chain of insertvalue instructions (each of them except the
1243 // last one has only one use and it's another insertvalue insn from this
1244 // chain), check if any of the 'children' uses the same indices as the first
1245 // instruction. In this case, the first one is redundant.
1246 Value *V = &I;
1247 unsigned Depth = 0;
1248 while (V->hasOneUse() && Depth < 10) {
1249 User *U = V->user_back();
1250 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1251 if (!UserInsInst || U->getOperand(0) != V)
1252 break;
1253 if (UserInsInst->getIndices() == FirstIndices) {
1254 IsRedundant = true;
1255 break;
1256 }
1257 V = UserInsInst;
1258 Depth++;
1259 }
1260
1261 if (IsRedundant)
1262 return replaceInstUsesWith(I, I.getOperand(0));
1263
1265 return NewI;
1266
1267 return nullptr;
1268}
1269
1271 // Can not analyze scalable type, the number of elements is not a compile-time
1272 // constant.
1273 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1274 return false;
1275
1276 int MaskSize = Shuf.getShuffleMask().size();
1277 int VecSize =
1278 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1279
1280 // A vector select does not change the size of the operands.
1281 if (MaskSize != VecSize)
1282 return false;
1283
1284 // Each mask element must be undefined or choose a vector element from one of
1285 // the source operands without crossing vector lanes.
1286 for (int i = 0; i != MaskSize; ++i) {
1287 int Elt = Shuf.getMaskValue(i);
1288 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1289 return false;
1290 }
1291
1292 return true;
1293}
1294
1295/// Turn a chain of inserts that splats a value into an insert + shuffle:
1296/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1297/// shufflevector(insertelt(X, %k, 0), poison, zero)
1299 // We are interested in the last insert in a chain. So if this insert has a
1300 // single user and that user is an insert, bail.
1301 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1302 return nullptr;
1303
1304 VectorType *VecTy = InsElt.getType();
1305 // Can not handle scalable type, the number of elements is not a compile-time
1306 // constant.
1307 if (isa<ScalableVectorType>(VecTy))
1308 return nullptr;
1309 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1310
1311 // Do not try to do this for a one-element vector, since that's a nop,
1312 // and will cause an inf-loop.
1313 if (NumElements == 1)
1314 return nullptr;
1315
1316 Value *SplatVal = InsElt.getOperand(1);
1317 InsertElementInst *CurrIE = &InsElt;
1318 SmallBitVector ElementPresent(NumElements, false);
1319 InsertElementInst *FirstIE = nullptr;
1320
1321 // Walk the chain backwards, keeping track of which indices we inserted into,
1322 // until we hit something that isn't an insert of the splatted value.
1323 while (CurrIE) {
1324 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1325 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1326 return nullptr;
1327
1328 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1329 // Check none of the intermediate steps have any additional uses, except
1330 // for the root insertelement instruction, which can be re-used, if it
1331 // inserts at position 0.
1332 if (CurrIE != &InsElt &&
1333 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1334 return nullptr;
1335
1336 ElementPresent[Idx->getZExtValue()] = true;
1337 FirstIE = CurrIE;
1338 CurrIE = NextIE;
1339 }
1340
1341 // If this is just a single insertelement (not a sequence), we are done.
1342 if (FirstIE == &InsElt)
1343 return nullptr;
1344
1345 // If we are not inserting into a poison vector, make sure we've seen an
1346 // insert into every element.
1347 // TODO: If the base vector is not undef, it might be better to create a splat
1348 // and then a select-shuffle (blend) with the base vector.
1349 if (!match(FirstIE->getOperand(0), m_Poison()))
1350 if (!ElementPresent.all())
1351 return nullptr;
1352
1353 // Create the insert + shuffle.
1354 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1355 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1356 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1357 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1358 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1359 InsElt.getIterator());
1360
1361 // Splat from element 0, but replace absent elements with poison in the mask.
1362 SmallVector<int, 16> Mask(NumElements, 0);
1363 for (unsigned i = 0; i != NumElements; ++i)
1364 if (!ElementPresent[i])
1365 Mask[i] = -1;
1366
1367 return new ShuffleVectorInst(FirstIE, Mask);
1368}
1369
1370/// Try to fold an insert element into an existing splat shuffle by changing
1371/// the shuffle's mask to include the index of this insert element.
1373 // Check if the vector operand of this insert is a canonical splat shuffle.
1374 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1375 if (!Shuf || !Shuf->isZeroEltSplat())
1376 return nullptr;
1377
1378 // Bail out early if shuffle is scalable type. The number of elements in
1379 // shuffle mask is unknown at compile-time.
1380 if (isa<ScalableVectorType>(Shuf->getType()))
1381 return nullptr;
1382
1383 // Check for a constant insertion index.
1384 uint64_t IdxC;
1385 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1386 return nullptr;
1387
1388 // Check if the splat shuffle's input is the same as this insert's scalar op.
1389 Value *X = InsElt.getOperand(1);
1390 Value *Op0 = Shuf->getOperand(0);
1391 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1392 return nullptr;
1393
1394 // Replace the shuffle mask element at the index of this insert with a zero.
1395 // For example:
1396 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1397 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1398 unsigned NumMaskElts =
1399 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1400 SmallVector<int, 16> NewMask(NumMaskElts);
1401 for (unsigned i = 0; i != NumMaskElts; ++i)
1402 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1403
1404 return new ShuffleVectorInst(Op0, NewMask);
1405}
1406
1407/// Try to fold an extract+insert element into an existing identity shuffle by
1408/// changing the shuffle's mask to include the index of this insert element.
1410 // Check if the vector operand of this insert is an identity shuffle.
1411 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1412 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1413 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1414 return nullptr;
1415
1416 // Bail out early if shuffle is scalable type. The number of elements in
1417 // shuffle mask is unknown at compile-time.
1418 if (isa<ScalableVectorType>(Shuf->getType()))
1419 return nullptr;
1420
1421 // Check for a constant insertion index.
1422 uint64_t IdxC;
1423 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1424 return nullptr;
1425
1426 // Check if this insert's scalar op is extracted from the identity shuffle's
1427 // input vector.
1428 Value *Scalar = InsElt.getOperand(1);
1429 Value *X = Shuf->getOperand(0);
1430 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1431 return nullptr;
1432
1433 // Replace the shuffle mask element at the index of this extract+insert with
1434 // that same index value.
1435 // For example:
1436 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1437 unsigned NumMaskElts =
1438 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1439 SmallVector<int, 16> NewMask(NumMaskElts);
1440 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1441 for (unsigned i = 0; i != NumMaskElts; ++i) {
1442 if (i != IdxC) {
1443 // All mask elements besides the inserted element remain the same.
1444 NewMask[i] = OldMask[i];
1445 } else if (OldMask[i] == (int)IdxC) {
1446 // If the mask element was already set, there's nothing to do
1447 // (demanded elements analysis may unset it later).
1448 return nullptr;
1449 } else {
1450 assert(OldMask[i] == PoisonMaskElem &&
1451 "Unexpected shuffle mask element for identity shuffle");
1452 NewMask[i] = IdxC;
1453 }
1454 }
1455
1456 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1457}
1458
1459/// If we have an insertelement instruction feeding into another insertelement
1460/// and the 2nd is inserting a constant into the vector, canonicalize that
1461/// constant insertion before the insertion of a variable:
1462///
1463/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1464/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1465///
1466/// This has the potential of eliminating the 2nd insertelement instruction
1467/// via constant folding of the scalar constant into a vector constant.
1469 InstCombiner::BuilderTy &Builder) {
1470 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1471 if (!InsElt1 || !InsElt1->hasOneUse())
1472 return nullptr;
1473
1474 Value *X, *Y;
1475 Constant *ScalarC;
1476 ConstantInt *IdxC1, *IdxC2;
1477 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1478 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1479 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1480 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1481 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1482 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1483 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1484 }
1485
1486 return nullptr;
1487}
1488
1489/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1490/// --> shufflevector X, CVec', Mask'
1492 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1493 // Bail out if the parent has more than one use. In that case, we'd be
1494 // replacing the insertelt with a shuffle, and that's not a clear win.
1495 if (!Inst || !Inst->hasOneUse())
1496 return nullptr;
1497 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1498 // The shuffle must have a constant vector operand. The insertelt must have
1499 // a constant scalar being inserted at a constant position in the vector.
1500 Constant *ShufConstVec, *InsEltScalar;
1501 uint64_t InsEltIndex;
1502 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1503 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1504 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1505 return nullptr;
1506
1507 // Adding an element to an arbitrary shuffle could be expensive, but a
1508 // shuffle that selects elements from vectors without crossing lanes is
1509 // assumed cheap.
1510 // If we're just adding a constant into that shuffle, it will still be
1511 // cheap.
1512 if (!isShuffleEquivalentToSelect(*Shuf))
1513 return nullptr;
1514
1515 // From the above 'select' check, we know that the mask has the same number
1516 // of elements as the vector input operands. We also know that each constant
1517 // input element is used in its lane and can not be used more than once by
1518 // the shuffle. Therefore, replace the constant in the shuffle's constant
1519 // vector with the insertelt constant. Replace the constant in the shuffle's
1520 // mask vector with the insertelt index plus the length of the vector
1521 // (because the constant vector operand of a shuffle is always the 2nd
1522 // operand).
1523 ArrayRef<int> Mask = Shuf->getShuffleMask();
1524 unsigned NumElts = Mask.size();
1525 SmallVector<Constant *, 16> NewShufElts(NumElts);
1526 SmallVector<int, 16> NewMaskElts(NumElts);
1527 for (unsigned I = 0; I != NumElts; ++I) {
1528 if (I == InsEltIndex) {
1529 NewShufElts[I] = InsEltScalar;
1530 NewMaskElts[I] = InsEltIndex + NumElts;
1531 } else {
1532 // Copy over the existing values.
1533 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1534 NewMaskElts[I] = Mask[I];
1535 }
1536
1537 // Bail if we failed to find an element.
1538 if (!NewShufElts[I])
1539 return nullptr;
1540 }
1541
1542 // Create new operands for a shuffle that includes the constant of the
1543 // original insertelt. The old shuffle will be dead now.
1544 return new ShuffleVectorInst(Shuf->getOperand(0),
1545 ConstantVector::get(NewShufElts), NewMaskElts);
1546 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1547 // Transform sequences of insertelements ops with constant data/indexes into
1548 // a single shuffle op.
1549 // Can not handle scalable type, the number of elements needed to create
1550 // shuffle mask is not a compile-time constant.
1551 if (isa<ScalableVectorType>(InsElt.getType()))
1552 return nullptr;
1553 unsigned NumElts =
1554 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1555
1556 uint64_t InsertIdx[2];
1557 Constant *Val[2];
1558 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1559 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1560 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1561 !match(IEI->getOperand(1), m_Constant(Val[1])))
1562 return nullptr;
1563 SmallVector<Constant *, 16> Values(NumElts);
1564 SmallVector<int, 16> Mask(NumElts);
1565 auto ValI = std::begin(Val);
1566 // Generate new constant vector and mask.
1567 // We have 2 values/masks from the insertelements instructions. Insert them
1568 // into new value/mask vectors.
1569 for (uint64_t I : InsertIdx) {
1570 if (!Values[I]) {
1571 Values[I] = *ValI;
1572 Mask[I] = NumElts + I;
1573 }
1574 ++ValI;
1575 }
1576 // Remaining values are filled with 'poison' values.
1577 for (unsigned I = 0; I < NumElts; ++I) {
1578 if (!Values[I]) {
1579 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1580 Mask[I] = I;
1581 }
1582 }
1583 // Create new operands for a shuffle that includes the constant of the
1584 // original insertelt.
1585 return new ShuffleVectorInst(IEI->getOperand(0),
1586 ConstantVector::get(Values), Mask);
1587 }
1588 return nullptr;
1589}
1590
1591/// If both the base vector and the inserted element are extended from the same
1592/// type, do the insert element in the narrow source type followed by extend.
1593/// TODO: This can be extended to include other cast opcodes, but particularly
1594/// if we create a wider insertelement, make sure codegen is not harmed.
1596 InstCombiner::BuilderTy &Builder) {
1597 // We are creating a vector extend. If the original vector extend has another
1598 // use, that would mean we end up with 2 vector extends, so avoid that.
1599 // TODO: We could ease the use-clause to "if at least one op has one use"
1600 // (assuming that the source types match - see next TODO comment).
1601 Value *Vec = InsElt.getOperand(0);
1602 if (!Vec->hasOneUse())
1603 return nullptr;
1604
1605 Value *Scalar = InsElt.getOperand(1);
1606 Value *X, *Y;
1607 CastInst::CastOps CastOpcode;
1608 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1609 CastOpcode = Instruction::FPExt;
1610 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1611 CastOpcode = Instruction::SExt;
1612 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1613 CastOpcode = Instruction::ZExt;
1614 else
1615 return nullptr;
1616
1617 // TODO: We can allow mismatched types by creating an intermediate cast.
1618 if (X->getType()->getScalarType() != Y->getType())
1619 return nullptr;
1620
1621 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1622 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1623 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1624}
1625
1626/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1627/// try to convert to a single insert with appropriate bitcasts.
1629 bool IsBigEndian,
1630 InstCombiner::BuilderTy &Builder) {
1631 Value *VecOp = InsElt.getOperand(0);
1632 Value *ScalarOp = InsElt.getOperand(1);
1633 Value *IndexOp = InsElt.getOperand(2);
1634
1635 // Pattern depends on endian because we expect lower index is inserted first.
1636 // Big endian:
1637 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1638 // Little endian:
1639 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1640 // Note: It is not safe to do this transform with an arbitrary base vector
1641 // because the bitcast of that vector to fewer/larger elements could
1642 // allow poison to spill into an element that was not poison before.
1643 // TODO: Detect smaller fractions of the scalar.
1644 // TODO: One-use checks are conservative.
1645 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1646 Value *Scalar0, *BaseVec;
1647 uint64_t Index0, Index1;
1648 if (!VTy || (VTy->getNumElements() & 1) ||
1649 !match(IndexOp, m_ConstantInt(Index1)) ||
1650 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1651 m_ConstantInt(Index0))) ||
1652 !match(BaseVec, m_Undef()))
1653 return nullptr;
1654
1655 // The first insert must be to the index one less than this one, and
1656 // the first insert must be to an even index.
1657 if (Index0 + 1 != Index1 || Index0 & 1)
1658 return nullptr;
1659
1660 // For big endian, the high half of the value should be inserted first.
1661 // For little endian, the low half of the value should be inserted first.
1662 Value *X;
1663 uint64_t ShAmt;
1664 if (IsBigEndian) {
1665 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1666 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1667 return nullptr;
1668 } else {
1669 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1670 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1671 return nullptr;
1672 }
1673
1674 Type *SrcTy = X->getType();
1675 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1676 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1677 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1678 return nullptr;
1679
1680 // Bitcast the base vector to a vector type with the source element type.
1681 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1682 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1683
1684 // Scale the insert index for a vector with half as many elements.
1685 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1686 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1687 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1688 return new BitCastInst(NewInsert, VTy);
1689}
1690
1692 Value *VecOp = IE.getOperand(0);
1693 Value *ScalarOp = IE.getOperand(1);
1694 Value *IdxOp = IE.getOperand(2);
1695
1696 if (auto *V = simplifyInsertElementInst(
1697 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1698 return replaceInstUsesWith(IE, V);
1699
1700 // Canonicalize type of constant indices to i64 to simplify CSE
1701 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1702 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1703 return replaceOperand(IE, 2, NewIdx);
1704
1705 Value *BaseVec, *OtherScalar;
1706 uint64_t OtherIndexVal;
1707 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1708 m_Value(OtherScalar),
1709 m_ConstantInt(OtherIndexVal)))) &&
1710 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1711 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1712 return InsertElementInst::Create(NewIns, OtherScalar,
1713 Builder.getInt64(OtherIndexVal));
1714 }
1715 }
1716
1717 // If the scalar is bitcast and inserted into undef, do the insert in the
1718 // source type followed by bitcast.
1719 // TODO: Generalize for insert into any constant, not just undef?
1720 Value *ScalarSrc;
1721 if (match(VecOp, m_Undef()) &&
1722 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1723 (ScalarSrc->getType()->isIntegerTy() ||
1724 ScalarSrc->getType()->isFloatingPointTy())) {
1725 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1726 // bitcast (inselt undef, ScalarSrc, IdxOp)
1727 Type *ScalarTy = ScalarSrc->getType();
1728 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1729 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1730 : UndefValue::get(VecTy);
1731 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1732 return new BitCastInst(NewInsElt, IE.getType());
1733 }
1734
1735 // If the vector and scalar are both bitcast from the same element type, do
1736 // the insert in that source type followed by bitcast.
1737 Value *VecSrc;
1738 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1739 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1740 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1741 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1742 cast<VectorType>(VecSrc->getType())->getElementType() ==
1743 ScalarSrc->getType()) {
1744 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1745 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1746 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1747 return new BitCastInst(NewInsElt, IE.getType());
1748 }
1749
1750 // If the inserted element was extracted from some other fixed-length vector
1751 // and both indexes are valid constants, try to turn this into a shuffle.
1752 // Can not handle scalable vector type, the number of elements needed to
1753 // create shuffle mask is not a compile-time constant.
1754 uint64_t InsertedIdx, ExtractedIdx;
1755 Value *ExtVecOp;
1756 if (isa<FixedVectorType>(IE.getType()) &&
1757 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1758 match(ScalarOp,
1759 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1760 isa<FixedVectorType>(ExtVecOp->getType()) &&
1761 ExtractedIdx <
1762 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1763 // TODO: Looking at the user(s) to determine if this insert is a
1764 // fold-to-shuffle opportunity does not match the usual instcombine
1765 // constraints. We should decide if the transform is worthy based only
1766 // on this instruction and its operands, but that may not work currently.
1767 //
1768 // Here, we are trying to avoid creating shuffles before reaching
1769 // the end of a chain of extract-insert pairs. This is complicated because
1770 // we do not generally form arbitrary shuffle masks in instcombine
1771 // (because those may codegen poorly), but collectShuffleElements() does
1772 // exactly that.
1773 //
1774 // The rules for determining what is an acceptable target-independent
1775 // shuffle mask are fuzzy because they evolve based on the backend's
1776 // capabilities and real-world impact.
1777 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1778 if (!Insert.hasOneUse())
1779 return true;
1780 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1781 if (!InsertUser)
1782 return true;
1783 return false;
1784 };
1785
1786 // Try to form a shuffle from a chain of extract-insert ops.
1787 if (isShuffleRootCandidate(IE)) {
1788 bool Rerun = true;
1789 while (Rerun) {
1790 Rerun = false;
1791
1793 ShuffleOps LR =
1794 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1795
1796 // The proposed shuffle may be trivial, in which case we shouldn't
1797 // perform the combine.
1798 if (LR.first != &IE && LR.second != &IE) {
1799 // We now have a shuffle of LHS, RHS, Mask.
1800 if (LR.second == nullptr)
1801 LR.second = PoisonValue::get(LR.first->getType());
1802 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1803 }
1804 }
1805 }
1806 }
1807
1808 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1809 unsigned VWidth = VecTy->getNumElements();
1810 APInt PoisonElts(VWidth, 0);
1811 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1812 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1813 PoisonElts)) {
1814 if (V != &IE)
1815 return replaceInstUsesWith(IE, V);
1816 return &IE;
1817 }
1818 }
1819
1821 return Shuf;
1822
1823 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1824 return NewInsElt;
1825
1826 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1827 return Broadcast;
1828
1830 return Splat;
1831
1832 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1833 return IdentityShuf;
1834
1835 if (Instruction *Ext = narrowInsElt(IE, Builder))
1836 return Ext;
1837
1839 return Ext;
1840
1841 return nullptr;
1842}
1843
1844/// Return true if we can evaluate the specified expression tree if the vector
1845/// elements were shuffled in a different order.
1847 unsigned Depth = 5) {
1848 // We can always reorder the elements of a constant.
1849 if (isa<Constant>(V))
1850 return true;
1851
1852 // We won't reorder vector arguments. No IPO here.
1853 Instruction *I = dyn_cast<Instruction>(V);
1854 if (!I) return false;
1855
1856 // Two users may expect different orders of the elements. Don't try it.
1857 if (!I->hasOneUse())
1858 return false;
1859
1860 if (Depth == 0) return false;
1861
1862 switch (I->getOpcode()) {
1863 case Instruction::UDiv:
1864 case Instruction::SDiv:
1865 case Instruction::URem:
1866 case Instruction::SRem:
1867 // Propagating an undefined shuffle mask element to integer div/rem is not
1868 // allowed because those opcodes can create immediate undefined behavior
1869 // from an undefined element in an operand.
1870 if (llvm::is_contained(Mask, -1))
1871 return false;
1872 [[fallthrough]];
1873 case Instruction::Add:
1874 case Instruction::FAdd:
1875 case Instruction::Sub:
1876 case Instruction::FSub:
1877 case Instruction::Mul:
1878 case Instruction::FMul:
1879 case Instruction::FDiv:
1880 case Instruction::FRem:
1881 case Instruction::Shl:
1882 case Instruction::LShr:
1883 case Instruction::AShr:
1884 case Instruction::And:
1885 case Instruction::Or:
1886 case Instruction::Xor:
1887 case Instruction::ICmp:
1888 case Instruction::FCmp:
1889 case Instruction::Trunc:
1890 case Instruction::ZExt:
1891 case Instruction::SExt:
1892 case Instruction::FPToUI:
1893 case Instruction::FPToSI:
1894 case Instruction::UIToFP:
1895 case Instruction::SIToFP:
1896 case Instruction::FPTrunc:
1897 case Instruction::FPExt:
1898 case Instruction::GetElementPtr: {
1899 // Bail out if we would create longer vector ops. We could allow creating
1900 // longer vector ops, but that may result in more expensive codegen.
1901 Type *ITy = I->getType();
1902 if (ITy->isVectorTy() &&
1903 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1904 return false;
1905 for (Value *Operand : I->operands()) {
1906 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1907 return false;
1908 }
1909 return true;
1910 }
1911 case Instruction::InsertElement: {
1912 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1913 if (!CI) return false;
1914 int ElementNumber = CI->getLimitedValue();
1915
1916 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1917 // can't put an element into multiple indices.
1918 bool SeenOnce = false;
1919 for (int I : Mask) {
1920 if (I == ElementNumber) {
1921 if (SeenOnce)
1922 return false;
1923 SeenOnce = true;
1924 }
1925 }
1926 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1927 }
1928 }
1929 return false;
1930}
1931
1932/// Rebuild a new instruction just like 'I' but with the new operands given.
1933/// In the event of type mismatch, the type of the operands is correct.
1935 IRBuilderBase &Builder) {
1936 Builder.SetInsertPoint(I);
1937 switch (I->getOpcode()) {
1938 case Instruction::Add:
1939 case Instruction::FAdd:
1940 case Instruction::Sub:
1941 case Instruction::FSub:
1942 case Instruction::Mul:
1943 case Instruction::FMul:
1944 case Instruction::UDiv:
1945 case Instruction::SDiv:
1946 case Instruction::FDiv:
1947 case Instruction::URem:
1948 case Instruction::SRem:
1949 case Instruction::FRem:
1950 case Instruction::Shl:
1951 case Instruction::LShr:
1952 case Instruction::AShr:
1953 case Instruction::And:
1954 case Instruction::Or:
1955 case Instruction::Xor: {
1956 BinaryOperator *BO = cast<BinaryOperator>(I);
1957 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1958 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1959 NewOps[0], NewOps[1]);
1960 if (auto *NewI = dyn_cast<Instruction>(New)) {
1961 if (isa<OverflowingBinaryOperator>(BO)) {
1962 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1963 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1964 }
1965 if (isa<PossiblyExactOperator>(BO)) {
1966 NewI->setIsExact(BO->isExact());
1967 }
1968 if (isa<FPMathOperator>(BO))
1969 NewI->copyFastMathFlags(I);
1970 }
1971 return New;
1972 }
1973 case Instruction::ICmp:
1974 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1975 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1976 NewOps[1]);
1977 case Instruction::FCmp:
1978 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1979 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1980 NewOps[1]);
1981 case Instruction::Trunc:
1982 case Instruction::ZExt:
1983 case Instruction::SExt:
1984 case Instruction::FPToUI:
1985 case Instruction::FPToSI:
1986 case Instruction::UIToFP:
1987 case Instruction::SIToFP:
1988 case Instruction::FPTrunc:
1989 case Instruction::FPExt: {
1990 // It's possible that the mask has a different number of elements from
1991 // the original cast. We recompute the destination type to match the mask.
1992 Type *DestTy = VectorType::get(
1993 I->getType()->getScalarType(),
1994 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1995 assert(NewOps.size() == 1 && "cast with #ops != 1");
1996 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1997 DestTy);
1998 }
1999 case Instruction::GetElementPtr: {
2000 Value *Ptr = NewOps[0];
2001 ArrayRef<Value*> Idx = NewOps.slice(1);
2002 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2003 Ptr, Idx, "",
2004 cast<GEPOperator>(I)->getNoWrapFlags());
2005 }
2006 }
2007 llvm_unreachable("failed to rebuild vector instructions");
2008}
2009
2011 IRBuilderBase &Builder) {
2012 // Mask.size() does not need to be equal to the number of vector elements.
2013
2014 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2015 Type *EltTy = V->getType()->getScalarType();
2016
2017 if (isa<PoisonValue>(V))
2018 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2019
2020 if (match(V, m_Undef()))
2021 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2022
2023 if (isa<ConstantAggregateZero>(V))
2024 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2025
2026 if (Constant *C = dyn_cast<Constant>(V))
2028 Mask);
2029
2030 Instruction *I = cast<Instruction>(V);
2031 switch (I->getOpcode()) {
2032 case Instruction::Add:
2033 case Instruction::FAdd:
2034 case Instruction::Sub:
2035 case Instruction::FSub:
2036 case Instruction::Mul:
2037 case Instruction::FMul:
2038 case Instruction::UDiv:
2039 case Instruction::SDiv:
2040 case Instruction::FDiv:
2041 case Instruction::URem:
2042 case Instruction::SRem:
2043 case Instruction::FRem:
2044 case Instruction::Shl:
2045 case Instruction::LShr:
2046 case Instruction::AShr:
2047 case Instruction::And:
2048 case Instruction::Or:
2049 case Instruction::Xor:
2050 case Instruction::ICmp:
2051 case Instruction::FCmp:
2052 case Instruction::Trunc:
2053 case Instruction::ZExt:
2054 case Instruction::SExt:
2055 case Instruction::FPToUI:
2056 case Instruction::FPToSI:
2057 case Instruction::UIToFP:
2058 case Instruction::SIToFP:
2059 case Instruction::FPTrunc:
2060 case Instruction::FPExt:
2061 case Instruction::Select:
2062 case Instruction::GetElementPtr: {
2064 bool NeedsRebuild =
2065 (Mask.size() !=
2066 cast<FixedVectorType>(I->getType())->getNumElements());
2067 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2068 Value *V;
2069 // Recursively call evaluateInDifferentElementOrder on vector arguments
2070 // as well. E.g. GetElementPtr may have scalar operands even if the
2071 // return value is a vector, so we need to examine the operand type.
2072 if (I->getOperand(i)->getType()->isVectorTy())
2073 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2074 else
2075 V = I->getOperand(i);
2076 NewOps.push_back(V);
2077 NeedsRebuild |= (V != I->getOperand(i));
2078 }
2079 if (NeedsRebuild)
2080 return buildNew(I, NewOps, Builder);
2081 return I;
2082 }
2083 case Instruction::InsertElement: {
2084 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2085
2086 // The insertelement was inserting at Element. Figure out which element
2087 // that becomes after shuffling. The answer is guaranteed to be unique
2088 // by CanEvaluateShuffled.
2089 bool Found = false;
2090 int Index = 0;
2091 for (int e = Mask.size(); Index != e; ++Index) {
2092 if (Mask[Index] == Element) {
2093 Found = true;
2094 break;
2095 }
2096 }
2097
2098 // If element is not in Mask, no need to handle the operand 1 (element to
2099 // be inserted). Just evaluate values in operand 0 according to Mask.
2100 if (!Found)
2101 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2102
2103 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2104 Builder);
2105 Builder.SetInsertPoint(I);
2106 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2107 }
2108 }
2109 llvm_unreachable("failed to reorder elements of vector instruction!");
2110}
2111
2112// Returns true if the shuffle is extracting a contiguous range of values from
2113// LHS, for example:
2114// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2115// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2116// Shuffles to: |EE|FF|GG|HH|
2117// +--+--+--+--+
2119 ArrayRef<int> Mask) {
2120 unsigned LHSElems =
2121 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2122 unsigned MaskElems = Mask.size();
2123 unsigned BegIdx = Mask.front();
2124 unsigned EndIdx = Mask.back();
2125 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2126 return false;
2127 for (unsigned I = 0; I != MaskElems; ++I)
2128 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2129 return false;
2130 return true;
2131}
2132
2133/// These are the ingredients in an alternate form binary operator as described
2134/// below.
2140 Value *V0 = nullptr, Value *V1 = nullptr) :
2141 Opcode(Opc), Op0(V0), Op1(V1) {}
2142 operator bool() const { return Opcode != 0; }
2143};
2144
2145/// Binops may be transformed into binops with different opcodes and operands.
2146/// Reverse the usual canonicalization to enable folds with the non-canonical
2147/// form of the binop. If a transform is possible, return the elements of the
2148/// new binop. If not, return invalid elements.
2150 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2151 Type *Ty = BO->getType();
2152 switch (BO->getOpcode()) {
2153 case Instruction::Shl: {
2154 // shl X, C --> mul X, (1 << C)
2155 Constant *C;
2156 if (match(BO1, m_ImmConstant(C))) {
2158 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2159 assert(ShlOne && "Constant folding of immediate constants failed");
2160 return {Instruction::Mul, BO0, ShlOne};
2161 }
2162 break;
2163 }
2164 case Instruction::Or: {
2165 // or disjoin X, C --> add X, C
2166 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2167 return {Instruction::Add, BO0, BO1};
2168 break;
2169 }
2170 case Instruction::Sub:
2171 // sub 0, X --> mul X, -1
2172 if (match(BO0, m_ZeroInt()))
2173 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2174 break;
2175 default:
2176 break;
2177 }
2178 return {};
2179}
2180
2181/// A select shuffle of a select shuffle with a shared operand can be reduced
2182/// to a single select shuffle. This is an obvious improvement in IR, and the
2183/// backend is expected to lower select shuffles efficiently.
2185 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2186
2187 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2189 Shuf.getShuffleMask(Mask);
2190 unsigned NumElts = Mask.size();
2191
2192 // Canonicalize a select shuffle with common operand as Op1.
2193 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2194 if (ShufOp && ShufOp->isSelect() &&
2195 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2196 std::swap(Op0, Op1);
2198 }
2199
2200 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2201 if (!ShufOp || !ShufOp->isSelect() ||
2202 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2203 return nullptr;
2204
2205 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2207 ShufOp->getShuffleMask(Mask1);
2208 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2209
2210 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2211 if (Y == Op0) {
2212 std::swap(X, Y);
2214 }
2215
2216 // If the mask chooses from X (operand 0), it stays the same.
2217 // If the mask chooses from the earlier shuffle, the other mask value is
2218 // transferred to the combined select shuffle:
2219 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2220 SmallVector<int, 16> NewMask(NumElts);
2221 for (unsigned i = 0; i != NumElts; ++i)
2222 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2223
2224 // A select mask with undef elements might look like an identity mask.
2225 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2226 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2227 "Unexpected shuffle mask");
2228 return new ShuffleVectorInst(X, Y, NewMask);
2229}
2230
2232 const SimplifyQuery &SQ) {
2233 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2234
2235 // Are we shuffling together some value and that same value after it has been
2236 // modified by a binop with a constant?
2237 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2238 Constant *C;
2239 bool Op0IsBinop;
2240 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2241 Op0IsBinop = true;
2242 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2243 Op0IsBinop = false;
2244 else
2245 return nullptr;
2246
2247 // The identity constant for a binop leaves a variable operand unchanged. For
2248 // a vector, this is a splat of something like 0, -1, or 1.
2249 // If there's no identity constant for this binop, we're done.
2250 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2251 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2252 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2253 if (!IdC)
2254 return nullptr;
2255
2256 Value *X = Op0IsBinop ? Op1 : Op0;
2257
2258 // Prevent folding in the case the non-binop operand might have NaN values.
2259 // If X can have NaN elements then we have that the floating point math
2260 // operation in the transformed code may not preserve the exact NaN
2261 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2262 // This makes the transformation incorrect since the original program would
2263 // have preserved the exact NaN bit-pattern.
2264 // Avoid the folding if X can have NaN elements.
2265 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2266 !isKnownNeverNaN(X, SQ))
2267 return nullptr;
2268
2269 // Shuffle identity constants into the lanes that return the original value.
2270 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2271 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2272 // The existing binop constant vector remains in the same operand position.
2273 ArrayRef<int> Mask = Shuf.getShuffleMask();
2274 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2276
2277 bool MightCreatePoisonOrUB =
2279 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2280 if (MightCreatePoisonOrUB)
2281 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2282
2283 // shuf (bop X, C), X, M --> bop X, C'
2284 // shuf X, (bop X, C), M --> bop X, C'
2285 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2286 NewBO->copyIRFlags(BO);
2287
2288 // An undef shuffle mask element may propagate as an undef constant element in
2289 // the new binop. That would produce poison where the original code might not.
2290 // If we already made a safe constant, then there's no danger.
2291 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2293 return NewBO;
2294}
2295
2296/// If we have an insert of a scalar to a non-zero element of an undefined
2297/// vector and then shuffle that value, that's the same as inserting to the zero
2298/// element and shuffling. Splatting from the zero element is recognized as the
2299/// canonical form of splat.
2301 InstCombiner::BuilderTy &Builder) {
2302 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2303 ArrayRef<int> Mask = Shuf.getShuffleMask();
2304 Value *X;
2305 uint64_t IndexC;
2306
2307 // Match a shuffle that is a splat to a non-zero element.
2309 m_ConstantInt(IndexC)))) ||
2310 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2311 return nullptr;
2312
2313 // Insert into element 0 of a poison vector.
2314 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2315 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2316
2317 // Splat from element 0. Any mask element that is poison remains poison.
2318 // For example:
2319 // shuf (inselt poison, X, 2), _, <2,2,undef>
2320 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2321 unsigned NumMaskElts =
2322 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2323 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2324 for (unsigned i = 0; i != NumMaskElts; ++i)
2325 if (Mask[i] == PoisonMaskElem)
2326 NewMask[i] = Mask[i];
2327
2328 return new ShuffleVectorInst(NewIns, NewMask);
2329}
2330
2331/// Try to fold shuffles that are the equivalent of a vector select.
2333 if (!Shuf.isSelect())
2334 return nullptr;
2335
2336 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2337 // Commuting undef to operand 0 conflicts with another canonicalization.
2338 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2339 if (!match(Shuf.getOperand(1), m_Undef()) &&
2340 Shuf.getMaskValue(0) >= (int)NumElts) {
2341 // TODO: Can we assert that both operands of a shuffle-select are not undef
2342 // (otherwise, it would have been folded by instsimplify?
2343 Shuf.commute();
2344 return &Shuf;
2345 }
2346
2348 return I;
2349
2351 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2352 return I;
2353
2354 BinaryOperator *B0, *B1;
2355 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2356 !match(Shuf.getOperand(1), m_BinOp(B1)))
2357 return nullptr;
2358
2359 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2360 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2361 // with a multiply, we will exit because C0/C1 will not be set.
2362 Value *X, *Y;
2363 Constant *C0 = nullptr, *C1 = nullptr;
2364 bool ConstantsAreOp1;
2365 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2366 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2367 ConstantsAreOp1 = false;
2368 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2369 m_Neg(m_Value(X)))) &&
2371 m_Neg(m_Value(Y)))))
2372 ConstantsAreOp1 = true;
2373 else
2374 return nullptr;
2375
2376 // We need matching binops to fold the lanes together.
2379 bool DropNSW = false;
2380 if (ConstantsAreOp1 && Opc0 != Opc1) {
2381 // TODO: We drop "nsw" if shift is converted into multiply because it may
2382 // not be correct when the shift amount is BitWidth - 1. We could examine
2383 // each vector element to determine if it is safe to keep that flag.
2384 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2385 DropNSW = true;
2386 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2387 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2388 Opc0 = AltB0.Opcode;
2389 C0 = cast<Constant>(AltB0.Op1);
2390 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2391 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2392 Opc1 = AltB1.Opcode;
2393 C1 = cast<Constant>(AltB1.Op1);
2394 }
2395 }
2396
2397 if (Opc0 != Opc1 || !C0 || !C1)
2398 return nullptr;
2399
2400 // The opcodes must be the same. Use a new name to make that clear.
2401 BinaryOperator::BinaryOps BOpc = Opc0;
2402
2403 // Select the constant elements needed for the single binop.
2404 ArrayRef<int> Mask = Shuf.getShuffleMask();
2405 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2406
2407 // We are moving a binop after a shuffle. When a shuffle has an undefined
2408 // mask element, the result is undefined, but it is not poison or undefined
2409 // behavior. That is not necessarily true for div/rem/shift.
2410 bool MightCreatePoisonOrUB =
2413 if (MightCreatePoisonOrUB)
2415 ConstantsAreOp1);
2416
2417 Value *V;
2418 if (X == Y) {
2419 // Remove a binop and the shuffle by rearranging the constant:
2420 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2421 // shuffle (op C0, V), (op C1, V), M --> op C', V
2422 V = X;
2423 } else {
2424 // If there are 2 different variable operands, we must create a new shuffle
2425 // (select) first, so check uses to ensure that we don't end up with more
2426 // instructions than we started with.
2427 if (!B0->hasOneUse() && !B1->hasOneUse())
2428 return nullptr;
2429
2430 // If we use the original shuffle mask and op1 is *variable*, we would be
2431 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2432 // poison. We do not have to guard against UB when *constants* are op1
2433 // because safe constants guarantee that we do not overflow sdiv/srem (and
2434 // there's no danger for other opcodes).
2435 // TODO: To allow this case, create a new shuffle mask with no undefs.
2436 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2437 return nullptr;
2438
2439 // Note: In general, we do not create new shuffles in InstCombine because we
2440 // do not know if a target can lower an arbitrary shuffle optimally. In this
2441 // case, the shuffle uses the existing mask, so there is no additional risk.
2442
2443 // Select the variable vectors first, then perform the binop:
2444 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2445 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2446 V = Builder.CreateShuffleVector(X, Y, Mask);
2447 }
2448
2449 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2450 Builder.CreateBinOp(BOpc, NewC, V);
2451
2452 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2453 // 1. If we changed an opcode, poison conditions might have changed.
2454 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2455 // where the original code did not. But if we already made a safe constant,
2456 // then there's no danger.
2457 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2458 NewI->copyIRFlags(B0);
2459 NewI->andIRFlags(B1);
2460 if (DropNSW)
2461 NewI->setHasNoSignedWrap(false);
2462 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2463 NewI->dropPoisonGeneratingFlags();
2464 }
2465 return replaceInstUsesWith(Shuf, NewBO);
2466}
2467
2468/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2469/// Example (little endian):
2470/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2472 bool IsBigEndian) {
2473 // This must be a bitcasted shuffle of 1 vector integer operand.
2474 Type *DestType = Shuf.getType();
2475 Value *X;
2476 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2477 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2478 return nullptr;
2479
2480 // The source type must have the same number of elements as the shuffle,
2481 // and the source element type must be larger than the shuffle element type.
2482 Type *SrcType = X->getType();
2483 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2484 cast<FixedVectorType>(SrcType)->getNumElements() !=
2485 cast<FixedVectorType>(DestType)->getNumElements() ||
2486 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2487 return nullptr;
2488
2489 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2490 "Expected a shuffle that decreases length");
2491
2492 // Last, check that the mask chooses the correct low bits for each narrow
2493 // element in the result.
2494 uint64_t TruncRatio =
2495 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2496 ArrayRef<int> Mask = Shuf.getShuffleMask();
2497 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2498 if (Mask[i] == PoisonMaskElem)
2499 continue;
2500 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2501 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2502 if (Mask[i] != (int)LSBIndex)
2503 return nullptr;
2504 }
2505
2506 return new TruncInst(X, DestType);
2507}
2508
2509/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2510/// narrowing (concatenating with poison and extracting back to the original
2511/// length). This allows replacing the wide select with a narrow select.
2513 InstCombiner::BuilderTy &Builder) {
2514 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2515 // of the 1st vector operand of a shuffle.
2516 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2517 return nullptr;
2518
2519 // The vector being shuffled must be a vector select that we can eliminate.
2520 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2521 Value *Cond, *X, *Y;
2522 if (!match(Shuf.getOperand(0),
2524 return nullptr;
2525
2526 // We need a narrow condition value. It must be extended with poison elements
2527 // and have the same number of elements as this shuffle.
2528 unsigned NarrowNumElts =
2529 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2530 Value *NarrowCond;
2531 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2532 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2533 NarrowNumElts ||
2534 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2535 return nullptr;
2536
2537 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2538 // -->
2539 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2540 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2541 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2542 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2543}
2544
2545/// Canonicalize FP negate/abs after shuffle.
2547 InstCombiner::BuilderTy &Builder) {
2548 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2549 Value *X;
2550 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2551 return nullptr;
2552
2553 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2554
2555 // Match 2-input (binary) shuffle.
2556 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2557 Value *Y;
2558 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2559 S0->getOpcode() != S1->getOpcode() ||
2560 (!S0->hasOneUse() && !S1->hasOneUse()))
2561 return nullptr;
2562
2563 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2564 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2565 Instruction *NewF;
2566 if (IsFNeg) {
2567 NewF = UnaryOperator::CreateFNeg(NewShuf);
2568 } else {
2570 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2571 NewF = CallInst::Create(FAbs, {NewShuf});
2572 }
2573 NewF->copyIRFlags(S0);
2574 NewF->andIRFlags(S1);
2575 return NewF;
2576}
2577
2578/// Canonicalize casts after shuffle.
2580 InstCombiner::BuilderTy &Builder) {
2581 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2582 if (!Cast0)
2583 return nullptr;
2584
2585 // TODO: Allow other opcodes? That would require easing the type restrictions
2586 // below here.
2587 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2588 switch (CastOpcode) {
2589 case Instruction::SExt:
2590 case Instruction::ZExt:
2591 case Instruction::FPToSI:
2592 case Instruction::FPToUI:
2593 case Instruction::SIToFP:
2594 case Instruction::UIToFP:
2595 break;
2596 default:
2597 return nullptr;
2598 }
2599
2600 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2601 VectorType *ShufTy = Shuf.getType();
2602 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2603
2604 // TODO: Allow length-increasing shuffles?
2605 if (ShufTy->getElementCount().getKnownMinValue() >
2606 ShufOpTy->getElementCount().getKnownMinValue())
2607 return nullptr;
2608
2609 // shuffle (cast X), Poison, identity-with-extract-mask -->
2610 // cast (shuffle X, Poison, identity-with-extract-mask).
2611 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2612 Shuf.isIdentityWithExtract()) {
2613 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2614 PoisonValue::get(CastSrcTy),
2615 Shuf.getShuffleMask());
2616 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2617 }
2618
2619 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2620 // Do we have 2 matching cast operands?
2621 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2622 Cast0->getSrcTy() != Cast1->getSrcTy())
2623 return nullptr;
2624
2625 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2626 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2627 "Expected fixed vector operands for casts and binary shuffle");
2628 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2629 return nullptr;
2630
2631 // At least one of the operands must have only one use (the shuffle).
2632 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2633 return nullptr;
2634
2635 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2636 Value *X = Cast0->getOperand(0);
2637 Value *Y = Cast1->getOperand(0);
2638 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2639 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2640}
2641
2642/// Try to fold an extract subvector operation.
2644 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2645 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2646 return nullptr;
2647
2648 // Check if we are extracting all bits of an inserted scalar:
2649 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2650 Value *X;
2651 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2652 X->getType()->getPrimitiveSizeInBits() ==
2654 return new BitCastInst(X, Shuf.getType());
2655
2656 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2657 Value *Y;
2658 ArrayRef<int> Mask;
2659 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2660 return nullptr;
2661
2662 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2663 // then combining may result in worse codegen.
2664 if (!Op0->hasOneUse())
2665 return nullptr;
2666
2667 // We are extracting a subvector from a shuffle. Remove excess elements from
2668 // the 1st shuffle mask to eliminate the extract.
2669 //
2670 // This transform is conservatively limited to identity extracts because we do
2671 // not allow arbitrary shuffle mask creation as a target-independent transform
2672 // (because we can't guarantee that will lower efficiently).
2673 //
2674 // If the extracting shuffle has an poison mask element, it transfers to the
2675 // new shuffle mask. Otherwise, copy the original mask element. Example:
2676 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2677 // shuf X, Y, <C0, poison, C2, poison>
2678 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2679 SmallVector<int, 16> NewMask(NumElts);
2680 assert(NumElts < Mask.size() &&
2681 "Identity with extract must have less elements than its inputs");
2682
2683 for (unsigned i = 0; i != NumElts; ++i) {
2684 int ExtractMaskElt = Shuf.getMaskValue(i);
2685 int MaskElt = Mask[i];
2686 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2687 }
2688 return new ShuffleVectorInst(X, Y, NewMask);
2689}
2690
2691/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2692/// operand with the operand of an insertelement.
2694 InstCombinerImpl &IC) {
2695 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2697 Shuf.getShuffleMask(Mask);
2698
2699 int NumElts = Mask.size();
2700 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2701
2702 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2703 // not be able to handle it there if the insertelement has >1 use.
2704 // If the shuffle has an insertelement operand but does not choose the
2705 // inserted scalar element from that value, then we can replace that shuffle
2706 // operand with the source vector of the insertelement.
2707 Value *X;
2708 uint64_t IdxC;
2709 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2710 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2711 if (!is_contained(Mask, (int)IdxC))
2712 return IC.replaceOperand(Shuf, 0, X);
2713 }
2714 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2715 // Offset the index constant by the vector width because we are checking for
2716 // accesses to the 2nd vector input of the shuffle.
2717 IdxC += InpNumElts;
2718 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2719 if (!is_contained(Mask, (int)IdxC))
2720 return IC.replaceOperand(Shuf, 1, X);
2721 }
2722 // For the rest of the transform, the shuffle must not change vector sizes.
2723 // TODO: This restriction could be removed if the insert has only one use
2724 // (because the transform would require a new length-changing shuffle).
2725 if (NumElts != InpNumElts)
2726 return nullptr;
2727
2728 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2729 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2730 // We need an insertelement with a constant index.
2731 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2732 m_ConstantInt(IndexC))))
2733 return false;
2734
2735 // Test the shuffle mask to see if it splices the inserted scalar into the
2736 // operand 1 vector of the shuffle.
2737 int NewInsIndex = -1;
2738 for (int i = 0; i != NumElts; ++i) {
2739 // Ignore undef mask elements.
2740 if (Mask[i] == -1)
2741 continue;
2742
2743 // The shuffle takes elements of operand 1 without lane changes.
2744 if (Mask[i] == NumElts + i)
2745 continue;
2746
2747 // The shuffle must choose the inserted scalar exactly once.
2748 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2749 return false;
2750
2751 // The shuffle is placing the inserted scalar into element i.
2752 NewInsIndex = i;
2753 }
2754
2755 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2756
2757 // Index is updated to the potentially translated insertion lane.
2758 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2759 return true;
2760 };
2761
2762 // If the shuffle is unnecessary, insert the scalar operand directly into
2763 // operand 1 of the shuffle. Example:
2764 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2765 Value *Scalar;
2766 ConstantInt *IndexC;
2767 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2768 return InsertElementInst::Create(V1, Scalar, IndexC);
2769
2770 // Try again after commuting shuffle. Example:
2771 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2772 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2773 std::swap(V0, V1);
2775 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2776 return InsertElementInst::Create(V1, Scalar, IndexC);
2777
2778 return nullptr;
2779}
2780
2782 // Match the operands as identity with padding (also known as concatenation
2783 // with undef) shuffles of the same source type. The backend is expected to
2784 // recreate these concatenations from a shuffle of narrow operands.
2785 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2786 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2787 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2788 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2789 return nullptr;
2790
2791 // We limit this transform to power-of-2 types because we expect that the
2792 // backend can convert the simplified IR patterns to identical nodes as the
2793 // original IR.
2794 // TODO: If we can verify the same behavior for arbitrary types, the
2795 // power-of-2 checks can be removed.
2796 Value *X = Shuffle0->getOperand(0);
2797 Value *Y = Shuffle1->getOperand(0);
2798 if (X->getType() != Y->getType() ||
2799 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2801 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2802 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2803 match(X, m_Undef()) || match(Y, m_Undef()))
2804 return nullptr;
2805 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2806 match(Shuffle1->getOperand(1), m_Undef()) &&
2807 "Unexpected operand for identity shuffle");
2808
2809 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2810 // operands directly by adjusting the shuffle mask to account for the narrower
2811 // types:
2812 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2813 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2814 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2815 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2816
2817 ArrayRef<int> Mask = Shuf.getShuffleMask();
2818 SmallVector<int, 16> NewMask(Mask.size(), -1);
2819 for (int i = 0, e = Mask.size(); i != e; ++i) {
2820 if (Mask[i] == -1)
2821 continue;
2822
2823 // If this shuffle is choosing an undef element from 1 of the sources, that
2824 // element is undef.
2825 if (Mask[i] < WideElts) {
2826 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2827 continue;
2828 } else {
2829 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2830 continue;
2831 }
2832
2833 // If this shuffle is choosing from the 1st narrow op, the mask element is
2834 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2835 // element is offset down to adjust for the narrow vector widths.
2836 if (Mask[i] < WideElts) {
2837 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2838 NewMask[i] = Mask[i];
2839 } else {
2840 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2841 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2842 }
2843 }
2844 return new ShuffleVectorInst(X, Y, NewMask);
2845}
2846
2847// Splatting the first element of the result of a BinOp, where any of the
2848// BinOp's operands are the result of a first element splat can be simplified to
2849// splatting the first element of the result of the BinOp
2851 if (!match(SVI.getOperand(1), m_Poison()) ||
2852 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2853 !SVI.getOperand(0)->hasOneUse())
2854 return nullptr;
2855
2856 Value *Op0 = SVI.getOperand(0);
2857 Value *X, *Y;
2859 m_Value(Y))) &&
2860 !match(Op0, m_BinOp(m_Value(X),
2862 return nullptr;
2863 if (X->getType() != Y->getType())
2864 return nullptr;
2865
2866 auto *BinOp = cast<BinaryOperator>(Op0);
2868 return nullptr;
2869
2870 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2871 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2872 NewBOI->copyIRFlags(BinOp);
2873
2874 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2875}
2876
2878 Value *LHS = SVI.getOperand(0);
2879 Value *RHS = SVI.getOperand(1);
2880 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2881 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2882 SVI.getType(), ShufQuery))
2883 return replaceInstUsesWith(SVI, V);
2884
2885 if (Instruction *I = simplifyBinOpSplats(SVI))
2886 return I;
2887
2888 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2889 // order to support scalable vectors.
2890 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2891 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2892
2893 if (isa<ScalableVectorType>(LHS->getType()))
2894 return nullptr;
2895
2896 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2897 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2898
2899 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2900 //
2901 // if X and Y are of the same (vector) type, and the element size is not
2902 // changed by the bitcasts, we can distribute the bitcasts through the
2903 // shuffle, hopefully reducing the number of instructions. We make sure that
2904 // at least one bitcast only has one use, so we don't *increase* the number of
2905 // instructions here.
2906 Value *X, *Y;
2908 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2909 X->getType()->getScalarSizeInBits() ==
2910 SVI.getType()->getScalarSizeInBits() &&
2911 (LHS->hasOneUse() || RHS->hasOneUse())) {
2913 SVI.getName() + ".uncasted");
2914 return new BitCastInst(V, SVI.getType());
2915 }
2916
2917 ArrayRef<int> Mask = SVI.getShuffleMask();
2918
2919 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2920 // simulated shuffle can simplify, then this shuffle is unnecessary:
2921 // shuf (bitcast X), undef, Mask --> bitcast X'
2922 // TODO: This could be extended to allow length-changing shuffles.
2923 // The transform might also be obsoleted if we allowed canonicalization
2924 // of bitcasted shuffles.
2925 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2926 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2927 // Try to create a scaled mask constant.
2928 auto *XType = cast<FixedVectorType>(X->getType());
2929 unsigned XNumElts = XType->getNumElements();
2930 SmallVector<int, 16> ScaledMask;
2931 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2932 // If the shuffled source vector simplifies, cast that value to this
2933 // shuffle's type.
2934 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2935 ScaledMask, XType, ShufQuery))
2936 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2937 }
2938 }
2939
2940 // shuffle x, x, mask --> shuffle x, undef, mask'
2941 if (LHS == RHS) {
2942 assert(!match(RHS, m_Undef()) &&
2943 "Shuffle with 2 undef ops not simplified?");
2944 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2945 }
2946
2947 // shuffle undef, x, mask --> shuffle x, undef, mask'
2948 if (match(LHS, m_Undef())) {
2949 SVI.commute();
2950 return &SVI;
2951 }
2952
2954 return I;
2955
2956 if (Instruction *I = foldSelectShuffle(SVI))
2957 return I;
2958
2960 return I;
2961
2963 return I;
2964
2966 return I;
2967
2968 if (Instruction *I = foldCastShuffle(SVI, Builder))
2969 return I;
2970
2971 APInt PoisonElts(VWidth, 0);
2972 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2973 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2974 if (V != &SVI)
2975 return replaceInstUsesWith(SVI, V);
2976 return &SVI;
2977 }
2978
2980 return I;
2981
2982 // These transforms have the potential to lose undef knowledge, so they are
2983 // intentionally placed after SimplifyDemandedVectorElts().
2984 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2985 return I;
2987 return I;
2988
2989 if (match(RHS, m_Constant())) {
2990 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
2991 // We cannot do this fold for elementwise select since ShuffleVector is
2992 // not elementwise.
2993 if (SI->getCondition()->getType()->isIntegerTy() &&
2994 (isa<PoisonValue>(RHS) ||
2995 isGuaranteedNotToBePoison(SI->getCondition()))) {
2996 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
2997 return I;
2998 }
2999 }
3000 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3001 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3002 return I;
3003 }
3004 }
3005
3006 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3008 return replaceInstUsesWith(SVI, V);
3009 }
3010
3011 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3012 // a non-vector type. We can instead bitcast the original vector followed by
3013 // an extract of the desired element:
3014 //
3015 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3016 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3017 // %1 = bitcast <4 x i8> %sroa to i32
3018 // Becomes:
3019 // %bc = bitcast <16 x i8> %in to <4 x i32>
3020 // %ext = extractelement <4 x i32> %bc, i32 0
3021 //
3022 // If the shuffle is extracting a contiguous range of values from the input
3023 // vector then each use which is a bitcast of the extracted size can be
3024 // replaced. This will work if the vector types are compatible, and the begin
3025 // index is aligned to a value in the casted vector type. If the begin index
3026 // isn't aligned then we can shuffle the original vector (keeping the same
3027 // vector type) before extracting.
3028 //
3029 // This code will bail out if the target type is fundamentally incompatible
3030 // with vectors of the source type.
3031 //
3032 // Example of <16 x i8>, target type i32:
3033 // Index range [4,8): v-----------v Will work.
3034 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3035 // <16 x i8>: | | | | | | | | | | | | | | | | |
3036 // <4 x i32>: | | | | |
3037 // +-----------+-----------+-----------+-----------+
3038 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3039 // Target type i40: ^--------------^ Won't work, bail.
3040 bool MadeChange = false;
3041 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3042 Value *V = LHS;
3043 unsigned MaskElems = Mask.size();
3044 auto *SrcTy = cast<FixedVectorType>(V->getType());
3045 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3046 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3047 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3048 unsigned SrcNumElems = SrcTy->getNumElements();
3051 for (User *U : SVI.users())
3052 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3053 // Only visit bitcasts that weren't previously handled.
3054 if (BC->use_empty())
3055 continue;
3056 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3057 if (BC->hasOneUse()) {
3058 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3059 if (BC2 && isEliminableCastPair(BC, BC2))
3060 continue;
3061 }
3062 BCs.push_back(BC);
3063 }
3064 for (BitCastInst *BC : BCs) {
3065 unsigned BegIdx = Mask.front();
3066 Type *TgtTy = BC->getDestTy();
3067 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3068 if (!TgtElemBitWidth)
3069 continue;
3070 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3071 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3072 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3073 if (!VecBitWidthsEqual)
3074 continue;
3076 continue;
3077 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3078 if (!BegIsAligned) {
3079 // Shuffle the input so [0,NumElements) contains the output, and
3080 // [NumElems,SrcNumElems) is undef.
3081 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3082 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3083 ShuffleMask[I] = Idx;
3084 V = Builder.CreateShuffleVector(V, ShuffleMask,
3085 SVI.getName() + ".extract");
3086 BegIdx = 0;
3087 }
3088 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3089 assert(SrcElemsPerTgtElem);
3090 BegIdx /= SrcElemsPerTgtElem;
3091 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3092 if (Inserted)
3093 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3094 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3095 SVI.getName() + ".extract");
3096 // The shufflevector isn't being replaced: the bitcast that used it
3097 // is. InstCombine will visit the newly-created instructions.
3098 replaceInstUsesWith(*BC, Ext);
3099 MadeChange = true;
3100 }
3101 }
3102
3103 // If the LHS is a shufflevector itself, see if we can combine it with this
3104 // one without producing an unusual shuffle.
3105 // Cases that might be simplified:
3106 // 1.
3107 // x1=shuffle(v1,v2,mask1)
3108 // x=shuffle(x1,undef,mask)
3109 // ==>
3110 // x=shuffle(v1,undef,newMask)
3111 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3112 // 2.
3113 // x1=shuffle(v1,undef,mask1)
3114 // x=shuffle(x1,x2,mask)
3115 // where v1.size() == mask1.size()
3116 // ==>
3117 // x=shuffle(v1,x2,newMask)
3118 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3119 // 3.
3120 // x2=shuffle(v2,undef,mask2)
3121 // x=shuffle(x1,x2,mask)
3122 // where v2.size() == mask2.size()
3123 // ==>
3124 // x=shuffle(x1,v2,newMask)
3125 // newMask[i] = (mask[i] < x1.size())
3126 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3127 // 4.
3128 // x1=shuffle(v1,undef,mask1)
3129 // x2=shuffle(v2,undef,mask2)
3130 // x=shuffle(x1,x2,mask)
3131 // where v1.size() == v2.size()
3132 // ==>
3133 // x=shuffle(v1,v2,newMask)
3134 // newMask[i] = (mask[i] < x1.size())
3135 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3136 //
3137 // Here we are really conservative:
3138 // we are absolutely afraid of producing a shuffle mask not in the input
3139 // program, because the code gen may not be smart enough to turn a merged
3140 // shuffle into two specific shuffles: it may produce worse code. As such,
3141 // we only merge two shuffles if the result is either a splat or one of the
3142 // input shuffle masks. In this case, merging the shuffles just removes
3143 // one instruction, which we know is safe. This is good for things like
3144 // turning: (splat(splat)) -> splat, or
3145 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3146 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3147 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3148 if (LHSShuffle)
3149 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3150 !match(RHS, m_Poison()))
3151 LHSShuffle = nullptr;
3152 if (RHSShuffle)
3153 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3154 RHSShuffle = nullptr;
3155 if (!LHSShuffle && !RHSShuffle)
3156 return MadeChange ? &SVI : nullptr;
3157
3158 Value* LHSOp0 = nullptr;
3159 Value* LHSOp1 = nullptr;
3160 Value* RHSOp0 = nullptr;
3161 unsigned LHSOp0Width = 0;
3162 unsigned RHSOp0Width = 0;
3163 if (LHSShuffle) {
3164 LHSOp0 = LHSShuffle->getOperand(0);
3165 LHSOp1 = LHSShuffle->getOperand(1);
3166 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3167 }
3168 if (RHSShuffle) {
3169 RHSOp0 = RHSShuffle->getOperand(0);
3170 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3171 }
3172 Value* newLHS = LHS;
3173 Value* newRHS = RHS;
3174 if (LHSShuffle) {
3175 // case 1
3176 if (match(RHS, m_Poison())) {
3177 newLHS = LHSOp0;
3178 newRHS = LHSOp1;
3179 }
3180 // case 2 or 4
3181 else if (LHSOp0Width == LHSWidth) {
3182 newLHS = LHSOp0;
3183 }
3184 }
3185 // case 3 or 4
3186 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3187 newRHS = RHSOp0;
3188 }
3189 // case 4
3190 if (LHSOp0 == RHSOp0) {
3191 newLHS = LHSOp0;
3192 newRHS = nullptr;
3193 }
3194
3195 if (newLHS == LHS && newRHS == RHS)
3196 return MadeChange ? &SVI : nullptr;
3197
3198 ArrayRef<int> LHSMask;
3199 ArrayRef<int> RHSMask;
3200 if (newLHS != LHS)
3201 LHSMask = LHSShuffle->getShuffleMask();
3202 if (RHSShuffle && newRHS != RHS)
3203 RHSMask = RHSShuffle->getShuffleMask();
3204
3205 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3206 SmallVector<int, 16> newMask;
3207 bool isSplat = true;
3208 int SplatElt = -1;
3209 // Create a new mask for the new ShuffleVectorInst so that the new
3210 // ShuffleVectorInst is equivalent to the original one.
3211 for (unsigned i = 0; i < VWidth; ++i) {
3212 int eltMask;
3213 if (Mask[i] < 0) {
3214 // This element is a poison value.
3215 eltMask = -1;
3216 } else if (Mask[i] < (int)LHSWidth) {
3217 // This element is from left hand side vector operand.
3218 //
3219 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3220 // new mask value for the element.
3221 if (newLHS != LHS) {
3222 eltMask = LHSMask[Mask[i]];
3223 // If the value selected is an poison value, explicitly specify it
3224 // with a -1 mask value.
3225 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3226 eltMask = -1;
3227 } else
3228 eltMask = Mask[i];
3229 } else {
3230 // This element is from right hand side vector operand
3231 //
3232 // If the value selected is a poison value, explicitly specify it
3233 // with a -1 mask value. (case 1)
3234 if (match(RHS, m_Poison()))
3235 eltMask = -1;
3236 // If RHS is going to be replaced (case 3 or 4), calculate the
3237 // new mask value for the element.
3238 else if (newRHS != RHS) {
3239 eltMask = RHSMask[Mask[i]-LHSWidth];
3240 // If the value selected is an poison value, explicitly specify it
3241 // with a -1 mask value.
3242 if (eltMask >= (int)RHSOp0Width) {
3243 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3244 "should have been check above");
3245 eltMask = -1;
3246 }
3247 } else
3248 eltMask = Mask[i]-LHSWidth;
3249
3250 // If LHS's width is changed, shift the mask value accordingly.
3251 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3252 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3253 // If newRHS == newLHS, we want to remap any references from newRHS to
3254 // newLHS so that we can properly identify splats that may occur due to
3255 // obfuscation across the two vectors.
3256 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3257 eltMask += newLHSWidth;
3258 }
3259
3260 // Check if this could still be a splat.
3261 if (eltMask >= 0) {
3262 if (SplatElt >= 0 && SplatElt != eltMask)
3263 isSplat = false;
3264 SplatElt = eltMask;
3265 }
3266
3267 newMask.push_back(eltMask);
3268 }
3269
3270 // If the result mask is equal to one of the original shuffle masks,
3271 // or is a splat, do the replacement.
3272 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3273 if (!newRHS)
3274 newRHS = PoisonValue::get(newLHS->getType());
3275 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3276 }
3277
3278 return MadeChange ? &SVI : nullptr;
3279}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:83
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
BinaryOps getOpcode() const
Definition: InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:762
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1677
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2609
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2694
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:264
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:435
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isBigEndian() const
Definition: DataLayout.h:199
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:674
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:973
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2625
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2238
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1923
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:527
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2593
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
Definition: InstCombiner.h:77
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:388
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:377
const DataLayout & DL
Definition: InstCombiner.h:76
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:332
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:412
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:280
BuilderTy & Builder
Definition: InstCombiner.h:61
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:338
void addValue(Value *V)
Add value to the worklist if it is an instruction.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
bool isShift() const
Definition: Instruction.h:320
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:318
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition: MapVector.h:107
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1468
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
@ ArrayTyID
Arrays.
Definition: Type.h:74
@ StructTyID
Structures.
Definition: Type.h:73
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI uint64_t getArrayNumElements() const
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:139
UnaryOps getOpcode() const
Definition: InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1090
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:695
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
Definition: DerivedTypes.h:463
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:931
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249