LLVM: lib/Transforms/InstCombine/InstCombineVectorOps.cpp Source File

//===- InstCombineVectorOps.cpp -------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements instcombine for ExtractElement, InsertElement and

// ShuffleVector.

//

//===----------------------------------------------------------------------===//


#include "InstCombineInternal.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallBitVector.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/InstructionSimplify.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Operator.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Transforms/InstCombine/InstCombiner.h"

#include <cassert>

#include <cstdint>

#include <iterator>

#include <utility>


#define DEBUG_TYPE "instcombine"


using namespace llvm;

using namespace PatternMatch;


STATISTIC(NumAggregateReconstructionsSimplified,

          "Number of aggregate reconstructions turned into reuse of the "

          "original aggregate");


/// Return true if the value is cheaper to scalarize than it is to leave as a

/// vector operation. If the extract index \p EI is a constant integer then

/// some operations may be cheap to scalarize.

///

/// FIXME: It's possible to create more instructions than previously existed.

static bool cheapToScalarize(Value *V, Value *EI) {

  ConstantInt *CEI = dyn_cast<ConstantInt>(EI);


  // If we can pick a scalar constant value out of a vector, that is free.

  if (auto *C = dyn_cast<Constant>(V))

    return CEI || C->getSplatValue();


  if (CEI && match(V, m_Intrinsic<Intrinsic::stepvector>())) {

    ElementCount EC = cast<VectorType>(V->getType())->getElementCount();

    // Index needs to be lower than the minimum size of the vector, because

    // for scalable vector, the vector size is known at run time.

    return CEI->getValue().ult(EC.getKnownMinValue());

  }


  // An insertelement to the same constant index as our extract will simplify

  // to the scalar inserted element. An insertelement to a different constant

  // index is irrelevant to our extract.

  if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))

    return CEI;


  if (match(V, m_OneUse(m_Load(m_Value()))))

    return true;


  if (match(V, m_OneUse(m_UnOp())))

    return true;


  Value *V0, *V1;

  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))

    if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))

      return true;


  CmpPredicate UnusedPred;

  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))

    if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))

      return true;


  return false;

}


// If we have a PHI node with a vector type that is only used to feed

// itself and be an operand of extractelement at a constant location,

// try to replace the PHI of the vector type with a PHI of a scalar type.

Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,

                                            PHINode *PN) {

  SmallVector<Instruction *, 2> Extracts;

  // The users we want the PHI to have are:

  // 1) The EI ExtractElement (we already know this)

  // 2) Possibly more ExtractElements with the same index.

  // 3) Another operand, which will feed back into the PHI.

  Instruction *PHIUser = nullptr;

  for (auto *U : PN->users()) {

    if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {

      if (EI.getIndexOperand() == EU->getIndexOperand())

        Extracts.push_back(EU);

      else

        return nullptr;

    } else if (!PHIUser) {

      PHIUser = cast<Instruction>(U);

    } else {

      return nullptr;

    }

  }


  if (!PHIUser)

    return nullptr;


  // Verify that this PHI user has one use, which is the PHI itself,

  // and that it is a binary operation which is cheap to scalarize.

  // otherwise return nullptr.

  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||

      !(isa<BinaryOperator>(PHIUser)) ||

      !cheapToScalarize(PHIUser, EI.getIndexOperand()))

    return nullptr;


  // Create a scalar PHI node that will replace the vector PHI node

  // just before the current PHI node.

  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(

      PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), PN->getIterator()));

  // Scalarize each PHI operand.

  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {

    Value *PHIInVal = PN->getIncomingValue(i);

    BasicBlock *inBB = PN->getIncomingBlock(i);

    Value *Elt = EI.getIndexOperand();

    // If the operand is the PHI induction variable:

    if (PHIInVal == PHIUser) {

      // Scalarize the binary operation. Its first operand is the

      // scalar PHI, and the second operand is extracted from the other

      // vector operand.

      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);

      unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;

      Value *Op = InsertNewInstWith(

          ExtractElementInst::Create(B0->getOperand(opId), Elt,

                                     B0->getOperand(opId)->getName() + ".Elt"),

          B0->getIterator());

      Value *newPHIUser = InsertNewInstWith(

          BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),

                                                scalarPHI, Op, B0), B0->getIterator());

      scalarPHI->addIncoming(newPHIUser, inBB);

    } else {

      // Scalarize PHI input:

      Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");

      // Insert the new instruction into the predecessor basic block.

      Instruction *pos = dyn_cast<Instruction>(PHIInVal);

      BasicBlock::iterator InsertPos;

      if (pos && !isa<PHINode>(pos)) {

        InsertPos = ++pos->getIterator();

      } else {

        InsertPos = inBB->getFirstInsertionPt();

      }


      InsertNewInstWith(newEI, InsertPos);


      scalarPHI->addIncoming(newEI, inBB);

    }

  }


  for (auto *E : Extracts) {

    replaceInstUsesWith(*E, scalarPHI);

    // Add old extract to worklist for DCE.

    addToWorklist(E);

  }


  return &EI;

}


Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {

  Value *X;

  uint64_t ExtIndexC;

  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||

      !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))

    return nullptr;


  ElementCount NumElts =

      cast<VectorType>(Ext.getVectorOperandType())->getElementCount();

  Type *DestTy = Ext.getType();

  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();

  bool IsBigEndian = DL.isBigEndian();


  // If we are casting an integer to vector and extracting a portion, that is

  // a shift-right and truncate.

  if (X->getType()->isIntegerTy()) {

    assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&

           "Expected fixed vector type for bitcast from scalar integer");


    // Big endian requires adjusting the extract index since MSB is at index 0.

    // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8

    // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8

    if (IsBigEndian)

      ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;

    unsigned ShiftAmountC = ExtIndexC * DestWidth;

    if ((!ShiftAmountC ||

         isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&

        Ext.getVectorOperand()->hasOneUse()) {

      if (ShiftAmountC)

        X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");

      if (DestTy->isFloatingPointTy()) {

        Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);

        Value *Trunc = Builder.CreateTrunc(X, DstIntTy);

        return new BitCastInst(Trunc, DestTy);

      }

      return new TruncInst(X, DestTy);

    }

  }


  if (!X->getType()->isVectorTy())

    return nullptr;


  // If this extractelement is using a bitcast from a vector of the same number

  // of elements, see if we can find the source element from the source vector:

  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]

  auto *SrcTy = cast<VectorType>(X->getType());

  ElementCount NumSrcElts = SrcTy->getElementCount();

  if (NumSrcElts == NumElts)

    if (Value *Elt = findScalarElement(X, ExtIndexC))

      return new BitCastInst(Elt, DestTy);


  assert(NumSrcElts.isScalable() == NumElts.isScalable() &&

         "Src and Dst must be the same sort of vector type");


  // If the source elements are wider than the destination, try to shift and

  // truncate a subset of scalar bits of an insert op.

  if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {

    Value *Scalar;

    Value *Vec;

    uint64_t InsIndexC;

    if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),

                              m_ConstantInt(InsIndexC))))

      return nullptr;


    // The extract must be from the subset of vector elements that we inserted

    // into. Example: if we inserted element 1 of a <2 x i64> and we are

    // extracting an i16 (narrowing ratio = 4), then this extract must be from 1

    // of elements 4-7 of the bitcasted vector.

    unsigned NarrowingRatio =

        NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();


    if (ExtIndexC / NarrowingRatio != InsIndexC) {

      // Remove insertelement, if we don't use the inserted element.

      // extractelement (bitcast (insertelement (Vec, b)), a) ->

      // extractelement (bitcast (Vec), a)

      // FIXME: this should be removed to SimplifyDemandedVectorElts,

      // once scale vectors are supported.

      if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {

        Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());

        return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());

      }

      return nullptr;

    }


    // We are extracting part of the original scalar. How that scalar is

    // inserted into the vector depends on the endian-ness. Example:

    //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7

    //                                       +--+--+--+--+--+--+--+--+

    // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|

    // extelt <4 x i16> V', 3:               |                 |S2|S3|

    //                                       +--+--+--+--+--+--+--+--+

    // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.

    // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.

    // In this example, we must right-shift little-endian. Big-endian is just a

    // truncate.

    unsigned Chunk = ExtIndexC % NarrowingRatio;

    if (IsBigEndian)

      Chunk = NarrowingRatio - 1 - Chunk;


    // Bail out if this is an FP vector to FP vector sequence. That would take

    // more instructions than we started with unless there is no shift, and it

    // may not be handled as well in the backend.

    bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();

    bool NeedDestBitcast = DestTy->isFloatingPointTy();

    if (NeedSrcBitcast && NeedDestBitcast)

      return nullptr;


    unsigned SrcWidth = SrcTy->getScalarSizeInBits();

    unsigned ShAmt = Chunk * DestWidth;


    // TODO: This limitation is more strict than necessary. We could sum the

    // number of new instructions and subtract the number eliminated to know if

    // we can proceed.

    if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())

      if (NeedSrcBitcast || NeedDestBitcast)

        return nullptr;


    if (NeedSrcBitcast) {

      Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);

      Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);

    }


    if (ShAmt) {

      // Bail out if we could end with more instructions than we started with.

      if (!Ext.getVectorOperand()->hasOneUse())

        return nullptr;

      Scalar = Builder.CreateLShr(Scalar, ShAmt);

    }


    if (NeedDestBitcast) {

      Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);

      return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);

    }

    return new TruncInst(Scalar, DestTy);

  }


  return nullptr;

}


/// Find elements of V demanded by UserInstr.

static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {

  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();


  // Conservatively assume that all elements are needed.

  APInt UsedElts(APInt::getAllOnes(VWidth));


  switch (UserInstr->getOpcode()) {

  case Instruction::ExtractElement: {

    ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);

    assert(EEI->getVectorOperand() == V);

    ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());

    if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {

      UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());

    }

    break;

  }

  case Instruction::ShuffleVector: {

    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);

    unsigned MaskNumElts =

        cast<FixedVectorType>(UserInstr->getType())->getNumElements();


    UsedElts = APInt(VWidth, 0);

    for (unsigned i = 0; i < MaskNumElts; i++) {

      unsigned MaskVal = Shuffle->getMaskValue(i);

      if (MaskVal == -1u || MaskVal >= 2 * VWidth)

        continue;

      if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))

        UsedElts.setBit(MaskVal);

      if (Shuffle->getOperand(1) == V &&

          ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))

        UsedElts.setBit(MaskVal - VWidth);

    }

    break;

  }

  default:

    break;

  }

  return UsedElts;

}


/// Find union of elements of V demanded by all its users.

/// If it is known by querying findDemandedEltsBySingleUser that

/// no user demands an element of V, then the corresponding bit

/// remains unset in the returned value.

static APInt findDemandedEltsByAllUsers(Value *V) {

  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();


  APInt UnionUsedElts(VWidth, 0);

  for (const Use &U : V->uses()) {

    if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {

      UnionUsedElts |= findDemandedEltsBySingleUser(V, I);

    } else {

      UnionUsedElts = APInt::getAllOnes(VWidth);

      break;

    }


    if (UnionUsedElts.isAllOnes())

      break;

  }


  return UnionUsedElts;

}


/// Given a constant index for a extractelement or insertelement instruction,

/// return it with the canonical type if it isn't already canonical.  We

/// arbitrarily pick 64 bit as our canonical type.  The actual bitwidth doesn't

/// matter, we just want a consistent type to simplify CSE.

static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {

  const unsigned IndexBW = IndexC->getBitWidth();

  if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)

    return nullptr;

  return ConstantInt::get(IndexC->getContext(),

                          IndexC->getValue().zextOrTrunc(64));

}


Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {

  Value *SrcVec = EI.getVectorOperand();

  Value *Index = EI.getIndexOperand();

  if (Value *V = simplifyExtractElementInst(SrcVec, Index,

                                            SQ.getWithInstruction(&EI)))

    return replaceInstUsesWith(EI, V);


  // extractelt (select %x, %vec1, %vec2), %const ->

  // select %x, %vec1[%const], %vec2[%const]

  // TODO: Support constant folding of multiple select operands:

  // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)

  // If the extractelement will for instance try to do out of bounds accesses

  // because of the values of %c1 and/or %c2, the sequence could be optimized

  // early. This is currently not possible because constant folding will reach

  // an unreachable assertion if it doesn't find a constant operand.

  if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))

    if (SI->getCondition()->getType()->isIntegerTy() &&

        isa<Constant>(EI.getIndexOperand()))

      if (Instruction *R = FoldOpIntoSelect(EI, SI))

        return R;


  // If extracting a specified index from the vector, see if we can recursively

  // find a previously computed scalar that was inserted into the vector.

  auto *IndexC = dyn_cast<ConstantInt>(Index);

  bool HasKnownValidIndex = false;

  if (IndexC) {

    // Canonicalize type of constant indices to i64 to simplify CSE

    if (auto *NewIdx = getPreferredVectorIndex(IndexC))

      return replaceOperand(EI, 1, NewIdx);


    ElementCount EC = EI.getVectorOperandType()->getElementCount();

    unsigned NumElts = EC.getKnownMinValue();

    HasKnownValidIndex = IndexC->getValue().ult(NumElts);


    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {

      Intrinsic::ID IID = II->getIntrinsicID();

      // Index needs to be lower than the minimum size of the vector, because

      // for scalable vector, the vector size is known at run time.

      if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {

        Type *Ty = EI.getType();

        unsigned BitWidth = Ty->getIntegerBitWidth();

        Value *Idx;

        // Return index when its value does not exceed the allowed limit

        // for the element type of the vector, otherwise return undefined.

        if (IndexC->getValue().getActiveBits() <= BitWidth)

          Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));

        else

          Idx = PoisonValue::get(Ty);

        return replaceInstUsesWith(EI, Idx);

      }

    }


    // InstSimplify should handle cases where the index is invalid.

    // For fixed-length vector, it's invalid to extract out-of-range element.

    if (!EC.isScalable() && IndexC->getValue().uge(NumElts))

      return nullptr;


    if (Instruction *I = foldBitcastExtElt(EI))

      return I;


    // If there's a vector PHI feeding a scalar use through this extractelement

    // instruction, try to scalarize the PHI.

    if (auto *Phi = dyn_cast<PHINode>(SrcVec))

      if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))

        return ScalarPHI;

  }


  // If SrcVec is a subvector starting at index 0, extract from the

  // wider source vector

  Value *V;

  if (match(SrcVec,

            m_Intrinsic<Intrinsic::vector_extract>(m_Value(V), m_Zero())))

    return ExtractElementInst::Create(V, Index);


  // TODO come up with a n-ary matcher that subsumes both unary and

  // binary matchers.

  UnaryOperator *UO;

  if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {

    // extelt (unop X), Index --> unop (extelt X, Index)

    Value *X = UO->getOperand(0);

    Value *E = Builder.CreateExtractElement(X, Index);

    return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);

  }


  // If the binop is not speculatable, we cannot hoist the extractelement if

  // it may make the operand poison.

  BinaryOperator *BO;

  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&

      (HasKnownValidIndex ||

       isSafeToSpeculativelyExecuteWithVariableReplaced(BO))) {

    // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)

    Value *X = BO->getOperand(0), *Y = BO->getOperand(1);

    Value *E0 = Builder.CreateExtractElement(X, Index);

    Value *E1 = Builder.CreateExtractElement(Y, Index);

    return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);

  }


  Value *X, *Y;

  CmpPredicate Pred;

  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&

      cheapToScalarize(SrcVec, Index)) {

    // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)

    Value *E0 = Builder.CreateExtractElement(X, Index);

    Value *E1 = Builder.CreateExtractElement(Y, Index);

    CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);

    return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,

                                          SrcCmpInst);

  }


  if (auto *I = dyn_cast<Instruction>(SrcVec)) {

    if (auto *IE = dyn_cast<InsertElementInst>(I)) {

      // instsimplify already handled the case where the indices are constants

      // and equal by value, if both are constants, they must not be the same

      // value, extract from the pre-inserted value instead.

      if (isa<Constant>(IE->getOperand(2)) && IndexC)

        return replaceOperand(EI, 0, IE->getOperand(0));

    } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {

      auto *VecType = cast<VectorType>(GEP->getType());

      ElementCount EC = VecType->getElementCount();

      uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;

      if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {

        // Find out why we have a vector result - these are a few examples:

        //  1. We have a scalar pointer and a vector of indices, or

        //  2. We have a vector of pointers and a scalar index, or

        //  3. We have a vector of pointers and a vector of indices, etc.

        // Here we only consider combining when there is exactly one vector

        // operand, since the optimization is less obviously a win due to

        // needing more than one extractelements.


        unsigned VectorOps =

            llvm::count_if(GEP->operands(), [](const Value *V) {

              return isa<VectorType>(V->getType());

            });

        if (VectorOps == 1) {

          Value *NewPtr = GEP->getPointerOperand();

          if (isa<VectorType>(NewPtr->getType()))

            NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);


          SmallVector<Value *> NewOps;

          for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {

            Value *Op = GEP->getOperand(I);

            if (isa<VectorType>(Op->getType()))

              NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));

            else

              NewOps.push_back(Op);

          }


          GetElementPtrInst *NewGEP = GetElementPtrInst::Create(

              GEP->getSourceElementType(), NewPtr, NewOps);

          NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());

          return NewGEP;

        }

      }

    } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {

      int SplatIndex = getSplatIndex(SVI->getShuffleMask());

      // We know the all-0 splat must be reading from the first operand, even

      // in the case of scalable vectors (vscale is always > 0).

      if (SplatIndex == 0)

        return ExtractElementInst::Create(SVI->getOperand(0),

                                          Builder.getInt64(0));


      if (isa<FixedVectorType>(SVI->getType())) {

        std::optional<int> SrcIdx;

        // getSplatIndex returns -1 to mean not-found.

        if (SplatIndex != -1)

          SrcIdx = SplatIndex;

        else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))

          SrcIdx = SVI->getMaskValue(CI->getZExtValue());


        if (SrcIdx) {

          Value *Src;

          unsigned LHSWidth =

              cast<FixedVectorType>(SVI->getOperand(0)->getType())

                  ->getNumElements();


          if (*SrcIdx < 0)

            return replaceInstUsesWith(EI, PoisonValue::get(EI.getType()));

          if (*SrcIdx < (int)LHSWidth)

            Src = SVI->getOperand(0);

          else {

            *SrcIdx -= LHSWidth;

            Src = SVI->getOperand(1);

          }

          Type *Int64Ty = Type::getInt64Ty(EI.getContext());

          return ExtractElementInst::Create(

              Src, ConstantInt::get(Int64Ty, *SrcIdx, false));

        }

      }

    } else if (auto *CI = dyn_cast<CastInst>(I)) {

      // Canonicalize extractelement(cast) -> cast(extractelement).

      // Bitcasts can change the number of vector elements, and they cost

      // nothing.

      if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {

        Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);

        return CastInst::Create(CI->getOpcode(), EE, EI.getType());

      }

    }

  }


  // Run demanded elements after other transforms as this can drop flags on

  // binops.  If there's two paths to the same final result, we prefer the

  // one which doesn't force us to drop flags.

  if (IndexC) {

    ElementCount EC = EI.getVectorOperandType()->getElementCount();

    unsigned NumElts = EC.getKnownMinValue();

    // This instruction only demands the single element from the input vector.

    // Skip for scalable type, the number of elements is unknown at

    // compile-time.

    if (!EC.isScalable() && NumElts != 1) {

      // If the input vector has a single use, simplify it based on this use

      // property.

      if (SrcVec->hasOneUse()) {

        APInt PoisonElts(NumElts, 0);

        APInt DemandedElts(NumElts, 0);

        DemandedElts.setBit(IndexC->getZExtValue());

        if (Value *V =

                SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))

          return replaceOperand(EI, 0, V);

      } else {

        // If the input vector has multiple uses, simplify it based on a union

        // of all elements used.

        APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);

        if (!DemandedElts.isAllOnes()) {

          APInt PoisonElts(NumElts, 0);

          if (Value *V = SimplifyDemandedVectorElts(

                  SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,

                  true /* AllowMultipleUsers */)) {

            if (V != SrcVec) {

              Worklist.addValue(SrcVec);

              SrcVec->replaceAllUsesWith(V);

              return &EI;

            }

          }

        }

      }

    }

  }

  return nullptr;

}


/// If V is a shuffle of values that ONLY returns elements from either LHS or

/// RHS, return the shuffle mask and true. Otherwise, return false.

static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,

                                         SmallVectorImpl<int> &Mask) {

  assert(LHS->getType() == RHS->getType() &&

         "Invalid CollectSingleShuffleElements");

  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();


  if (match(V, m_Poison())) {

    Mask.assign(NumElts, -1);

    return true;

  }


  if (V == LHS) {

    for (unsigned i = 0; i != NumElts; ++i)

      Mask.push_back(i);

    return true;

  }


  if (V == RHS) {

    for (unsigned i = 0; i != NumElts; ++i)

      Mask.push_back(i + NumElts);

    return true;

  }


  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {

    // If this is an insert of an extract from some other vector, include it.

    Value *VecOp    = IEI->getOperand(0);

    Value *ScalarOp = IEI->getOperand(1);

    Value *IdxOp    = IEI->getOperand(2);


    if (!isa<ConstantInt>(IdxOp))

      return false;

    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();


    if (isa<PoisonValue>(ScalarOp)) {  // inserting poison into vector.

      // We can handle this if the vector we are inserting into is

      // transitively ok.

      if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {

        // If so, update the mask to reflect the inserted poison.

        Mask[InsertedIdx] = -1;

        return true;

      }

    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){

      if (isa<ConstantInt>(EI->getOperand(1))) {

        unsigned ExtractedIdx =

        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();

        unsigned NumLHSElts =

            cast<FixedVectorType>(LHS->getType())->getNumElements();


        // This must be extracting from either LHS or RHS.

        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {

          // We can handle this if the vector we are inserting into is

          // transitively ok.

          if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {

            // If so, update the mask to reflect the inserted value.

            if (EI->getOperand(0) == LHS) {

              Mask[InsertedIdx % NumElts] = ExtractedIdx;

            } else {

              assert(EI->getOperand(0) == RHS);

              Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;

            }

            return true;

          }

        }

      }

    }

  }


  return false;

}


/// If we have insertion into a vector that is wider than the vector that we

/// are extracting from, try to widen the source vector to allow a single

/// shufflevector to replace one or more insert/extract pairs.

static bool replaceExtractElements(InsertElementInst *InsElt,

                                   ExtractElementInst *ExtElt,

                                   InstCombinerImpl &IC) {

  auto *InsVecType = cast<FixedVectorType>(InsElt->getType());

  auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());

  unsigned NumInsElts = InsVecType->getNumElements();

  unsigned NumExtElts = ExtVecType->getNumElements();


  // The inserted-to vector must be wider than the extracted-from vector.

  if (InsVecType->getElementType() != ExtVecType->getElementType() ||

      NumExtElts >= NumInsElts)

    return false;


  // Create a shuffle mask to widen the extended-from vector using poison

  // values. The mask selects all of the values of the original vector followed

  // by as many poison values as needed to create a vector of the same length

  // as the inserted-to vector.

  SmallVector<int, 16> ExtendMask;

  for (unsigned i = 0; i < NumExtElts; ++i)

    ExtendMask.push_back(i);

  for (unsigned i = NumExtElts; i < NumInsElts; ++i)

    ExtendMask.push_back(-1);


  Value *ExtVecOp = ExtElt->getVectorOperand();

  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);

  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))

                                   ? ExtVecOpInst->getParent()

                                   : ExtElt->getParent();


  // TODO: This restriction matches the basic block check below when creating

  // new extractelement instructions. If that limitation is removed, this one

  // could also be removed. But for now, we just bail out to ensure that we

  // will replace the extractelement instruction that is feeding our

  // insertelement instruction. This allows the insertelement to then be

  // replaced by a shufflevector. If the insertelement is not replaced, we can

  // induce infinite looping because there's an optimization for extractelement

  // that will delete our widening shuffle. This would trigger another attempt

  // here to create that shuffle, and we spin forever.

  if (InsertionBlock != InsElt->getParent())

    return false;


  // TODO: This restriction matches the check in visitInsertElementInst() and

  // prevents an infinite loop caused by not turning the extract/insert pair

  // into a shuffle. We really should not need either check, but we're lacking

  // folds for shufflevectors because we're afraid to generate shuffle masks

  // that the backend can't handle.

  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))

    return false;


  auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);


  // Insert the new shuffle after the vector operand of the extract is defined

  // (as long as it's not a PHI) or at the start of the basic block of the

  // extract, so any subsequent extracts in the same basic block can use it.

  // TODO: Insert before the earliest ExtractElementInst that is replaced.

  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))

    WideVec->insertAfter(ExtVecOpInst->getIterator());

  else

    IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());


  // Replace extracts from the original narrow vector with extracts from the new

  // wide vector.

  for (User *U : ExtVecOp->users()) {

    ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);

    if (!OldExt || OldExt->getParent() != WideVec->getParent())

      continue;

    auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));

    IC.InsertNewInstWith(NewExt, OldExt->getIterator());

    IC.replaceInstUsesWith(*OldExt, NewExt);

    // Add the old extracts to the worklist for DCE. We can't remove the

    // extracts directly, because they may still be used by the calling code.

    IC.addToWorklist(OldExt);

  }


  return true;

}


/// We are building a shuffle to create V, which is a sequence of insertelement,

/// extractelement pairs. If PermittedRHS is set, then we must either use it or

/// not rely on the second vector source. Return a std::pair containing the

/// left and right vectors of the proposed shuffle (or 0), and set the Mask

/// parameter as required.

///

/// Note: we intentionally don't try to fold earlier shuffles since they have

/// often been chosen carefully to be efficiently implementable on the target.

using ShuffleOps = std::pair<Value *, Value *>;


static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,

                                         Value *PermittedRHS,

                                         InstCombinerImpl &IC, bool &Rerun) {

  assert(V->getType()->isVectorTy() && "Invalid shuffle!");

  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();


  if (match(V, m_Poison())) {

    Mask.assign(NumElts, -1);

    return std::make_pair(

        PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);

  }


  if (isa<ConstantAggregateZero>(V)) {

    Mask.assign(NumElts, 0);

    return std::make_pair(V, nullptr);

  }


  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {

    // If this is an insert of an extract from some other vector, include it.

    Value *VecOp    = IEI->getOperand(0);

    Value *ScalarOp = IEI->getOperand(1);

    Value *IdxOp    = IEI->getOperand(2);


    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {

      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {

        unsigned ExtractedIdx =

          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();

        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();


        // Either the extracted from or inserted into vector must be RHSVec,

        // otherwise we'd end up with a shuffle of three inputs.

        if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {

          Value *RHS = EI->getOperand(0);

          ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);

          assert(LR.second == nullptr || LR.second == RHS);


          if (LR.first->getType() != RHS->getType()) {

            // Although we are giving up for now, see if we can create extracts

            // that match the inserts for another round of combining.

            if (replaceExtractElements(IEI, EI, IC))

              Rerun = true;


            // We tried our best, but we can't find anything compatible with RHS

            // further up the chain. Return a trivial shuffle.

            for (unsigned i = 0; i < NumElts; ++i)

              Mask[i] = i;

            return std::make_pair(V, nullptr);

          }


          unsigned NumLHSElts =

              cast<FixedVectorType>(RHS->getType())->getNumElements();

          Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;

          return std::make_pair(LR.first, RHS);

        }


        if (VecOp == PermittedRHS) {

          // We've gone as far as we can: anything on the other side of the

          // extractelement will already have been converted into a shuffle.

          unsigned NumLHSElts =

              cast<FixedVectorType>(EI->getOperand(0)->getType())

                  ->getNumElements();

          for (unsigned i = 0; i != NumElts; ++i)

            Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);

          return std::make_pair(EI->getOperand(0), PermittedRHS);

        }


        // If this insertelement is a chain that comes from exactly these two

        // vectors, return the vector and the effective shuffle.

        if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&

            collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,

                                         Mask))

          return std::make_pair(EI->getOperand(0), PermittedRHS);

      }

    }

  }


  // Otherwise, we can't do anything fancy. Return an identity vector.

  for (unsigned i = 0; i != NumElts; ++i)

    Mask.push_back(i);

  return std::make_pair(V, nullptr);

}


/// Look for chain of insertvalue's that fully define an aggregate, and trace

/// back the values inserted, see if they are all were extractvalue'd from

/// the same source aggregate from the exact same element indexes.

/// If they were, just reuse the source aggregate.

/// This potentially deals with PHI indirections.

Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

    InsertValueInst &OrigIVI) {

  Type *AggTy = OrigIVI.getType();

  unsigned NumAggElts;

  switch (AggTy->getTypeID()) {

  case Type::StructTyID:

    NumAggElts = AggTy->getStructNumElements();

    break;

  case Type::ArrayTyID:

    NumAggElts = AggTy->getArrayNumElements();

    break;

  default:

    llvm_unreachable("Unhandled aggregate type?");

  }


  // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able

  // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),

  // FIXME: any interesting patterns to be caught with larger limit?

  assert(NumAggElts > 0 && "Aggregate should have elements.");

  if (NumAggElts > 2)

    return nullptr;


  static constexpr auto NotFound = std::nullopt;

  static constexpr auto FoundMismatch = nullptr;


  // Try to find a value of each element of an aggregate.

  // FIXME: deal with more complex, not one-dimensional, aggregate types

  SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);


  // Do we know values for each element of the aggregate?

  auto KnowAllElts = [&AggElts]() {

    return !llvm::is_contained(AggElts, NotFound);

  };


  int Depth = 0;


  // Arbitrary `insertvalue` visitation depth limit. Let's be okay with

  // every element being overwritten twice, which should never happen.

  static const int DepthLimit = 2 * NumAggElts;


  // Recurse up the chain of `insertvalue` aggregate operands until either we've

  // reconstructed full initializer or can't visit any more `insertvalue`'s.

  for (InsertValueInst *CurrIVI = &OrigIVI;

       Depth < DepthLimit && CurrIVI && !KnowAllElts();

       CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),

                       ++Depth) {

    auto *InsertedValue =

        dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());

    if (!InsertedValue)

      return nullptr; // Inserted value must be produced by an instruction.


    ArrayRef<unsigned int> Indices = CurrIVI->getIndices();


    // Don't bother with more than single-level aggregates.

    if (Indices.size() != 1)

      return nullptr; // FIXME: deal with more complex aggregates?


    // Now, we may have already previously recorded the value for this element

    // of an aggregate. If we did, that means the CurrIVI will later be

    // overwritten with the already-recorded value. But if not, let's record it!

    std::optional<Instruction *> &Elt = AggElts[Indices.front()];

    Elt = Elt.value_or(InsertedValue);


    // FIXME: should we handle chain-terminating undef base operand?

  }


  // Was that sufficient to deduce the full initializer for the aggregate?

  if (!KnowAllElts())

    return nullptr; // Give up then.


  // We now want to find the source[s] of the aggregate elements we've found.

  // And with "source" we mean the original aggregate[s] from which

  // the inserted elements were extracted. This may require PHI translation.


  enum class AggregateDescription {

    /// When analyzing the value that was inserted into an aggregate, we did

    /// not manage to find defining `extractvalue` instruction to analyze.

    NotFound,

    /// When analyzing the value that was inserted into an aggregate, we did

    /// manage to find defining `extractvalue` instruction[s], and everything

    /// matched perfectly - aggregate type, element insertion/extraction index.

    Found,

    /// When analyzing the value that was inserted into an aggregate, we did

    /// manage to find defining `extractvalue` instruction, but there was

    /// a mismatch: either the source type from which the extraction was didn't

    /// match the aggregate type into which the insertion was,

    /// or the extraction/insertion channels mismatched,

    /// or different elements had different source aggregates.

    FoundMismatch

  };

  auto Describe = [](std::optional<Value *> SourceAggregate) {

    if (SourceAggregate == NotFound)

      return AggregateDescription::NotFound;

    if (*SourceAggregate == FoundMismatch)

      return AggregateDescription::FoundMismatch;

    return AggregateDescription::Found;

  };


  // If an aggregate element is defined in UseBB, we can't use it in PredBB.

  bool EltDefinedInUseBB = false;


  // Given the value \p Elt that was being inserted into element \p EltIdx of an

  // aggregate AggTy, see if \p Elt was originally defined by an

  // appropriate extractvalue (same element index, same aggregate type).

  // If found, return the source aggregate from which the extraction was.

  // If \p PredBB is provided, does PHI translation of an \p Elt first.

  auto FindSourceAggregate =

      [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,

          std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {

    // For now(?), only deal with, at most, a single level of PHI indirection.

    if (UseBB && PredBB) {

      Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));

      if (Elt && Elt->getParent() == *UseBB)

        EltDefinedInUseBB = true;

    }

    // FIXME: deal with multiple levels of PHI indirection?


    // Did we find an extraction?

    auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);

    if (!EVI)

      return NotFound;


    Value *SourceAggregate = EVI->getAggregateOperand();


    // Is the extraction from the same type into which the insertion was?

    if (SourceAggregate->getType() != AggTy)

      return FoundMismatch;

    // And the element index doesn't change between extraction and insertion?

    if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())

      return FoundMismatch;


    return SourceAggregate; // AggregateDescription::Found

  };


  // Given elements AggElts that were constructing an aggregate OrigIVI,

  // see if we can find appropriate source aggregate for each of the elements,

  // and see it's the same aggregate for each element. If so, return it.

  auto FindCommonSourceAggregate =

      [&](std::optional<BasicBlock *> UseBB,

          std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {

    std::optional<Value *> SourceAggregate;


    for (auto I : enumerate(AggElts)) {

      assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&

             "We don't store nullptr in SourceAggregate!");

      assert((Describe(SourceAggregate) == AggregateDescription::Found) ==

                 (I.index() != 0) &&

             "SourceAggregate should be valid after the first element,");


      // For this element, is there a plausible source aggregate?

      // FIXME: we could special-case undef element, IFF we know that in the

      //        source aggregate said element isn't poison.

      std::optional<Value *> SourceAggregateForElement =

          FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);


      // Okay, what have we found? Does that correlate with previous findings?


      // Regardless of whether or not we have previously found source

      // aggregate for previous elements (if any), if we didn't find one for

      // this element, passthrough whatever we have just found.

      if (Describe(SourceAggregateForElement) != AggregateDescription::Found)

        return SourceAggregateForElement;


      // Okay, we have found source aggregate for this element.

      // Let's see what we already know from previous elements, if any.

      switch (Describe(SourceAggregate)) {

      case AggregateDescription::NotFound:

        // This is apparently the first element that we have examined.

        SourceAggregate = SourceAggregateForElement; // Record the aggregate!

        continue; // Great, now look at next element.

      case AggregateDescription::Found:

        // We have previously already successfully examined other elements.

        // Is this the same source aggregate we've found for other elements?

        if (*SourceAggregateForElement != *SourceAggregate)

          return FoundMismatch;

        continue; // Still the same aggregate, look at next element.

      case AggregateDescription::FoundMismatch:

        llvm_unreachable("Can't happen. We would have early-exited then.");

      };

    }


    assert(Describe(SourceAggregate) == AggregateDescription::Found &&

           "Must be a valid Value");

    return *SourceAggregate;

  };


  std::optional<Value *> SourceAggregate;


  // Can we find the source aggregate without looking at predecessors?

  SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,

                                              /*PredBB=*/std::nullopt);

  if (Describe(SourceAggregate) != AggregateDescription::NotFound) {

    if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)

      return nullptr; // Conflicting source aggregates!

    ++NumAggregateReconstructionsSimplified;

    return replaceInstUsesWith(OrigIVI, *SourceAggregate);

  }


  // Okay, apparently we need to look at predecessors.


  // We should be smart about picking the "use" basic block, which will be the

  // merge point for aggregate, where we'll insert the final PHI that will be

  // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.

  // We should look in which blocks each of the AggElts is being defined,

  // they all should be defined in the same basic block.

  BasicBlock *UseBB = nullptr;


  for (const std::optional<Instruction *> &I : AggElts) {

    BasicBlock *BB = (*I)->getParent();

    // If it's the first instruction we've encountered, record the basic block.

    if (!UseBB) {

      UseBB = BB;

      continue;

    }

    // Otherwise, this must be the same basic block we've seen previously.

    if (UseBB != BB)

      return nullptr;

  }


  // If *all* of the elements are basic-block-independent, meaning they are

  // either function arguments, or constant expressions, then if we didn't

  // handle them without predecessor-aware handling, we won't handle them now.

  if (!UseBB)

    return nullptr;


  // If we didn't manage to find source aggregate without looking at

  // predecessors, and there are no predecessors to look at, then we're done.

  if (pred_empty(UseBB))

    return nullptr;


  // Arbitrary predecessor count limit.

  static const int PredCountLimit = 64;


  // Cache the (non-uniqified!) list of predecessors in a vector,

  // checking the limit at the same time for efficiency.

  SmallVector<BasicBlock *, 4> Preds; // May have duplicates!

  for (BasicBlock *Pred : predecessors(UseBB)) {

    // Don't bother if there are too many predecessors.

    if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?

      return nullptr;

    Preds.emplace_back(Pred);

  }


  // For each predecessor, what is the source aggregate,

  // from which all the elements were originally extracted from?

  // Note that we want for the map to have stable iteration order!

  SmallMapVector<BasicBlock *, Value *, 4> SourceAggregates;

  bool FoundSrcAgg = false;

  for (BasicBlock *Pred : Preds) {

    std::pair<decltype(SourceAggregates)::iterator, bool> IV =

        SourceAggregates.try_emplace(Pred);

    // Did we already evaluate this predecessor?

    if (!IV.second)

      continue;


    // Let's hope that when coming from predecessor Pred, all elements of the

    // aggregate produced by OrigIVI must have been originally extracted from

    // the same aggregate. Is that so? Can we find said original aggregate?

    SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);

    if (Describe(SourceAggregate) == AggregateDescription::Found) {

      FoundSrcAgg = true;

      IV.first->second = *SourceAggregate;

    } else {

      // If UseBB is the single successor of Pred, we can add InsertValue to

      // Pred.

      auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());

      if (!BI || !BI->isUnconditional())

        return nullptr;

    }

  }


  if (!FoundSrcAgg)

    return nullptr;


  // Do some sanity check if we need to add insertvalue into predecessors.

  auto OrigBB = OrigIVI.getParent();

  for (auto &It : SourceAggregates) {

    if (Describe(It.second) == AggregateDescription::Found)

      continue;


    // Element is defined in UseBB, so it can't be used in predecessors.

    if (EltDefinedInUseBB)

      return nullptr;


    // Do this transformation cross loop boundary may cause dead loop. So we

    // should avoid this situation. But LoopInfo is not generally available, we

    // must be conservative here.

    // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB

    // can't be in inner loop.

    if (UseBB != OrigBB)

      return nullptr;


    // Avoid constructing constant aggregate because constant value may expose

    // more optimizations.

    bool ConstAgg = true;

    for (auto Val : AggElts) {

      Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);

      if (!isa<Constant>(Elt)) {

        ConstAgg = false;

        break;

      }

    }

    if (ConstAgg)

      return nullptr;

  }


  // For predecessors without appropriate source aggregate, create one in the

  // predecessor.

  for (auto &It : SourceAggregates) {

    if (Describe(It.second) == AggregateDescription::Found)

      continue;


    BasicBlock *Pred = It.first;

    Builder.SetInsertPoint(Pred->getTerminator());

    Value *V = PoisonValue::get(AggTy);

    for (auto [Idx, Val] : enumerate(AggElts)) {

      Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);

      V = Builder.CreateInsertValue(V, Elt, Idx);

    }


    It.second = V;

  }


  // All good! Now we just need to thread the source aggregates here.

  // Note that we have to insert the new PHI here, ourselves, because we can't

  // rely on InstCombinerImpl::run() inserting it into the right basic block.

  // Note that the same block can be a predecessor more than once,

  // and we need to preserve that invariant for the PHI node.

  BuilderTy::InsertPointGuard Guard(Builder);

  Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());

  auto *PHI =

      Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");

  for (BasicBlock *Pred : Preds)

    PHI->addIncoming(SourceAggregates[Pred], Pred);


  ++NumAggregateReconstructionsSimplified;

  return replaceInstUsesWith(OrigIVI, PHI);

}


/// Try to find redundant insertvalue instructions, like the following ones:

///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0

///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0

/// Here the second instruction inserts values at the same indices, as the

/// first one, making the first one redundant.

/// It should be transformed to:

///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0

Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {

  if (Value *V = simplifyInsertValueInst(

          I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),

          SQ.getWithInstruction(&I)))

    return replaceInstUsesWith(I, V);


  bool IsRedundant = false;

  ArrayRef<unsigned int> FirstIndices = I.getIndices();


  // If there is a chain of insertvalue instructions (each of them except the

  // last one has only one use and it's another insertvalue insn from this

  // chain), check if any of the 'children' uses the same indices as the first

  // instruction. In this case, the first one is redundant.

  Value *V = &I;

  unsigned Depth = 0;

  while (V->hasOneUse() && Depth < 10) {

    User *U = V->user_back();

    auto UserInsInst = dyn_cast<InsertValueInst>(U);

    if (!UserInsInst || U->getOperand(0) != V)

      break;

    if (UserInsInst->getIndices() == FirstIndices) {

      IsRedundant = true;

      break;

    }

    V = UserInsInst;

    Depth++;

  }


  if (IsRedundant)

    return replaceInstUsesWith(I, I.getOperand(0));


  if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))

    return NewI;


  return nullptr;

}


static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {

  // Can not analyze scalable type, the number of elements is not a compile-time

  // constant.

  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))

    return false;


  int MaskSize = Shuf.getShuffleMask().size();

  int VecSize =

      cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();


  // A vector select does not change the size of the operands.

  if (MaskSize != VecSize)

    return false;


  // Each mask element must be undefined or choose a vector element from one of

  // the source operands without crossing vector lanes.

  for (int i = 0; i != MaskSize; ++i) {

    int Elt = Shuf.getMaskValue(i);

    if (Elt != -1 && Elt != i && Elt != i + VecSize)

      return false;

  }


  return true;

}


/// Turn a chain of inserts that splats a value into an insert + shuffle:

/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->

/// shufflevector(insertelt(X, %k, 0), poison, zero)

static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {

  // We are interested in the last insert in a chain. So if this insert has a

  // single user and that user is an insert, bail.

  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))

    return nullptr;


  VectorType *VecTy = InsElt.getType();

  // Can not handle scalable type, the number of elements is not a compile-time

  // constant.

  if (isa<ScalableVectorType>(VecTy))

    return nullptr;

  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();


  // Do not try to do this for a one-element vector, since that's a nop,

  // and will cause an inf-loop.

  if (NumElements == 1)

    return nullptr;


  Value *SplatVal = InsElt.getOperand(1);

  InsertElementInst *CurrIE = &InsElt;

  SmallBitVector ElementPresent(NumElements, false);

  InsertElementInst *FirstIE = nullptr;


  // Walk the chain backwards, keeping track of which indices we inserted into,

  // until we hit something that isn't an insert of the splatted value.

  while (CurrIE) {

    auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));

    if (!Idx || CurrIE->getOperand(1) != SplatVal)

      return nullptr;


    auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));

    // Check none of the intermediate steps have any additional uses, except

    // for the root insertelement instruction, which can be re-used, if it

    // inserts at position 0.

    if (CurrIE != &InsElt &&

        (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))

      return nullptr;


    ElementPresent[Idx->getZExtValue()] = true;

    FirstIE = CurrIE;

    CurrIE = NextIE;

  }


  // If this is just a single insertelement (not a sequence), we are done.

  if (FirstIE == &InsElt)

    return nullptr;


  // If we are not inserting into a poison vector, make sure we've seen an

  // insert into every element.

  // TODO: If the base vector is not undef, it might be better to create a splat

  //       and then a select-shuffle (blend) with the base vector.

  if (!match(FirstIE->getOperand(0), m_Poison()))

    if (!ElementPresent.all())

      return nullptr;


  // Create the insert + shuffle.

  Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());

  PoisonValue *PoisonVec = PoisonValue::get(VecTy);

  Constant *Zero = ConstantInt::get(Int64Ty, 0);

  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())

    FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",

                                        InsElt.getIterator());


  // Splat from element 0, but replace absent elements with poison in the mask.

  SmallVector<int, 16> Mask(NumElements, 0);

  for (unsigned i = 0; i != NumElements; ++i)

    if (!ElementPresent[i])

      Mask[i] = -1;


  return new ShuffleVectorInst(FirstIE, Mask);

}


/// Try to fold an insert element into an existing splat shuffle by changing

/// the shuffle's mask to include the index of this insert element.

static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {

  // Check if the vector operand of this insert is a canonical splat shuffle.

  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));

  if (!Shuf || !Shuf->isZeroEltSplat())

    return nullptr;


  // Bail out early if shuffle is scalable type. The number of elements in

  // shuffle mask is unknown at compile-time.

  if (isa<ScalableVectorType>(Shuf->getType()))

    return nullptr;


  // Check for a constant insertion index.

  uint64_t IdxC;

  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))

    return nullptr;


  // Check if the splat shuffle's input is the same as this insert's scalar op.

  Value *X = InsElt.getOperand(1);

  Value *Op0 = Shuf->getOperand(0);

  if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))

    return nullptr;


  // Replace the shuffle mask element at the index of this insert with a zero.

  // For example:

  // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1

  //   --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>

  unsigned NumMaskElts =

      cast<FixedVectorType>(Shuf->getType())->getNumElements();

  SmallVector<int, 16> NewMask(NumMaskElts);

  for (unsigned i = 0; i != NumMaskElts; ++i)

    NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);


  return new ShuffleVectorInst(Op0, NewMask);

}


/// Try to fold an extract+insert element into an existing identity shuffle by

/// changing the shuffle's mask to include the index of this insert element.

static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {

  // Check if the vector operand of this insert is an identity shuffle.

  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));

  if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||

      !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))

    return nullptr;


  // Bail out early if shuffle is scalable type. The number of elements in

  // shuffle mask is unknown at compile-time.

  if (isa<ScalableVectorType>(Shuf->getType()))

    return nullptr;


  // Check for a constant insertion index.

  uint64_t IdxC;

  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))

    return nullptr;


  // Check if this insert's scalar op is extracted from the identity shuffle's

  // input vector.

  Value *Scalar = InsElt.getOperand(1);

  Value *X = Shuf->getOperand(0);

  if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))

    return nullptr;


  // Replace the shuffle mask element at the index of this extract+insert with

  // that same index value.

  // For example:

  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'

  unsigned NumMaskElts =

      cast<FixedVectorType>(Shuf->getType())->getNumElements();

  SmallVector<int, 16> NewMask(NumMaskElts);

  ArrayRef<int> OldMask = Shuf->getShuffleMask();

  for (unsigned i = 0; i != NumMaskElts; ++i) {

    if (i != IdxC) {

      // All mask elements besides the inserted element remain the same.

      NewMask[i] = OldMask[i];

    } else if (OldMask[i] == (int)IdxC) {

      // If the mask element was already set, there's nothing to do

      // (demanded elements analysis may unset it later).

      return nullptr;

    } else {

      assert(OldMask[i] == PoisonMaskElem &&

             "Unexpected shuffle mask element for identity shuffle");

      NewMask[i] = IdxC;

    }

  }


  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);

}


/// If we have an insertelement instruction feeding into another insertelement

/// and the 2nd is inserting a constant into the vector, canonicalize that

/// constant insertion before the insertion of a variable:

///

/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->

/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1

///

/// This has the potential of eliminating the 2nd insertelement instruction

/// via constant folding of the scalar constant into a vector constant.

static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,

                                     InstCombiner::BuilderTy &Builder) {

  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));

  if (!InsElt1 || !InsElt1->hasOneUse())

    return nullptr;


  Value *X, *Y;

  Constant *ScalarC;

  ConstantInt *IdxC1, *IdxC2;

  if (match(InsElt1->getOperand(0), m_Value(X)) &&

      match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&

      match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&

      match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&

      match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {

    Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);

    return InsertElementInst::Create(NewInsElt1, Y, IdxC1);

  }


  return nullptr;

}


/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex

/// --> shufflevector X, CVec', Mask'

static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {

  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));

  // Bail out if the parent has more than one use. In that case, we'd be

  // replacing the insertelt with a shuffle, and that's not a clear win.

  if (!Inst || !Inst->hasOneUse())

    return nullptr;

  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {

    // The shuffle must have a constant vector operand. The insertelt must have

    // a constant scalar being inserted at a constant position in the vector.

    Constant *ShufConstVec, *InsEltScalar;

    uint64_t InsEltIndex;

    if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||

        !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||

        !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))

      return nullptr;


    // Adding an element to an arbitrary shuffle could be expensive, but a

    // shuffle that selects elements from vectors without crossing lanes is

    // assumed cheap.

    // If we're just adding a constant into that shuffle, it will still be

    // cheap.

    if (!isShuffleEquivalentToSelect(*Shuf))

      return nullptr;


    // From the above 'select' check, we know that the mask has the same number

    // of elements as the vector input operands. We also know that each constant

    // input element is used in its lane and can not be used more than once by

    // the shuffle. Therefore, replace the constant in the shuffle's constant

    // vector with the insertelt constant. Replace the constant in the shuffle's

    // mask vector with the insertelt index plus the length of the vector

    // (because the constant vector operand of a shuffle is always the 2nd

    // operand).

    ArrayRef<int> Mask = Shuf->getShuffleMask();

    unsigned NumElts = Mask.size();

    SmallVector<Constant *, 16> NewShufElts(NumElts);

    SmallVector<int, 16> NewMaskElts(NumElts);

    for (unsigned I = 0; I != NumElts; ++I) {

      if (I == InsEltIndex) {

        NewShufElts[I] = InsEltScalar;

        NewMaskElts[I] = InsEltIndex + NumElts;

      } else {

        // Copy over the existing values.

        NewShufElts[I] = ShufConstVec->getAggregateElement(I);

        NewMaskElts[I] = Mask[I];

      }


      // Bail if we failed to find an element.

      if (!NewShufElts[I])

        return nullptr;

    }


    // Create new operands for a shuffle that includes the constant of the

    // original insertelt. The old shuffle will be dead now.

    return new ShuffleVectorInst(Shuf->getOperand(0),

                                 ConstantVector::get(NewShufElts), NewMaskElts);

  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {

    // Transform sequences of insertelements ops with constant data/indexes into

    // a single shuffle op.

    // Can not handle scalable type, the number of elements needed to create

    // shuffle mask is not a compile-time constant.

    if (isa<ScalableVectorType>(InsElt.getType()))

      return nullptr;

    unsigned NumElts =

        cast<FixedVectorType>(InsElt.getType())->getNumElements();


    uint64_t InsertIdx[2];

    Constant *Val[2];

    if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||

        !match(InsElt.getOperand(1), m_Constant(Val[0])) ||

        !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||

        !match(IEI->getOperand(1), m_Constant(Val[1])))

      return nullptr;

    SmallVector<Constant *, 16> Values(NumElts);

    SmallVector<int, 16> Mask(NumElts);

    auto ValI = std::begin(Val);

    // Generate new constant vector and mask.

    // We have 2 values/masks from the insertelements instructions. Insert them

    // into new value/mask vectors.

    for (uint64_t I : InsertIdx) {

      if (!Values[I]) {

        Values[I] = *ValI;

        Mask[I] = NumElts + I;

      }

      ++ValI;

    }

    // Remaining values are filled with 'poison' values.

    for (unsigned I = 0; I < NumElts; ++I) {

      if (!Values[I]) {

        Values[I] = PoisonValue::get(InsElt.getType()->getElementType());

        Mask[I] = I;

      }

    }

    // Create new operands for a shuffle that includes the constant of the

    // original insertelt.

    return new ShuffleVectorInst(IEI->getOperand(0),

                                 ConstantVector::get(Values), Mask);

  }

  return nullptr;

}


/// If both the base vector and the inserted element are extended from the same

/// type, do the insert element in the narrow source type followed by extend.

/// TODO: This can be extended to include other cast opcodes, but particularly

///       if we create a wider insertelement, make sure codegen is not harmed.

static Instruction *narrowInsElt(InsertElementInst &InsElt,

                                 InstCombiner::BuilderTy &Builder) {

  // We are creating a vector extend. If the original vector extend has another

  // use, that would mean we end up with 2 vector extends, so avoid that.

  // TODO: We could ease the use-clause to "if at least one op has one use"

  //       (assuming that the source types match - see next TODO comment).

  Value *Vec = InsElt.getOperand(0);

  if (!Vec->hasOneUse())

    return nullptr;


  Value *Scalar = InsElt.getOperand(1);

  Value *X, *Y;

  CastInst::CastOps CastOpcode;

  if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))

    CastOpcode = Instruction::FPExt;

  else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))

    CastOpcode = Instruction::SExt;

  else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))

    CastOpcode = Instruction::ZExt;

  else

    return nullptr;


  // TODO: We can allow mismatched types by creating an intermediate cast.

  if (X->getType()->getScalarType() != Y->getType())

    return nullptr;


  // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)

  Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));

  return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());

}


/// If we are inserting 2 halves of a value into adjacent elements of a vector,

/// try to convert to a single insert with appropriate bitcasts.

static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,

                                        bool IsBigEndian,

                                        InstCombiner::BuilderTy &Builder) {

  Value *VecOp    = InsElt.getOperand(0);

  Value *ScalarOp = InsElt.getOperand(1);

  Value *IndexOp  = InsElt.getOperand(2);


  // Pattern depends on endian because we expect lower index is inserted first.

  // Big endian:

  // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1

  // Little endian:

  // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1

  // Note: It is not safe to do this transform with an arbitrary base vector

  //       because the bitcast of that vector to fewer/larger elements could

  //       allow poison to spill into an element that was not poison before.

  // TODO: Detect smaller fractions of the scalar.

  // TODO: One-use checks are conservative.

  auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());

  Value *Scalar0, *BaseVec;

  uint64_t Index0, Index1;

  if (!VTy || (VTy->getNumElements() & 1) ||

      !match(IndexOp, m_ConstantInt(Index1)) ||

      !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),

                                m_ConstantInt(Index0))) ||

      !match(BaseVec, m_Undef()))

    return nullptr;


  // The first insert must be to the index one less than this one, and

  // the first insert must be to an even index.

  if (Index0 + 1 != Index1 || Index0 & 1)

    return nullptr;


  // For big endian, the high half of the value should be inserted first.

  // For little endian, the low half of the value should be inserted first.

  Value *X;

  uint64_t ShAmt;

  if (IsBigEndian) {

    if (!match(ScalarOp, m_Trunc(m_Value(X))) ||

        !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))

      return nullptr;

  } else {

    if (!match(Scalar0, m_Trunc(m_Value(X))) ||

        !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))

      return nullptr;

  }


  Type *SrcTy = X->getType();

  unsigned ScalarWidth = SrcTy->getScalarSizeInBits();

  unsigned VecEltWidth = VTy->getScalarSizeInBits();

  if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)

    return nullptr;


  // Bitcast the base vector to a vector type with the source element type.

  Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);

  Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);


  // Scale the insert index for a vector with half as many elements.

  // bitcast (inselt (bitcast BaseVec), X, NewIndex)

  uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;

  Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);

  return new BitCastInst(NewInsert, VTy);

}


Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {

  Value *VecOp    = IE.getOperand(0);

  Value *ScalarOp = IE.getOperand(1);

  Value *IdxOp    = IE.getOperand(2);


  if (auto *V = simplifyInsertElementInst(

          VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))

    return replaceInstUsesWith(IE, V);


  // Canonicalize type of constant indices to i64 to simplify CSE

  if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {

    if (auto *NewIdx = getPreferredVectorIndex(IndexC))

      return replaceOperand(IE, 2, NewIdx);


    Value *BaseVec, *OtherScalar;

    uint64_t OtherIndexVal;

    if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),

                                          m_Value(OtherScalar),

                                          m_ConstantInt(OtherIndexVal)))) &&

        !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {

      Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);

      return InsertElementInst::Create(NewIns, OtherScalar,

                                       Builder.getInt64(OtherIndexVal));

    }

  }


  // If the scalar is bitcast and inserted into undef, do the insert in the

  // source type followed by bitcast.

  // TODO: Generalize for insert into any constant, not just undef?

  Value *ScalarSrc;

  if (match(VecOp, m_Undef()) &&

      match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&

      (ScalarSrc->getType()->isIntegerTy() ||

       ScalarSrc->getType()->isFloatingPointTy())) {

    // inselt undef, (bitcast ScalarSrc), IdxOp -->

    //   bitcast (inselt undef, ScalarSrc, IdxOp)

    Type *ScalarTy = ScalarSrc->getType();

    Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());

    Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)

                                                 : UndefValue::get(VecTy);

    Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);

    return new BitCastInst(NewInsElt, IE.getType());

  }


  // If the vector and scalar are both bitcast from the same element type, do

  // the insert in that source type followed by bitcast.

  Value *VecSrc;

  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&

      match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&

      (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&

      VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&

      cast<VectorType>(VecSrc->getType())->getElementType() ==

          ScalarSrc->getType()) {

    // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->

    //   bitcast (inselt VecSrc, ScalarSrc, IdxOp)

    Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);

    return new BitCastInst(NewInsElt, IE.getType());

  }


  // If the inserted element was extracted from some other fixed-length vector

  // and both indexes are valid constants, try to turn this into a shuffle.

  // Can not handle scalable vector type, the number of elements needed to

  // create shuffle mask is not a compile-time constant.

  uint64_t InsertedIdx, ExtractedIdx;

  Value *ExtVecOp;

  if (isa<FixedVectorType>(IE.getType()) &&

      match(IdxOp, m_ConstantInt(InsertedIdx)) &&

      match(ScalarOp,

            m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&

      isa<FixedVectorType>(ExtVecOp->getType()) &&

      ExtractedIdx <

          cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {

    // TODO: Looking at the user(s) to determine if this insert is a

    // fold-to-shuffle opportunity does not match the usual instcombine

    // constraints. We should decide if the transform is worthy based only

    // on this instruction and its operands, but that may not work currently.

    //

    // Here, we are trying to avoid creating shuffles before reaching

    // the end of a chain of extract-insert pairs. This is complicated because

    // we do not generally form arbitrary shuffle masks in instcombine

    // (because those may codegen poorly), but collectShuffleElements() does

    // exactly that.

    //

    // The rules for determining what is an acceptable target-independent

    // shuffle mask are fuzzy because they evolve based on the backend's

    // capabilities and real-world impact.

    auto isShuffleRootCandidate = [](InsertElementInst &Insert) {

      if (!Insert.hasOneUse())

        return true;

      auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());

      if (!InsertUser)

        return true;

      return false;

    };


    // Try to form a shuffle from a chain of extract-insert ops.

    if (isShuffleRootCandidate(IE)) {

      bool Rerun = true;

      while (Rerun) {

        Rerun = false;


        SmallVector<int, 16> Mask;

        ShuffleOps LR =

            collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);


        // The proposed shuffle may be trivial, in which case we shouldn't

        // perform the combine.

        if (LR.first != &IE && LR.second != &IE) {

          // We now have a shuffle of LHS, RHS, Mask.

          if (LR.second == nullptr)

            LR.second = PoisonValue::get(LR.first->getType());

          return new ShuffleVectorInst(LR.first, LR.second, Mask);

        }

      }

    }

  }


  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {

    unsigned VWidth = VecTy->getNumElements();

    APInt PoisonElts(VWidth, 0);

    APInt AllOnesEltMask(APInt::getAllOnes(VWidth));

    if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,

                                              PoisonElts)) {

      if (V != &IE)

        return replaceInstUsesWith(IE, V);

      return &IE;

    }

  }


  if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))

    return Shuf;


  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))

    return NewInsElt;


  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))

    return Broadcast;


  if (Instruction *Splat = foldInsEltIntoSplat(IE))

    return Splat;


  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))

    return IdentityShuf;


  if (Instruction *Ext = narrowInsElt(IE, Builder))

    return Ext;


  if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))

    return Ext;


  return nullptr;

}


/// Return true if we can evaluate the specified expression tree if the vector

/// elements were shuffled in a different order.

static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,

                                unsigned Depth = 5) {

  // We can always reorder the elements of a constant.

  if (isa<Constant>(V))

    return true;


  // We won't reorder vector arguments. No IPO here.

  Instruction *I = dyn_cast<Instruction>(V);

  if (!I) return false;


  // Two users may expect different orders of the elements. Don't try it.

  if (!I->hasOneUse())

    return false;


  if (Depth == 0) return false;


  switch (I->getOpcode()) {

    case Instruction::UDiv:

    case Instruction::SDiv:

    case Instruction::URem:

    case Instruction::SRem:

      // Propagating an undefined shuffle mask element to integer div/rem is not

      // allowed because those opcodes can create immediate undefined behavior

      // from an undefined element in an operand.

      if (llvm::is_contained(Mask, -1))

        return false;

      [[fallthrough]];

    case Instruction::Add:

    case Instruction::FAdd:

    case Instruction::Sub:

    case Instruction::FSub:

    case Instruction::Mul:

    case Instruction::FMul:

    case Instruction::FDiv:

    case Instruction::FRem:

    case Instruction::Shl:

    case Instruction::LShr:

    case Instruction::AShr:

    case Instruction::And:

    case Instruction::Or:

    case Instruction::Xor:

    case Instruction::ICmp:

    case Instruction::FCmp:

    case Instruction::Trunc:

    case Instruction::ZExt:

    case Instruction::SExt:

    case Instruction::FPToUI:

    case Instruction::FPToSI:

    case Instruction::UIToFP:

    case Instruction::SIToFP:

    case Instruction::FPTrunc:

    case Instruction::FPExt:

    case Instruction::GetElementPtr: {

      // Bail out if we would create longer vector ops. We could allow creating

      // longer vector ops, but that may result in more expensive codegen.

      Type *ITy = I->getType();

      if (ITy->isVectorTy() &&

          Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())

        return false;

      for (Value *Operand : I->operands()) {

        if (!canEvaluateShuffled(Operand, Mask, Depth - 1))

          return false;

      }

      return true;

    }

    case Instruction::InsertElement: {

      ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));

      if (!CI) return false;

      int ElementNumber = CI->getLimitedValue();


      // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'

      // can't put an element into multiple indices.

      bool SeenOnce = false;

      for (int I : Mask) {

        if (I == ElementNumber) {

          if (SeenOnce)

            return false;

          SeenOnce = true;

        }

      }

      return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);

    }

  }

  return false;

}


/// Rebuild a new instruction just like 'I' but with the new operands given.

/// In the event of type mismatch, the type of the operands is correct.

static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps,

                       IRBuilderBase &Builder) {

  Builder.SetInsertPoint(I);

  switch (I->getOpcode()) {

    case Instruction::Add:

    case Instruction::FAdd:

    case Instruction::Sub:

    case Instruction::FSub:

    case Instruction::Mul:

    case Instruction::FMul:

    case Instruction::UDiv:

    case Instruction::SDiv:

    case Instruction::FDiv:

    case Instruction::URem:

    case Instruction::SRem:

    case Instruction::FRem:

    case Instruction::Shl:

    case Instruction::LShr:

    case Instruction::AShr:

    case Instruction::And:

    case Instruction::Or:

    case Instruction::Xor: {

      BinaryOperator *BO = cast<BinaryOperator>(I);

      assert(NewOps.size() == 2 && "binary operator with #ops != 2");

      Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),

                                       NewOps[0], NewOps[1]);

      if (auto *NewI = dyn_cast<Instruction>(New)) {

        if (isa<OverflowingBinaryOperator>(BO)) {

          NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());

          NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());

        }

        if (isa<PossiblyExactOperator>(BO)) {

          NewI->setIsExact(BO->isExact());

        }

        if (isa<FPMathOperator>(BO))

          NewI->copyFastMathFlags(I);

      }

      return New;

    }

    case Instruction::ICmp:

      assert(NewOps.size() == 2 && "icmp with #ops != 2");

      return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],

                                NewOps[1]);

    case Instruction::FCmp:

      assert(NewOps.size() == 2 && "fcmp with #ops != 2");

      return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],

                                NewOps[1]);

    case Instruction::Trunc:

    case Instruction::ZExt:

    case Instruction::SExt:

    case Instruction::FPToUI:

    case Instruction::FPToSI:

    case Instruction::UIToFP:

    case Instruction::SIToFP:

    case Instruction::FPTrunc:

    case Instruction::FPExt: {

      // It's possible that the mask has a different number of elements from

      // the original cast. We recompute the destination type to match the mask.

      Type *DestTy = VectorType::get(

          I->getType()->getScalarType(),

          cast<VectorType>(NewOps[0]->getType())->getElementCount());

      assert(NewOps.size() == 1 && "cast with #ops != 1");

      return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],

                                DestTy);

    }

    case Instruction::GetElementPtr: {

      Value *Ptr = NewOps[0];

      ArrayRef<Value*> Idx = NewOps.slice(1);

      return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),

                               Ptr, Idx, "",

                               cast<GEPOperator>(I)->getNoWrapFlags());

    }

  }

  llvm_unreachable("failed to rebuild vector instructions");

}


static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask,

                                              IRBuilderBase &Builder) {

  // Mask.size() does not need to be equal to the number of vector elements.


  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");

  Type *EltTy = V->getType()->getScalarType();


  if (isa<PoisonValue>(V))

    return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));


  if (match(V, m_Undef()))

    return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));


  if (isa<ConstantAggregateZero>(V))

    return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));


  if (Constant *C = dyn_cast<Constant>(V))

    return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),

                                          Mask);


  Instruction *I = cast<Instruction>(V);

  switch (I->getOpcode()) {

    case Instruction::Add:

    case Instruction::FAdd:

    case Instruction::Sub:

    case Instruction::FSub:

    case Instruction::Mul:

    case Instruction::FMul:

    case Instruction::UDiv:

    case Instruction::SDiv:

    case Instruction::FDiv:

    case Instruction::URem:

    case Instruction::SRem:

    case Instruction::FRem:

    case Instruction::Shl:

    case Instruction::LShr:

    case Instruction::AShr:

    case Instruction::And:

    case Instruction::Or:

    case Instruction::Xor:

    case Instruction::ICmp:

    case Instruction::FCmp:

    case Instruction::Trunc:

    case Instruction::ZExt:

    case Instruction::SExt:

    case Instruction::FPToUI:

    case Instruction::FPToSI:

    case Instruction::UIToFP:

    case Instruction::SIToFP:

    case Instruction::FPTrunc:

    case Instruction::FPExt:

    case Instruction::Select:

    case Instruction::GetElementPtr: {

      SmallVector<Value*, 8> NewOps;

      bool NeedsRebuild =

          (Mask.size() !=

           cast<FixedVectorType>(I->getType())->getNumElements());

      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {

        Value *V;

        // Recursively call evaluateInDifferentElementOrder on vector arguments

        // as well. E.g. GetElementPtr may have scalar operands even if the

        // return value is a vector, so we need to examine the operand type.

        if (I->getOperand(i)->getType()->isVectorTy())

          V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);

        else

          V = I->getOperand(i);

        NewOps.push_back(V);

        NeedsRebuild |= (V != I->getOperand(i));

      }

      if (NeedsRebuild)

        return buildNew(I, NewOps, Builder);

      return I;

    }

    case Instruction::InsertElement: {

      int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();


      // The insertelement was inserting at Element. Figure out which element

      // that becomes after shuffling. The answer is guaranteed to be unique

      // by CanEvaluateShuffled.

      bool Found = false;

      int Index = 0;

      for (int e = Mask.size(); Index != e; ++Index) {

        if (Mask[Index] == Element) {

          Found = true;

          break;

        }

      }


      // If element is not in Mask, no need to handle the operand 1 (element to

      // be inserted). Just evaluate values in operand 0 according to Mask.

      if (!Found)

        return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);


      Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,

                                                 Builder);

      Builder.SetInsertPoint(I);

      return Builder.CreateInsertElement(V, I->getOperand(1), Index);

    }

  }

  llvm_unreachable("failed to reorder elements of vector instruction!");

}


// Returns true if the shuffle is extracting a contiguous range of values from

// LHS, for example:

//                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

//   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|

//   Shuffles to:  |EE|FF|GG|HH|

//                 +--+--+--+--+

static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,

                                       ArrayRef<int> Mask) {

  unsigned LHSElems =

      cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();

  unsigned MaskElems = Mask.size();

  unsigned BegIdx = Mask.front();

  unsigned EndIdx = Mask.back();

  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)

    return false;

  for (unsigned I = 0; I != MaskElems; ++I)

    if (static_cast<unsigned>(Mask[I]) != BegIdx + I)

      return false;

  return true;

}


/// These are the ingredients in an alternate form binary operator as described

/// below.

struct BinopElts {

  BinaryOperator::BinaryOps Opcode;

  Value *Op0;

  Value *Op1;

  BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,

            Value *V0 = nullptr, Value *V1 = nullptr) :

      Opcode(Opc), Op0(V0), Op1(V1) {}

  operator bool() const { return Opcode != 0; }

};


/// Binops may be transformed into binops with different opcodes and operands.

/// Reverse the usual canonicalization to enable folds with the non-canonical

/// form of the binop. If a transform is possible, return the elements of the

/// new binop. If not, return invalid elements.

static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {

  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);

  Type *Ty = BO->getType();

  switch (BO->getOpcode()) {

  case Instruction::Shl: {

    // shl X, C --> mul X, (1 << C)

    Constant *C;

    if (match(BO1, m_ImmConstant(C))) {

      Constant *ShlOne = ConstantFoldBinaryOpOperands(

          Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);

      assert(ShlOne && "Constant folding of immediate constants failed");

      return {Instruction::Mul, BO0, ShlOne};

    }

    break;

  }

  case Instruction::Or: {

    // or disjoin X, C --> add X, C

    if (cast<PossiblyDisjointInst>(BO)->isDisjoint())

      return {Instruction::Add, BO0, BO1};

    break;

  }

  case Instruction::Sub:

    // sub 0, X --> mul X, -1

    if (match(BO0, m_ZeroInt()))

      return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};

    break;

  default:

    break;

  }

  return {};

}


/// A select shuffle of a select shuffle with a shared operand can be reduced

/// to a single select shuffle. This is an obvious improvement in IR, and the

/// backend is expected to lower select shuffles efficiently.

static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {

  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");


  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

  SmallVector<int, 16> Mask;

  Shuf.getShuffleMask(Mask);

  unsigned NumElts = Mask.size();


  // Canonicalize a select shuffle with common operand as Op1.

  auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);

  if (ShufOp && ShufOp->isSelect() &&

      (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {

    std::swap(Op0, Op1);

    ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);

  }


  ShufOp = dyn_cast<ShuffleVectorInst>(Op1);

  if (!ShufOp || !ShufOp->isSelect() ||

      (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))

    return nullptr;


  Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);

  SmallVector<int, 16> Mask1;

  ShufOp->getShuffleMask(Mask1);

  assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");


  // Canonicalize common operand (Op0) as X (first operand of first shuffle).

  if (Y == Op0) {

    std::swap(X, Y);

    ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);

  }


  // If the mask chooses from X (operand 0), it stays the same.

  // If the mask chooses from the earlier shuffle, the other mask value is

  // transferred to the combined select shuffle:

  // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'

  SmallVector<int, 16> NewMask(NumElts);

  for (unsigned i = 0; i != NumElts; ++i)

    NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];


  // A select mask with undef elements might look like an identity mask.

  assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||

          ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&

         "Unexpected shuffle mask");

  return new ShuffleVectorInst(X, Y, NewMask);

}


static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,

                                                const SimplifyQuery &SQ) {

  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");


  // Are we shuffling together some value and that same value after it has been

  // modified by a binop with a constant?

  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

  Constant *C;

  bool Op0IsBinop;

  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))

    Op0IsBinop = true;

  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))

    Op0IsBinop = false;

  else

    return nullptr;


  // The identity constant for a binop leaves a variable operand unchanged. For

  // a vector, this is a splat of something like 0, -1, or 1.

  // If there's no identity constant for this binop, we're done.

  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);

  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();

  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);

  if (!IdC)

    return nullptr;


  Value *X = Op0IsBinop ? Op1 : Op0;


  // Prevent folding in the case the non-binop operand might have NaN values.

  // If X can have NaN elements then we have that the floating point math

  // operation in the transformed code may not preserve the exact NaN

  // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.

  // This makes the transformation incorrect since the original program would

  // have preserved the exact NaN bit-pattern.

  // Avoid the folding if X can have NaN elements.

  if (Shuf.getType()->getElementType()->isFloatingPointTy() &&

      !isKnownNeverNaN(X, SQ))

    return nullptr;


  // Shuffle identity constants into the lanes that return the original value.

  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}

  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}

  // The existing binop constant vector remains in the same operand position.

  ArrayRef<int> Mask = Shuf.getShuffleMask();

  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :

                                ConstantExpr::getShuffleVector(IdC, C, Mask);


  bool MightCreatePoisonOrUB =

      is_contained(Mask, PoisonMaskElem) &&

      (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));

  if (MightCreatePoisonOrUB)

    NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);


  // shuf (bop X, C), X, M --> bop X, C'

  // shuf X, (bop X, C), M --> bop X, C'

  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);

  NewBO->copyIRFlags(BO);


  // An undef shuffle mask element may propagate as an undef constant element in

  // the new binop. That would produce poison where the original code might not.

  // If we already made a safe constant, then there's no danger.

  if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)

    NewBO->dropPoisonGeneratingFlags();

  return NewBO;

}


/// If we have an insert of a scalar to a non-zero element of an undefined

/// vector and then shuffle that value, that's the same as inserting to the zero

/// element and shuffling. Splatting from the zero element is recognized as the

/// canonical form of splat.

static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,

                                            InstCombiner::BuilderTy &Builder) {

  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

  ArrayRef<int> Mask = Shuf.getShuffleMask();

  Value *X;

  uint64_t IndexC;


  // Match a shuffle that is a splat to a non-zero element.

  if (!match(Op0, m_OneUse(m_InsertElt(m_Poison(), m_Value(X),

                                       m_ConstantInt(IndexC)))) ||

      !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)

    return nullptr;


  // Insert into element 0 of a poison vector.

  PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());

  Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);


  // Splat from element 0. Any mask element that is poison remains poison.

  // For example:

  // shuf (inselt poison, X, 2), _, <2,2,undef>

  //   --> shuf (inselt poison, X, 0), poison, <0,0,undef>

  unsigned NumMaskElts =

      cast<FixedVectorType>(Shuf.getType())->getNumElements();

  SmallVector<int, 16> NewMask(NumMaskElts, 0);

  for (unsigned i = 0; i != NumMaskElts; ++i)

    if (Mask[i] == PoisonMaskElem)

      NewMask[i] = Mask[i];


  return new ShuffleVectorInst(NewIns, NewMask);

}


/// Try to fold shuffles that are the equivalent of a vector select.

Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {

  if (!Shuf.isSelect())

    return nullptr;


  // Canonicalize to choose from operand 0 first unless operand 1 is undefined.

  // Commuting undef to operand 0 conflicts with another canonicalization.

  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();

  if (!match(Shuf.getOperand(1), m_Undef()) &&

      Shuf.getMaskValue(0) >= (int)NumElts) {

    // TODO: Can we assert that both operands of a shuffle-select are not undef

    // (otherwise, it would have been folded by instsimplify?

    Shuf.commute();

    return &Shuf;

  }


  if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))

    return I;


  if (Instruction *I = foldSelectShuffleWith1Binop(

          Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))

    return I;


  BinaryOperator *B0, *B1;

  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||

      !match(Shuf.getOperand(1), m_BinOp(B1)))

    return nullptr;


  // If one operand is "0 - X", allow that to be viewed as "X * -1"

  // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired

  // with a multiply, we will exit because C0/C1 will not be set.

  Value *X, *Y;

  Constant *C0 = nullptr, *C1 = nullptr;

  bool ConstantsAreOp1;

  if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&

      match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))

    ConstantsAreOp1 = false;

  else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),

                                 m_Neg(m_Value(X)))) &&

           match(B1, m_CombineOr(m_BinOp(m_Value(Y), m_Constant(C1)),

                                 m_Neg(m_Value(Y)))))

    ConstantsAreOp1 = true;

  else

    return nullptr;


  // We need matching binops to fold the lanes together.

  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();

  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();

  bool DropNSW = false;

  if (ConstantsAreOp1 && Opc0 != Opc1) {

    // TODO: We drop "nsw" if shift is converted into multiply because it may

    // not be correct when the shift amount is BitWidth - 1. We could examine

    // each vector element to determine if it is safe to keep that flag.

    if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)

      DropNSW = true;

    if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {

      assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");

      Opc0 = AltB0.Opcode;

      C0 = cast<Constant>(AltB0.Op1);

    } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {

      assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");

      Opc1 = AltB1.Opcode;

      C1 = cast<Constant>(AltB1.Op1);

    }

  }


  if (Opc0 != Opc1 || !C0 || !C1)

    return nullptr;


  // The opcodes must be the same. Use a new name to make that clear.

  BinaryOperator::BinaryOps BOpc = Opc0;


  // Select the constant elements needed for the single binop.

  ArrayRef<int> Mask = Shuf.getShuffleMask();

  Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);


  // We are moving a binop after a shuffle. When a shuffle has an undefined

  // mask element, the result is undefined, but it is not poison or undefined

  // behavior. That is not necessarily true for div/rem/shift.

  bool MightCreatePoisonOrUB =

      is_contained(Mask, PoisonMaskElem) &&

      (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));

  if (MightCreatePoisonOrUB)

    NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,

                                                       ConstantsAreOp1);


  Value *V;

  if (X == Y) {

    // Remove a binop and the shuffle by rearranging the constant:

    // shuffle (op V, C0), (op V, C1), M --> op V, C'

    // shuffle (op C0, V), (op C1, V), M --> op C', V

    V = X;

  } else {

    // If there are 2 different variable operands, we must create a new shuffle

    // (select) first, so check uses to ensure that we don't end up with more

    // instructions than we started with.

    if (!B0->hasOneUse() && !B1->hasOneUse())

      return nullptr;


    // If we use the original shuffle mask and op1 is *variable*, we would be

    // putting an undef into operand 1 of div/rem/shift. This is either UB or

    // poison. We do not have to guard against UB when *constants* are op1

    // because safe constants guarantee that we do not overflow sdiv/srem (and

    // there's no danger for other opcodes).

    // TODO: To allow this case, create a new shuffle mask with no undefs.

    if (MightCreatePoisonOrUB && !ConstantsAreOp1)

      return nullptr;


    // Note: In general, we do not create new shuffles in InstCombine because we

    // do not know if a target can lower an arbitrary shuffle optimally. In this

    // case, the shuffle uses the existing mask, so there is no additional risk.


    // Select the variable vectors first, then perform the binop:

    // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'

    // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)

    V = Builder.CreateShuffleVector(X, Y, Mask);

  }


  Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :

                                   Builder.CreateBinOp(BOpc, NewC, V);


  // Flags are intersected from the 2 source binops. But there are 2 exceptions:

  // 1. If we changed an opcode, poison conditions might have changed.

  // 2. If the shuffle had undef mask elements, the new binop might have undefs

  //    where the original code did not. But if we already made a safe constant,

  //    then there's no danger.

  if (auto *NewI = dyn_cast<Instruction>(NewBO)) {

    NewI->copyIRFlags(B0);

    NewI->andIRFlags(B1);

    if (DropNSW)

      NewI->setHasNoSignedWrap(false);

    if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)

      NewI->dropPoisonGeneratingFlags();

  }

  return replaceInstUsesWith(Shuf, NewBO);

}


/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.

/// Example (little endian):

/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>

static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,

                                     bool IsBigEndian) {

  // This must be a bitcasted shuffle of 1 vector integer operand.

  Type *DestType = Shuf.getType();

  Value *X;

  if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||

      !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())

    return nullptr;


  // The source type must have the same number of elements as the shuffle,

  // and the source element type must be larger than the shuffle element type.

  Type *SrcType = X->getType();

  if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||

      cast<FixedVectorType>(SrcType)->getNumElements() !=

          cast<FixedVectorType>(DestType)->getNumElements() ||

      SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)

    return nullptr;


  assert(Shuf.changesLength() && !Shuf.increasesLength() &&

         "Expected a shuffle that decreases length");


  // Last, check that the mask chooses the correct low bits for each narrow

  // element in the result.

  uint64_t TruncRatio =

      SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();

  ArrayRef<int> Mask = Shuf.getShuffleMask();

  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {

    if (Mask[i] == PoisonMaskElem)

      continue;

    uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;

    assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");

    if (Mask[i] != (int)LSBIndex)

      return nullptr;

  }


  return new TruncInst(X, DestType);

}


/// Match a shuffle-select-shuffle pattern where the shuffles are widening and

/// narrowing (concatenating with poison and extracting back to the original

/// length). This allows replacing the wide select with a narrow select.

static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,

                                       InstCombiner::BuilderTy &Builder) {

  // This must be a narrowing identity shuffle. It extracts the 1st N elements

  // of the 1st vector operand of a shuffle.

  if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())

    return nullptr;


  // The vector being shuffled must be a vector select that we can eliminate.

  // TODO: The one-use requirement could be eased if X and/or Y are constants.

  Value *Cond, *X, *Y;

  if (!match(Shuf.getOperand(0),

             m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))

    return nullptr;


  // We need a narrow condition value. It must be extended with poison elements

  // and have the same number of elements as this shuffle.

  unsigned NarrowNumElts =

      cast<FixedVectorType>(Shuf.getType())->getNumElements();

  Value *NarrowCond;

  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||

      cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=

          NarrowNumElts ||

      !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())

    return nullptr;


  // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)

  // -->

  // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)

  Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());

  Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());

  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);

}


/// Canonicalize FP negate/abs after shuffle.

static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,

                                          InstCombiner::BuilderTy &Builder) {

  auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));

  Value *X;

  if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))

    return nullptr;


  bool IsFNeg = S0->getOpcode() == Instruction::FNeg;


  // Match 2-input (binary) shuffle.

  auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));

  Value *Y;

  if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||

      S0->getOpcode() != S1->getOpcode() ||

      (!S0->hasOneUse() && !S1->hasOneUse()))

    return nullptr;


  // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)

  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());

  Instruction *NewF;

  if (IsFNeg) {

    NewF = UnaryOperator::CreateFNeg(NewShuf);

  } else {

    Function *FAbs = Intrinsic::getOrInsertDeclaration(

        Shuf.getModule(), Intrinsic::fabs, Shuf.getType());

    NewF = CallInst::Create(FAbs, {NewShuf});

  }

  NewF->copyIRFlags(S0);

  NewF->andIRFlags(S1);

  return NewF;

}


/// Canonicalize casts after shuffle.

static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,

                                    InstCombiner::BuilderTy &Builder) {

  auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));

  if (!Cast0)

    return nullptr;


  // TODO: Allow other opcodes? That would require easing the type restrictions

  //       below here.

  CastInst::CastOps CastOpcode = Cast0->getOpcode();

  switch (CastOpcode) {

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::FPToSI:

  case Instruction::FPToUI:

  case Instruction::SIToFP:

  case Instruction::UIToFP:

    break;

  default:

    return nullptr;

  }


  VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());

  VectorType *ShufTy = Shuf.getType();

  VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());


  // TODO: Allow length-increasing shuffles?

  if (ShufTy->getElementCount().getKnownMinValue() >

      ShufOpTy->getElementCount().getKnownMinValue())

    return nullptr;


  // shuffle (cast X), Poison, identity-with-extract-mask -->

  // cast (shuffle X, Poison, identity-with-extract-mask).

  if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&

      Shuf.isIdentityWithExtract()) {

    auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),

                                               PoisonValue::get(CastSrcTy),

                                               Shuf.getShuffleMask());

    return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());

  }


  auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));

  // Do we have 2 matching cast operands?

  if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||

      Cast0->getSrcTy() != Cast1->getSrcTy())

    return nullptr;


  // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?

  assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&

         "Expected fixed vector operands for casts and binary shuffle");

  if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())

    return nullptr;


  // At least one of the operands must have only one use (the shuffle).

  if (!Cast0->hasOneUse() && !Cast1->hasOneUse())

    return nullptr;


  // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)

  Value *X = Cast0->getOperand(0);

  Value *Y = Cast1->getOperand(0);

  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());

  return CastInst::Create(CastOpcode, NewShuf, ShufTy);

}


/// Try to fold an extract subvector operation.

static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {

  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))

    return nullptr;


  // Check if we are extracting all bits of an inserted scalar:

  // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type

  Value *X;

  if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&

      X->getType()->getPrimitiveSizeInBits() ==

          Shuf.getType()->getPrimitiveSizeInBits())

    return new BitCastInst(X, Shuf.getType());


  // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.

  Value *Y;

  ArrayRef<int> Mask;

  if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))

    return nullptr;


  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,

  // then combining may result in worse codegen.

  if (!Op0->hasOneUse())

    return nullptr;


  // We are extracting a subvector from a shuffle. Remove excess elements from

  // the 1st shuffle mask to eliminate the extract.

  //

  // This transform is conservatively limited to identity extracts because we do

  // not allow arbitrary shuffle mask creation as a target-independent transform

  // (because we can't guarantee that will lower efficiently).

  //

  // If the extracting shuffle has an poison mask element, it transfers to the

  // new shuffle mask. Otherwise, copy the original mask element. Example:

  //   shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->

  //   shuf X, Y, <C0, poison, C2, poison>

  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();

  SmallVector<int, 16> NewMask(NumElts);

  assert(NumElts < Mask.size() &&

         "Identity with extract must have less elements than its inputs");


  for (unsigned i = 0; i != NumElts; ++i) {

    int ExtractMaskElt = Shuf.getMaskValue(i);

    int MaskElt = Mask[i];

    NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;

  }

  return new ShuffleVectorInst(X, Y, NewMask);

}


/// Try to replace a shuffle with an insertelement or try to replace a shuffle

/// operand with the operand of an insertelement.

static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,

                                          InstCombinerImpl &IC) {

  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);

  SmallVector<int, 16> Mask;

  Shuf.getShuffleMask(Mask);


  int NumElts = Mask.size();

  int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();


  // This is a specialization of a fold in SimplifyDemandedVectorElts. We may

  // not be able to handle it there if the insertelement has >1 use.

  // If the shuffle has an insertelement operand but does not choose the

  // inserted scalar element from that value, then we can replace that shuffle

  // operand with the source vector of the insertelement.

  Value *X;

  uint64_t IdxC;

  if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {

    // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask

    if (!is_contained(Mask, (int)IdxC))

      return IC.replaceOperand(Shuf, 0, X);

  }

  if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {

    // Offset the index constant by the vector width because we are checking for

    // accesses to the 2nd vector input of the shuffle.

    IdxC += InpNumElts;

    // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask

    if (!is_contained(Mask, (int)IdxC))

      return IC.replaceOperand(Shuf, 1, X);

  }

  // For the rest of the transform, the shuffle must not change vector sizes.

  // TODO: This restriction could be removed if the insert has only one use

  //       (because the transform would require a new length-changing shuffle).

  if (NumElts != InpNumElts)

    return nullptr;


  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'

  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {

    // We need an insertelement with a constant index.

    if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),

                               m_ConstantInt(IndexC))))

      return false;


    // Test the shuffle mask to see if it splices the inserted scalar into the

    // operand 1 vector of the shuffle.

    int NewInsIndex = -1;

    for (int i = 0; i != NumElts; ++i) {

      // Ignore undef mask elements.

      if (Mask[i] == -1)

        continue;


      // The shuffle takes elements of operand 1 without lane changes.

      if (Mask[i] == NumElts + i)

        continue;


      // The shuffle must choose the inserted scalar exactly once.

      if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())

        return false;


      // The shuffle is placing the inserted scalar into element i.

      NewInsIndex = i;

    }


    assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");


    // Index is updated to the potentially translated insertion lane.

    IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);

    return true;

  };


  // If the shuffle is unnecessary, insert the scalar operand directly into

  // operand 1 of the shuffle. Example:

  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0

  Value *Scalar;

  ConstantInt *IndexC;

  if (isShufflingScalarIntoOp1(Scalar, IndexC))

    return InsertElementInst::Create(V1, Scalar, IndexC);


  // Try again after commuting shuffle. Example:

  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->

  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3

  std::swap(V0, V1);

  ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);

  if (isShufflingScalarIntoOp1(Scalar, IndexC))

    return InsertElementInst::Create(V1, Scalar, IndexC);


  return nullptr;

}


static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {

  // Match the operands as identity with padding (also known as concatenation

  // with undef) shuffles of the same source type. The backend is expected to

  // recreate these concatenations from a shuffle of narrow operands.

  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));

  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));

  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||

      !Shuffle1 || !Shuffle1->isIdentityWithPadding())

    return nullptr;


  // We limit this transform to power-of-2 types because we expect that the

  // backend can convert the simplified IR patterns to identical nodes as the

  // original IR.

  // TODO: If we can verify the same behavior for arbitrary types, the

  //       power-of-2 checks can be removed.

  Value *X = Shuffle0->getOperand(0);

  Value *Y = Shuffle1->getOperand(0);

  if (X->getType() != Y->getType() ||

      !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||

      !isPowerOf2_32(

          cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||

      !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||

      match(X, m_Undef()) || match(Y, m_Undef()))

    return nullptr;

  assert(match(Shuffle0->getOperand(1), m_Undef()) &&

         match(Shuffle1->getOperand(1), m_Undef()) &&

         "Unexpected operand for identity shuffle");


  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source

  // operands directly by adjusting the shuffle mask to account for the narrower

  // types:

  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'

  int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();

  int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();

  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");


  ArrayRef<int> Mask = Shuf.getShuffleMask();

  SmallVector<int, 16> NewMask(Mask.size(), -1);

  for (int i = 0, e = Mask.size(); i != e; ++i) {

    if (Mask[i] == -1)

      continue;


    // If this shuffle is choosing an undef element from 1 of the sources, that

    // element is undef.

    if (Mask[i] < WideElts) {

      if (Shuffle0->getMaskValue(Mask[i]) == -1)

        continue;

    } else {

      if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)

        continue;

    }


    // If this shuffle is choosing from the 1st narrow op, the mask element is

    // the same. If this shuffle is choosing from the 2nd narrow op, the mask

    // element is offset down to adjust for the narrow vector widths.

    if (Mask[i] < WideElts) {

      assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");

      NewMask[i] = Mask[i];

    } else {

      assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");

      NewMask[i] = Mask[i] - (WideElts - NarrowElts);

    }

  }

  return new ShuffleVectorInst(X, Y, NewMask);

}


// Splatting the first element of the result of a BinOp, where any of the

// BinOp's operands are the result of a first element splat can be simplified to

// splatting the first element of the result of the BinOp

Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {

  if (!match(SVI.getOperand(1), m_Poison()) ||

      !match(SVI.getShuffleMask(), m_ZeroMask()) ||

      !SVI.getOperand(0)->hasOneUse())

    return nullptr;


  Value *Op0 = SVI.getOperand(0);

  Value *X, *Y;

  if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Poison(), m_ZeroMask()),

                          m_Value(Y))) &&

      !match(Op0, m_BinOp(m_Value(X),

                          m_Shuffle(m_Value(Y), m_Poison(), m_ZeroMask()))))

    return nullptr;

  if (X->getType() != Y->getType())

    return nullptr;


  auto *BinOp = cast<BinaryOperator>(Op0);

  if (!isSafeToSpeculativelyExecuteWithVariableReplaced(BinOp))

    return nullptr;


  Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);

  if (auto NewBOI = dyn_cast<Instruction>(NewBO))

    NewBOI->copyIRFlags(BinOp);


  return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());

}


Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

  Value *LHS = SVI.getOperand(0);

  Value *RHS = SVI.getOperand(1);

  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);

  if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),

                                          SVI.getType(), ShufQuery))

    return replaceInstUsesWith(SVI, V);


  if (Instruction *I = simplifyBinOpSplats(SVI))

    return I;


  // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in

  // order to support scalable vectors.

  if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))

    return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));


  if (isa<ScalableVectorType>(LHS->getType()))

    return nullptr;


  unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();

  unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();


  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)

  //

  // if X and Y are of the same (vector) type, and the element size is not

  // changed by the bitcasts, we can distribute the bitcasts through the

  // shuffle, hopefully reducing the number of instructions. We make sure that

  // at least one bitcast only has one use, so we don't *increase* the number of

  // instructions here.

  Value *X, *Y;

  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&

      X->getType()->isVectorTy() && X->getType() == Y->getType() &&

      X->getType()->getScalarSizeInBits() ==

          SVI.getType()->getScalarSizeInBits() &&

      (LHS->hasOneUse() || RHS->hasOneUse())) {

    Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),

                                           SVI.getName() + ".uncasted");

    return new BitCastInst(V, SVI.getType());

  }


  ArrayRef<int> Mask = SVI.getShuffleMask();


  // Peek through a bitcasted shuffle operand by scaling the mask. If the

  // simulated shuffle can simplify, then this shuffle is unnecessary:

  // shuf (bitcast X), undef, Mask --> bitcast X'

  // TODO: This could be extended to allow length-changing shuffles.

  //       The transform might also be obsoleted if we allowed canonicalization

  //       of bitcasted shuffles.

  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&

      X->getType()->isVectorTy() && VWidth == LHSWidth) {

    // Try to create a scaled mask constant.

    auto *XType = cast<FixedVectorType>(X->getType());

    unsigned XNumElts = XType->getNumElements();

    SmallVector<int, 16> ScaledMask;

    if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {

      // If the shuffled source vector simplifies, cast that value to this

      // shuffle's type.

      if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),

                                              ScaledMask, XType, ShufQuery))

        return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());

    }

  }


  // shuffle x, x, mask --> shuffle x, undef, mask'

  if (LHS == RHS) {

    assert(!match(RHS, m_Undef()) &&

           "Shuffle with 2 undef ops not simplified?");

    return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));

  }


  // shuffle undef, x, mask --> shuffle x, undef, mask'

  if (match(LHS, m_Undef())) {

    SVI.commute();

    return &SVI;

  }


  if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))

    return I;


  if (Instruction *I = foldSelectShuffle(SVI))

    return I;


  if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))

    return I;


  if (Instruction *I = narrowVectorSelect(SVI, Builder))

    return I;


  if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder))

    return I;


  if (Instruction *I = foldCastShuffle(SVI, Builder))

    return I;


  APInt PoisonElts(VWidth, 0);

  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));

  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {

    if (V != &SVI)

      return replaceInstUsesWith(SVI, V);

    return &SVI;

  }


  if (Instruction *I = foldIdentityExtractShuffle(SVI))

    return I;


  // These transforms have the potential to lose undef knowledge, so they are

  // intentionally placed after SimplifyDemandedVectorElts().

  if (Instruction *I = foldShuffleWithInsert(SVI, *this))

    return I;

  if (Instruction *I = foldIdentityPaddedShuffles(SVI))

    return I;


  if (match(RHS, m_Constant())) {

    if (auto *SI = dyn_cast<SelectInst>(LHS)) {

      // We cannot do this fold for elementwise select since ShuffleVector is

      // not elementwise.

      if (SI->getCondition()->getType()->isIntegerTy() &&

          (isa<PoisonValue>(RHS) ||

           isGuaranteedNotToBePoison(SI->getCondition()))) {

        if (Instruction *I = FoldOpIntoSelect(SVI, SI))

          return I;

      }

    }

    if (auto *PN = dyn_cast<PHINode>(LHS)) {

      if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))

        return I;

    }

  }


  if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {

    Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder);

    return replaceInstUsesWith(SVI, V);

  }


  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to

  // a non-vector type. We can instead bitcast the original vector followed by

  // an extract of the desired element:

  //

  //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,

  //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>

  //   %1 = bitcast <4 x i8> %sroa to i32

  // Becomes:

  //   %bc = bitcast <16 x i8> %in to <4 x i32>

  //   %ext = extractelement <4 x i32> %bc, i32 0

  //

  // If the shuffle is extracting a contiguous range of values from the input

  // vector then each use which is a bitcast of the extracted size can be

  // replaced. This will work if the vector types are compatible, and the begin

  // index is aligned to a value in the casted vector type. If the begin index

  // isn't aligned then we can shuffle the original vector (keeping the same

  // vector type) before extracting.

  //

  // This code will bail out if the target type is fundamentally incompatible

  // with vectors of the source type.

  //

  // Example of <16 x i8>, target type i32:

  // Index range [4,8):         v-----------v Will work.

  //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

  //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |

  //     <4 x i32>: |           |           |           |           |

  //                +-----------+-----------+-----------+-----------+

  // Index range [6,10):              ^-----------^ Needs an extra shuffle.

  // Target type i40:           ^--------------^ Won't work, bail.

  bool MadeChange = false;

  if (isShuffleExtractingFromLHS(SVI, Mask)) {

    Value *V = LHS;

    unsigned MaskElems = Mask.size();

    auto *SrcTy = cast<FixedVectorType>(V->getType());

    unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);

    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());

    assert(SrcElemBitWidth && "vector elements must have a bitwidth");

    unsigned SrcNumElems = SrcTy->getNumElements();

    SmallVector<BitCastInst *, 8> BCs;

    DenseMap<Type *, Value *> NewBCs;

    for (User *U : SVI.users())

      if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {

        // Only visit bitcasts that weren't previously handled.

        if (BC->use_empty())

          continue;

        // Prefer to combine bitcasts of bitcasts before attempting this fold.

        if (BC->hasOneUse()) {

          auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());

          if (BC2 && isEliminableCastPair(BC, BC2))

            continue;

        }

        BCs.push_back(BC);

      }

    for (BitCastInst *BC : BCs) {

      unsigned BegIdx = Mask.front();

      Type *TgtTy = BC->getDestTy();

      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);

      if (!TgtElemBitWidth)

        continue;

      unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;

      bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;

      bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);

      if (!VecBitWidthsEqual)

        continue;

      if (!VectorType::isValidElementType(TgtTy))

        continue;

      auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);

      if (!BegIsAligned) {

        // Shuffle the input so [0,NumElements) contains the output, and

        // [NumElems,SrcNumElems) is undef.

        SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);

        for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)

          ShuffleMask[I] = Idx;

        V = Builder.CreateShuffleVector(V, ShuffleMask,

                                        SVI.getName() + ".extract");

        BegIdx = 0;

      }

      unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;

      assert(SrcElemsPerTgtElem);

      BegIdx /= SrcElemsPerTgtElem;

      auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);

      if (Inserted)

        It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");

      auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,

                                               SVI.getName() + ".extract");

      // The shufflevector isn't being replaced: the bitcast that used it

      // is. InstCombine will visit the newly-created instructions.

      replaceInstUsesWith(*BC, Ext);

      MadeChange = true;

    }

  }


  // If the LHS is a shufflevector itself, see if we can combine it with this

  // one without producing an unusual shuffle.

  // Cases that might be simplified:

  // 1.

  // x1=shuffle(v1,v2,mask1)

  //  x=shuffle(x1,undef,mask)

  //        ==>

  //  x=shuffle(v1,undef,newMask)

  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1

  // 2.

  // x1=shuffle(v1,undef,mask1)

  //  x=shuffle(x1,x2,mask)

  // where v1.size() == mask1.size()

  //        ==>

  //  x=shuffle(v1,x2,newMask)

  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]

  // 3.

  // x2=shuffle(v2,undef,mask2)

  //  x=shuffle(x1,x2,mask)

  // where v2.size() == mask2.size()

  //        ==>

  //  x=shuffle(x1,v2,newMask)

  // newMask[i] = (mask[i] < x1.size())

  //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()

  // 4.

  // x1=shuffle(v1,undef,mask1)

  // x2=shuffle(v2,undef,mask2)

  //  x=shuffle(x1,x2,mask)

  // where v1.size() == v2.size()

  //        ==>

  //  x=shuffle(v1,v2,newMask)

  // newMask[i] = (mask[i] < x1.size())

  //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()

  //

  // Here we are really conservative:

  // we are absolutely afraid of producing a shuffle mask not in the input

  // program, because the code gen may not be smart enough to turn a merged

  // shuffle into two specific shuffles: it may produce worse code.  As such,

  // we only merge two shuffles if the result is either a splat or one of the

  // input shuffle masks.  In this case, merging the shuffles just removes

  // one instruction, which we know is safe.  This is good for things like

  // turning: (splat(splat)) -> splat, or

  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]

  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);

  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);

  if (LHSShuffle)

    if (!match(LHSShuffle->getOperand(1), m_Poison()) &&

        !match(RHS, m_Poison()))

      LHSShuffle = nullptr;

  if (RHSShuffle)

    if (!match(RHSShuffle->getOperand(1), m_Poison()))

      RHSShuffle = nullptr;

  if (!LHSShuffle && !RHSShuffle)

    return MadeChange ? &SVI : nullptr;


  Value* LHSOp0 = nullptr;

  Value* LHSOp1 = nullptr;

  Value* RHSOp0 = nullptr;

  unsigned LHSOp0Width = 0;

  unsigned RHSOp0Width = 0;

  if (LHSShuffle) {

    LHSOp0 = LHSShuffle->getOperand(0);

    LHSOp1 = LHSShuffle->getOperand(1);

    LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();

  }

  if (RHSShuffle) {

    RHSOp0 = RHSShuffle->getOperand(0);

    RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();

  }

  Value* newLHS = LHS;

  Value* newRHS = RHS;

  if (LHSShuffle) {

    // case 1

    if (match(RHS, m_Poison())) {

      newLHS = LHSOp0;

      newRHS = LHSOp1;

    }

    // case 2 or 4

    else if (LHSOp0Width == LHSWidth) {

      newLHS = LHSOp0;

    }

  }

  // case 3 or 4

  if (RHSShuffle && RHSOp0Width == LHSWidth) {

    newRHS = RHSOp0;

  }

  // case 4

  if (LHSOp0 == RHSOp0) {

    newLHS = LHSOp0;

    newRHS = nullptr;

  }


  if (newLHS == LHS && newRHS == RHS)

    return MadeChange ? &SVI : nullptr;


  ArrayRef<int> LHSMask;

  ArrayRef<int> RHSMask;

  if (newLHS != LHS)

    LHSMask = LHSShuffle->getShuffleMask();

  if (RHSShuffle && newRHS != RHS)

    RHSMask = RHSShuffle->getShuffleMask();


  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;

  SmallVector<int, 16> newMask;

  bool isSplat = true;

  int SplatElt = -1;

  // Create a new mask for the new ShuffleVectorInst so that the new

  // ShuffleVectorInst is equivalent to the original one.

  for (unsigned i = 0; i < VWidth; ++i) {

    int eltMask;

    if (Mask[i] < 0) {

      // This element is a poison value.

      eltMask = -1;

    } else if (Mask[i] < (int)LHSWidth) {

      // This element is from left hand side vector operand.

      //

      // If LHS is going to be replaced (case 1, 2, or 4), calculate the

      // new mask value for the element.

      if (newLHS != LHS) {

        eltMask = LHSMask[Mask[i]];

        // If the value selected is an poison value, explicitly specify it

        // with a -1 mask value.

        if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))

          eltMask = -1;

      } else

        eltMask = Mask[i];

    } else {

      // This element is from right hand side vector operand

      //

      // If the value selected is a poison value, explicitly specify it

      // with a -1 mask value. (case 1)

      if (match(RHS, m_Poison()))

        eltMask = -1;

      // If RHS is going to be replaced (case 3 or 4), calculate the

      // new mask value for the element.

      else if (newRHS != RHS) {

        eltMask = RHSMask[Mask[i]-LHSWidth];

        // If the value selected is an poison value, explicitly specify it

        // with a -1 mask value.

        if (eltMask >= (int)RHSOp0Width) {

          assert(match(RHSShuffle->getOperand(1), m_Poison()) &&

                 "should have been check above");

          eltMask = -1;

        }

      } else

        eltMask = Mask[i]-LHSWidth;


      // If LHS's width is changed, shift the mask value accordingly.

      // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any

      // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.

      // If newRHS == newLHS, we want to remap any references from newRHS to

      // newLHS so that we can properly identify splats that may occur due to

      // obfuscation across the two vectors.

      if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)

        eltMask += newLHSWidth;

    }


    // Check if this could still be a splat.

    if (eltMask >= 0) {

      if (SplatElt >= 0 && SplatElt != eltMask)

        isSplat = false;

      SplatElt = eltMask;

    }


    newMask.push_back(eltMask);

  }


  // If the result mask is equal to one of the original shuffle masks,

  // or is a splat, do the replacement.

  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {

    if (!newRHS)

      newRHS = PoisonValue::get(newLHS->getType());

    return new ShuffleVectorInst(newLHS, newRHS, newMask);

  }


  return MadeChange ? &SVI : nullptr;

}

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

S1
constexpr LLT S1
Definition: AMDGPULegalizerInfo.cpp:294

PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:98

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

ArrayRef.h

Casting.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:347

DenseMap.h
This file defines the DenseMap class.

DerivedTypes.h

X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:164

BasicBlock.h

Constant.h

Instruction.h

Operator.h

Type.h

User.h

Value.h

InstCombineInternal.h
This file provides internal interfaces used to implement the InstCombine.

foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition: InstCombineVectorOps.cpp:1491

evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
Definition: InstCombineVectorOps.cpp:2010

collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition: InstCombineVectorOps.cpp:640

collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
Definition: InstCombineVectorOps.cpp:800

findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition: InstCombineVectorOps.cpp:367

foldTruncInsEltPair
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
Definition: InstCombineVectorOps.cpp:1628

foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
Definition: InstCombineVectorOps.cpp:2231

foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2781

foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
Definition: InstCombineVectorOps.cpp:2643

foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition: InstCombineVectorOps.cpp:1372

ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:798

foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition: InstCombineVectorOps.cpp:2693

canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition: InstCombineVectorOps.cpp:2300

foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition: InstCombineVectorOps.cpp:2471

replaceExtractElements
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition: InstCombineVectorOps.cpp:713

cheapToScalarize
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition: InstCombineVectorOps.cpp:58

buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition: InstCombineVectorOps.cpp:1934

canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition: InstCombineVectorOps.cpp:1846

foldSelectShuffleOfSelectShuffle
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
Definition: InstCombineVectorOps.cpp:2184

hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition: InstCombineVectorOps.cpp:1468

findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition: InstCombineVectorOps.cpp:323

foldShuffleOfUnaryOps
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
Definition: InstCombineVectorOps.cpp:2546

foldCastShuffle
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
Definition: InstCombineVectorOps.cpp:2579

narrowInsElt
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
Definition: InstCombineVectorOps.cpp:1595

isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1270

foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition: InstCombineVectorOps.cpp:1298

foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition: InstCombineVectorOps.cpp:1409

getPreferredVectorIndex
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
Definition: InstCombineVectorOps.cpp:390

isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:2118

getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition: InstCombineVectorOps.cpp:2149

InstCombiner.h
This file provides the interface for the instcombine pass implementation.

InstrTypes.h

InstructionSimplify.h

Instructions.h

isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition: LowerMatrixIntrinsics.cpp:110

I
#define I(x, y, z)
Definition: MD5.cpp:58

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:46

Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

PatternMatch.h

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:71

Opc
auto Opc
Definition: RISCVRedundantCopyElimination.cpp:75

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

SmallBitVector.h
This file implements the SmallBitVector class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247

VectorUtils.h

narrowVectorSelect
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Definition: X86ISelLowering.cpp:47354

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:83

VectorType
Definition: ItaniumDemangle.h:1189

bool

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234

llvm::APInt::zextOrTrunc
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033

llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330

llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371

llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111

llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147

llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:62

llvm::BasicBlock::getFirstInsertionPt
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233

llvm::BinaryOperator
Definition: InstrTypes.h:171

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:374

llvm::BinaryOperator::Create
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2703

llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:219

llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:4997

llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:1545

llvm::CastInst::Create
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3039

llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666

llvm::CmpInst::CreateWithCopiedFlags
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
Definition: Instructions.cpp:3528

llvm::CmpInst::getOpcode
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:762

llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23

llvm::ConstantAggregateZero::get
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1677

llvm::ConstantExpr::getShuffleVector
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2609

llvm::ConstantExpr::getBinOpIdentity
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2694

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:87

llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:264

llvm::ConstantInt::getBitWidth
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:157

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163

llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154

llvm::ConstantVector::get
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:43

llvm::Constant::getAllOnesValue
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420

llvm::Constant::getAggregateElement
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:435

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:33

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:199

llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:674

llvm::DenseMapBase::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245

llvm::DenseMap
Definition: DenseMap.h:730

llvm::ElementCount
Definition: TypeSize.h:301

llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1808

llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:1821

llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1832

llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1833

llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition: Instructions.h:1837

llvm::FixedVectorType::get
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803

llvm::Function
Definition: Function.h:64

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949

llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:973

llvm::GetElementPtrInst::setNoWrapFlags
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Definition: Instructions.cpp:1621

llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:409

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114

llvm::IRBuilderBase::CreateFCmp
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571

llvm::IRBuilderBase::CreateInsertValue
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2625

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559

llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513

llvm::IRBuilderBase::CreateCast
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2238

llvm::IRBuilderBase::CreateGEP
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1923

llvm::IRBuilderBase::getInt64
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:527

llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494

llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204

llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2593

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068

llvm::IRBuilderBase::CreateBinOp
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439

llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >

llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1867

llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:1881

llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1895

llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2523

llvm::InstCombinerImpl
Definition: InstCombineInternal.h:62

llvm::InstCombinerImpl::FoldOpIntoSelect
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Definition: InstructionCombining.cpp:1767

llvm::InstCombinerImpl::foldOpIntoPhi
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Definition: InstructionCombining.cpp:1860

llvm::InstCombinerImpl::SimplifyDemandedVectorElts
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Definition: InstCombineSimplifyDemanded.cpp:1395

llvm::InstCombinerImpl::foldSelectShuffle
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Definition: InstCombineVectorOps.cpp:2332

llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition: InstCombineVectorOps.cpp:1233

llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition: InstCombineVectorOps.cpp:1691

llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition: InstCombineVectorOps.cpp:398

llvm::InstCombinerImpl::simplifyBinOpSplats
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2850

llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition: InstCombineVectorOps.cpp:887

llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2877

llvm::InstCombiner::SQ
SimplifyQuery SQ
Definition: InstCombiner.h:77

llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:388

llvm::InstCombiner::Worklist
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65

llvm::InstCombiner::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:377

llvm::InstCombiner::DL
const DataLayout & DL
Definition: InstCombiner.h:76

llvm::InstCombiner::addToWorklist
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:332

llvm::InstCombiner::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:412

llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:280

llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:61

llvm::InstCombiner::getSimplifyQuery
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:338

llvm::InstructionWorklist::addValue
void addValue(Value *V)
Add value to the worklist if it is an instruction.
Definition: InstructionWorklist.h:51

llvm::Instruction
Definition: Instruction.h:69

llvm::Instruction::hasNoUnsignedWrap
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:411

llvm::Instruction::hasNoSignedWrap
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:418

llvm::Instruction::copyIRFlags
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition: Instruction.cpp:687

llvm::Instruction::getModule
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78

llvm::Instruction::andIRFlags
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:731

llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171

llvm::Instruction::isExact
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
Definition: Instruction.cpp:584

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:998

llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:320

llvm::Instruction::dropPoisonGeneratingFlags
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition: Instruction.cpp:434

llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:318

llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:1012

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49

llvm::MapVector::try_emplace
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition: MapVector.h:107

llvm::PHINode
Definition: Instructions.h:2638

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2773

llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2733

llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2713

llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2709

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2673

llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1468

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1689

llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1714

llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1934

llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition: Instructions.h:2014

llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:1984

llvm::ShuffleVectorInst::isSelectMask
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:1989

llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1975

llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition: Instructions.h:2025

llvm::ShuffleVectorInst::isIdentityWithExtract
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition: Instructions.cpp:2200

llvm::ShuffleVectorInst::getShuffleMask
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:1857

llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition: Instructions.h:2124

llvm::ShuffleVectorInst::isIdentityMask
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition: Instructions.cpp:1947

llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2343

llvm::ShuffleVectorInst::commute
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition: Instructions.cpp:1769

llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35

llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:216

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:414

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197

llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4555

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273

llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:74

llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:73

llvm::Type::getInt64Ty
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240

llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136

llvm::Type::getStructNumElements
LLVM_ABI unsigned getStructNumElements() const

llvm::Type::getIntegerBitWidth
LLVM_ABI unsigned getIntegerBitWidth() const

llvm::Type::getArrayNumElements
LLVM_ABI uint64_t getArrayNumElements() const

llvm::UnaryOperator
Definition: InstrTypes.h:101

llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:139

llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition: InstrTypes.h:154

llvm::UndefValue::get
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35

llvm::User
Definition: User.h:44

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:232

llvm::Value
LLVM Value Representation.
Definition: Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256

llvm::Value::DoPHITranslation
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1090

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:426

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:695

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::VectorType::isValidElementType
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.

llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:463

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:134

uint64_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:164

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition: MIPatternMatch.h:277

llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition: MIPatternMatch.h:928

llvm::MIPatternMatch::m_OneUse
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Definition: MIPatternMatch.h:56

llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:157

llvm::PatternMatch::m_Poison
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160

llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100

llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165

llvm::PatternMatch::m_Trunc
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:2211

llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:1060

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962

llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1966

llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168

llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1928

llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:2040

llvm::PatternMatch::m_FPExt
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:2312

llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:2052

llvm::PatternMatch::m_ZExt
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:2243

llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105

llvm::PatternMatch::m_ImmConstant
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:931

llvm::PatternMatch::m_BitCast
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:2150

llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92

llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1320

llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1236

llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152

llvm::PatternMatch::m_SExt
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:2237

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612

llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1958

llvm::PatternMatch::m_FAbs
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
Definition: PatternMatch.h:2838

llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491

llvm::createUnaryMask
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:1184

llvm::simplifyShuffleVectorInst
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5569

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288

llvm::ComplexDeinterleavingOperation::Splat
@ Splat

llvm::simplifyInsertValueInst
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5178

llvm::ConstantFoldBinaryOpOperands
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
Definition: ConstantFolding.cpp:1317

llvm::PoisonMaskElem
constexpr int PoisonMaskElem
Definition: Instructions.h:1922

llvm::findScalarElement
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:303

llvm::isSafeToSpeculativelyExecuteWithVariableReplaced
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
Definition: ValueTracking.h:560

llvm::simplifyInsertElementInst
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition: InstructionSimplify.cpp:5184

llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980

llvm::isKnownNeverNaN
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition: ValueTracking.cpp:5964

llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition: MachineBasicBlock.h:1422

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916

llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119

llvm::isGuaranteedNotToBePoison
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Definition: ValueTracking.cpp:7733

llvm::simplifyExtractElementInst
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5300

llvm::scaleShuffleMaskElts
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
Definition: VectorUtils.cpp:624

llvm::getSplatIndex
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:367

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858

BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition: InstCombineVectorOps.cpp:2135

BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition: InstCombineVectorOps.cpp:2139

BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition: InstCombineVectorOps.cpp:2136

BinopElts::Op1
Value * Op1
Definition: InstCombineVectorOps.cpp:2138

BinopElts::Op0
Value * Op0
Definition: InstCombineVectorOps.cpp:2137

llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1988

llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1997

llvm::SimplifyQuery
Definition: SimplifyQuery.h:71

llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:108

llvm::SmallMapVector
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249