LLVM 22.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V))
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
54 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
55 Res->setIsExact(I->isExact());
56 break;
57 }
58 case Instruction::Trunc:
59 case Instruction::ZExt:
60 case Instruction::SExt:
61 // If the source type of the cast is the type we're trying for then we can
62 // just return the source. There's no need to insert it because it is not
63 // new.
64 if (I->getOperand(0)->getType() == Ty)
65 return I->getOperand(0);
66
67 // Otherwise, must be the same type of cast, so just reinsert a new one.
68 // This also handles the case of zext(trunc(x)) -> zext(x).
69 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
70 Opc == Instruction::SExt);
71 break;
72 case Instruction::Select: {
73 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
74 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
75 Res = SelectInst::Create(I->getOperand(0), True, False);
76 break;
77 }
78 case Instruction::PHI: {
79 PHINode *OPN = cast<PHINode>(I);
81 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
82 Value *V =
84 NPN->addIncoming(V, OPN->getIncomingBlock(i));
85 }
86 Res = NPN;
87 break;
88 }
89 case Instruction::FPToUI:
90 case Instruction::FPToSI:
91 Res = CastInst::Create(
92 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
93 break;
94 case Instruction::Call:
95 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
96 switch (II->getIntrinsicID()) {
97 default:
98 llvm_unreachable("Unsupported call!");
99 case Intrinsic::vscale: {
101 I->getModule(), Intrinsic::vscale, {Ty});
102 Res = CallInst::Create(Fn->getFunctionType(), Fn);
103 break;
104 }
105 }
106 }
107 break;
108 case Instruction::ShuffleVector: {
109 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
110 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
111 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
112 Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
113 Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
114 Res = new ShuffleVectorInst(Op0, Op1,
115 cast<ShuffleVectorInst>(I)->getShuffleMask());
116 break;
117 }
118 default:
119 // TODO: Can handle more cases here.
120 llvm_unreachable("Unreachable!");
121 }
122
123 Res->takeName(I);
124 return InsertNewInstWith(Res, I->getIterator());
125}
126
128InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
129 const CastInst *CI2) {
130 Type *SrcTy = CI1->getSrcTy();
131 Type *MidTy = CI1->getDestTy();
132 Type *DstTy = CI2->getDestTy();
133
134 Instruction::CastOps firstOp = CI1->getOpcode();
135 Instruction::CastOps secondOp = CI2->getOpcode();
136 Type *SrcIntPtrTy =
137 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
138 Type *MidIntPtrTy =
139 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
140 Type *DstIntPtrTy =
141 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
142 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
143 DstTy, SrcIntPtrTy, MidIntPtrTy,
144 DstIntPtrTy);
145
146 // We don't want to form an inttoptr or ptrtoint that converts to an integer
147 // type that differs from the pointer size.
148 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
149 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
150 Res = 0;
151
152 return Instruction::CastOps(Res);
153}
154
155/// Implement the transforms common to all CastInst visitors.
157 Value *Src = CI.getOperand(0);
158 Type *Ty = CI.getType();
159
160 if (auto *SrcC = dyn_cast<Constant>(Src))
161 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
162 return replaceInstUsesWith(CI, Res);
163
164 // Try to eliminate a cast of a cast.
165 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
166 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
167 // The first cast (CSrc) is eliminable so we need to fix up or replace
168 // the second cast (CI). CSrc will then have a good chance of being dead.
169 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
170 // Point debug users of the dying cast to the new one.
171 if (CSrc->hasOneUse())
172 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
173 return Res;
174 }
175 }
176
177 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
178 // We are casting a select. Try to fold the cast into the select if the
179 // select does not have a compare instruction with matching operand types
180 // or the select is likely better done in a narrow type.
181 // Creating a select with operands that are different sizes than its
182 // condition may inhibit other folds and lead to worse codegen.
183 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
184 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
185 (CI.getOpcode() == Instruction::Trunc &&
186 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
187
188 // If it's a bitcast involving vectors, make sure it has the same number
189 // of elements on both sides.
190 if (CI.getOpcode() != Instruction::BitCast ||
192 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
193 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
194 return NV;
195 }
196 }
197 }
198 }
199
200 // If we are casting a PHI, then fold the cast into the PHI.
201 if (auto *PN = dyn_cast<PHINode>(Src)) {
202 // Don't do this if it would create a PHI node with an illegal type from a
203 // legal type.
204 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
205 shouldChangeType(CI.getSrcTy(), CI.getType()))
206 if (Instruction *NV = foldOpIntoPhi(CI, PN))
207 return NV;
208 }
209
210 // Canonicalize a unary shuffle after the cast if neither operation changes
211 // the size or element size of the input vector.
212 // TODO: We could allow size-changing ops if that doesn't harm codegen.
213 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
214 Value *X;
215 ArrayRef<int> Mask;
216 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
217 // TODO: Allow scalable vectors?
218 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
219 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
220 if (SrcTy && DestTy &&
221 SrcTy->getNumElements() == DestTy->getNumElements() &&
222 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
223 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
224 return new ShuffleVectorInst(CastX, Mask);
225 }
226 }
227
228 return nullptr;
229}
230
231/// Constants and extensions/truncates from the destination type are always
232/// free to be evaluated in that type. This is a helper for canEvaluate*.
233static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
234 if (isa<Constant>(V))
235 return match(V, m_ImmConstant());
236
237 Value *X;
238 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
239 X->getType() == Ty)
240 return true;
241
242 return false;
243}
244
245/// Filter out values that we can not evaluate in the destination type for free.
246/// This is a helper for canEvaluate*.
247static bool canNotEvaluateInType(Value *V, Type *Ty) {
248 if (!isa<Instruction>(V))
249 return true;
250 // We don't extend or shrink something that has multiple uses -- doing so
251 // would require duplicating the instruction which isn't profitable.
252 if (!V->hasOneUse())
253 return true;
254
255 return false;
256}
257
258/// Return true if we can evaluate the specified expression tree as type Ty
259/// instead of its larger type, and arrive with the same value.
260/// This is used by code that tries to eliminate truncates.
261///
262/// Ty will always be a type smaller than V. We should return true if trunc(V)
263/// can be computed by computing V in the smaller type. If V is an instruction,
264/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
265/// makes sense if x and y can be efficiently truncated.
266///
267/// This function works on both vectors and scalars.
268///
270 Instruction *CxtI) {
271 if (canAlwaysEvaluateInType(V, Ty))
272 return true;
273 if (canNotEvaluateInType(V, Ty))
274 return false;
275
276 auto *I = cast<Instruction>(V);
277 Type *OrigTy = V->getType();
278 switch (I->getOpcode()) {
279 case Instruction::Add:
280 case Instruction::Sub:
281 case Instruction::Mul:
282 case Instruction::And:
283 case Instruction::Or:
284 case Instruction::Xor:
285 // These operators can all arbitrarily be extended or truncated.
286 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
287 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
288
289 case Instruction::UDiv:
290 case Instruction::URem: {
291 // UDiv and URem can be truncated if all the truncated bits are zero.
292 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
294 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
295 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
296 // Do not preserve the original context instruction. Simplifying div/rem
297 // based on later context may introduce a trap.
298 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
299 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
300 return canEvaluateTruncated(I->getOperand(0), Ty, IC, I) &&
301 canEvaluateTruncated(I->getOperand(1), Ty, IC, I);
302 }
303 break;
304 }
305 case Instruction::Shl: {
306 // If we are truncating the result of this SHL, and if it's a shift of an
307 // inrange amount, we can always perform a SHL in a smaller type.
309 KnownBits AmtKnownBits =
310 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
311 if (AmtKnownBits.getMaxValue().ult(BitWidth))
312 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
313 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
314 break;
315 }
316 case Instruction::LShr: {
317 // If this is a truncate of a logical shr, we can truncate it to a smaller
318 // lshr iff we know that the bits we would otherwise be shifting in are
319 // already zeros.
320 // TODO: It is enough to check that the bits we would be shifting in are
321 // zero - use AmtKnownBits.getMaxValue().
322 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
324 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
325 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
326 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
327 if (MaxShiftAmt.ult(BitWidth)) {
328 // If the only user is a trunc then we can narrow the shift if any new
329 // MSBs are not going to be used.
330 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
331 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
332 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
333 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
334 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
335 }
336 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
337 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
338 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
339 }
340 break;
341 }
342 case Instruction::AShr: {
343 // If this is a truncate of an arithmetic shr, we can truncate it to a
344 // smaller ashr iff we know that all the bits from the sign bit of the
345 // original type and the sign bit of the truncate type are similar.
346 // TODO: It is enough to check that the bits we would be shifting in are
347 // similar to sign bit of the truncate type.
348 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
350 KnownBits AmtKnownBits =
351 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
352 unsigned ShiftedBits = OrigBitWidth - BitWidth;
353 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
354 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
355 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
356 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
357 break;
358 }
359 case Instruction::Trunc:
360 // trunc(trunc(x)) -> trunc(x)
361 return true;
362 case Instruction::ZExt:
363 case Instruction::SExt:
364 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
365 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
366 return true;
367 case Instruction::Select: {
368 SelectInst *SI = cast<SelectInst>(I);
369 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
370 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
371 }
372 case Instruction::PHI: {
373 // We can change a phi if we can change all operands. Note that we never
374 // get into trouble with cyclic PHIs here because we only consider
375 // instructions with a single use.
376 PHINode *PN = cast<PHINode>(I);
377 for (Value *IncValue : PN->incoming_values())
378 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
379 return false;
380 return true;
381 }
382 case Instruction::FPToUI:
383 case Instruction::FPToSI: {
384 // If the integer type can hold the max FP value, it is safe to cast
385 // directly to that type. Otherwise, we may create poison via overflow
386 // that did not exist in the original code.
387 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
388 const fltSemantics &Semantics = InputTy->getFltSemantics();
389 uint32_t MinBitWidth =
391 I->getOpcode() == Instruction::FPToSI);
392 return Ty->getScalarSizeInBits() >= MinBitWidth;
393 }
394 case Instruction::ShuffleVector:
395 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
396 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
397 default:
398 // TODO: Can handle more cases here.
399 break;
400 }
401
402 return false;
403}
404
405/// Given a vector that is bitcast to an integer, optionally logically
406/// right-shifted, and truncated, convert it to an extractelement.
407/// Example (big endian):
408/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
409/// --->
410/// extractelement <4 x i32> %X, 1
412 InstCombinerImpl &IC) {
413 Value *TruncOp = Trunc.getOperand(0);
414 Type *DestType = Trunc.getType();
415 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
416 return nullptr;
417
418 Value *VecInput = nullptr;
419 ConstantInt *ShiftVal = nullptr;
420 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
421 m_LShr(m_BitCast(m_Value(VecInput)),
422 m_ConstantInt(ShiftVal)))) ||
423 !isa<VectorType>(VecInput->getType()))
424 return nullptr;
425
426 VectorType *VecType = cast<VectorType>(VecInput->getType());
427 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
428 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
429 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
430
431 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
432 return nullptr;
433
434 // If the element type of the vector doesn't match the result type,
435 // bitcast it to a vector type that we can extract from.
436 unsigned NumVecElts = VecWidth / DestWidth;
437 if (VecType->getElementType() != DestType) {
438 VecType = FixedVectorType::get(DestType, NumVecElts);
439 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
440 }
441
442 unsigned Elt = ShiftAmount / DestWidth;
443 if (IC.getDataLayout().isBigEndian())
444 Elt = NumVecElts - 1 - Elt;
445
446 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
447}
448
449/// Whenever an element is extracted from a vector, optionally shifted down, and
450/// then truncated, canonicalize by converting it to a bitcast followed by an
451/// extractelement.
452///
453/// Examples (little endian):
454/// trunc (extractelement <4 x i64> %X, 0) to i32
455/// --->
456/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
457///
458/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
459/// --->
460/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
462 InstCombinerImpl &IC) {
463 Value *Src = Trunc.getOperand(0);
464 Type *SrcType = Src->getType();
465 Type *DstType = Trunc.getType();
466
467 // Only attempt this if we have simple aliasing of the vector elements.
468 // A badly fit destination size would result in an invalid cast.
469 unsigned SrcBits = SrcType->getScalarSizeInBits();
470 unsigned DstBits = DstType->getScalarSizeInBits();
471 unsigned TruncRatio = SrcBits / DstBits;
472 if ((SrcBits % DstBits) != 0)
473 return nullptr;
474
475 Value *VecOp;
476 ConstantInt *Cst;
477 const APInt *ShiftAmount = nullptr;
478 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
479 !match(Src,
481 m_APInt(ShiftAmount)))))
482 return nullptr;
483
484 auto *VecOpTy = cast<VectorType>(VecOp->getType());
485 auto VecElts = VecOpTy->getElementCount();
486
487 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
488 uint64_t VecOpIdx = Cst->getZExtValue();
489 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
490 ? (VecOpIdx + 1) * TruncRatio - 1
491 : VecOpIdx * TruncRatio;
492
493 // Adjust index by the whole number of truncated elements.
494 if (ShiftAmount) {
495 // Check shift amount is in range and shifts a whole number of truncated
496 // elements.
497 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
498 return nullptr;
499
500 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
501 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
502 : (NewIdx + IdxOfs);
503 }
504
505 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
506 NewIdx <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
507
508 auto *BitCastTo =
509 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
510 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
511 return ExtractElementInst::Create(BitCast, IC.Builder.getInt32(NewIdx));
512}
513
514/// Funnel/Rotate left/right may occur in a wider type than necessary because of
515/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
516Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
517 assert((isa<VectorType>(Trunc.getSrcTy()) ||
518 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
519 "Don't narrow to an illegal scalar type");
520
521 // Bail out on strange types. It is possible to handle some of these patterns
522 // even with non-power-of-2 sizes, but it is not a likely scenario.
523 Type *DestTy = Trunc.getType();
524 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
525 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
526 if (!isPowerOf2_32(NarrowWidth))
527 return nullptr;
528
529 // First, find an or'd pair of opposite shifts:
530 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
531 BinaryOperator *Or0, *Or1;
532 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
533 return nullptr;
534
535 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
536 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
537 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
538 Or0->getOpcode() == Or1->getOpcode())
539 return nullptr;
540
541 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
542 if (Or0->getOpcode() == BinaryOperator::LShr) {
543 std::swap(Or0, Or1);
544 std::swap(ShVal0, ShVal1);
545 std::swap(ShAmt0, ShAmt1);
546 }
547 assert(Or0->getOpcode() == BinaryOperator::Shl &&
548 Or1->getOpcode() == BinaryOperator::LShr &&
549 "Illegal or(shift,shift) pair");
550
551 // Match the shift amount operands for a funnel/rotate pattern. This always
552 // matches a subtraction on the R operand.
553 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
554 // The shift amounts may add up to the narrow bit width:
555 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
556 // If this is a funnel shift (different operands are shifted), then the
557 // shift amount can not over-shift (create poison) in the narrow type.
558 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
559 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
560 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
561 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
562 return L;
563
564 // The following patterns currently only work for rotation patterns.
565 // TODO: Add more general funnel-shift compatible patterns.
566 if (ShVal0 != ShVal1)
567 return nullptr;
568
569 // The shift amount may be masked with negation:
570 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
571 Value *X;
572 unsigned Mask = Width - 1;
573 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
575 return X;
576
577 // Same as above, but the shift amount may be extended after masking:
578 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
580 return X;
581
582 return nullptr;
583 };
584
585 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
586 bool IsFshl = true; // Sub on LSHR.
587 if (!ShAmt) {
588 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
589 IsFshl = false; // Sub on SHL.
590 }
591 if (!ShAmt)
592 return nullptr;
593
594 // The right-shifted value must have high zeros in the wide type (for example
595 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
596 // truncated, so those do not matter.
597 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
598 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
599 return nullptr;
600
601 // Adjust the width of ShAmt for narrowed funnel shift operation:
602 // - Zero-extend if ShAmt is narrower than the destination type.
603 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
604 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
605 // zext/trunc(ShAmt)).
606 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
607
608 Value *X, *Y;
609 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
610 if (ShVal0 != ShVal1)
611 Y = Builder.CreateTrunc(ShVal1, DestTy);
612 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
613 Function *F =
614 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
615 return CallInst::Create(F, {X, Y, NarrowShAmt});
616}
617
618/// Try to narrow the width of math or bitwise logic instructions by pulling a
619/// truncate ahead of binary operators.
620Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
621 Type *SrcTy = Trunc.getSrcTy();
622 Type *DestTy = Trunc.getType();
623 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
624 unsigned DestWidth = DestTy->getScalarSizeInBits();
625
626 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
627 return nullptr;
628
629 BinaryOperator *BinOp;
630 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
631 return nullptr;
632
633 Value *BinOp0 = BinOp->getOperand(0);
634 Value *BinOp1 = BinOp->getOperand(1);
635 switch (BinOp->getOpcode()) {
636 case Instruction::And:
637 case Instruction::Or:
638 case Instruction::Xor:
639 case Instruction::Add:
640 case Instruction::Sub:
641 case Instruction::Mul: {
642 Constant *C;
643 if (match(BinOp0, m_Constant(C))) {
644 // trunc (binop C, X) --> binop (trunc C', X)
645 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
646 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
647 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
648 }
649 if (match(BinOp1, m_Constant(C))) {
650 // trunc (binop X, C) --> binop (trunc X, C')
651 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
652 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
653 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
654 }
655 Value *X;
656 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
657 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
658 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
659 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
660 }
661 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
662 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
663 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
664 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
665 }
666 break;
667 }
668 case Instruction::LShr:
669 case Instruction::AShr: {
670 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
671 Value *A;
672 Constant *C;
673 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
674 unsigned MaxShiftAmt = SrcWidth - DestWidth;
675 // If the shift is small enough, all zero/sign bits created by the shift
676 // are removed by the trunc.
678 APInt(SrcWidth, MaxShiftAmt)))) {
679 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
680 bool IsExact = OldShift->isExact();
681 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
682 /*IsSigned*/ true, DL)) {
683 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
684 Value *Shift =
685 OldShift->getOpcode() == Instruction::AShr
686 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
687 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
688 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
689 }
690 }
691 }
692 break;
693 }
694 default: break;
695 }
696
697 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
698 return NarrowOr;
699
700 return nullptr;
701}
702
703/// Try to narrow the width of a splat shuffle. This could be generalized to any
704/// shuffle with a constant operand, but we limit the transform to avoid
705/// creating a shuffle type that targets may not be able to lower effectively.
707 InstCombiner::BuilderTy &Builder) {
708 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
709 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
710 all_equal(Shuf->getShuffleMask()) &&
711 ElementCount::isKnownGE(Shuf->getType()->getElementCount(),
712 cast<VectorType>(Shuf->getOperand(0)->getType())
713 ->getElementCount())) {
714 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
715 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
716 Type *NewTruncTy = Shuf->getOperand(0)->getType()->getWithNewType(
717 Trunc.getType()->getScalarType());
718 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), NewTruncTy);
719 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
720 }
721
722 return nullptr;
723}
724
725/// Try to narrow the width of an insert element. This could be generalized for
726/// any vector constant, but we limit the transform to insertion into undef to
727/// avoid potential backend problems from unsupported insertion widths. This
728/// could also be extended to handle the case of inserting a scalar constant
729/// into a vector variable.
731 InstCombiner::BuilderTy &Builder) {
732 Instruction::CastOps Opcode = Trunc.getOpcode();
733 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
734 "Unexpected instruction for shrinking");
735
736 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
737 if (!InsElt || !InsElt->hasOneUse())
738 return nullptr;
739
740 Type *DestTy = Trunc.getType();
741 Type *DestScalarTy = DestTy->getScalarType();
742 Value *VecOp = InsElt->getOperand(0);
743 Value *ScalarOp = InsElt->getOperand(1);
744 Value *Index = InsElt->getOperand(2);
745
746 if (match(VecOp, m_Undef())) {
747 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
748 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
749 UndefValue *NarrowUndef = UndefValue::get(DestTy);
750 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
751 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
752 }
753
754 return nullptr;
755}
756
758 if (Instruction *Result = commonCastTransforms(Trunc))
759 return Result;
760
761 Value *Src = Trunc.getOperand(0);
762 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
763 unsigned DestWidth = DestTy->getScalarSizeInBits();
764 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
765
766 // Attempt to truncate the entire input expression tree to the destination
767 // type. Only do this if the dest type is a simple type, don't convert the
768 // expression tree to something weird like i93 unless the source is also
769 // strange.
770 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
771 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
772
773 // If this cast is a truncate, evaluting in a different type always
774 // eliminates the cast, so it is always a win.
776 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
777 " to avoid cast: "
778 << Trunc << '\n');
779 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
780 assert(Res->getType() == DestTy);
781 return replaceInstUsesWith(Trunc, Res);
782 }
783
784 // For integer types, check if we can shorten the entire input expression to
785 // DestWidth * 2, which won't allow removing the truncate, but reducing the
786 // width may enable further optimizations, e.g. allowing for larger
787 // vectorization factors.
788 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
789 if (DestWidth * 2 < SrcWidth) {
790 auto *NewDestTy = DestITy->getExtendedType();
791 if (shouldChangeType(SrcTy, NewDestTy) &&
792 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
794 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
795 " to reduce the width of operand of"
796 << Trunc << '\n');
797 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
798 return new TruncInst(Res, DestTy);
799 }
800 }
801 }
802
803 // See if we can simplify any instructions used by the input whose sole
804 // purpose is to compute bits we don't care about.
806 return &Trunc;
807
808 if (DestWidth == 1) {
809 Value *Zero = Constant::getNullValue(SrcTy);
810
811 Value *X;
812 const APInt *C1;
813 Constant *C2;
814 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
815 m_ImmConstant(C2))))) {
816 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
817 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
818 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
819 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
820 }
821
822 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
823 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
824 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
825 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
826 }
827
828 Constant *C;
829 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
830 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
831 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
832 Value *MaskC = Builder.CreateShl(One, C);
833 Value *And = Builder.CreateAnd(X, MaskC);
834 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
835 }
837 m_Deferred(X))))) {
838 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
839 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
840 Value *MaskC = Builder.CreateShl(One, C);
841 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
842 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
843 }
844
845 {
846 const APInt *C;
847 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
848 // trunc (C << X) to i1 --> X == 0, where C is odd
849 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
850 }
851 }
852
853 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
854 Value *X, *Y;
855 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
856 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
857 }
858 }
859
860 Value *A, *B;
861 Constant *C;
862 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
863 unsigned AWidth = A->getType()->getScalarSizeInBits();
864 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
865 auto *OldSh = cast<Instruction>(Src);
866 bool IsExact = OldSh->isExact();
867
868 // If the shift is small enough, all zero bits created by the shift are
869 // removed by the trunc.
871 APInt(SrcWidth, MaxShiftAmt)))) {
872 auto GetNewShAmt = [&](unsigned Width) {
873 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
874 Constant *Cmp =
876 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
877 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
878 DL);
879 };
880
881 // trunc (lshr (sext A), C) --> ashr A, C
882 if (A->getType() == DestTy) {
883 Constant *ShAmt = GetNewShAmt(DestWidth);
884 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
885 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
886 : BinaryOperator::CreateAShr(A, ShAmt);
887 }
888 // The types are mismatched, so create a cast after shifting:
889 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
890 if (Src->hasOneUse()) {
891 Constant *ShAmt = GetNewShAmt(AWidth);
892 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
893 return CastInst::CreateIntegerCast(Shift, DestTy, true);
894 }
895 }
896 // TODO: Mask high bits with 'and'.
897 }
898
899 if (Instruction *I = narrowBinOp(Trunc))
900 return I;
901
903 return I;
904
905 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
906 return I;
907
908 if (Src->hasOneUse() &&
909 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
910 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
911 // dest type is native and cst < dest size.
912 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
913 !match(A, m_Shr(m_Value(), m_Constant()))) {
914 // Skip shifts of shift by constants. It undoes a combine in
915 // FoldShiftByConstant and is the extend in reg pattern.
916 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
917 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
918 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
919 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
920 ConstantExpr::getTrunc(C, DestTy));
921 }
922 }
923 }
924
925 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
926 return I;
927
928 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
929 return I;
930
931 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
932 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
933 m_Value(B))))) {
934 unsigned AWidth = A->getType()->getScalarSizeInBits();
935 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
936 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
937 Value *NarrowCtlz =
938 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
939 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
940 }
941 }
942
943 if (match(Src, m_VScale())) {
944 if (Trunc.getFunction() &&
945 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
946 Attribute Attr =
947 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
948 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
949 if (Log2_32(*MaxVScale) < DestWidth)
950 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
951 }
952 }
953
954 if (DestWidth == 1 &&
955 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
957 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
958
959 bool Changed = false;
960 if (!Trunc.hasNoSignedWrap() &&
961 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
962 Trunc.setHasNoSignedWrap(true);
963 Changed = true;
964 }
965 if (!Trunc.hasNoUnsignedWrap() &&
966 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
967 &Trunc)) {
968 Trunc.setHasNoUnsignedWrap(true);
969 Changed = true;
970 }
971
972 return Changed ? &Trunc : nullptr;
973}
974
975Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
976 ZExtInst &Zext) {
977 // If we are just checking for a icmp eq of a single bit and zext'ing it
978 // to an integer, then shift the bit to the appropriate place and then
979 // cast to integer to avoid the comparison.
980
981 // FIXME: This set of transforms does not check for extra uses and/or creates
982 // an extra instruction (an optional final cast is not included
983 // in the transform comments). We may also want to favor icmp over
984 // shifts in cases of equal instructions because icmp has better
985 // analysis in general (invert the transform).
986
987 const APInt *Op1CV;
988 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
989
990 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
991 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
992 Value *In = Cmp->getOperand(0);
993 Value *Sh = ConstantInt::get(In->getType(),
994 In->getType()->getScalarSizeInBits() - 1);
995 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
996 if (In->getType() != Zext.getType())
997 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
998
999 return replaceInstUsesWith(Zext, In);
1000 }
1001
1002 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1003 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1004 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1005 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1006
1007 if (Op1CV->isZero() && Cmp->isEquality()) {
1008 // Exactly 1 possible 1? But not the high-bit because that is
1009 // canonicalized to this form.
1010 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1011 APInt KnownZeroMask(~Known.Zero);
1012 uint32_t ShAmt = KnownZeroMask.logBase2();
1013 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1014 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1015 if (IsExpectShAmt &&
1016 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1017 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1018 Value *In = Cmp->getOperand(0);
1019 if (ShAmt) {
1020 // Perform a logical shr by shiftamt.
1021 // Insert the shift to put the result in the low bit.
1022 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1023 In->getName() + ".lobit");
1024 }
1025
1026 // Toggle the low bit for "X == 0".
1027 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1028 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1029
1030 if (Zext.getType() == In->getType())
1031 return replaceInstUsesWith(Zext, In);
1032
1033 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1034 return replaceInstUsesWith(Zext, IntCast);
1035 }
1036 }
1037 }
1038
1039 if (Cmp->isEquality()) {
1040 // Test if a bit is clear/set using a shifted-one mask:
1041 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1042 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1043 Value *X, *ShAmt;
1044 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1045 match(Cmp->getOperand(0),
1046 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1047 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1048 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1049 if (Zext.getType() == And->getType() ||
1050 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1051 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1052 X = Builder.CreateNot(X);
1053 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1054 Value *And1 =
1055 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1056 return replaceInstUsesWith(
1057 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1058 }
1059 }
1060 }
1061
1062 return nullptr;
1063}
1064
1065/// Determine if the specified value can be computed in the specified wider type
1066/// and produce the same low bits. If not, return false.
1067///
1068/// If this function returns true, it can also return a non-zero number of bits
1069/// (in BitsToClear) which indicates that the value it computes is correct for
1070/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1071/// out. For example, to promote something like:
1072///
1073/// %B = trunc i64 %A to i32
1074/// %C = lshr i32 %B, 8
1075/// %E = zext i32 %C to i64
1076///
1077/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1078/// set to 8 to indicate that the promoted value needs to have bits 24-31
1079/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1080/// clear the top bits anyway, doing this has no extra cost.
1081///
1082/// This function works on both vectors and scalars.
1083static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1084 InstCombinerImpl &IC, Instruction *CxtI) {
1085 BitsToClear = 0;
1086 if (canAlwaysEvaluateInType(V, Ty))
1087 return true;
1088 if (canNotEvaluateInType(V, Ty))
1089 return false;
1090
1091 auto *I = cast<Instruction>(V);
1092 unsigned Tmp;
1093 switch (I->getOpcode()) {
1094 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1095 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1096 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1097 return true;
1098 case Instruction::And:
1099 case Instruction::Or:
1100 case Instruction::Xor:
1101 case Instruction::Add:
1102 case Instruction::Sub:
1103 case Instruction::Mul:
1104 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1105 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1106 return false;
1107 // These can all be promoted if neither operand has 'bits to clear'.
1108 if (BitsToClear == 0 && Tmp == 0)
1109 return true;
1110
1111 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1112 // other side, BitsToClear is ok.
1113 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1114 // We use MaskedValueIsZero here for generality, but the case we care
1115 // about the most is constant RHS.
1116 unsigned VSize = V->getType()->getScalarSizeInBits();
1117 if (IC.MaskedValueIsZero(I->getOperand(1),
1118 APInt::getHighBitsSet(VSize, BitsToClear),
1119 CxtI)) {
1120 // If this is an And instruction and all of the BitsToClear are
1121 // known to be zero we can reset BitsToClear.
1122 if (I->getOpcode() == Instruction::And)
1123 BitsToClear = 0;
1124 return true;
1125 }
1126 }
1127
1128 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1129 return false;
1130
1131 case Instruction::Shl: {
1132 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1133 // upper bits we can reduce BitsToClear by the shift amount.
1134 uint64_t ShiftAmt;
1135 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1136 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1137 return false;
1138 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1139 return true;
1140 }
1141 return false;
1142 }
1143 case Instruction::LShr: {
1144 // We can promote lshr(x, cst) if we can promote x. This requires the
1145 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1146 uint64_t ShiftAmt;
1147 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1148 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1149 return false;
1150 BitsToClear += ShiftAmt;
1151 if (BitsToClear > V->getType()->getScalarSizeInBits())
1152 BitsToClear = V->getType()->getScalarSizeInBits();
1153 return true;
1154 }
1155 // Cannot promote variable LSHR.
1156 return false;
1157 }
1158 case Instruction::Select:
1159 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1160 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1161 // TODO: If important, we could handle the case when the BitsToClear are
1162 // known zero in the disagreeing side.
1163 Tmp != BitsToClear)
1164 return false;
1165 return true;
1166
1167 case Instruction::PHI: {
1168 // We can change a phi if we can change all operands. Note that we never
1169 // get into trouble with cyclic PHIs here because we only consider
1170 // instructions with a single use.
1171 PHINode *PN = cast<PHINode>(I);
1172 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1173 return false;
1174 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1175 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1176 // TODO: If important, we could handle the case when the BitsToClear
1177 // are known zero in the disagreeing input.
1178 Tmp != BitsToClear)
1179 return false;
1180 return true;
1181 }
1182 case Instruction::Call:
1183 // llvm.vscale() can always be executed in larger type, because the
1184 // value is automatically zero-extended.
1185 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1186 if (II->getIntrinsicID() == Intrinsic::vscale)
1187 return true;
1188 return false;
1189 default:
1190 // TODO: Can handle more cases here.
1191 return false;
1192 }
1193}
1194
1196 // If this zero extend is only used by a truncate, let the truncate be
1197 // eliminated before we try to optimize this zext.
1198 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1199 !isa<Constant>(Zext.getOperand(0)))
1200 return nullptr;
1201
1202 // If one of the common conversion will work, do it.
1203 if (Instruction *Result = commonCastTransforms(Zext))
1204 return Result;
1205
1206 Value *Src = Zext.getOperand(0);
1207 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1208
1209 // zext nneg bool x -> 0
1210 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1212
1213 // Try to extend the entire expression tree to the wide destination type.
1214 unsigned BitsToClear;
1215 if (shouldChangeType(SrcTy, DestTy) &&
1216 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1217 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1218 "Can't clear more bits than in SrcTy");
1219
1220 // Okay, we can transform this! Insert the new expression now.
1221 LLVM_DEBUG(
1222 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1223 " to avoid zero extend: "
1224 << Zext << '\n');
1225 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1226 assert(Res->getType() == DestTy);
1227
1228 // Preserve debug values referring to Src if the zext is its last use.
1229 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1230 if (SrcOp->hasOneUse())
1231 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1232
1233 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1234 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1235
1236 // If the high bits are already filled with zeros, just replace this
1237 // cast with the result.
1239 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1240 &Zext))
1241 return replaceInstUsesWith(Zext, Res);
1242
1243 // We need to emit an AND to clear the high bits.
1244 Constant *C = ConstantInt::get(Res->getType(),
1245 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1246 return BinaryOperator::CreateAnd(Res, C);
1247 }
1248
1249 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1250 // types and if the sizes are just right we can convert this into a logical
1251 // 'and' which will be much cheaper than the pair of casts.
1252 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1253 // TODO: Subsume this into EvaluateInDifferentType.
1254
1255 // Get the sizes of the types involved. We know that the intermediate type
1256 // will be smaller than A or C, but don't know the relation between A and C.
1257 Value *A = CSrc->getOperand(0);
1258 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1259 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1260 unsigned DstSize = DestTy->getScalarSizeInBits();
1261 // If we're actually extending zero bits, then if
1262 // SrcSize < DstSize: zext(a & mask)
1263 // SrcSize == DstSize: a & mask
1264 // SrcSize > DstSize: trunc(a) & mask
1265 if (SrcSize < DstSize) {
1266 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1267 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1268 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1269 return new ZExtInst(And, DestTy);
1270 }
1271
1272 if (SrcSize == DstSize) {
1273 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1274 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1275 AndValue));
1276 }
1277 if (SrcSize > DstSize) {
1278 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1279 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1280 return BinaryOperator::CreateAnd(Trunc,
1281 ConstantInt::get(Trunc->getType(),
1282 AndValue));
1283 }
1284 }
1285
1286 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1287 return transformZExtICmp(Cmp, Zext);
1288
1289 // zext(trunc(X) & C) -> (X & zext(C)).
1290 Constant *C;
1291 Value *X;
1292 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1293 X->getType() == DestTy)
1294 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1295
1296 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1297 Value *And;
1298 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1300 X->getType() == DestTy) {
1301 Value *ZC = Builder.CreateZExt(C, DestTy);
1302 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1303 }
1304
1305 // If we are truncating, masking, and then zexting back to the original type,
1306 // that's just a mask. This is not handled by canEvaluateZextd if the
1307 // intermediate values have extra uses. This could be generalized further for
1308 // a non-constant mask operand.
1309 // zext (and (trunc X), C) --> and X, (zext C)
1310 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1311 X->getType() == DestTy) {
1312 Value *ZextC = Builder.CreateZExt(C, DestTy);
1313 return BinaryOperator::CreateAnd(X, ZextC);
1314 }
1315
1316 if (match(Src, m_VScale())) {
1317 if (Zext.getFunction() &&
1318 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1319 Attribute Attr =
1320 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1321 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1322 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1323 if (Log2_32(*MaxVScale) < TypeWidth)
1324 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1325 }
1326 }
1327 }
1328
1329 if (!Zext.hasNonNeg()) {
1330 // If this zero extend is only used by a shift, add nneg flag.
1331 if (Zext.hasOneUse() &&
1332 SrcTy->getScalarSizeInBits() >
1333 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1334 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1335 Zext.setNonNeg();
1336 return &Zext;
1337 }
1338
1339 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1340 Zext.setNonNeg();
1341 return &Zext;
1342 }
1343 }
1344
1345 return nullptr;
1346}
1347
1348/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1349Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1350 SExtInst &Sext) {
1351 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1352 ICmpInst::Predicate Pred = Cmp->getPredicate();
1353
1354 // Don't bother if Op1 isn't of vector or integer type.
1355 if (!Op1->getType()->isIntOrIntVectorTy())
1356 return nullptr;
1357
1358 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1359 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1360 Value *Sh = ConstantInt::get(Op0->getType(),
1361 Op0->getType()->getScalarSizeInBits() - 1);
1362 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1363 if (In->getType() != Sext.getType())
1364 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1365
1366 return replaceInstUsesWith(Sext, In);
1367 }
1368
1369 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1370 // If we know that only one bit of the LHS of the icmp can be set and we
1371 // have an equality comparison with zero or a power of 2, we can transform
1372 // the icmp and sext into bitwise/integer operations.
1373 if (Cmp->hasOneUse() &&
1374 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1375 KnownBits Known = computeKnownBits(Op0, &Sext);
1376
1377 APInt KnownZeroMask(~Known.Zero);
1378 if (KnownZeroMask.isPowerOf2()) {
1379 Value *In = Cmp->getOperand(0);
1380
1381 // If the icmp tests for a known zero bit we can constant fold it.
1382 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1383 Value *V = Pred == ICmpInst::ICMP_NE ?
1385 ConstantInt::getNullValue(Sext.getType());
1386 return replaceInstUsesWith(Sext, V);
1387 }
1388
1389 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1390 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1391 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1392 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1393 // Perform a right shift to place the desired bit in the LSB.
1394 if (ShiftAmt)
1395 In = Builder.CreateLShr(In,
1396 ConstantInt::get(In->getType(), ShiftAmt));
1397
1398 // At this point "In" is either 1 or 0. Subtract 1 to turn
1399 // {1, 0} -> {0, -1}.
1400 In = Builder.CreateAdd(In,
1401 ConstantInt::getAllOnesValue(In->getType()),
1402 "sext");
1403 } else {
1404 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1405 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1406 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1407 // Perform a left shift to place the desired bit in the MSB.
1408 if (ShiftAmt)
1409 In = Builder.CreateShl(In,
1410 ConstantInt::get(In->getType(), ShiftAmt));
1411
1412 // Distribute the bit over the whole bit width.
1413 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1414 KnownZeroMask.getBitWidth() - 1), "sext");
1415 }
1416
1417 if (Sext.getType() == In->getType())
1418 return replaceInstUsesWith(Sext, In);
1419 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1420 }
1421 }
1422 }
1423
1424 return nullptr;
1425}
1426
1427/// Return true if we can take the specified value and return it as type Ty
1428/// without inserting any new casts and without changing the value of the common
1429/// low bits. This is used by code that tries to promote integer operations to
1430/// a wider types will allow us to eliminate the extension.
1431///
1432/// This function works on both vectors and scalars.
1433///
1434static bool canEvaluateSExtd(Value *V, Type *Ty) {
1435 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1436 "Can't sign extend type to a smaller type");
1437 if (canAlwaysEvaluateInType(V, Ty))
1438 return true;
1439 if (canNotEvaluateInType(V, Ty))
1440 return false;
1441
1442 auto *I = cast<Instruction>(V);
1443 switch (I->getOpcode()) {
1444 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1445 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1446 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1447 return true;
1448 case Instruction::And:
1449 case Instruction::Or:
1450 case Instruction::Xor:
1451 case Instruction::Add:
1452 case Instruction::Sub:
1453 case Instruction::Mul:
1454 // These operators can all arbitrarily be extended if their inputs can.
1455 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1456 canEvaluateSExtd(I->getOperand(1), Ty);
1457
1458 //case Instruction::Shl: TODO
1459 //case Instruction::LShr: TODO
1460
1461 case Instruction::Select:
1462 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1463 canEvaluateSExtd(I->getOperand(2), Ty);
1464
1465 case Instruction::PHI: {
1466 // We can change a phi if we can change all operands. Note that we never
1467 // get into trouble with cyclic PHIs here because we only consider
1468 // instructions with a single use.
1469 PHINode *PN = cast<PHINode>(I);
1470 for (Value *IncValue : PN->incoming_values())
1471 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1472 return true;
1473 }
1474 default:
1475 // TODO: Can handle more cases here.
1476 break;
1477 }
1478
1479 return false;
1480}
1481
1483 // If this sign extend is only used by a truncate, let the truncate be
1484 // eliminated before we try to optimize this sext.
1485 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1486 return nullptr;
1487
1488 if (Instruction *I = commonCastTransforms(Sext))
1489 return I;
1490
1491 Value *Src = Sext.getOperand(0);
1492 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1493 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1494 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1495
1496 // If the value being extended is zero or positive, use a zext instead.
1497 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1498 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1499 CI->setNonNeg(true);
1500 return CI;
1501 }
1502
1503 // Try to extend the entire expression tree to the wide destination type.
1504 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1505 // Okay, we can transform this! Insert the new expression now.
1506 LLVM_DEBUG(
1507 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1508 " to avoid sign extend: "
1509 << Sext << '\n');
1510 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1511 assert(Res->getType() == DestTy);
1512
1513 // If the high bits are already filled with sign bit, just replace this
1514 // cast with the result.
1515 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1516 return replaceInstUsesWith(Sext, Res);
1517
1518 // We need to emit a shl + ashr to do the sign extend.
1519 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1520 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1521 ShAmt);
1522 }
1523
1524 Value *X;
1525 if (match(Src, m_Trunc(m_Value(X)))) {
1526 // If the input has more sign bits than bits truncated, then convert
1527 // directly to final type.
1528 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1529 if (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)
1530 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1531
1532 // If input is a trunc from the destination type, then convert into shifts.
1533 if (Src->hasOneUse() && X->getType() == DestTy) {
1534 // sext (trunc X) --> ashr (shl X, C), C
1535 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1536 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1537 }
1538
1539 // If we are replacing shifted-in high zero bits with sign bits, convert
1540 // the logic shift to arithmetic shift and eliminate the cast to
1541 // intermediate type:
1542 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1543 Value *Y;
1544 if (Src->hasOneUse() &&
1546 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1547 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1548 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1549 }
1550 }
1551
1552 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1553 return transformSExtICmp(Cmp, Sext);
1554
1555 // If the input is a shl/ashr pair of a same constant, then this is a sign
1556 // extension from a smaller value. If we could trust arbitrary bitwidth
1557 // integers, we could turn this into a truncate to the smaller bit and then
1558 // use a sext for the whole extension. Since we don't, look deeper and check
1559 // for a truncate. If the source and dest are the same type, eliminate the
1560 // trunc and extend and just do shifts. For example, turn:
1561 // %a = trunc i32 %i to i8
1562 // %b = shl i8 %a, C
1563 // %c = ashr i8 %b, C
1564 // %d = sext i8 %c to i32
1565 // into:
1566 // %a = shl i32 %i, 32-(8-C)
1567 // %d = ashr i32 %a, 32-(8-C)
1568 Value *A = nullptr;
1569 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1570 Constant *BA = nullptr, *CA = nullptr;
1571 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1572 m_ImmConstant(CA))) &&
1573 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1574 Constant *WideCurrShAmt =
1575 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1576 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1577 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1578 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1579 Constant *NewShAmt = ConstantExpr::getSub(
1580 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1581 NumLowbitsLeft);
1582 NewShAmt =
1584 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1585 return BinaryOperator::CreateAShr(A, NewShAmt);
1586 }
1587
1588 // Splatting a bit of constant-index across a value:
1589 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1590 // If the dest type is different, use a cast (adjust use check).
1591 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1592 m_SpecificInt(SrcBitSize - 1))))) {
1593 Type *XTy = X->getType();
1594 unsigned XBitSize = XTy->getScalarSizeInBits();
1595 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1596 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1597 if (XTy == DestTy)
1598 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1599 AshrAmtC);
1600 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1601 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1602 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1603 }
1604 }
1605
1606 if (match(Src, m_VScale())) {
1607 if (Sext.getFunction() &&
1608 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1609 Attribute Attr =
1610 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1611 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1612 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
1613 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
1614 }
1615 }
1616
1617 return nullptr;
1618}
1619
1620/// Return a Constant* for the specified floating-point constant if it fits
1621/// in the specified FP type without changing its value.
1622static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1623 bool losesInfo;
1624 APFloat F = CFP->getValueAPF();
1625 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1626 return !losesInfo;
1627}
1628
1629static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1630 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1631 return nullptr; // No constant folding of this.
1632 // See if the value can be truncated to bfloat and then reextended.
1633 if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))
1634 return Type::getBFloatTy(CFP->getContext());
1635 // See if the value can be truncated to half and then reextended.
1636 if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))
1637 return Type::getHalfTy(CFP->getContext());
1638 // See if the value can be truncated to float and then reextended.
1640 return Type::getFloatTy(CFP->getContext());
1641 if (CFP->getType()->isDoubleTy())
1642 return nullptr; // Won't shrink.
1644 return Type::getDoubleTy(CFP->getContext());
1645 // Don't try to shrink to various long double types.
1646 return nullptr;
1647}
1648
1649// Determine if this is a vector of ConstantFPs and if so, return the minimal
1650// type we can safely truncate all elements to.
1651static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1652 auto *CV = dyn_cast<Constant>(V);
1653 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1654 if (!CV || !CVVTy)
1655 return nullptr;
1656
1657 Type *MinType = nullptr;
1658
1659 unsigned NumElts = CVVTy->getNumElements();
1660
1661 // For fixed-width vectors we find the minimal type by looking
1662 // through the constant values of the vector.
1663 for (unsigned i = 0; i != NumElts; ++i) {
1664 if (isa<UndefValue>(CV->getAggregateElement(i)))
1665 continue;
1666
1667 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1668 if (!CFP)
1669 return nullptr;
1670
1671 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1672 if (!T)
1673 return nullptr;
1674
1675 // If we haven't found a type yet or this type has a larger mantissa than
1676 // our previous type, this is our new minimal type.
1677 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1678 MinType = T;
1679 }
1680
1681 // Make a vector type from the minimal type.
1682 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1683}
1684
1685/// Find the minimum FP type we can safely truncate to.
1686static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1687 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1688 return FPExt->getOperand(0)->getType();
1689
1690 // If this value is a constant, return the constant in the smallest FP type
1691 // that can accurately represent it. This allows us to turn
1692 // (float)((double)X+2.0) into x+2.0f.
1693 if (auto *CFP = dyn_cast<ConstantFP>(V))
1694 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1695 return T;
1696
1697 // Try to shrink scalable and fixed splat vectors.
1698 if (auto *FPC = dyn_cast<Constant>(V))
1699 if (isa<VectorType>(V->getType()))
1700 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
1701 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
1702 return T;
1703
1704 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1705 // vectors
1706 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1707 return T;
1708
1709 return V->getType();
1710}
1711
1712/// Return true if the cast from integer to FP can be proven to be exact for all
1713/// possible inputs (the conversion does not lose any precision).
1715 CastInst::CastOps Opcode = I.getOpcode();
1716 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1717 "Unexpected cast");
1718 Value *Src = I.getOperand(0);
1719 Type *SrcTy = Src->getType();
1720 Type *FPTy = I.getType();
1721 bool IsSigned = Opcode == Instruction::SIToFP;
1722 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1723
1724 // Easy case - if the source integer type has less bits than the FP mantissa,
1725 // then the cast must be exact.
1726 int DestNumSigBits = FPTy->getFPMantissaWidth();
1727 if (SrcSize <= DestNumSigBits)
1728 return true;
1729
1730 // Cast from FP to integer and back to FP is independent of the intermediate
1731 // integer width because of poison on overflow.
1732 Value *F;
1733 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1734 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1735 // potential rounding of negative FP input values.
1736 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1737 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1738 SrcNumSigBits++;
1739
1740 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1741 // significant bits than the destination (and make sure neither type is
1742 // weird -- ppc_fp128).
1743 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1744 SrcNumSigBits <= DestNumSigBits)
1745 return true;
1746 }
1747
1748 // TODO:
1749 // Try harder to find if the source integer type has less significant bits.
1750 // For example, compute number of sign bits.
1751 KnownBits SrcKnown = IC.computeKnownBits(Src, &I);
1752 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1753 SrcKnown.countMinLeadingZeros() -
1754 SrcKnown.countMinTrailingZeros();
1755 if (SigBits <= DestNumSigBits)
1756 return true;
1757
1758 return false;
1759}
1760
1763 return I;
1764
1765 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1766 // simplify this expression to avoid one or more of the trunc/extend
1767 // operations if we can do so without changing the numerical results.
1768 //
1769 // The exact manner in which the widths of the operands interact to limit
1770 // what we can and cannot do safely varies from operation to operation, and
1771 // is explained below in the various case statements.
1772 Type *Ty = FPT.getType();
1773 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1774 if (BO && BO->hasOneUse()) {
1775 Type *LHSMinType =
1776 getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());
1777 Type *RHSMinType =
1778 getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());
1779 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1780 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1781 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1782 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1783 unsigned DstWidth = Ty->getFPMantissaWidth();
1784 switch (BO->getOpcode()) {
1785 default: break;
1786 case Instruction::FAdd:
1787 case Instruction::FSub:
1788 // For addition and subtraction, the infinitely precise result can
1789 // essentially be arbitrarily wide; proving that double rounding
1790 // will not occur because the result of OpI is exact (as we will for
1791 // FMul, for example) is hopeless. However, we *can* nonetheless
1792 // frequently know that double rounding cannot occur (or that it is
1793 // innocuous) by taking advantage of the specific structure of
1794 // infinitely-precise results that admit double rounding.
1795 //
1796 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1797 // to represent both sources, we can guarantee that the double
1798 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1799 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1800 // for proof of this fact).
1801 //
1802 // Note: Figueroa does not consider the case where DstFormat !=
1803 // SrcFormat. It's possible (likely even!) that this analysis
1804 // could be tightened for those cases, but they are rare (the main
1805 // case of interest here is (float)((double)float + float)).
1806 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1807 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1808 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1809 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1810 RI->copyFastMathFlags(BO);
1811 return RI;
1812 }
1813 break;
1814 case Instruction::FMul:
1815 // For multiplication, the infinitely precise result has at most
1816 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1817 // that such a value can be exactly represented, then no double
1818 // rounding can possibly occur; we can safely perform the operation
1819 // in the destination format if it can represent both sources.
1820 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1821 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1822 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1824 }
1825 break;
1826 case Instruction::FDiv:
1827 // For division, we use again use the bound from Figueroa's
1828 // dissertation. I am entirely certain that this bound can be
1829 // tightened in the unbalanced operand case by an analysis based on
1830 // the diophantine rational approximation bound, but the well-known
1831 // condition used here is a good conservative first pass.
1832 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1833 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1834 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1835 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1837 }
1838 break;
1839 case Instruction::FRem: {
1840 // Remainder is straightforward. Remainder is always exact, so the
1841 // type of OpI doesn't enter into things at all. We simply evaluate
1842 // in whichever source type is larger, then convert to the
1843 // destination type.
1844 if (SrcWidth == OpWidth)
1845 break;
1846 Value *LHS, *RHS;
1847 if (LHSWidth == SrcWidth) {
1848 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1849 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1850 } else {
1851 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1852 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1853 }
1854
1855 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1856 return CastInst::CreateFPCast(ExactResult, Ty);
1857 }
1858 }
1859 }
1860
1861 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1862 Value *X;
1863 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1864 if (Op && Op->hasOneUse()) {
1865 FastMathFlags FMF = FPT.getFastMathFlags();
1866 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
1867 FMF &= FPMO->getFastMathFlags();
1868
1869 if (match(Op, m_FNeg(m_Value(X)))) {
1870 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
1871 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
1872 return replaceInstUsesWith(FPT, Neg);
1873 }
1874
1875 // If we are truncating a select that has an extended operand, we can
1876 // narrow the other operand and do the select as a narrow op.
1877 Value *Cond, *X, *Y;
1879 X->getType() == Ty) {
1880 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1881 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
1882 Value *Sel =
1883 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
1884 return replaceInstUsesWith(FPT, Sel);
1885 }
1887 X->getType() == Ty) {
1888 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1889 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
1890 Value *Sel =
1891 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
1892 return replaceInstUsesWith(FPT, Sel);
1893 }
1894 }
1895
1896 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1897 switch (II->getIntrinsicID()) {
1898 default: break;
1899 case Intrinsic::ceil:
1900 case Intrinsic::fabs:
1901 case Intrinsic::floor:
1902 case Intrinsic::nearbyint:
1903 case Intrinsic::rint:
1904 case Intrinsic::round:
1905 case Intrinsic::roundeven:
1906 case Intrinsic::trunc: {
1907 Value *Src = II->getArgOperand(0);
1908 if (!Src->hasOneUse())
1909 break;
1910
1911 // Except for fabs, this transformation requires the input of the unary FP
1912 // operation to be itself an fpext from the type to which we're
1913 // truncating.
1914 if (II->getIntrinsicID() != Intrinsic::fabs) {
1915 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1916 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1917 break;
1918 }
1919
1920 // Do unary FP operation on smaller type.
1921 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1922 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1924 FPT.getModule(), II->getIntrinsicID(), Ty);
1926 II->getOperandBundlesAsDefs(OpBundles);
1927 CallInst *NewCI =
1928 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1929 // A normal value may be converted to an infinity. It means that we cannot
1930 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
1931 NewCI->copyFastMathFlags(&FPT);
1932 return NewCI;
1933 }
1934 }
1935 }
1936
1937 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1938 return I;
1939
1940 Value *Src = FPT.getOperand(0);
1941 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1942 auto *FPCast = cast<CastInst>(Src);
1943 if (isKnownExactCastIntToFP(*FPCast, *this))
1944 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1945 }
1946
1947 return nullptr;
1948}
1949
1951 // If the source operand is a cast from integer to FP and known exact, then
1952 // cast the integer operand directly to the destination type.
1953 Type *Ty = FPExt.getType();
1954 Value *Src = FPExt.getOperand(0);
1955 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1956 auto *FPCast = cast<CastInst>(Src);
1957 if (isKnownExactCastIntToFP(*FPCast, *this))
1958 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1959 }
1960
1961 return commonCastTransforms(FPExt);
1962}
1963
1964/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1965/// This is safe if the intermediate type has enough bits in its mantissa to
1966/// accurately represent all values of X. For example, this won't work with
1967/// i64 -> float -> i64.
1969 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1970 return nullptr;
1971
1972 auto *OpI = cast<CastInst>(FI.getOperand(0));
1973 Value *X = OpI->getOperand(0);
1974 Type *XType = X->getType();
1975 Type *DestType = FI.getType();
1976 bool IsOutputSigned = isa<FPToSIInst>(FI);
1977
1978 // Since we can assume the conversion won't overflow, our decision as to
1979 // whether the input will fit in the float should depend on the minimum
1980 // of the input range and output range.
1981
1982 // This means this is also safe for a signed input and unsigned output, since
1983 // a negative input would lead to undefined behavior.
1984 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1985 // The first cast may not round exactly based on the source integer width
1986 // and FP width, but the overflow UB rules can still allow this to fold.
1987 // If the destination type is narrow, that means the intermediate FP value
1988 // must be large enough to hold the source value exactly.
1989 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1990 int OutputSize = (int)DestType->getScalarSizeInBits();
1991 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1992 return nullptr;
1993 }
1994
1995 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1996 bool IsInputSigned = isa<SIToFPInst>(OpI);
1997 if (IsInputSigned && IsOutputSigned)
1998 return new SExtInst(X, DestType);
1999 return new ZExtInst(X, DestType);
2000 }
2001 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
2002 return new TruncInst(X, DestType);
2003
2004 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2005 return replaceInstUsesWith(FI, X);
2006}
2007
2009 // fpto{u/s}i non-norm --> 0
2010 FPClassTest Mask =
2011 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2013 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2014 if (FPClass.isKnownNever(Mask))
2016
2017 return nullptr;
2018}
2019
2021 if (Instruction *I = foldItoFPtoI(FI))
2022 return I;
2023
2024 if (Instruction *I = foldFPtoI(FI, *this))
2025 return I;
2026
2027 return commonCastTransforms(FI);
2028}
2029
2031 if (Instruction *I = foldItoFPtoI(FI))
2032 return I;
2033
2034 if (Instruction *I = foldFPtoI(FI, *this))
2035 return I;
2036
2037 return commonCastTransforms(FI);
2038}
2039
2041 if (Instruction *R = commonCastTransforms(CI))
2042 return R;
2043 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2044 CI.setNonNeg();
2045 return &CI;
2046 }
2047 return nullptr;
2048}
2049
2051 if (Instruction *R = commonCastTransforms(CI))
2052 return R;
2053 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2054 auto *UI =
2055 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2056 UI->setNonNeg(true);
2057 return UI;
2058 }
2059 return nullptr;
2060}
2061
2063 // If the source integer type is not the intptr_t type for this target, do a
2064 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2065 // cast to be exposed to other transforms.
2066 unsigned AS = CI.getAddressSpace();
2067 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2069 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2070 DL.getIntPtrType(CI.getContext(), AS));
2072 return new IntToPtrInst(P, CI.getType());
2073 }
2074
2075 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2076 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2077 // value.
2078 Value *Base;
2079 Value *Offset;
2080 auto UsesPointerAsInt = [](User *U) {
2081 if (isa<ICmpInst, PtrToIntInst>(U))
2082 return true;
2083 if (auto *P = dyn_cast<PHINode>(U))
2084 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2085 return false;
2086 };
2087 if (match(CI.getOperand(0),
2089 m_Value(Offset)))) &&
2091 Base->getType()->getPointerAddressSpace() &&
2092 all_of(CI.users(), UsesPointerAsInt)) {
2094 }
2095
2097 return I;
2098
2099 return nullptr;
2100}
2101
2103 // Look through chain of one-use GEPs.
2104 Type *PtrTy = Ptr->getType();
2106 while (true) {
2107 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2108 if (!GEP || !GEP->hasOneUse())
2109 break;
2110 GEPs.push_back(GEP);
2111 Ptr = GEP->getPointerOperand();
2112 }
2113
2114 // Don't handle case where GEP converts from pointer to vector.
2115 if (GEPs.empty() || PtrTy != Ptr->getType())
2116 return nullptr;
2117
2118 // Check whether we know the integer value of the base pointer.
2119 Value *Res;
2120 Type *IdxTy = DL.getIndexType(PtrTy);
2121 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2122 Res->getType() == IntTy && IntTy == IdxTy) {
2123 // pass
2124 } else if (isa<ConstantPointerNull>(Ptr)) {
2125 Res = Constant::getNullValue(IdxTy);
2126 } else {
2127 return nullptr;
2128 }
2129
2130 // Perform the entire operation on integers instead.
2131 for (GEPOperator *GEP : reverse(GEPs)) {
2132 Value *Offset = EmitGEPOffset(GEP);
2133 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2134 }
2135 return Builder.CreateZExtOrTrunc(Res, IntTy);
2136}
2137
2139 // If the destination integer type is not the intptr_t type for this target,
2140 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2141 // to be exposed to other transforms.
2143 Type *SrcTy = SrcOp->getType();
2144 Type *Ty = CI.getType();
2145 unsigned AS = CI.getPointerAddressSpace();
2146 unsigned TySize = Ty->getScalarSizeInBits();
2147 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2148 if (TySize != PtrSize) {
2149 Type *IntPtrTy =
2150 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2151 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2152 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2153 }
2154
2155 // (ptrtoint (ptrmask P, M))
2156 // -> (and (ptrtoint P), M)
2157 // This is generally beneficial as `and` is better supported than `ptrmask`.
2158 Value *Ptr, *Mask;
2159 if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
2160 m_Value(Mask)))) &&
2161 Mask->getType() == Ty)
2162 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2163
2164 if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp))
2165 return replaceInstUsesWith(CI, V);
2166
2167 Value *Vec, *Scalar, *Index;
2169 m_Value(Scalar), m_Value(Index)))) &&
2170 Vec->getType() == Ty) {
2171 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2172 // Convert the scalar to int followed by insert to eliminate one cast:
2173 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2174 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2175 return InsertElementInst::Create(Vec, NewCast, Index);
2176 }
2177
2178 return commonCastTransforms(CI);
2179}
2180
2181/// This input value (which is known to have vector type) is being zero extended
2182/// or truncated to the specified vector type. Since the zext/trunc is done
2183/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2184/// endianness will impact which end of the vector that is extended or
2185/// truncated.
2186///
2187/// A vector is always stored with index 0 at the lowest address, which
2188/// corresponds to the most significant bits for a big endian stored integer and
2189/// the least significant bits for little endian. A trunc/zext of an integer
2190/// impacts the big end of the integer. Thus, we need to add/remove elements at
2191/// the front of the vector for big endian targets, and the back of the vector
2192/// for little endian targets.
2193///
2194/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2195///
2196/// The source and destination vector types may have different element types.
2197static Instruction *
2199 InstCombinerImpl &IC) {
2200 // We can only do this optimization if the output is a multiple of the input
2201 // element size, or the input is a multiple of the output element size.
2202 // Convert the input type to have the same element type as the output.
2203 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2204
2205 if (SrcTy->getElementType() != DestTy->getElementType()) {
2206 // The input types don't need to be identical, but for now they must be the
2207 // same size. There is no specific reason we couldn't handle things like
2208 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2209 // there yet.
2210 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2211 DestTy->getElementType()->getPrimitiveSizeInBits())
2212 return nullptr;
2213
2214 SrcTy =
2215 FixedVectorType::get(DestTy->getElementType(),
2216 cast<FixedVectorType>(SrcTy)->getNumElements());
2217 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2218 }
2219
2220 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2221 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2222 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2223
2224 assert(SrcElts != DestElts && "Element counts should be different.");
2225
2226 // Now that the element types match, get the shuffle mask and RHS of the
2227 // shuffle to use, which depends on whether we're increasing or decreasing the
2228 // size of the input.
2229 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2230 ArrayRef<int> ShuffleMask;
2231 Value *V2;
2232
2233 if (SrcElts > DestElts) {
2234 // If we're shrinking the number of elements (rewriting an integer
2235 // truncate), just shuffle in the elements corresponding to the least
2236 // significant bits from the input and use poison as the second shuffle
2237 // input.
2238 V2 = PoisonValue::get(SrcTy);
2239 // Make sure the shuffle mask selects the "least significant bits" by
2240 // keeping elements from back of the src vector for big endian, and from the
2241 // front for little endian.
2242 ShuffleMask = ShuffleMaskStorage;
2243 if (IsBigEndian)
2244 ShuffleMask = ShuffleMask.take_back(DestElts);
2245 else
2246 ShuffleMask = ShuffleMask.take_front(DestElts);
2247 } else {
2248 // If we're increasing the number of elements (rewriting an integer zext),
2249 // shuffle in all of the elements from InVal. Fill the rest of the result
2250 // elements with zeros from a constant zero.
2251 V2 = Constant::getNullValue(SrcTy);
2252 // Use first elt from V2 when indicating zero in the shuffle mask.
2253 uint32_t NullElt = SrcElts;
2254 // Extend with null values in the "most significant bits" by adding elements
2255 // in front of the src vector for big endian, and at the back for little
2256 // endian.
2257 unsigned DeltaElts = DestElts - SrcElts;
2258 if (IsBigEndian)
2259 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2260 else
2261 ShuffleMaskStorage.append(DeltaElts, NullElt);
2262 ShuffleMask = ShuffleMaskStorage;
2263 }
2264
2265 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2266}
2267
2268static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2269 return Value % Ty->getPrimitiveSizeInBits() == 0;
2270}
2271
2272static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2273 return Value / Ty->getPrimitiveSizeInBits();
2274}
2275
2276/// V is a value which is inserted into a vector of VecEltTy.
2277/// Look through the value to see if we can decompose it into
2278/// insertions into the vector. See the example in the comment for
2279/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2280/// The type of V is always a non-zero multiple of VecEltTy's size.
2281/// Shift is the number of bits between the lsb of V and the lsb of
2282/// the vector.
2283///
2284/// This returns false if the pattern can't be matched or true if it can,
2285/// filling in Elements with the elements found here.
2286static bool collectInsertionElements(Value *V, unsigned Shift,
2287 SmallVectorImpl<Value *> &Elements,
2288 Type *VecEltTy, bool isBigEndian) {
2289 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2290 "Shift should be a multiple of the element type size");
2291
2292 // Undef values never contribute useful bits to the result.
2293 if (isa<UndefValue>(V)) return true;
2294
2295 // If we got down to a value of the right type, we win, try inserting into the
2296 // right element.
2297 if (V->getType() == VecEltTy) {
2298 // Inserting null doesn't actually insert any elements.
2299 if (Constant *C = dyn_cast<Constant>(V))
2300 if (C->isNullValue())
2301 return true;
2302
2303 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2304 if (isBigEndian)
2305 ElementIndex = Elements.size() - ElementIndex - 1;
2306
2307 // Fail if multiple elements are inserted into this slot.
2308 if (Elements[ElementIndex])
2309 return false;
2310
2311 Elements[ElementIndex] = V;
2312 return true;
2313 }
2314
2315 if (Constant *C = dyn_cast<Constant>(V)) {
2316 // Figure out the # elements this provides, and bitcast it or slice it up
2317 // as required.
2318 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2319 VecEltTy);
2320 // If the constant is the size of a vector element, we just need to bitcast
2321 // it to the right type so it gets properly inserted.
2322 if (NumElts == 1)
2324 Shift, Elements, VecEltTy, isBigEndian);
2325
2326 // Okay, this is a constant that covers multiple elements. Slice it up into
2327 // pieces and insert each element-sized piece into the vector.
2328 if (!isa<IntegerType>(C->getType()))
2329 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2330 C->getType()->getPrimitiveSizeInBits()));
2331 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2332 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2333
2334 for (unsigned i = 0; i != NumElts; ++i) {
2335 unsigned ShiftI = i * ElementSize;
2337 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2338 if (!Piece)
2339 return false;
2340
2341 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2342 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2343 isBigEndian))
2344 return false;
2345 }
2346 return true;
2347 }
2348
2349 if (!V->hasOneUse()) return false;
2350
2351 Instruction *I = dyn_cast<Instruction>(V);
2352 if (!I) return false;
2353 switch (I->getOpcode()) {
2354 default: return false; // Unhandled case.
2355 case Instruction::BitCast:
2356 if (I->getOperand(0)->getType()->isVectorTy())
2357 return false;
2358 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2359 isBigEndian);
2360 case Instruction::ZExt:
2362 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2363 VecEltTy))
2364 return false;
2365 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2366 isBigEndian);
2367 case Instruction::Or:
2368 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2369 isBigEndian) &&
2370 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2371 isBigEndian);
2372 case Instruction::Shl: {
2373 // Must be shifting by a constant that is a multiple of the element size.
2374 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2375 if (!CI) return false;
2376 Shift += CI->getZExtValue();
2377 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2378 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2379 isBigEndian);
2380 }
2381
2382 }
2383}
2384
2385
2386/// If the input is an 'or' instruction, we may be doing shifts and ors to
2387/// assemble the elements of the vector manually.
2388/// Try to rip the code out and replace it with insertelements. This is to
2389/// optimize code like this:
2390///
2391/// %tmp37 = bitcast float %inc to i32
2392/// %tmp38 = zext i32 %tmp37 to i64
2393/// %tmp31 = bitcast float %inc5 to i32
2394/// %tmp32 = zext i32 %tmp31 to i64
2395/// %tmp33 = shl i64 %tmp32, 32
2396/// %ins35 = or i64 %tmp33, %tmp38
2397/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2398///
2399/// Into two insertelements that do "buildvector{%inc, %inc5}".
2401 InstCombinerImpl &IC) {
2402 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2403 Value *IntInput = CI.getOperand(0);
2404
2405 // if the int input is just an undef value do not try to optimize to vector
2406 // insertions as it will prevent undef propagation
2407 if (isa<UndefValue>(IntInput))
2408 return nullptr;
2409
2410 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2411 if (!collectInsertionElements(IntInput, 0, Elements,
2412 DestVecTy->getElementType(),
2413 IC.getDataLayout().isBigEndian()))
2414 return nullptr;
2415
2416 // If we succeeded, we know that all of the element are specified by Elements
2417 // or are zero if Elements has a null entry. Recast this as a set of
2418 // insertions.
2419 Value *Result = Constant::getNullValue(CI.getType());
2420 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2421 if (!Elements[i]) continue; // Unset element.
2422
2423 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2424 IC.Builder.getInt32(i));
2425 }
2426
2427 return Result;
2428}
2429
2430/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2431/// vector followed by extract element. The backend tends to handle bitcasts of
2432/// vectors better than bitcasts of scalars because vector registers are
2433/// usually not type-specific like scalar integer or scalar floating-point.
2435 InstCombinerImpl &IC) {
2436 Value *VecOp, *Index;
2437 if (!match(BitCast.getOperand(0),
2438 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2439 return nullptr;
2440
2441 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2442 // type to extract from.
2443 Type *DestType = BitCast.getType();
2444 VectorType *VecType = cast<VectorType>(VecOp->getType());
2445 if (VectorType::isValidElementType(DestType)) {
2446 auto *NewVecType = VectorType::get(DestType, VecType);
2447 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2448 return ExtractElementInst::Create(NewBC, Index);
2449 }
2450
2451 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2452 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2453 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2454 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2455 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2456
2457 return nullptr;
2458}
2459
2460/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2462 InstCombiner::BuilderTy &Builder) {
2463 Type *DestTy = BitCast.getType();
2464 BinaryOperator *BO;
2465
2466 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2467 !BO->isBitwiseLogicOp())
2468 return nullptr;
2469
2470 // FIXME: This transform is restricted to vector types to avoid backend
2471 // problems caused by creating potentially illegal operations. If a fix-up is
2472 // added to handle that situation, we can remove this check.
2473 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2474 return nullptr;
2475
2476 if (DestTy->isFPOrFPVectorTy()) {
2477 Value *X, *Y;
2478 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2479 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2481 if (X->getType()->isFPOrFPVectorTy() &&
2482 Y->getType()->isIntOrIntVectorTy()) {
2483 Value *CastedOp =
2484 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2485 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2486 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2487 }
2488 if (X->getType()->isIntOrIntVectorTy() &&
2489 Y->getType()->isFPOrFPVectorTy()) {
2490 Value *CastedOp =
2491 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2492 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2493 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2494 }
2495 }
2496 return nullptr;
2497 }
2498
2499 if (!DestTy->isIntOrIntVectorTy())
2500 return nullptr;
2501
2502 Value *X;
2503 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2504 X->getType() == DestTy && !isa<Constant>(X)) {
2505 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2506 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2507 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2508 }
2509
2510 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2511 X->getType() == DestTy && !isa<Constant>(X)) {
2512 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2513 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2514 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2515 }
2516
2517 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2518 // constant. This eases recognition of special constants for later ops.
2519 // Example:
2520 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2521 Constant *C;
2522 if (match(BO->getOperand(1), m_Constant(C))) {
2523 // bitcast (logic X, C) --> logic (bitcast X, C')
2524 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2525 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2526 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2527 }
2528
2529 return nullptr;
2530}
2531
2532/// Change the type of a select if we can eliminate a bitcast.
2534 InstCombiner::BuilderTy &Builder) {
2535 Value *Cond, *TVal, *FVal;
2536 if (!match(BitCast.getOperand(0),
2537 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2538 return nullptr;
2539
2540 // A vector select must maintain the same number of elements in its operands.
2541 Type *CondTy = Cond->getType();
2542 Type *DestTy = BitCast.getType();
2543 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2544 if (!DestTy->isVectorTy() ||
2545 CondVTy->getElementCount() !=
2546 cast<VectorType>(DestTy)->getElementCount())
2547 return nullptr;
2548
2549 // FIXME: This transform is restricted from changing the select between
2550 // scalars and vectors to avoid backend problems caused by creating
2551 // potentially illegal operations. If a fix-up is added to handle that
2552 // situation, we can remove this check.
2553 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2554 return nullptr;
2555
2556 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2557 Value *X;
2558 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2559 !isa<Constant>(X)) {
2560 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2561 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2562 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2563 }
2564
2565 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2566 !isa<Constant>(X)) {
2567 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2568 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2569 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2570 }
2571
2572 return nullptr;
2573}
2574
2575/// Check if all users of CI are StoreInsts.
2576static bool hasStoreUsersOnly(CastInst &CI) {
2577 for (User *U : CI.users()) {
2578 if (!isa<StoreInst>(U))
2579 return false;
2580 }
2581 return true;
2582}
2583
2584/// This function handles following case
2585///
2586/// A -> B cast
2587/// PHI
2588/// B -> A cast
2589///
2590/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2591/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2592Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2593 PHINode *PN) {
2594 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2595 if (hasStoreUsersOnly(CI))
2596 return nullptr;
2597
2598 Value *Src = CI.getOperand(0);
2599 Type *SrcTy = Src->getType(); // Type B
2600 Type *DestTy = CI.getType(); // Type A
2601
2602 SmallVector<PHINode *, 4> PhiWorklist;
2603 SmallSetVector<PHINode *, 4> OldPhiNodes;
2604
2605 // Find all of the A->B casts and PHI nodes.
2606 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2607 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2608 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2609 PhiWorklist.push_back(PN);
2610 OldPhiNodes.insert(PN);
2611 while (!PhiWorklist.empty()) {
2612 auto *OldPN = PhiWorklist.pop_back_val();
2613 for (Value *IncValue : OldPN->incoming_values()) {
2614 if (isa<Constant>(IncValue))
2615 continue;
2616
2617 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2618 // If there is a sequence of one or more load instructions, each loaded
2619 // value is used as address of later load instruction, bitcast is
2620 // necessary to change the value type, don't optimize it. For
2621 // simplicity we give up if the load address comes from another load.
2622 Value *Addr = LI->getOperand(0);
2623 if (Addr == &CI || isa<LoadInst>(Addr))
2624 return nullptr;
2625 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2626 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2627 // TODO: Remove this check when bitcast between vector and x86_amx
2628 // is replaced with a specific intrinsic.
2629 if (DestTy->isX86_AMXTy())
2630 return nullptr;
2631 if (LI->hasOneUse() && LI->isSimple())
2632 continue;
2633 // If a LoadInst has more than one use, changing the type of loaded
2634 // value may create another bitcast.
2635 return nullptr;
2636 }
2637
2638 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2639 if (OldPhiNodes.insert(PNode))
2640 PhiWorklist.push_back(PNode);
2641 continue;
2642 }
2643
2644 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2645 // We can't handle other instructions.
2646 if (!BCI)
2647 return nullptr;
2648
2649 // Verify it's a A->B cast.
2650 Type *TyA = BCI->getOperand(0)->getType();
2651 Type *TyB = BCI->getType();
2652 if (TyA != DestTy || TyB != SrcTy)
2653 return nullptr;
2654 }
2655 }
2656
2657 // Check that each user of each old PHI node is something that we can
2658 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2659 for (auto *OldPN : OldPhiNodes) {
2660 for (User *V : OldPN->users()) {
2661 if (auto *SI = dyn_cast<StoreInst>(V)) {
2662 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2663 return nullptr;
2664 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2665 // Verify it's a B->A cast.
2666 Type *TyB = BCI->getOperand(0)->getType();
2667 Type *TyA = BCI->getType();
2668 if (TyA != DestTy || TyB != SrcTy)
2669 return nullptr;
2670 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2671 // As long as the user is another old PHI node, then even if we don't
2672 // rewrite it, the PHI web we're considering won't have any users
2673 // outside itself, so it'll be dead.
2674 if (!OldPhiNodes.contains(PHI))
2675 return nullptr;
2676 } else {
2677 return nullptr;
2678 }
2679 }
2680 }
2681
2682 // For each old PHI node, create a corresponding new PHI node with a type A.
2684 for (auto *OldPN : OldPhiNodes) {
2685 Builder.SetInsertPoint(OldPN);
2686 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2687 NewPNodes[OldPN] = NewPN;
2688 }
2689
2690 // Fill in the operands of new PHI nodes.
2691 for (auto *OldPN : OldPhiNodes) {
2692 PHINode *NewPN = NewPNodes[OldPN];
2693 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2694 Value *V = OldPN->getOperand(j);
2695 Value *NewV = nullptr;
2696 if (auto *C = dyn_cast<Constant>(V)) {
2697 NewV = ConstantExpr::getBitCast(C, DestTy);
2698 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2699 // Explicitly perform load combine to make sure no opposing transform
2700 // can remove the bitcast in the meantime and trigger an infinite loop.
2702 NewV = combineLoadToNewType(*LI, DestTy);
2703 // Remove the old load and its use in the old phi, which itself becomes
2704 // dead once the whole transform finishes.
2705 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
2707 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2708 NewV = BCI->getOperand(0);
2709 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2710 NewV = NewPNodes[PrevPN];
2711 }
2712 assert(NewV);
2713 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2714 }
2715 }
2716
2717 // Traverse all accumulated PHI nodes and process its users,
2718 // which are Stores and BitcCasts. Without this processing
2719 // NewPHI nodes could be replicated and could lead to extra
2720 // moves generated after DeSSA.
2721 // If there is a store with type B, change it to type A.
2722
2723
2724 // Replace users of BitCast B->A with NewPHI. These will help
2725 // later to get rid off a closure formed by OldPHI nodes.
2726 Instruction *RetVal = nullptr;
2727 for (auto *OldPN : OldPhiNodes) {
2728 PHINode *NewPN = NewPNodes[OldPN];
2729 for (User *V : make_early_inc_range(OldPN->users())) {
2730 if (auto *SI = dyn_cast<StoreInst>(V)) {
2731 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2733 auto *NewBC =
2734 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2735 SI->setOperand(0, NewBC);
2736 Worklist.push(SI);
2737 assert(hasStoreUsersOnly(*NewBC));
2738 }
2739 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2740 Type *TyB = BCI->getOperand(0)->getType();
2741 Type *TyA = BCI->getType();
2742 assert(TyA == DestTy && TyB == SrcTy);
2743 (void) TyA;
2744 (void) TyB;
2745 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2746 if (BCI == &CI)
2747 RetVal = I;
2748 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2749 assert(OldPhiNodes.contains(PHI));
2750 (void) PHI;
2751 } else {
2752 llvm_unreachable("all uses should be handled");
2753 }
2754 }
2755 }
2756
2757 return RetVal;
2758}
2759
2760/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
2761/// copysign((bitcast Y to fp), X)
2763 InstCombiner::BuilderTy &Builder,
2764 const SimplifyQuery &SQ) {
2765 Value *X, *Y;
2766 Type *FTy = CI.getType();
2767 if (!FTy->isFPOrFPVectorTy())
2768 return nullptr;
2771 m_Value(Y)))))
2772 return nullptr;
2773 if (X->getType() != FTy)
2774 return nullptr;
2775 if (!isKnownNonNegative(Y, SQ))
2776 return nullptr;
2777
2778 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
2779}
2780
2782 // If the operands are integer typed then apply the integer transforms,
2783 // otherwise just apply the common ones.
2784 Value *Src = CI.getOperand(0);
2785 Type *SrcTy = Src->getType();
2786 Type *DestTy = CI.getType();
2787
2788 // Get rid of casts from one type to the same type. These are useless and can
2789 // be replaced by the operand.
2790 if (DestTy == Src->getType())
2791 return replaceInstUsesWith(CI, Src);
2792
2793 if (isa<FixedVectorType>(DestTy)) {
2794 if (isa<IntegerType>(SrcTy)) {
2795 // If this is a cast from an integer to vector, check to see if the input
2796 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2797 // the casts with a shuffle and (potentially) a bitcast.
2798 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2799 CastInst *SrcCast = cast<CastInst>(Src);
2800 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2801 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2803 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2804 return I;
2805 }
2806
2807 // If the input is an 'or' instruction, we may be doing shifts and ors to
2808 // assemble the elements of the vector manually. Try to rip the code out
2809 // and replace it with insertelements.
2810 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2811 return replaceInstUsesWith(CI, V);
2812 }
2813 }
2814
2815 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2816 if (SrcVTy->getNumElements() == 1) {
2817 // If our destination is not a vector, then make this a straight
2818 // scalar-scalar cast.
2819 if (!DestTy->isVectorTy()) {
2820 Value *Elem =
2823 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2824 }
2825
2826 // Otherwise, see if our source is an insert. If so, then use the scalar
2827 // component directly:
2828 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2829 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2830 return new BitCastInst(InsElt->getOperand(1), DestTy);
2831 }
2832
2833 // Convert an artificial vector insert into more analyzable bitwise logic.
2834 unsigned BitWidth = DestTy->getScalarSizeInBits();
2835 Value *X, *Y;
2836 uint64_t IndexC;
2838 m_Value(Y), m_ConstantInt(IndexC)))) &&
2839 DestTy->isIntegerTy() && X->getType() == DestTy &&
2840 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2841 // Adjust for big endian - the LSBs are at the high index.
2842 if (DL.isBigEndian())
2843 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2844
2845 // We only handle (endian-normalized) insert to index 0. Any other insert
2846 // would require a left-shift, so that is an extra instruction.
2847 if (IndexC == 0) {
2848 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2849 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2850 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2851 Value *AndX = Builder.CreateAnd(X, MaskC);
2852 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2853 return BinaryOperator::CreateOr(AndX, ZextY);
2854 }
2855 }
2856 }
2857
2858 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2859 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2860 // a bitcast to a vector with the same # elts.
2861 Value *ShufOp0 = Shuf->getOperand(0);
2862 Value *ShufOp1 = Shuf->getOperand(1);
2863 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2864 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2865 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2866 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2867 ShufElts == SrcVecElts) {
2868 BitCastInst *Tmp;
2869 // If either of the operands is a cast from CI.getType(), then
2870 // evaluating the shuffle in the casted destination's type will allow
2871 // us to eliminate at least one cast.
2872 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2873 Tmp->getOperand(0)->getType() == DestTy) ||
2874 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2875 Tmp->getOperand(0)->getType() == DestTy)) {
2876 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2877 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2878 // Return a new shuffle vector. Use the same element ID's, as we
2879 // know the vector types match #elts.
2880 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2881 }
2882 }
2883
2884 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2885 // as a byte/bit swap:
2886 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2887 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2888 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2889 Shuf->hasOneUse() && Shuf->isReverse()) {
2890 unsigned IntrinsicNum = 0;
2891 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2892 SrcTy->getScalarSizeInBits() == 8) {
2893 IntrinsicNum = Intrinsic::bswap;
2894 } else if (SrcTy->getScalarSizeInBits() == 1) {
2895 IntrinsicNum = Intrinsic::bitreverse;
2896 }
2897 if (IntrinsicNum != 0) {
2898 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2899 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2900 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
2901 CI.getModule(), IntrinsicNum, DestTy);
2902 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2903 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2904 }
2905 }
2906 }
2907
2908 // Handle the A->B->A cast, and there is an intervening PHI node.
2909 if (PHINode *PN = dyn_cast<PHINode>(Src))
2910 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2911 return I;
2912
2913 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2914 return I;
2915
2917 return I;
2918
2920 return I;
2921
2923 return replaceInstUsesWith(CI, V);
2924
2925 return commonCastTransforms(CI);
2926}
2927
2929 return commonCastTransforms(CI);
2930}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Type * shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat)
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static Type * getMinimumFPType(Value *V, bool PreferBFloat)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1573
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1666
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
int32_t exactLogBase2() const
Definition: APInt.h:1783
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:224
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:231
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:474
BinaryOps getOpcode() const
Definition: InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:248
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:617
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:612
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:619
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2654
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2328
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2272
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
const APFloat & getValueAPF() const
Definition: Constants.h:320
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:808
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:298
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition: DataLayout.h:390
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:220
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:850
bool isBigEndian() const
Definition: DataLayout.h:199
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:877
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:592
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:973
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFPTruncFMF(Value *V, Type *DestTy, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2167
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2100
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1010
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2162
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2238
Value * CreateVScale(Type *Ty, const Twine &Name="")
Create a call to llvm.vscale.<Ty>().
Definition: IRBuilder.h:958
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
Value * CreateCopySign(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the copysign intrinsic.
Definition: IRBuilder.h:1058
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1532
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1795
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:552
Value * CreateFRemFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1694
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Value * foldPtrToIntOfGEP(Type *IntTy, Value *Ptr)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
Definition: InstCombiner.h:48
SimplifyQuery SQ
Definition: InstCombiner.h:77
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:337
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Definition: InstCombiner.h:462
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Definition: InstCombiner.h:456
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:388
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:377
const DataLayout & DL
Definition: InstCombiner.h:76
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
Definition: InstCombiner.h:433
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Definition: InstCombiner.h:450
DominatorTree & DT
Definition: InstCombiner.h:75
BuilderTy & Builder
Definition: InstCombiner.h:61
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:338
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:366
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
static LLVM_ABI Type * getPPC_FP128Ty(LLVMContext &C)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
LLVM_ABI const fltSemantics & getFltSemantics() const
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
'undef' values are things that do not have specified contents.
Definition: Constants.h:1420
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
iterator_range< user_iterator > users()
Definition: Value.h:426
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:240
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
Definition: PatternMatch.h:664
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:980
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:931
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:700
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition: MathExtras.h:355
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2414
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2127
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:265
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:338
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:235
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:241
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:138
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Definition: KnownFPClass.h:36
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const