LLVM 22.0.0git
ConstantFolding.cpp
Go to the documentation of this file.
1//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines routines for folding instructions into constants.
10//
11// Also, to supplement the basic IR ConstantExpr simplifications,
12// this file defines some additional folding routines that can make use of
13// DataLayout information. These functions cannot go in IR due to library
14// dependency issues.
15//
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/APSInt.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/StringRef.h"
31#include "llvm/Config/config.h"
32#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/IntrinsicsAMDGPU.h"
47#include "llvm/IR/IntrinsicsARM.h"
48#include "llvm/IR/IntrinsicsNVPTX.h"
49#include "llvm/IR/IntrinsicsWebAssembly.h"
50#include "llvm/IR/IntrinsicsX86.h"
52#include "llvm/IR/Operator.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
59#include <cassert>
60#include <cerrno>
61#include <cfenv>
62#include <cmath>
63#include <cstdint>
64
65using namespace llvm;
66
68 "disable-fp-call-folding",
69 cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
70 cl::init(false), cl::Hidden);
71
72namespace {
73
74//===----------------------------------------------------------------------===//
75// Constant Folding internal helper functions
76//===----------------------------------------------------------------------===//
77
78static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
79 Constant *C, Type *SrcEltTy,
80 unsigned NumSrcElts,
81 const DataLayout &DL) {
82 // Now that we know that the input value is a vector of integers, just shift
83 // and insert them into our result.
84 unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
85 for (unsigned i = 0; i != NumSrcElts; ++i) {
86 Constant *Element;
87 if (DL.isLittleEndian())
88 Element = C->getAggregateElement(NumSrcElts - i - 1);
89 else
90 Element = C->getAggregateElement(i);
91
92 if (isa_and_nonnull<UndefValue>(Element)) {
93 Result <<= BitShift;
94 continue;
95 }
96
97 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
98 if (!ElementCI)
99 return ConstantExpr::getBitCast(C, DestTy);
100
101 Result <<= BitShift;
102 Result |= ElementCI->getValue().zext(Result.getBitWidth());
103 }
104
105 return nullptr;
106}
107
108/// Constant fold bitcast, symbolically evaluating it with DataLayout.
109/// This always returns a non-null constant, but it may be a
110/// ConstantExpr if unfoldable.
111Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
112 assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
113 "Invalid constantexpr bitcast!");
114
115 // Catch the obvious splat cases.
116 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))
117 return Res;
118
119 if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
120 // Handle a vector->scalar integer/fp cast.
121 if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
122 unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
123 Type *SrcEltTy = VTy->getElementType();
124
125 // If the vector is a vector of floating point, convert it to vector of int
126 // to simplify things.
127 if (SrcEltTy->isFloatingPointTy()) {
128 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
129 auto *SrcIVTy = FixedVectorType::get(
130 IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
131 // Ask IR to do the conversion now that #elts line up.
132 C = ConstantExpr::getBitCast(C, SrcIVTy);
133 }
134
135 APInt Result(DL.getTypeSizeInBits(DestTy), 0);
136 if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
137 SrcEltTy, NumSrcElts, DL))
138 return CE;
139
140 if (isa<IntegerType>(DestTy))
141 return ConstantInt::get(DestTy, Result);
142
143 APFloat FP(DestTy->getFltSemantics(), Result);
144 return ConstantFP::get(DestTy->getContext(), FP);
145 }
146 }
147
148 // The code below only handles casts to vectors currently.
149 auto *DestVTy = dyn_cast<VectorType>(DestTy);
150 if (!DestVTy)
151 return ConstantExpr::getBitCast(C, DestTy);
152
153 // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
154 // vector so the code below can handle it uniformly.
155 if (!isa<VectorType>(C->getType()) &&
156 (isa<ConstantFP>(C) || isa<ConstantInt>(C))) {
157 Constant *Ops = C; // don't take the address of C!
158 return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
159 }
160
161 // Some of what follows may extend to cover scalable vectors but the current
162 // implementation is fixed length specific.
163 if (!isa<FixedVectorType>(C->getType()))
164 return ConstantExpr::getBitCast(C, DestTy);
165
166 // If this is a bitcast from constant vector -> vector, fold it.
167 if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C) &&
168 !isa<ConstantInt>(C) && !isa<ConstantFP>(C))
169 return ConstantExpr::getBitCast(C, DestTy);
170
171 // If the element types match, IR can fold it.
172 unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
173 unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
174 if (NumDstElt == NumSrcElt)
175 return ConstantExpr::getBitCast(C, DestTy);
176
177 Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
178 Type *DstEltTy = DestVTy->getElementType();
179
180 // Otherwise, we're changing the number of elements in a vector, which
181 // requires endianness information to do the right thing. For example,
182 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
183 // folds to (little endian):
184 // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
185 // and to (big endian):
186 // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
187
188 // First thing is first. We only want to think about integer here, so if
189 // we have something in FP form, recast it as integer.
190 if (DstEltTy->isFloatingPointTy()) {
191 // Fold to an vector of integers with same size as our FP type.
192 unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
193 auto *DestIVTy = FixedVectorType::get(
194 IntegerType::get(C->getContext(), FPWidth), NumDstElt);
195 // Recursively handle this integer conversion, if possible.
196 C = FoldBitCast(C, DestIVTy, DL);
197
198 // Finally, IR can handle this now that #elts line up.
199 return ConstantExpr::getBitCast(C, DestTy);
200 }
201
202 // Okay, we know the destination is integer, if the input is FP, convert
203 // it to integer first.
204 if (SrcEltTy->isFloatingPointTy()) {
205 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
206 auto *SrcIVTy = FixedVectorType::get(
207 IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
208 // Ask IR to do the conversion now that #elts line up.
209 C = ConstantExpr::getBitCast(C, SrcIVTy);
210 assert((isa<ConstantVector>(C) || // FIXME: Remove ConstantVector.
211 isa<ConstantDataVector>(C) || isa<ConstantInt>(C)) &&
212 "Constant folding cannot fail for plain fp->int bitcast!");
213 }
214
215 // Now we know that the input and output vectors are both integer vectors
216 // of the same size, and that their #elements is not the same. Do the
217 // conversion here, which depends on whether the input or output has
218 // more elements.
219 bool isLittleEndian = DL.isLittleEndian();
220
222 if (NumDstElt < NumSrcElt) {
223 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
225 unsigned Ratio = NumSrcElt/NumDstElt;
226 unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
227 unsigned SrcElt = 0;
228 for (unsigned i = 0; i != NumDstElt; ++i) {
229 // Build each element of the result.
230 Constant *Elt = Zero;
231 unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
232 for (unsigned j = 0; j != Ratio; ++j) {
233 Constant *Src = C->getAggregateElement(SrcElt++);
234 if (isa_and_nonnull<UndefValue>(Src))
236 cast<VectorType>(C->getType())->getElementType());
237 else
238 Src = dyn_cast_or_null<ConstantInt>(Src);
239 if (!Src) // Reject constantexpr elements.
240 return ConstantExpr::getBitCast(C, DestTy);
241
242 // Zero extend the element to the right size.
243 Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),
244 DL);
245 assert(Src && "Constant folding cannot fail on plain integers");
246
247 // Shift it to the right place, depending on endianness.
249 Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),
250 DL);
251 assert(Src && "Constant folding cannot fail on plain integers");
252
253 ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
254
255 // Mix it in.
256 Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);
257 assert(Elt && "Constant folding cannot fail on plain integers");
258 }
259 Result.push_back(Elt);
260 }
261 return ConstantVector::get(Result);
262 }
263
264 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
265 unsigned Ratio = NumDstElt/NumSrcElt;
266 unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
267
268 // Loop over each source value, expanding into multiple results.
269 for (unsigned i = 0; i != NumSrcElt; ++i) {
270 auto *Element = C->getAggregateElement(i);
271
272 if (!Element) // Reject constantexpr elements.
273 return ConstantExpr::getBitCast(C, DestTy);
274
275 if (isa<UndefValue>(Element)) {
276 // Correctly Propagate undef values.
277 Result.append(Ratio, UndefValue::get(DstEltTy));
278 continue;
279 }
280
281 auto *Src = dyn_cast<ConstantInt>(Element);
282 if (!Src)
283 return ConstantExpr::getBitCast(C, DestTy);
284
285 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
286 for (unsigned j = 0; j != Ratio; ++j) {
287 // Shift the piece of the value into the right place, depending on
288 // endianness.
289 APInt Elt = Src->getValue().lshr(ShiftAmt);
290 ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
291
292 // Truncate and remember this piece.
293 Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
294 }
295 }
296
297 return ConstantVector::get(Result);
298}
299
300} // end anonymous namespace
301
302/// If this constant is a constant offset from a global, return the global and
303/// the constant. Because of constantexprs, this function is recursive.
305 APInt &Offset, const DataLayout &DL,
306 DSOLocalEquivalent **DSOEquiv) {
307 if (DSOEquiv)
308 *DSOEquiv = nullptr;
309
310 // Trivial case, constant is the global.
311 if ((GV = dyn_cast<GlobalValue>(C))) {
312 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
313 Offset = APInt(BitWidth, 0);
314 return true;
315 }
316
317 if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
318 if (DSOEquiv)
319 *DSOEquiv = FoundDSOEquiv;
320 GV = FoundDSOEquiv->getGlobalValue();
321 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
322 Offset = APInt(BitWidth, 0);
323 return true;
324 }
325
326 // Otherwise, if this isn't a constant expr, bail out.
327 auto *CE = dyn_cast<ConstantExpr>(C);
328 if (!CE) return false;
329
330 // Look through ptr->int and ptr->ptr casts.
331 if (CE->getOpcode() == Instruction::PtrToInt ||
332 CE->getOpcode() == Instruction::BitCast)
333 return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
334 DSOEquiv);
335
336 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
337 auto *GEP = dyn_cast<GEPOperator>(CE);
338 if (!GEP)
339 return false;
340
341 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
342 APInt TmpOffset(BitWidth, 0);
343
344 // If the base isn't a global+constant, we aren't either.
345 if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
346 DSOEquiv))
347 return false;
348
349 // Otherwise, add any offset that our operands provide.
350 if (!GEP->accumulateConstantOffset(DL, TmpOffset))
351 return false;
352
353 Offset = TmpOffset;
354 return true;
355}
356
358 const DataLayout &DL) {
359 do {
360 Type *SrcTy = C->getType();
361 if (SrcTy == DestTy)
362 return C;
363
364 TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
365 TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
366 if (!TypeSize::isKnownGE(SrcSize, DestSize))
367 return nullptr;
368
369 // Catch the obvious splat cases (since all-zeros can coerce non-integral
370 // pointers legally).
371 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))
372 return Res;
373
374 // If the type sizes are the same and a cast is legal, just directly
375 // cast the constant.
376 // But be careful not to coerce non-integral pointers illegally.
377 if (SrcSize == DestSize &&
378 DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
379 DL.isNonIntegralPointerType(DestTy->getScalarType())) {
380 Instruction::CastOps Cast = Instruction::BitCast;
381 // If we are going from a pointer to int or vice versa, we spell the cast
382 // differently.
383 if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
384 Cast = Instruction::IntToPtr;
385 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
386 Cast = Instruction::PtrToInt;
387
388 if (CastInst::castIsValid(Cast, C, DestTy))
389 return ConstantFoldCastOperand(Cast, C, DestTy, DL);
390 }
391
392 // If this isn't an aggregate type, there is nothing we can do to drill down
393 // and find a bitcastable constant.
394 if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
395 return nullptr;
396
397 // We're simulating a load through a pointer that was bitcast to point to
398 // a different type, so we can try to walk down through the initial
399 // elements of an aggregate to see if some part of the aggregate is
400 // castable to implement the "load" semantic model.
401 if (SrcTy->isStructTy()) {
402 // Struct types might have leading zero-length elements like [0 x i32],
403 // which are certainly not what we are looking for, so skip them.
404 unsigned Elem = 0;
405 Constant *ElemC;
406 do {
407 ElemC = C->getAggregateElement(Elem++);
408 } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
409 C = ElemC;
410 } else {
411 // For non-byte-sized vector elements, the first element is not
412 // necessarily located at the vector base address.
413 if (auto *VT = dyn_cast<VectorType>(SrcTy))
414 if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))
415 return nullptr;
416
417 C = C->getAggregateElement(0u);
418 }
419 } while (C);
420
421 return nullptr;
422}
423
424namespace {
425
426/// Recursive helper to read bits out of global. C is the constant being copied
427/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
428/// results into and BytesLeft is the number of bytes left in
429/// the CurPtr buffer. DL is the DataLayout.
430bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
431 unsigned BytesLeft, const DataLayout &DL) {
432 assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
433 "Out of range access");
434
435 // Reading type padding, return zero.
436 if (ByteOffset >= DL.getTypeStoreSize(C->getType()))
437 return true;
438
439 // If this element is zero or undefined, we can just return since *CurPtr is
440 // zero initialized.
441 if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
442 return true;
443
444 if (auto *CI = dyn_cast<ConstantInt>(C)) {
445 if ((CI->getBitWidth() & 7) != 0)
446 return false;
447 const APInt &Val = CI->getValue();
448 unsigned IntBytes = unsigned(CI->getBitWidth()/8);
449
450 for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
451 unsigned n = ByteOffset;
452 if (!DL.isLittleEndian())
453 n = IntBytes - n - 1;
454 CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue();
455 ++ByteOffset;
456 }
457 return true;
458 }
459
460 if (auto *CFP = dyn_cast<ConstantFP>(C)) {
461 if (CFP->getType()->isDoubleTy()) {
462 C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
463 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
464 }
465 if (CFP->getType()->isFloatTy()){
466 C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
467 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
468 }
469 if (CFP->getType()->isHalfTy()){
470 C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
471 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
472 }
473 return false;
474 }
475
476 if (auto *CS = dyn_cast<ConstantStruct>(C)) {
477 const StructLayout *SL = DL.getStructLayout(CS->getType());
478 unsigned Index = SL->getElementContainingOffset(ByteOffset);
479 uint64_t CurEltOffset = SL->getElementOffset(Index);
480 ByteOffset -= CurEltOffset;
481
482 while (true) {
483 // If the element access is to the element itself and not to tail padding,
484 // read the bytes from the element.
485 uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
486
487 if (ByteOffset < EltSize &&
488 !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
489 BytesLeft, DL))
490 return false;
491
492 ++Index;
493
494 // Check to see if we read from the last struct element, if so we're done.
495 if (Index == CS->getType()->getNumElements())
496 return true;
497
498 // If we read all of the bytes we needed from this element we're done.
499 uint64_t NextEltOffset = SL->getElementOffset(Index);
500
501 if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
502 return true;
503
504 // Move to the next element of the struct.
505 CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
506 BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
507 ByteOffset = 0;
508 CurEltOffset = NextEltOffset;
509 }
510 // not reached.
511 }
512
513 if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
514 isa<ConstantDataSequential>(C)) {
515 uint64_t NumElts, EltSize;
516 Type *EltTy;
517 if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
518 NumElts = AT->getNumElements();
519 EltTy = AT->getElementType();
520 EltSize = DL.getTypeAllocSize(EltTy);
521 } else {
522 NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
523 EltTy = cast<FixedVectorType>(C->getType())->getElementType();
524 // TODO: For non-byte-sized vectors, current implementation assumes there is
525 // padding to the next byte boundary between elements.
526 if (!DL.typeSizeEqualsStoreSize(EltTy))
527 return false;
528
529 EltSize = DL.getTypeStoreSize(EltTy);
530 }
531 uint64_t Index = ByteOffset / EltSize;
532 uint64_t Offset = ByteOffset - Index * EltSize;
533
534 for (; Index != NumElts; ++Index) {
535 if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
536 BytesLeft, DL))
537 return false;
538
539 uint64_t BytesWritten = EltSize - Offset;
540 assert(BytesWritten <= EltSize && "Not indexing into this element?");
541 if (BytesWritten >= BytesLeft)
542 return true;
543
544 Offset = 0;
545 BytesLeft -= BytesWritten;
546 CurPtr += BytesWritten;
547 }
548 return true;
549 }
550
551 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
552 if (CE->getOpcode() == Instruction::IntToPtr &&
553 CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
554 return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
555 BytesLeft, DL);
556 }
557 }
558
559 // Otherwise, unknown initializer type.
560 return false;
561}
562
563Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
564 int64_t Offset, const DataLayout &DL) {
565 // Bail out early. Not expect to load from scalable global variable.
566 if (isa<ScalableVectorType>(LoadTy))
567 return nullptr;
568
569 auto *IntType = dyn_cast<IntegerType>(LoadTy);
570
571 // If this isn't an integer load we can't fold it directly.
572 if (!IntType) {
573 // If this is a non-integer load, we can try folding it as an int load and
574 // then bitcast the result. This can be useful for union cases. Note
575 // that address spaces don't matter here since we're not going to result in
576 // an actual new load.
577 if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
578 !LoadTy->isVectorTy())
579 return nullptr;
580
581 Type *MapTy = Type::getIntNTy(C->getContext(),
582 DL.getTypeSizeInBits(LoadTy).getFixedValue());
583 if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
584 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
585 // Materializing a zero can be done trivially without a bitcast
586 return Constant::getNullValue(LoadTy);
587 Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
588 Res = FoldBitCast(Res, CastTy, DL);
589 if (LoadTy->isPtrOrPtrVectorTy()) {
590 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
591 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
592 return Constant::getNullValue(LoadTy);
593 if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
594 // Be careful not to replace a load of an addrspace value with an inttoptr here
595 return nullptr;
596 Res = ConstantExpr::getIntToPtr(Res, LoadTy);
597 }
598 return Res;
599 }
600 return nullptr;
601 }
602
603 unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
604 if (BytesLoaded > 32 || BytesLoaded == 0)
605 return nullptr;
606
607 // If we're not accessing anything in this constant, the result is undefined.
608 if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
609 return PoisonValue::get(IntType);
610
611 // TODO: We should be able to support scalable types.
612 TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
613 if (InitializerSize.isScalable())
614 return nullptr;
615
616 // If we're not accessing anything in this constant, the result is undefined.
617 if (Offset >= (int64_t)InitializerSize.getFixedValue())
618 return PoisonValue::get(IntType);
619
620 unsigned char RawBytes[32] = {0};
621 unsigned char *CurPtr = RawBytes;
622 unsigned BytesLeft = BytesLoaded;
623
624 // If we're loading off the beginning of the global, some bytes may be valid.
625 if (Offset < 0) {
626 CurPtr += -Offset;
627 BytesLeft += Offset;
628 Offset = 0;
629 }
630
631 if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
632 return nullptr;
633
634 APInt ResultVal = APInt(IntType->getBitWidth(), 0);
635 if (DL.isLittleEndian()) {
636 ResultVal = RawBytes[BytesLoaded - 1];
637 for (unsigned i = 1; i != BytesLoaded; ++i) {
638 ResultVal <<= 8;
639 ResultVal |= RawBytes[BytesLoaded - 1 - i];
640 }
641 } else {
642 ResultVal = RawBytes[0];
643 for (unsigned i = 1; i != BytesLoaded; ++i) {
644 ResultVal <<= 8;
645 ResultVal |= RawBytes[i];
646 }
647 }
648
649 return ConstantInt::get(IntType->getContext(), ResultVal);
650}
651
652} // anonymous namespace
653
654// If GV is a constant with an initializer read its representation starting
655// at Offset and return it as a constant array of unsigned char. Otherwise
656// return null.
659 if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
660 return nullptr;
661
662 const DataLayout &DL = GV->getDataLayout();
663 Constant *Init = const_cast<Constant *>(GV->getInitializer());
664 TypeSize InitSize = DL.getTypeAllocSize(Init->getType());
665 if (InitSize < Offset)
666 return nullptr;
667
668 uint64_t NBytes = InitSize - Offset;
669 if (NBytes > UINT16_MAX)
670 // Bail for large initializers in excess of 64K to avoid allocating
671 // too much memory.
672 // Offset is assumed to be less than or equal than InitSize (this
673 // is enforced in ReadDataFromGlobal).
674 return nullptr;
675
676 SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
677 unsigned char *CurPtr = RawBytes.data();
678
679 if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))
680 return nullptr;
681
682 return ConstantDataArray::get(GV->getContext(), RawBytes);
683}
684
685/// If this Offset points exactly to the start of an aggregate element, return
686/// that element, otherwise return nullptr.
688 const DataLayout &DL) {
689 if (Offset.isZero())
690 return Base;
691
692 if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))
693 return nullptr;
694
695 Type *ElemTy = Base->getType();
696 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
697 if (!Offset.isZero() || !Indices[0].isZero())
698 return nullptr;
699
700 Constant *C = Base;
701 for (const APInt &Index : drop_begin(Indices)) {
702 if (Index.isNegative() || Index.getActiveBits() >= 32)
703 return nullptr;
704
705 C = C->getAggregateElement(Index.getZExtValue());
706 if (!C)
707 return nullptr;
708 }
709
710 return C;
711}
712
714 const APInt &Offset,
715 const DataLayout &DL) {
716 if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
717 if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
718 return Result;
719
720 // Explicitly check for out-of-bounds access, so we return poison even if the
721 // constant is a uniform value.
722 TypeSize Size = DL.getTypeAllocSize(C->getType());
723 if (!Size.isScalable() && Offset.sge(Size.getFixedValue()))
724 return PoisonValue::get(Ty);
725
726 // Try an offset-independent fold of a uniform value.
727 if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL))
728 return Result;
729
730 // Try hard to fold loads from bitcasted strange and non-type-safe things.
731 if (Offset.getSignificantBits() <= 64)
732 if (Constant *Result =
733 FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
734 return Result;
735
736 return nullptr;
737}
738
740 const DataLayout &DL) {
741 return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);
742}
743
746 const DataLayout &DL) {
747 // We can only fold loads from constant globals with a definitive initializer.
748 // Check this upfront, to skip expensive offset calculations.
749 auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C));
750 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
751 return nullptr;
752
753 C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
754 DL, Offset, /* AllowNonInbounds */ true));
755
756 if (C == GV)
757 if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
758 Offset, DL))
759 return Result;
760
761 // If this load comes from anywhere in a uniform constant global, the value
762 // is always the same, regardless of the loaded offset.
763 return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL);
764}
765
767 const DataLayout &DL) {
768 APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
769 return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL);
770}
771
773 const DataLayout &DL) {
774 if (isa<PoisonValue>(C))
775 return PoisonValue::get(Ty);
776 if (isa<UndefValue>(C))
777 return UndefValue::get(Ty);
778 // If padding is needed when storing C to memory, then it isn't considered as
779 // uniform.
780 if (!DL.typeSizeEqualsStoreSize(C->getType()))
781 return nullptr;
782 if (C->isNullValue() && !Ty->isX86_AMXTy())
783 return Constant::getNullValue(Ty);
784 if (C->isAllOnesValue() &&
785 (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
786 return Constant::getAllOnesValue(Ty);
787 return nullptr;
788}
789
790namespace {
791
792/// One of Op0/Op1 is a constant expression.
793/// Attempt to symbolically evaluate the result of a binary operator merging
794/// these together. If target data info is available, it is provided as DL,
795/// otherwise DL is null.
796Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
797 const DataLayout &DL) {
798 // SROA
799
800 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
801 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
802 // bits.
803
804 if (Opc == Instruction::And) {
805 KnownBits Known0 = computeKnownBits(Op0, DL);
806 KnownBits Known1 = computeKnownBits(Op1, DL);
807 if ((Known1.One | Known0.Zero).isAllOnes()) {
808 // All the bits of Op0 that the 'and' could be masking are already zero.
809 return Op0;
810 }
811 if ((Known0.One | Known1.Zero).isAllOnes()) {
812 // All the bits of Op1 that the 'and' could be masking are already zero.
813 return Op1;
814 }
815
816 Known0 &= Known1;
817 if (Known0.isConstant())
818 return ConstantInt::get(Op0->getType(), Known0.getConstant());
819 }
820
821 // If the constant expr is something like &A[123] - &A[4].f, fold this into a
822 // constant. This happens frequently when iterating over a global array.
823 if (Opc == Instruction::Sub) {
824 GlobalValue *GV1, *GV2;
825 APInt Offs1, Offs2;
826
827 if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
828 if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
829 unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
830
831 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
832 // PtrToInt may change the bitwidth so we have convert to the right size
833 // first.
834 return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
835 Offs2.zextOrTrunc(OpSize));
836 }
837 }
838
839 return nullptr;
840}
841
842/// If array indices are not pointer-sized integers, explicitly cast them so
843/// that they aren't implicitly casted by the getelementptr.
844Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
845 Type *ResultTy, GEPNoWrapFlags NW,
846 std::optional<ConstantRange> InRange,
847 const DataLayout &DL, const TargetLibraryInfo *TLI) {
848 Type *IntIdxTy = DL.getIndexType(ResultTy);
849 Type *IntIdxScalarTy = IntIdxTy->getScalarType();
850
851 bool Any = false;
853 for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
854 if ((i == 1 ||
855 !isa<StructType>(GetElementPtrInst::getIndexedType(
856 SrcElemTy, Ops.slice(1, i - 1)))) &&
857 Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
858 Any = true;
859 Type *NewType =
860 Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;
862 CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType,
863 DL);
864 if (!NewIdx)
865 return nullptr;
866 NewIdxs.push_back(NewIdx);
867 } else
868 NewIdxs.push_back(Ops[i]);
869 }
870
871 if (!Any)
872 return nullptr;
873
874 Constant *C =
875 ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange);
876 return ConstantFoldConstant(C, DL, TLI);
877}
878
879/// If we can symbolically evaluate the GEP constant expression, do so.
880Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
882 const DataLayout &DL,
883 const TargetLibraryInfo *TLI) {
884 Type *SrcElemTy = GEP->getSourceElementType();
885 Type *ResTy = GEP->getType();
886 if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
887 return nullptr;
888
889 if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(),
890 GEP->getInRange(), DL, TLI))
891 return C;
892
893 Constant *Ptr = Ops[0];
894 if (!Ptr->getType()->isPointerTy())
895 return nullptr;
896
897 Type *IntIdxTy = DL.getIndexType(Ptr->getType());
898
899 for (unsigned i = 1, e = Ops.size(); i != e; ++i)
900 if (!isa<ConstantInt>(Ops[i]) || !Ops[i]->getType()->isIntegerTy())
901 return nullptr;
902
903 unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
905 BitWidth,
906 DL.getIndexedOffsetInType(
907 SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)),
908 /*isSigned=*/true, /*implicitTrunc=*/true);
909
910 std::optional<ConstantRange> InRange = GEP->getInRange();
911 if (InRange)
912 InRange = InRange->sextOrTrunc(BitWidth);
913
914 // If this is a GEP of a GEP, fold it all into a single GEP.
915 GEPNoWrapFlags NW = GEP->getNoWrapFlags();
916 bool Overflow = false;
917 while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
918 NW &= GEP->getNoWrapFlags();
919
920 SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
921
922 // Do not try the incorporate the sub-GEP if some index is not a number.
923 bool AllConstantInt = true;
924 for (Value *NestedOp : NestedOps)
925 if (!isa<ConstantInt>(NestedOp)) {
926 AllConstantInt = false;
927 break;
928 }
929 if (!AllConstantInt)
930 break;
931
932 // Adjust inrange offset and intersect inrange attributes
933 if (auto GEPRange = GEP->getInRange()) {
934 auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
935 InRange =
936 InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
937 }
938
939 Ptr = cast<Constant>(GEP->getOperand(0));
940 SrcElemTy = GEP->getSourceElementType();
941 Offset = Offset.sadd_ov(
942 APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps),
943 /*isSigned=*/true, /*implicitTrunc=*/true),
944 Overflow);
945 }
946
947 // Preserving nusw (without inbounds) also requires that the offset
948 // additions did not overflow.
949 if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow)
951
952 // If the base value for this address is a literal integer value, fold the
953 // getelementptr to the resulting integer value casted to the pointer type.
954 APInt BasePtr(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
955 if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {
956 if (CE->getOpcode() == Instruction::IntToPtr) {
957 if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
958 BasePtr = Base->getValue().zextOrTrunc(BasePtr.getBitWidth());
959 }
960 }
961
962 auto *PTy = cast<PointerType>(Ptr->getType());
963 if ((Ptr->isNullValue() || BasePtr != 0) &&
964 !DL.isNonIntegralPointerType(PTy)) {
965 // If the index size is smaller than the pointer size, add to the low
966 // bits only.
967 BasePtr.insertBits(BasePtr.trunc(BitWidth) + Offset, 0);
968 Constant *C = ConstantInt::get(Ptr->getContext(), BasePtr);
969 return ConstantExpr::getIntToPtr(C, ResTy);
970 }
971
972 // Try to infer inbounds for GEPs of globals.
973 if (!NW.isInBounds() && Offset.isNonNegative()) {
974 bool CanBeNull, CanBeFreed;
975 uint64_t DerefBytes =
976 Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
977 if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes))
979 }
980
981 // nusw + nneg -> nuw
982 if (NW.hasNoUnsignedSignedWrap() && Offset.isNonNegative())
984
985 // Otherwise canonicalize this to a single ptradd.
986 LLVMContext &Ctx = Ptr->getContext();
988 ConstantInt::get(Ctx, Offset), NW,
989 InRange);
990}
991
992/// Attempt to constant fold an instruction with the
993/// specified opcode and operands. If successful, the constant result is
994/// returned, if not, null is returned. Note that this function can fail when
995/// attempting to fold instructions like loads and stores, which have no
996/// constant expression form.
997Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
999 const DataLayout &DL,
1000 const TargetLibraryInfo *TLI,
1001 bool AllowNonDeterministic) {
1002 Type *DestTy = InstOrCE->getType();
1003
1004 if (Instruction::isUnaryOp(Opcode))
1005 return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
1006
1007 if (Instruction::isBinaryOp(Opcode)) {
1008 switch (Opcode) {
1009 default:
1010 break;
1011 case Instruction::FAdd:
1012 case Instruction::FSub:
1013 case Instruction::FMul:
1014 case Instruction::FDiv:
1015 case Instruction::FRem:
1016 // Handle floating point instructions separately to account for denormals
1017 // TODO: If a constant expression is being folded rather than an
1018 // instruction, denormals will not be flushed/treated as zero
1019 if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
1020 return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,
1021 AllowNonDeterministic);
1022 }
1023 }
1024 return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
1025 }
1026
1027 if (Instruction::isCast(Opcode))
1028 return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
1029
1030 if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
1031 Type *SrcElemTy = GEP->getSourceElementType();
1033 return nullptr;
1034
1035 if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
1036 return C;
1037
1038 return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1),
1039 GEP->getNoWrapFlags(),
1040 GEP->getInRange());
1041 }
1042
1043 if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
1044 return CE->getWithOperands(Ops);
1045
1046 switch (Opcode) {
1047 default: return nullptr;
1048 case Instruction::ICmp:
1049 case Instruction::FCmp: {
1050 auto *C = cast<CmpInst>(InstOrCE);
1051 return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],
1052 DL, TLI, C);
1053 }
1054 case Instruction::Freeze:
1055 return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
1056 case Instruction::Call:
1057 if (auto *F = dyn_cast<Function>(Ops.back())) {
1058 const auto *Call = cast<CallBase>(InstOrCE);
1059 if (canConstantFoldCallTo(Call, F))
1060 return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,
1061 AllowNonDeterministic);
1062 }
1063 return nullptr;
1064 case Instruction::Select:
1065 return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]);
1066 case Instruction::ExtractElement:
1067 return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
1068 case Instruction::ExtractValue:
1070 Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
1071 case Instruction::InsertElement:
1072 return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
1073 case Instruction::InsertValue:
1075 Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());
1076 case Instruction::ShuffleVector:
1078 Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
1079 case Instruction::Load: {
1080 const auto *LI = dyn_cast<LoadInst>(InstOrCE);
1081 if (LI->isVolatile())
1082 return nullptr;
1083 return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
1084 }
1085 }
1086}
1087
1088} // end anonymous namespace
1089
1090//===----------------------------------------------------------------------===//
1091// Constant Folding public APIs
1092//===----------------------------------------------------------------------===//
1093
1094namespace {
1095
1096Constant *
1097ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
1098 const TargetLibraryInfo *TLI,
1100 if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
1101 return const_cast<Constant *>(C);
1102
1104 for (const Use &OldU : C->operands()) {
1105 Constant *OldC = cast<Constant>(&OldU);
1106 Constant *NewC = OldC;
1107 // Recursively fold the ConstantExpr's operands. If we have already folded
1108 // a ConstantExpr, we don't have to process it again.
1109 if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
1110 auto It = FoldedOps.find(OldC);
1111 if (It == FoldedOps.end()) {
1112 NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
1113 FoldedOps.insert({OldC, NewC});
1114 } else {
1115 NewC = It->second;
1116 }
1117 }
1118 Ops.push_back(NewC);
1119 }
1120
1121 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1122 if (Constant *Res = ConstantFoldInstOperandsImpl(
1123 CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))
1124 return Res;
1125 return const_cast<Constant *>(C);
1126 }
1127
1128 assert(isa<ConstantVector>(C));
1129 return ConstantVector::get(Ops);
1130}
1131
1132} // end anonymous namespace
1133
1135 const DataLayout &DL,
1136 const TargetLibraryInfo *TLI) {
1137 // Handle PHI nodes quickly here...
1138 if (auto *PN = dyn_cast<PHINode>(I)) {
1139 Constant *CommonValue = nullptr;
1140
1142 for (Value *Incoming : PN->incoming_values()) {
1143 // If the incoming value is undef then skip it. Note that while we could
1144 // skip the value if it is equal to the phi node itself we choose not to
1145 // because that would break the rule that constant folding only applies if
1146 // all operands are constants.
1147 if (isa<UndefValue>(Incoming))
1148 continue;
1149 // If the incoming value is not a constant, then give up.
1150 auto *C = dyn_cast<Constant>(Incoming);
1151 if (!C)
1152 return nullptr;
1153 // Fold the PHI's operands.
1154 C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1155 // If the incoming value is a different constant to
1156 // the one we saw previously, then give up.
1157 if (CommonValue && C != CommonValue)
1158 return nullptr;
1159 CommonValue = C;
1160 }
1161
1162 // If we reach here, all incoming values are the same constant or undef.
1163 return CommonValue ? CommonValue : UndefValue::get(PN->getType());
1164 }
1165
1166 // Scan the operand list, checking to see if they are all constants, if so,
1167 // hand off to ConstantFoldInstOperandsImpl.
1168 if (!all_of(I->operands(), [](const Use &U) { return isa<Constant>(U); }))
1169 return nullptr;
1170
1173 for (const Use &OpU : I->operands()) {
1174 auto *Op = cast<Constant>(&OpU);
1175 // Fold the Instruction's operands.
1176 Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
1177 Ops.push_back(Op);
1178 }
1179
1180 return ConstantFoldInstOperands(I, Ops, DL, TLI);
1181}
1182
1184 const TargetLibraryInfo *TLI) {
1186 return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1187}
1188
1191 const DataLayout &DL,
1192 const TargetLibraryInfo *TLI,
1193 bool AllowNonDeterministic) {
1194 return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,
1195 AllowNonDeterministic);
1196}
1197
1199 unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
1200 const TargetLibraryInfo *TLI, const Instruction *I) {
1201 CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
1202 // fold: icmp (inttoptr x), null -> icmp x, 0
1203 // fold: icmp null, (inttoptr x) -> icmp 0, x
1204 // fold: icmp (ptrtoint x), 0 -> icmp x, null
1205 // fold: icmp 0, (ptrtoint x) -> icmp null, x
1206 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
1207 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
1208 //
1209 // FIXME: The following comment is out of data and the DataLayout is here now.
1210 // ConstantExpr::getCompare cannot do this, because it doesn't have DL
1211 // around to know if bit truncation is happening.
1212 if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
1213 if (Ops1->isNullValue()) {
1214 if (CE0->getOpcode() == Instruction::IntToPtr) {
1215 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1216 // Convert the integer value to the right size to ensure we get the
1217 // proper extension or truncation.
1218 if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
1219 /*IsSigned*/ false, DL)) {
1220 Constant *Null = Constant::getNullValue(C->getType());
1221 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
1222 }
1223 }
1224
1225 // Only do this transformation if the int is intptrty in size, otherwise
1226 // there is a truncation or extension that we aren't modeling.
1227 if (CE0->getOpcode() == Instruction::PtrToInt) {
1228 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
1229 if (CE0->getType() == IntPtrTy) {
1230 Constant *C = CE0->getOperand(0);
1231 Constant *Null = Constant::getNullValue(C->getType());
1232 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
1233 }
1234 }
1235 }
1236
1237 if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
1238 if (CE0->getOpcode() == CE1->getOpcode()) {
1239 if (CE0->getOpcode() == Instruction::IntToPtr) {
1240 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1241
1242 // Convert the integer value to the right size to ensure we get the
1243 // proper extension or truncation.
1244 Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
1245 /*IsSigned*/ false, DL);
1246 Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy,
1247 /*IsSigned*/ false, DL);
1248 if (C0 && C1)
1249 return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
1250 }
1251
1252 // Only do this transformation if the int is intptrty in size, otherwise
1253 // there is a truncation or extension that we aren't modeling.
1254 if (CE0->getOpcode() == Instruction::PtrToInt) {
1255 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
1256 if (CE0->getType() == IntPtrTy &&
1257 CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
1259 Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
1260 }
1261 }
1262 }
1263 }
1264
1265 // Convert pointer comparison (base+offset1) pred (base+offset2) into
1266 // offset1 pred offset2, for the case where the offset is inbounds. This
1267 // only works for equality and unsigned comparison, as inbounds permits
1268 // crossing the sign boundary. However, the offset comparison itself is
1269 // signed.
1270 if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {
1271 unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());
1272 APInt Offset0(IndexWidth, 0);
1273 bool IsEqPred = ICmpInst::isEquality(Predicate);
1274 Value *Stripped0 = Ops0->stripAndAccumulateConstantOffsets(
1275 DL, Offset0, /*AllowNonInbounds=*/IsEqPred,
1276 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1277 /*LookThroughIntToPtr=*/IsEqPred);
1278 APInt Offset1(IndexWidth, 0);
1279 Value *Stripped1 = Ops1->stripAndAccumulateConstantOffsets(
1280 DL, Offset1, /*AllowNonInbounds=*/IsEqPred,
1281 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1282 /*LookThroughIntToPtr=*/IsEqPred);
1283 if (Stripped0 == Stripped1)
1284 return ConstantInt::getBool(
1285 Ops0->getContext(),
1286 ICmpInst::compare(Offset0, Offset1,
1287 ICmpInst::getSignedPredicate(Predicate)));
1288 }
1289 } else if (isa<ConstantExpr>(Ops1)) {
1290 // If RHS is a constant expression, but the left side isn't, swap the
1291 // operands and try again.
1292 Predicate = ICmpInst::getSwappedPredicate(Predicate);
1293 return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
1294 }
1295
1296 if (CmpInst::isFPPredicate(Predicate)) {
1297 // Flush any denormal constant float input according to denormal handling
1298 // mode.
1299 Ops0 = FlushFPConstant(Ops0, I, /*IsOutput=*/false);
1300 if (!Ops0)
1301 return nullptr;
1302 Ops1 = FlushFPConstant(Ops1, I, /*IsOutput=*/false);
1303 if (!Ops1)
1304 return nullptr;
1305 }
1306
1307 return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1);
1308}
1309
1311 const DataLayout &DL) {
1313
1314 return ConstantFoldUnaryInstruction(Opcode, Op);
1315}
1316
1318 Constant *RHS,
1319 const DataLayout &DL) {
1321 if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))
1322 if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
1323 return C;
1324
1326 return ConstantExpr::get(Opcode, LHS, RHS);
1327 return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
1328}
1329
1332 switch (Mode) {
1334 return nullptr;
1335 case DenormalMode::IEEE:
1336 return ConstantFP::get(Ty->getContext(), APF);
1338 return ConstantFP::get(
1339 Ty->getContext(),
1342 return ConstantFP::get(Ty->getContext(),
1343 APFloat::getZero(APF.getSemantics(), false));
1344 default:
1345 break;
1346 }
1347
1348 llvm_unreachable("unknown denormal mode");
1349}
1350
1351/// Return the denormal mode that can be assumed when executing a floating point
1352/// operation at \p CtxI.
1354 if (!CtxI || !CtxI->getParent() || !CtxI->getFunction())
1355 return DenormalMode::getDynamic();
1356 return CtxI->getFunction()->getDenormalMode(Ty->getFltSemantics());
1357}
1358
1360 const Instruction *Inst,
1361 bool IsOutput) {
1362 const APFloat &APF = CFP->getValueAPF();
1363 if (!APF.isDenormal())
1364 return CFP;
1365
1366 DenormalMode Mode = getInstrDenormalMode(Inst, CFP->getType());
1367 return flushDenormalConstant(CFP->getType(), APF,
1368 IsOutput ? Mode.Output : Mode.Input);
1369}
1370
1372 bool IsOutput) {
1373 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand))
1374 return flushDenormalConstantFP(CFP, Inst, IsOutput);
1375
1376 if (isa<ConstantAggregateZero, UndefValue>(Operand))
1377 return Operand;
1378
1379 Type *Ty = Operand->getType();
1380 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1381 if (VecTy) {
1382 if (auto *Splat = dyn_cast_or_null<ConstantFP>(Operand->getSplatValue())) {
1383 ConstantFP *Folded = flushDenormalConstantFP(Splat, Inst, IsOutput);
1384 if (!Folded)
1385 return nullptr;
1386 return ConstantVector::getSplat(VecTy->getElementCount(), Folded);
1387 }
1388
1389 Ty = VecTy->getElementType();
1390 }
1391
1392 if (isa<ConstantExpr>(Operand))
1393 return Operand;
1394
1395 if (const auto *CV = dyn_cast<ConstantVector>(Operand)) {
1397 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1398 Constant *Element = CV->getAggregateElement(i);
1399 if (isa<UndefValue>(Element)) {
1400 NewElts.push_back(Element);
1401 continue;
1402 }
1403
1404 ConstantFP *CFP = dyn_cast<ConstantFP>(Element);
1405 if (!CFP)
1406 return nullptr;
1407
1408 ConstantFP *Folded = flushDenormalConstantFP(CFP, Inst, IsOutput);
1409 if (!Folded)
1410 return nullptr;
1411 NewElts.push_back(Folded);
1412 }
1413
1414 return ConstantVector::get(NewElts);
1415 }
1416
1417 if (const auto *CDV = dyn_cast<ConstantDataVector>(Operand)) {
1419 for (unsigned I = 0, E = CDV->getNumElements(); I < E; ++I) {
1420 const APFloat &Elt = CDV->getElementAsAPFloat(I);
1421 if (!Elt.isDenormal()) {
1422 NewElts.push_back(ConstantFP::get(Ty, Elt));
1423 } else {
1424 DenormalMode Mode = getInstrDenormalMode(Inst, Ty);
1425 ConstantFP *Folded =
1426 flushDenormalConstant(Ty, Elt, IsOutput ? Mode.Output : Mode.Input);
1427 if (!Folded)
1428 return nullptr;
1429 NewElts.push_back(Folded);
1430 }
1431 }
1432
1433 return ConstantVector::get(NewElts);
1434 }
1435
1436 return nullptr;
1437}
1438
1440 Constant *RHS, const DataLayout &DL,
1441 const Instruction *I,
1442 bool AllowNonDeterministic) {
1443 if (Instruction::isBinaryOp(Opcode)) {
1444 // Flush denormal inputs if needed.
1445 Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
1446 if (!Op0)
1447 return nullptr;
1448 Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
1449 if (!Op1)
1450 return nullptr;
1451
1452 // If nsz or an algebraic FMF flag is set, the result of the FP operation
1453 // may change due to future optimization. Don't constant fold them if
1454 // non-deterministic results are not allowed.
1455 if (!AllowNonDeterministic)
1456 if (auto *FP = dyn_cast_or_null<FPMathOperator>(I))
1457 if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||
1458 FP->hasAllowContract() || FP->hasAllowReciprocal())
1459 return nullptr;
1460
1461 // Calculate constant result.
1462 Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
1463 if (!C)
1464 return nullptr;
1465
1466 // Flush denormal output if needed.
1467 C = FlushFPConstant(C, I, /* IsOutput */ true);
1468 if (!C)
1469 return nullptr;
1470
1471 // The precise NaN value is non-deterministic.
1472 if (!AllowNonDeterministic && C->isNaN())
1473 return nullptr;
1474
1475 return C;
1476 }
1477 // If instruction lacks a parent/function and the denormal mode cannot be
1478 // determined, use the default (IEEE).
1479 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
1480}
1481
1483 Type *DestTy, const DataLayout &DL) {
1484 assert(Instruction::isCast(Opcode));
1485 switch (Opcode) {
1486 default:
1487 llvm_unreachable("Missing case");
1488 case Instruction::PtrToAddr:
1489 // TODO: Add some of the ptrtoint folds here as well.
1490 break;
1491 case Instruction::PtrToInt:
1492 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1493 Constant *FoldedValue = nullptr;
1494 // If the input is a inttoptr, eliminate the pair. This requires knowing
1495 // the width of a pointer, so it can't be done in ConstantExpr::getCast.
1496 if (CE->getOpcode() == Instruction::IntToPtr) {
1497 // zext/trunc the inttoptr to pointer size.
1498 FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0),
1499 DL.getIntPtrType(CE->getType()),
1500 /*IsSigned=*/false, DL);
1501 } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
1502 // If we have GEP, we can perform the following folds:
1503 // (ptrtoint (gep null, x)) -> x
1504 // (ptrtoint (gep (gep null, x), y) -> x + y, etc.
1505 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
1506 APInt BaseOffset(BitWidth, 0);
1507 auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
1508 DL, BaseOffset, /*AllowNonInbounds=*/true));
1509 if (Base->isNullValue()) {
1510 FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
1511 } else {
1512 // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
1513 if (GEP->getNumIndices() == 1 &&
1514 GEP->getSourceElementType()->isIntegerTy(8)) {
1515 auto *Ptr = cast<Constant>(GEP->getPointerOperand());
1516 auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
1517 Type *IntIdxTy = DL.getIndexType(Ptr->getType());
1518 if (Sub && Sub->getType() == IntIdxTy &&
1519 Sub->getOpcode() == Instruction::Sub &&
1520 Sub->getOperand(0)->isNullValue())
1521 FoldedValue = ConstantExpr::getSub(
1522 ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
1523 }
1524 }
1525 }
1526 if (FoldedValue) {
1527 // Do a zext or trunc to get to the ptrtoint dest size.
1528 return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false,
1529 DL);
1530 }
1531 }
1532 break;
1533 case Instruction::IntToPtr:
1534 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
1535 // the int size is >= the ptr size and the address spaces are the same.
1536 // This requires knowing the width of a pointer, so it can't be done in
1537 // ConstantExpr::getCast.
1538 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1539 if (CE->getOpcode() == Instruction::PtrToInt) {
1540 Constant *SrcPtr = CE->getOperand(0);
1541 unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
1542 unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
1543
1544 if (MidIntSize >= SrcPtrSize) {
1545 unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
1546 if (SrcAS == DestTy->getPointerAddressSpace())
1547 return FoldBitCast(CE->getOperand(0), DestTy, DL);
1548 }
1549 }
1550 }
1551 break;
1552 case Instruction::Trunc:
1553 case Instruction::ZExt:
1554 case Instruction::SExt:
1555 case Instruction::FPTrunc:
1556 case Instruction::FPExt:
1557 case Instruction::UIToFP:
1558 case Instruction::SIToFP:
1559 case Instruction::FPToUI:
1560 case Instruction::FPToSI:
1561 case Instruction::AddrSpaceCast:
1562 break;
1563 case Instruction::BitCast:
1564 return FoldBitCast(C, DestTy, DL);
1565 }
1566
1568 return ConstantExpr::getCast(Opcode, C, DestTy);
1569 return ConstantFoldCastInstruction(Opcode, C, DestTy);
1570}
1571
1573 bool IsSigned, const DataLayout &DL) {
1574 Type *SrcTy = C->getType();
1575 if (SrcTy == DestTy)
1576 return C;
1577 if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
1578 return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL);
1579 if (IsSigned)
1580 return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL);
1581 return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL);
1582}
1583
1584//===----------------------------------------------------------------------===//
1585// Constant Folding for Calls
1586//
1587
1589 if (Call->isNoBuiltin())
1590 return false;
1591 if (Call->getFunctionType() != F->getFunctionType())
1592 return false;
1593
1594 // Allow FP calls (both libcalls and intrinsics) to avoid being folded.
1595 // This can be useful for GPU targets or in cross-compilation scenarios
1596 // when the exact target FP behaviour is required, and the host compiler's
1597 // behaviour may be slightly different from the device's run-time behaviour.
1598 if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
1599 any_of(F->args(), [](const Argument &Arg) {
1600 return Arg.getType()->isFloatingPointTy();
1601 })))
1602 return false;
1603
1604 switch (F->getIntrinsicID()) {
1605 // Operations that do not operate floating-point numbers and do not depend on
1606 // FP environment can be folded even in strictfp functions.
1607 case Intrinsic::bswap:
1608 case Intrinsic::ctpop:
1609 case Intrinsic::ctlz:
1610 case Intrinsic::cttz:
1611 case Intrinsic::fshl:
1612 case Intrinsic::fshr:
1613 case Intrinsic::launder_invariant_group:
1614 case Intrinsic::strip_invariant_group:
1615 case Intrinsic::masked_load:
1616 case Intrinsic::get_active_lane_mask:
1617 case Intrinsic::abs:
1618 case Intrinsic::smax:
1619 case Intrinsic::smin:
1620 case Intrinsic::umax:
1621 case Intrinsic::umin:
1622 case Intrinsic::scmp:
1623 case Intrinsic::ucmp:
1624 case Intrinsic::sadd_with_overflow:
1625 case Intrinsic::uadd_with_overflow:
1626 case Intrinsic::ssub_with_overflow:
1627 case Intrinsic::usub_with_overflow:
1628 case Intrinsic::smul_with_overflow:
1629 case Intrinsic::umul_with_overflow:
1630 case Intrinsic::sadd_sat:
1631 case Intrinsic::uadd_sat:
1632 case Intrinsic::ssub_sat:
1633 case Intrinsic::usub_sat:
1634 case Intrinsic::smul_fix:
1635 case Intrinsic::smul_fix_sat:
1636 case Intrinsic::bitreverse:
1637 case Intrinsic::is_constant:
1638 case Intrinsic::vector_reduce_add:
1639 case Intrinsic::vector_reduce_mul:
1640 case Intrinsic::vector_reduce_and:
1641 case Intrinsic::vector_reduce_or:
1642 case Intrinsic::vector_reduce_xor:
1643 case Intrinsic::vector_reduce_smin:
1644 case Intrinsic::vector_reduce_smax:
1645 case Intrinsic::vector_reduce_umin:
1646 case Intrinsic::vector_reduce_umax:
1647 case Intrinsic::vector_extract:
1648 case Intrinsic::vector_insert:
1649 case Intrinsic::vector_interleave2:
1650 case Intrinsic::vector_deinterleave2:
1651 // Target intrinsics
1652 case Intrinsic::amdgcn_perm:
1653 case Intrinsic::amdgcn_wave_reduce_umin:
1654 case Intrinsic::amdgcn_wave_reduce_umax:
1655 case Intrinsic::amdgcn_s_wqm:
1656 case Intrinsic::amdgcn_s_quadmask:
1657 case Intrinsic::amdgcn_s_bitreplicate:
1658 case Intrinsic::arm_mve_vctp8:
1659 case Intrinsic::arm_mve_vctp16:
1660 case Intrinsic::arm_mve_vctp32:
1661 case Intrinsic::arm_mve_vctp64:
1662 case Intrinsic::aarch64_sve_convert_from_svbool:
1663 case Intrinsic::wasm_alltrue:
1664 case Intrinsic::wasm_anytrue:
1665 case Intrinsic::wasm_dot:
1666 // WebAssembly float semantics are always known
1667 case Intrinsic::wasm_trunc_signed:
1668 case Intrinsic::wasm_trunc_unsigned:
1669 return true;
1670
1671 // Floating point operations cannot be folded in strictfp functions in
1672 // general case. They can be folded if FP environment is known to compiler.
1673 case Intrinsic::minnum:
1674 case Intrinsic::maxnum:
1675 case Intrinsic::minimum:
1676 case Intrinsic::maximum:
1677 case Intrinsic::minimumnum:
1678 case Intrinsic::maximumnum:
1679 case Intrinsic::log:
1680 case Intrinsic::log2:
1681 case Intrinsic::log10:
1682 case Intrinsic::exp:
1683 case Intrinsic::exp2:
1684 case Intrinsic::exp10:
1685 case Intrinsic::sqrt:
1686 case Intrinsic::sin:
1687 case Intrinsic::cos:
1688 case Intrinsic::sincos:
1689 case Intrinsic::sinh:
1690 case Intrinsic::cosh:
1691 case Intrinsic::atan:
1692 case Intrinsic::pow:
1693 case Intrinsic::powi:
1694 case Intrinsic::ldexp:
1695 case Intrinsic::fma:
1696 case Intrinsic::fmuladd:
1697 case Intrinsic::frexp:
1698 case Intrinsic::fptoui_sat:
1699 case Intrinsic::fptosi_sat:
1700 case Intrinsic::convert_from_fp16:
1701 case Intrinsic::convert_to_fp16:
1702 case Intrinsic::amdgcn_cos:
1703 case Intrinsic::amdgcn_cubeid:
1704 case Intrinsic::amdgcn_cubema:
1705 case Intrinsic::amdgcn_cubesc:
1706 case Intrinsic::amdgcn_cubetc:
1707 case Intrinsic::amdgcn_fmul_legacy:
1708 case Intrinsic::amdgcn_fma_legacy:
1709 case Intrinsic::amdgcn_fract:
1710 case Intrinsic::amdgcn_sin:
1711 // The intrinsics below depend on rounding mode in MXCSR.
1712 case Intrinsic::x86_sse_cvtss2si:
1713 case Intrinsic::x86_sse_cvtss2si64:
1714 case Intrinsic::x86_sse_cvttss2si:
1715 case Intrinsic::x86_sse_cvttss2si64:
1716 case Intrinsic::x86_sse2_cvtsd2si:
1717 case Intrinsic::x86_sse2_cvtsd2si64:
1718 case Intrinsic::x86_sse2_cvttsd2si:
1719 case Intrinsic::x86_sse2_cvttsd2si64:
1720 case Intrinsic::x86_avx512_vcvtss2si32:
1721 case Intrinsic::x86_avx512_vcvtss2si64:
1722 case Intrinsic::x86_avx512_cvttss2si:
1723 case Intrinsic::x86_avx512_cvttss2si64:
1724 case Intrinsic::x86_avx512_vcvtsd2si32:
1725 case Intrinsic::x86_avx512_vcvtsd2si64:
1726 case Intrinsic::x86_avx512_cvttsd2si:
1727 case Intrinsic::x86_avx512_cvttsd2si64:
1728 case Intrinsic::x86_avx512_vcvtss2usi32:
1729 case Intrinsic::x86_avx512_vcvtss2usi64:
1730 case Intrinsic::x86_avx512_cvttss2usi:
1731 case Intrinsic::x86_avx512_cvttss2usi64:
1732 case Intrinsic::x86_avx512_vcvtsd2usi32:
1733 case Intrinsic::x86_avx512_vcvtsd2usi64:
1734 case Intrinsic::x86_avx512_cvttsd2usi:
1735 case Intrinsic::x86_avx512_cvttsd2usi64:
1736
1737 // NVVM FMax intrinsics
1738 case Intrinsic::nvvm_fmax_d:
1739 case Intrinsic::nvvm_fmax_f:
1740 case Intrinsic::nvvm_fmax_ftz_f:
1741 case Intrinsic::nvvm_fmax_ftz_nan_f:
1742 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
1743 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
1744 case Intrinsic::nvvm_fmax_nan_f:
1745 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
1746 case Intrinsic::nvvm_fmax_xorsign_abs_f:
1747
1748 // NVVM FMin intrinsics
1749 case Intrinsic::nvvm_fmin_d:
1750 case Intrinsic::nvvm_fmin_f:
1751 case Intrinsic::nvvm_fmin_ftz_f:
1752 case Intrinsic::nvvm_fmin_ftz_nan_f:
1753 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
1754 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
1755 case Intrinsic::nvvm_fmin_nan_f:
1756 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
1757 case Intrinsic::nvvm_fmin_xorsign_abs_f:
1758
1759 // NVVM float/double to int32/uint32 conversion intrinsics
1760 case Intrinsic::nvvm_f2i_rm:
1761 case Intrinsic::nvvm_f2i_rn:
1762 case Intrinsic::nvvm_f2i_rp:
1763 case Intrinsic::nvvm_f2i_rz:
1764 case Intrinsic::nvvm_f2i_rm_ftz:
1765 case Intrinsic::nvvm_f2i_rn_ftz:
1766 case Intrinsic::nvvm_f2i_rp_ftz:
1767 case Intrinsic::nvvm_f2i_rz_ftz:
1768 case Intrinsic::nvvm_f2ui_rm:
1769 case Intrinsic::nvvm_f2ui_rn:
1770 case Intrinsic::nvvm_f2ui_rp:
1771 case Intrinsic::nvvm_f2ui_rz:
1772 case Intrinsic::nvvm_f2ui_rm_ftz:
1773 case Intrinsic::nvvm_f2ui_rn_ftz:
1774 case Intrinsic::nvvm_f2ui_rp_ftz:
1775 case Intrinsic::nvvm_f2ui_rz_ftz:
1776 case Intrinsic::nvvm_d2i_rm:
1777 case Intrinsic::nvvm_d2i_rn:
1778 case Intrinsic::nvvm_d2i_rp:
1779 case Intrinsic::nvvm_d2i_rz:
1780 case Intrinsic::nvvm_d2ui_rm:
1781 case Intrinsic::nvvm_d2ui_rn:
1782 case Intrinsic::nvvm_d2ui_rp:
1783 case Intrinsic::nvvm_d2ui_rz:
1784
1785 // NVVM float/double to int64/uint64 conversion intrinsics
1786 case Intrinsic::nvvm_f2ll_rm:
1787 case Intrinsic::nvvm_f2ll_rn:
1788 case Intrinsic::nvvm_f2ll_rp:
1789 case Intrinsic::nvvm_f2ll_rz:
1790 case Intrinsic::nvvm_f2ll_rm_ftz:
1791 case Intrinsic::nvvm_f2ll_rn_ftz:
1792 case Intrinsic::nvvm_f2ll_rp_ftz:
1793 case Intrinsic::nvvm_f2ll_rz_ftz:
1794 case Intrinsic::nvvm_f2ull_rm:
1795 case Intrinsic::nvvm_f2ull_rn:
1796 case Intrinsic::nvvm_f2ull_rp:
1797 case Intrinsic::nvvm_f2ull_rz:
1798 case Intrinsic::nvvm_f2ull_rm_ftz:
1799 case Intrinsic::nvvm_f2ull_rn_ftz:
1800 case Intrinsic::nvvm_f2ull_rp_ftz:
1801 case Intrinsic::nvvm_f2ull_rz_ftz:
1802 case Intrinsic::nvvm_d2ll_rm:
1803 case Intrinsic::nvvm_d2ll_rn:
1804 case Intrinsic::nvvm_d2ll_rp:
1805 case Intrinsic::nvvm_d2ll_rz:
1806 case Intrinsic::nvvm_d2ull_rm:
1807 case Intrinsic::nvvm_d2ull_rn:
1808 case Intrinsic::nvvm_d2ull_rp:
1809 case Intrinsic::nvvm_d2ull_rz:
1810
1811 // NVVM math intrinsics:
1812 case Intrinsic::nvvm_ceil_d:
1813 case Intrinsic::nvvm_ceil_f:
1814 case Intrinsic::nvvm_ceil_ftz_f:
1815
1816 case Intrinsic::nvvm_fabs:
1817 case Intrinsic::nvvm_fabs_ftz:
1818
1819 case Intrinsic::nvvm_floor_d:
1820 case Intrinsic::nvvm_floor_f:
1821 case Intrinsic::nvvm_floor_ftz_f:
1822
1823 case Intrinsic::nvvm_rcp_rm_d:
1824 case Intrinsic::nvvm_rcp_rm_f:
1825 case Intrinsic::nvvm_rcp_rm_ftz_f:
1826 case Intrinsic::nvvm_rcp_rn_d:
1827 case Intrinsic::nvvm_rcp_rn_f:
1828 case Intrinsic::nvvm_rcp_rn_ftz_f:
1829 case Intrinsic::nvvm_rcp_rp_d:
1830 case Intrinsic::nvvm_rcp_rp_f:
1831 case Intrinsic::nvvm_rcp_rp_ftz_f:
1832 case Intrinsic::nvvm_rcp_rz_d:
1833 case Intrinsic::nvvm_rcp_rz_f:
1834 case Intrinsic::nvvm_rcp_rz_ftz_f:
1835
1836 case Intrinsic::nvvm_round_d:
1837 case Intrinsic::nvvm_round_f:
1838 case Intrinsic::nvvm_round_ftz_f:
1839
1840 case Intrinsic::nvvm_saturate_d:
1841 case Intrinsic::nvvm_saturate_f:
1842 case Intrinsic::nvvm_saturate_ftz_f:
1843
1844 case Intrinsic::nvvm_sqrt_f:
1845 case Intrinsic::nvvm_sqrt_rn_d:
1846 case Intrinsic::nvvm_sqrt_rn_f:
1847 case Intrinsic::nvvm_sqrt_rn_ftz_f:
1848 return !Call->isStrictFP();
1849
1850 // NVVM add intrinsics with explicit rounding modes
1851 case Intrinsic::nvvm_add_rm_d:
1852 case Intrinsic::nvvm_add_rn_d:
1853 case Intrinsic::nvvm_add_rp_d:
1854 case Intrinsic::nvvm_add_rz_d:
1855 case Intrinsic::nvvm_add_rm_f:
1856 case Intrinsic::nvvm_add_rn_f:
1857 case Intrinsic::nvvm_add_rp_f:
1858 case Intrinsic::nvvm_add_rz_f:
1859 case Intrinsic::nvvm_add_rm_ftz_f:
1860 case Intrinsic::nvvm_add_rn_ftz_f:
1861 case Intrinsic::nvvm_add_rp_ftz_f:
1862 case Intrinsic::nvvm_add_rz_ftz_f:
1863
1864 // NVVM div intrinsics with explicit rounding modes
1865 case Intrinsic::nvvm_div_rm_d:
1866 case Intrinsic::nvvm_div_rn_d:
1867 case Intrinsic::nvvm_div_rp_d:
1868 case Intrinsic::nvvm_div_rz_d:
1869 case Intrinsic::nvvm_div_rm_f:
1870 case Intrinsic::nvvm_div_rn_f:
1871 case Intrinsic::nvvm_div_rp_f:
1872 case Intrinsic::nvvm_div_rz_f:
1873 case Intrinsic::nvvm_div_rm_ftz_f:
1874 case Intrinsic::nvvm_div_rn_ftz_f:
1875 case Intrinsic::nvvm_div_rp_ftz_f:
1876 case Intrinsic::nvvm_div_rz_ftz_f:
1877
1878 // NVVM mul intrinsics with explicit rounding modes
1879 case Intrinsic::nvvm_mul_rm_d:
1880 case Intrinsic::nvvm_mul_rn_d:
1881 case Intrinsic::nvvm_mul_rp_d:
1882 case Intrinsic::nvvm_mul_rz_d:
1883 case Intrinsic::nvvm_mul_rm_f:
1884 case Intrinsic::nvvm_mul_rn_f:
1885 case Intrinsic::nvvm_mul_rp_f:
1886 case Intrinsic::nvvm_mul_rz_f:
1887 case Intrinsic::nvvm_mul_rm_ftz_f:
1888 case Intrinsic::nvvm_mul_rn_ftz_f:
1889 case Intrinsic::nvvm_mul_rp_ftz_f:
1890 case Intrinsic::nvvm_mul_rz_ftz_f:
1891
1892 // NVVM fma intrinsics with explicit rounding modes
1893 case Intrinsic::nvvm_fma_rm_d:
1894 case Intrinsic::nvvm_fma_rn_d:
1895 case Intrinsic::nvvm_fma_rp_d:
1896 case Intrinsic::nvvm_fma_rz_d:
1897 case Intrinsic::nvvm_fma_rm_f:
1898 case Intrinsic::nvvm_fma_rn_f:
1899 case Intrinsic::nvvm_fma_rp_f:
1900 case Intrinsic::nvvm_fma_rz_f:
1901 case Intrinsic::nvvm_fma_rm_ftz_f:
1902 case Intrinsic::nvvm_fma_rn_ftz_f:
1903 case Intrinsic::nvvm_fma_rp_ftz_f:
1904 case Intrinsic::nvvm_fma_rz_ftz_f:
1905
1906 // Sign operations are actually bitwise operations, they do not raise
1907 // exceptions even for SNANs.
1908 case Intrinsic::fabs:
1909 case Intrinsic::copysign:
1910 case Intrinsic::is_fpclass:
1911 // Non-constrained variants of rounding operations means default FP
1912 // environment, they can be folded in any case.
1913 case Intrinsic::ceil:
1914 case Intrinsic::floor:
1915 case Intrinsic::round:
1916 case Intrinsic::roundeven:
1917 case Intrinsic::trunc:
1918 case Intrinsic::nearbyint:
1919 case Intrinsic::rint:
1920 case Intrinsic::canonicalize:
1921
1922 // Constrained intrinsics can be folded if FP environment is known
1923 // to compiler.
1924 case Intrinsic::experimental_constrained_fma:
1925 case Intrinsic::experimental_constrained_fmuladd:
1926 case Intrinsic::experimental_constrained_fadd:
1927 case Intrinsic::experimental_constrained_fsub:
1928 case Intrinsic::experimental_constrained_fmul:
1929 case Intrinsic::experimental_constrained_fdiv:
1930 case Intrinsic::experimental_constrained_frem:
1931 case Intrinsic::experimental_constrained_ceil:
1932 case Intrinsic::experimental_constrained_floor:
1933 case Intrinsic::experimental_constrained_round:
1934 case Intrinsic::experimental_constrained_roundeven:
1935 case Intrinsic::experimental_constrained_trunc:
1936 case Intrinsic::experimental_constrained_nearbyint:
1937 case Intrinsic::experimental_constrained_rint:
1938 case Intrinsic::experimental_constrained_fcmp:
1939 case Intrinsic::experimental_constrained_fcmps:
1940 return true;
1941 default:
1942 return false;
1943 case Intrinsic::not_intrinsic: break;
1944 }
1945
1946 if (!F->hasName() || Call->isStrictFP())
1947 return false;
1948
1949 // In these cases, the check of the length is required. We don't want to
1950 // return true for a name like "cos\0blah" which strcmp would return equal to
1951 // "cos", but has length 8.
1952 StringRef Name = F->getName();
1953 switch (Name[0]) {
1954 default:
1955 return false;
1956 case 'a':
1957 return Name == "acos" || Name == "acosf" ||
1958 Name == "asin" || Name == "asinf" ||
1959 Name == "atan" || Name == "atanf" ||
1960 Name == "atan2" || Name == "atan2f";
1961 case 'c':
1962 return Name == "ceil" || Name == "ceilf" ||
1963 Name == "cos" || Name == "cosf" ||
1964 Name == "cosh" || Name == "coshf";
1965 case 'e':
1966 return Name == "exp" || Name == "expf" || Name == "exp2" ||
1967 Name == "exp2f" || Name == "erf" || Name == "erff";
1968 case 'f':
1969 return Name == "fabs" || Name == "fabsf" ||
1970 Name == "floor" || Name == "floorf" ||
1971 Name == "fmod" || Name == "fmodf";
1972 case 'i':
1973 return Name == "ilogb" || Name == "ilogbf";
1974 case 'l':
1975 return Name == "log" || Name == "logf" || Name == "logl" ||
1976 Name == "log2" || Name == "log2f" || Name == "log10" ||
1977 Name == "log10f" || Name == "logb" || Name == "logbf" ||
1978 Name == "log1p" || Name == "log1pf";
1979 case 'n':
1980 return Name == "nearbyint" || Name == "nearbyintf";
1981 case 'p':
1982 return Name == "pow" || Name == "powf";
1983 case 'r':
1984 return Name == "remainder" || Name == "remainderf" ||
1985 Name == "rint" || Name == "rintf" ||
1986 Name == "round" || Name == "roundf";
1987 case 's':
1988 return Name == "sin" || Name == "sinf" ||
1989 Name == "sinh" || Name == "sinhf" ||
1990 Name == "sqrt" || Name == "sqrtf";
1991 case 't':
1992 return Name == "tan" || Name == "tanf" ||
1993 Name == "tanh" || Name == "tanhf" ||
1994 Name == "trunc" || Name == "truncf";
1995 case '_':
1996 // Check for various function names that get used for the math functions
1997 // when the header files are preprocessed with the macro
1998 // __FINITE_MATH_ONLY__ enabled.
1999 // The '12' here is the length of the shortest name that can match.
2000 // We need to check the size before looking at Name[1] and Name[2]
2001 // so we may as well check a limit that will eliminate mismatches.
2002 if (Name.size() < 12 || Name[1] != '_')
2003 return false;
2004 switch (Name[2]) {
2005 default:
2006 return false;
2007 case 'a':
2008 return Name == "__acos_finite" || Name == "__acosf_finite" ||
2009 Name == "__asin_finite" || Name == "__asinf_finite" ||
2010 Name == "__atan2_finite" || Name == "__atan2f_finite";
2011 case 'c':
2012 return Name == "__cosh_finite" || Name == "__coshf_finite";
2013 case 'e':
2014 return Name == "__exp_finite" || Name == "__expf_finite" ||
2015 Name == "__exp2_finite" || Name == "__exp2f_finite";
2016 case 'l':
2017 return Name == "__log_finite" || Name == "__logf_finite" ||
2018 Name == "__log10_finite" || Name == "__log10f_finite";
2019 case 'p':
2020 return Name == "__pow_finite" || Name == "__powf_finite";
2021 case 's':
2022 return Name == "__sinh_finite" || Name == "__sinhf_finite";
2023 }
2024 }
2025}
2026
2027namespace {
2028
2029Constant *GetConstantFoldFPValue(double V, Type *Ty) {
2030 if (Ty->isHalfTy() || Ty->isFloatTy()) {
2031 APFloat APF(V);
2032 bool unused;
2033 APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
2034 return ConstantFP::get(Ty->getContext(), APF);
2035 }
2036 if (Ty->isDoubleTy())
2037 return ConstantFP::get(Ty->getContext(), APFloat(V));
2038 llvm_unreachable("Can only constant fold half/float/double");
2039}
2040
2041#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2042Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {
2043 if (Ty->isFP128Ty())
2044 return ConstantFP::get(Ty, V);
2045 llvm_unreachable("Can only constant fold fp128");
2046}
2047#endif
2048
2049/// Clear the floating-point exception state.
2050inline void llvm_fenv_clearexcept() {
2051#if HAVE_DECL_FE_ALL_EXCEPT
2052 feclearexcept(FE_ALL_EXCEPT);
2053#endif
2054 errno = 0;
2055}
2056
2057/// Test if a floating-point exception was raised.
2058inline bool llvm_fenv_testexcept() {
2059 int errno_val = errno;
2060 if (errno_val == ERANGE || errno_val == EDOM)
2061 return true;
2062#if HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
2063 if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
2064 return true;
2065#endif
2066 return false;
2067}
2068
2069static APFloat FTZPreserveSign(const APFloat &V) {
2070 if (V.isDenormal())
2071 return APFloat::getZero(V.getSemantics(), V.isNegative());
2072 return V;
2073}
2074
2075static APFloat FlushToPositiveZero(const APFloat &V) {
2076 if (V.isDenormal())
2077 return APFloat::getZero(V.getSemantics(), false);
2078 return V;
2079}
2080
2081static APFloat FlushWithDenormKind(const APFloat &V,
2082 DenormalMode::DenormalModeKind DenormKind) {
2083 assert(DenormKind != DenormalMode::DenormalModeKind::Invalid &&
2084 DenormKind != DenormalMode::DenormalModeKind::Dynamic);
2085 switch (DenormKind) {
2086 case DenormalMode::DenormalModeKind::IEEE:
2087 return V;
2088 case DenormalMode::DenormalModeKind::PreserveSign:
2089 return FTZPreserveSign(V);
2090 case DenormalMode::DenormalModeKind::PositiveZero:
2091 return FlushToPositiveZero(V);
2092 default:
2093 llvm_unreachable("Invalid denormal mode!");
2094 }
2095}
2096
2097Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
2098 DenormalMode DenormMode = DenormalMode::getIEEE()) {
2099 if (!DenormMode.isValid() ||
2100 DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
2101 DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
2102 return nullptr;
2103
2104 llvm_fenv_clearexcept();
2105 auto Input = FlushWithDenormKind(V, DenormMode.Input);
2106 double Result = NativeFP(Input.convertToDouble());
2107 if (llvm_fenv_testexcept()) {
2108 llvm_fenv_clearexcept();
2109 return nullptr;
2110 }
2111
2112 Constant *Output = GetConstantFoldFPValue(Result, Ty);
2113 if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
2114 return Output;
2115 const auto *CFP = static_cast<ConstantFP *>(Output);
2116 const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output);
2117 return ConstantFP::get(Ty->getContext(), Res);
2118}
2119
2120#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2121Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
2122 Type *Ty) {
2123 llvm_fenv_clearexcept();
2124 float128 Result = NativeFP(V.convertToQuad());
2125 if (llvm_fenv_testexcept()) {
2126 llvm_fenv_clearexcept();
2127 return nullptr;
2128 }
2129
2130 return GetConstantFoldFPValue128(Result, Ty);
2131}
2132#endif
2133
2134Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
2135 const APFloat &V, const APFloat &W, Type *Ty) {
2136 llvm_fenv_clearexcept();
2137 double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
2138 if (llvm_fenv_testexcept()) {
2139 llvm_fenv_clearexcept();
2140 return nullptr;
2141 }
2142
2143 return GetConstantFoldFPValue(Result, Ty);
2144}
2145
2146Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
2147 FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());
2148 if (!VT)
2149 return nullptr;
2150
2151 // This isn't strictly necessary, but handle the special/common case of zero:
2152 // all integer reductions of a zero input produce zero.
2153 if (isa<ConstantAggregateZero>(Op))
2154 return ConstantInt::get(VT->getElementType(), 0);
2155
2156 // This is the same as the underlying binops - poison propagates.
2157 if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
2158 return PoisonValue::get(VT->getElementType());
2159
2160 // TODO: Handle undef.
2161 if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))
2162 return nullptr;
2163
2164 auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
2165 if (!EltC)
2166 return nullptr;
2167
2168 APInt Acc = EltC->getValue();
2169 for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
2170 if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
2171 return nullptr;
2172 const APInt &X = EltC->getValue();
2173 switch (IID) {
2174 case Intrinsic::vector_reduce_add:
2175 Acc = Acc + X;
2176 break;
2177 case Intrinsic::vector_reduce_mul:
2178 Acc = Acc * X;
2179 break;
2180 case Intrinsic::vector_reduce_and:
2181 Acc = Acc & X;
2182 break;
2183 case Intrinsic::vector_reduce_or:
2184 Acc = Acc | X;
2185 break;
2186 case Intrinsic::vector_reduce_xor:
2187 Acc = Acc ^ X;
2188 break;
2189 case Intrinsic::vector_reduce_smin:
2190 Acc = APIntOps::smin(Acc, X);
2191 break;
2192 case Intrinsic::vector_reduce_smax:
2193 Acc = APIntOps::smax(Acc, X);
2194 break;
2195 case Intrinsic::vector_reduce_umin:
2196 Acc = APIntOps::umin(Acc, X);
2197 break;
2198 case Intrinsic::vector_reduce_umax:
2199 Acc = APIntOps::umax(Acc, X);
2200 break;
2201 }
2202 }
2203
2204 return ConstantInt::get(Op->getContext(), Acc);
2205}
2206
2207/// Attempt to fold an SSE floating point to integer conversion of a constant
2208/// floating point. If roundTowardZero is false, the default IEEE rounding is
2209/// used (toward nearest, ties to even). This matches the behavior of the
2210/// non-truncating SSE instructions in the default rounding mode. The desired
2211/// integer type Ty is used to select how many bits are available for the
2212/// result. Returns null if the conversion cannot be performed, otherwise
2213/// returns the Constant value resulting from the conversion.
2214Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
2215 Type *Ty, bool IsSigned) {
2216 // All of these conversion intrinsics form an integer of at most 64bits.
2217 unsigned ResultWidth = Ty->getIntegerBitWidth();
2218 assert(ResultWidth <= 64 &&
2219 "Can only constant fold conversions to 64 and 32 bit ints");
2220
2221 uint64_t UIntVal;
2222 bool isExact = false;
2223 APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
2224 : APFloat::rmNearestTiesToEven;
2226 Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth,
2227 IsSigned, mode, &isExact);
2228 if (status != APFloat::opOK &&
2229 (!roundTowardZero || status != APFloat::opInexact))
2230 return nullptr;
2231 return ConstantInt::get(Ty, UIntVal, IsSigned);
2232}
2233
2234double getValueAsDouble(ConstantFP *Op) {
2235 Type *Ty = Op->getType();
2236
2237 if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
2238 return Op->getValueAPF().convertToDouble();
2239
2240 bool unused;
2241 APFloat APF = Op->getValueAPF();
2242 APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
2243 return APF.convertToDouble();
2244}
2245
2246static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
2247 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
2248 C = &CI->getValue();
2249 return true;
2250 }
2251 if (isa<UndefValue>(Op)) {
2252 C = nullptr;
2253 return true;
2254 }
2255 return false;
2256}
2257
2258/// Checks if the given intrinsic call, which evaluates to constant, is allowed
2259/// to be folded.
2260///
2261/// \param CI Constrained intrinsic call.
2262/// \param St Exception flags raised during constant evaluation.
2263static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
2264 APFloat::opStatus St) {
2265 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2266 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2267
2268 // If the operation does not change exception status flags, it is safe
2269 // to fold.
2270 if (St == APFloat::opStatus::opOK)
2271 return true;
2272
2273 // If evaluation raised FP exception, the result can depend on rounding
2274 // mode. If the latter is unknown, folding is not possible.
2275 if (ORM == RoundingMode::Dynamic)
2276 return false;
2277
2278 // If FP exceptions are ignored, fold the call, even if such exception is
2279 // raised.
2280 if (EB && *EB != fp::ExceptionBehavior::ebStrict)
2281 return true;
2282
2283 // Leave the calculation for runtime so that exception flags be correctly set
2284 // in hardware.
2285 return false;
2286}
2287
2288/// Returns the rounding mode that should be used for constant evaluation.
2289static RoundingMode
2290getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
2291 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2292 if (!ORM || *ORM == RoundingMode::Dynamic)
2293 // Even if the rounding mode is unknown, try evaluating the operation.
2294 // If it does not raise inexact exception, rounding was not applied,
2295 // so the result is exact and does not depend on rounding mode. Whether
2296 // other FP exceptions are raised, it does not depend on rounding mode.
2297 return RoundingMode::NearestTiesToEven;
2298 return *ORM;
2299}
2300
2301/// Try to constant fold llvm.canonicalize for the given caller and value.
2302static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,
2303 const APFloat &Src) {
2304 // Zero, positive and negative, is always OK to fold.
2305 if (Src.isZero()) {
2306 // Get a fresh 0, since ppc_fp128 does have non-canonical zeros.
2307 return ConstantFP::get(
2308 CI->getContext(),
2309 APFloat::getZero(Src.getSemantics(), Src.isNegative()));
2310 }
2311
2312 if (!Ty->isIEEELikeFPTy())
2313 return nullptr;
2314
2315 // Zero is always canonical and the sign must be preserved.
2316 //
2317 // Denorms and nans may have special encodings, but it should be OK to fold a
2318 // totally average number.
2319 if (Src.isNormal() || Src.isInfinity())
2320 return ConstantFP::get(CI->getContext(), Src);
2321
2322 if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {
2323 DenormalMode DenormMode =
2324 CI->getFunction()->getDenormalMode(Src.getSemantics());
2325
2326 if (DenormMode == DenormalMode::getIEEE())
2327 return ConstantFP::get(CI->getContext(), Src);
2328
2329 if (DenormMode.Input == DenormalMode::Dynamic)
2330 return nullptr;
2331
2332 // If we know if either input or output is flushed, we can fold.
2333 if ((DenormMode.Input == DenormalMode::Dynamic &&
2334 DenormMode.Output == DenormalMode::IEEE) ||
2335 (DenormMode.Input == DenormalMode::IEEE &&
2336 DenormMode.Output == DenormalMode::Dynamic))
2337 return nullptr;
2338
2339 bool IsPositive =
2340 (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||
2341 (DenormMode.Output == DenormalMode::PositiveZero &&
2342 DenormMode.Input == DenormalMode::IEEE));
2343
2344 return ConstantFP::get(CI->getContext(),
2345 APFloat::getZero(Src.getSemantics(), !IsPositive));
2346 }
2347
2348 return nullptr;
2349}
2350
2351static Constant *ConstantFoldScalarCall1(StringRef Name,
2352 Intrinsic::ID IntrinsicID,
2353 Type *Ty,
2355 const TargetLibraryInfo *TLI,
2356 const CallBase *Call) {
2357 assert(Operands.size() == 1 && "Wrong number of operands.");
2358
2359 if (IntrinsicID == Intrinsic::is_constant) {
2360 // We know we have a "Constant" argument. But we want to only
2361 // return true for manifest constants, not those that depend on
2362 // constants with unknowable values, e.g. GlobalValue or BlockAddress.
2363 if (Operands[0]->isManifestConstant())
2364 return ConstantInt::getTrue(Ty->getContext());
2365 return nullptr;
2366 }
2367
2368 if (isa<UndefValue>(Operands[0])) {
2369 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
2370 // ctpop() is between 0 and bitwidth, pick 0 for undef.
2371 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
2372 if (IntrinsicID == Intrinsic::cos ||
2373 IntrinsicID == Intrinsic::ctpop ||
2374 IntrinsicID == Intrinsic::fptoui_sat ||
2375 IntrinsicID == Intrinsic::fptosi_sat ||
2376 IntrinsicID == Intrinsic::canonicalize)
2377 return Constant::getNullValue(Ty);
2378 if (IntrinsicID == Intrinsic::bswap ||
2379 IntrinsicID == Intrinsic::bitreverse ||
2380 IntrinsicID == Intrinsic::launder_invariant_group ||
2381 IntrinsicID == Intrinsic::strip_invariant_group)
2382 return Operands[0];
2383 }
2384
2385 if (isa<ConstantPointerNull>(Operands[0])) {
2386 // launder(null) == null == strip(null) iff in addrspace 0
2387 if (IntrinsicID == Intrinsic::launder_invariant_group ||
2388 IntrinsicID == Intrinsic::strip_invariant_group) {
2389 // If instruction is not yet put in a basic block (e.g. when cloning
2390 // a function during inlining), Call's caller may not be available.
2391 // So check Call's BB first before querying Call->getCaller.
2392 const Function *Caller =
2393 Call->getParent() ? Call->getCaller() : nullptr;
2394 if (Caller &&
2396 Caller, Operands[0]->getType()->getPointerAddressSpace())) {
2397 return Operands[0];
2398 }
2399 return nullptr;
2400 }
2401 }
2402
2403 if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
2404 if (IntrinsicID == Intrinsic::convert_to_fp16) {
2405 APFloat Val(Op->getValueAPF());
2406
2407 bool lost = false;
2408 Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
2409
2410 return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
2411 }
2412
2413 APFloat U = Op->getValueAPF();
2414
2415 if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
2416 IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
2417 bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
2418
2419 if (U.isNaN())
2420 return nullptr;
2421
2422 unsigned Width = Ty->getIntegerBitWidth();
2423 APSInt Int(Width, !Signed);
2424 bool IsExact = false;
2426 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
2427
2428 if (Status == APFloat::opOK || Status == APFloat::opInexact)
2429 return ConstantInt::get(Ty, Int);
2430
2431 return nullptr;
2432 }
2433
2434 if (IntrinsicID == Intrinsic::fptoui_sat ||
2435 IntrinsicID == Intrinsic::fptosi_sat) {
2436 // convertToInteger() already has the desired saturation semantics.
2438 IntrinsicID == Intrinsic::fptoui_sat);
2439 bool IsExact;
2440 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
2441 return ConstantInt::get(Ty, Int);
2442 }
2443
2444 if (IntrinsicID == Intrinsic::canonicalize)
2445 return constantFoldCanonicalize(Ty, Call, U);
2446
2447#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2448 if (Ty->isFP128Ty()) {
2449 if (IntrinsicID == Intrinsic::log) {
2450 float128 Result = logf128(Op->getValueAPF().convertToQuad());
2451 return GetConstantFoldFPValue128(Result, Ty);
2452 }
2453
2454 LibFunc Fp128Func = NotLibFunc;
2455 if (TLI && TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) &&
2456 Fp128Func == LibFunc_logl)
2457 return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty);
2458 }
2459#endif
2460
2461 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() &&
2462 !Ty->isIntegerTy())
2463 return nullptr;
2464
2465 // Use internal versions of these intrinsics.
2466
2467 if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
2468 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2469 return ConstantFP::get(Ty->getContext(), U);
2470 }
2471
2472 if (IntrinsicID == Intrinsic::round) {
2473 U.roundToIntegral(APFloat::rmNearestTiesToAway);
2474 return ConstantFP::get(Ty->getContext(), U);
2475 }
2476
2477 if (IntrinsicID == Intrinsic::roundeven) {
2478 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2479 return ConstantFP::get(Ty->getContext(), U);
2480 }
2481
2482 if (IntrinsicID == Intrinsic::ceil) {
2483 U.roundToIntegral(APFloat::rmTowardPositive);
2484 return ConstantFP::get(Ty->getContext(), U);
2485 }
2486
2487 if (IntrinsicID == Intrinsic::floor) {
2488 U.roundToIntegral(APFloat::rmTowardNegative);
2489 return ConstantFP::get(Ty->getContext(), U);
2490 }
2491
2492 if (IntrinsicID == Intrinsic::trunc) {
2493 U.roundToIntegral(APFloat::rmTowardZero);
2494 return ConstantFP::get(Ty->getContext(), U);
2495 }
2496
2497 if (IntrinsicID == Intrinsic::fabs) {
2498 U.clearSign();
2499 return ConstantFP::get(Ty->getContext(), U);
2500 }
2501
2502 if (IntrinsicID == Intrinsic::amdgcn_fract) {
2503 // The v_fract instruction behaves like the OpenCL spec, which defines
2504 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
2505 // there to prevent fract(-small) from returning 1.0. It returns the
2506 // largest positive floating-point number less than 1.0."
2507 APFloat FloorU(U);
2508 FloorU.roundToIntegral(APFloat::rmTowardNegative);
2509 APFloat FractU(U - FloorU);
2510 APFloat AlmostOne(U.getSemantics(), 1);
2511 AlmostOne.next(/*nextDown*/ true);
2512 return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
2513 }
2514
2515 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
2516 // raise FP exceptions, unless the argument is signaling NaN.
2517
2518 std::optional<APFloat::roundingMode> RM;
2519 switch (IntrinsicID) {
2520 default:
2521 break;
2522 case Intrinsic::experimental_constrained_nearbyint:
2523 case Intrinsic::experimental_constrained_rint: {
2524 auto CI = cast<ConstrainedFPIntrinsic>(Call);
2525 RM = CI->getRoundingMode();
2526 if (!RM || *RM == RoundingMode::Dynamic)
2527 return nullptr;
2528 break;
2529 }
2530 case Intrinsic::experimental_constrained_round:
2531 RM = APFloat::rmNearestTiesToAway;
2532 break;
2533 case Intrinsic::experimental_constrained_ceil:
2534 RM = APFloat::rmTowardPositive;
2535 break;
2536 case Intrinsic::experimental_constrained_floor:
2537 RM = APFloat::rmTowardNegative;
2538 break;
2539 case Intrinsic::experimental_constrained_trunc:
2540 RM = APFloat::rmTowardZero;
2541 break;
2542 }
2543 if (RM) {
2544 auto CI = cast<ConstrainedFPIntrinsic>(Call);
2545 if (U.isFinite()) {
2546 APFloat::opStatus St = U.roundToIntegral(*RM);
2547 if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
2548 St == APFloat::opInexact) {
2549 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2550 if (EB == fp::ebStrict)
2551 return nullptr;
2552 }
2553 } else if (U.isSignaling()) {
2554 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2555 if (EB && *EB != fp::ebIgnore)
2556 return nullptr;
2557 U = APFloat::getQNaN(U.getSemantics());
2558 }
2559 return ConstantFP::get(Ty->getContext(), U);
2560 }
2561
2562 // NVVM float/double to signed/unsigned int32/int64 conversions:
2563 switch (IntrinsicID) {
2564 // f2i
2565 case Intrinsic::nvvm_f2i_rm:
2566 case Intrinsic::nvvm_f2i_rn:
2567 case Intrinsic::nvvm_f2i_rp:
2568 case Intrinsic::nvvm_f2i_rz:
2569 case Intrinsic::nvvm_f2i_rm_ftz:
2570 case Intrinsic::nvvm_f2i_rn_ftz:
2571 case Intrinsic::nvvm_f2i_rp_ftz:
2572 case Intrinsic::nvvm_f2i_rz_ftz:
2573 // f2ui
2574 case Intrinsic::nvvm_f2ui_rm:
2575 case Intrinsic::nvvm_f2ui_rn:
2576 case Intrinsic::nvvm_f2ui_rp:
2577 case Intrinsic::nvvm_f2ui_rz:
2578 case Intrinsic::nvvm_f2ui_rm_ftz:
2579 case Intrinsic::nvvm_f2ui_rn_ftz:
2580 case Intrinsic::nvvm_f2ui_rp_ftz:
2581 case Intrinsic::nvvm_f2ui_rz_ftz:
2582 // d2i
2583 case Intrinsic::nvvm_d2i_rm:
2584 case Intrinsic::nvvm_d2i_rn:
2585 case Intrinsic::nvvm_d2i_rp:
2586 case Intrinsic::nvvm_d2i_rz:
2587 // d2ui
2588 case Intrinsic::nvvm_d2ui_rm:
2589 case Intrinsic::nvvm_d2ui_rn:
2590 case Intrinsic::nvvm_d2ui_rp:
2591 case Intrinsic::nvvm_d2ui_rz:
2592 // f2ll
2593 case Intrinsic::nvvm_f2ll_rm:
2594 case Intrinsic::nvvm_f2ll_rn:
2595 case Intrinsic::nvvm_f2ll_rp:
2596 case Intrinsic::nvvm_f2ll_rz:
2597 case Intrinsic::nvvm_f2ll_rm_ftz:
2598 case Intrinsic::nvvm_f2ll_rn_ftz:
2599 case Intrinsic::nvvm_f2ll_rp_ftz:
2600 case Intrinsic::nvvm_f2ll_rz_ftz:
2601 // f2ull
2602 case Intrinsic::nvvm_f2ull_rm:
2603 case Intrinsic::nvvm_f2ull_rn:
2604 case Intrinsic::nvvm_f2ull_rp:
2605 case Intrinsic::nvvm_f2ull_rz:
2606 case Intrinsic::nvvm_f2ull_rm_ftz:
2607 case Intrinsic::nvvm_f2ull_rn_ftz:
2608 case Intrinsic::nvvm_f2ull_rp_ftz:
2609 case Intrinsic::nvvm_f2ull_rz_ftz:
2610 // d2ll
2611 case Intrinsic::nvvm_d2ll_rm:
2612 case Intrinsic::nvvm_d2ll_rn:
2613 case Intrinsic::nvvm_d2ll_rp:
2614 case Intrinsic::nvvm_d2ll_rz:
2615 // d2ull
2616 case Intrinsic::nvvm_d2ull_rm:
2617 case Intrinsic::nvvm_d2ull_rn:
2618 case Intrinsic::nvvm_d2ull_rp:
2619 case Intrinsic::nvvm_d2ull_rz: {
2620 // In float-to-integer conversion, NaN inputs are converted to 0.
2621 if (U.isNaN())
2622 return ConstantInt::get(Ty, 0);
2623
2624 APFloat::roundingMode RMode =
2626 bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
2627 bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
2628
2629 APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
2630 auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
2631
2632 bool IsExact = false;
2634 FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2635
2636 if (Status != APFloat::opInvalidOp)
2637 return ConstantInt::get(Ty, ResInt);
2638 return nullptr;
2639 }
2640 }
2641
2642 /// We only fold functions with finite arguments. Folding NaN and inf is
2643 /// likely to be aborted with an exception anyway, and some host libms
2644 /// have known errors raising exceptions.
2645 if (!U.isFinite())
2646 return nullptr;
2647
2648 /// Currently APFloat versions of these functions do not exist, so we use
2649 /// the host native double versions. Float versions are not called
2650 /// directly but for all these it is true (float)(f((double)arg)) ==
2651 /// f(arg). Long double not supported yet.
2652 const APFloat &APF = Op->getValueAPF();
2653
2654 switch (IntrinsicID) {
2655 default: break;
2656 case Intrinsic::log:
2657 return ConstantFoldFP(log, APF, Ty);
2658 case Intrinsic::log2:
2659 // TODO: What about hosts that lack a C99 library?
2660 return ConstantFoldFP(log2, APF, Ty);
2661 case Intrinsic::log10:
2662 // TODO: What about hosts that lack a C99 library?
2663 return ConstantFoldFP(log10, APF, Ty);
2664 case Intrinsic::exp:
2665 return ConstantFoldFP(exp, APF, Ty);
2666 case Intrinsic::exp2:
2667 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2668 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
2669 case Intrinsic::exp10:
2670 // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.
2671 return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty);
2672 case Intrinsic::sin:
2673 return ConstantFoldFP(sin, APF, Ty);
2674 case Intrinsic::cos:
2675 return ConstantFoldFP(cos, APF, Ty);
2676 case Intrinsic::sinh:
2677 return ConstantFoldFP(sinh, APF, Ty);
2678 case Intrinsic::cosh:
2679 return ConstantFoldFP(cosh, APF, Ty);
2680 case Intrinsic::atan:
2681 // Implement optional behavior from C's Annex F for +/-0.0.
2682 if (U.isZero())
2683 return ConstantFP::get(Ty->getContext(), U);
2684 return ConstantFoldFP(atan, APF, Ty);
2685 case Intrinsic::sqrt:
2686 return ConstantFoldFP(sqrt, APF, Ty);
2687
2688 // NVVM Intrinsics:
2689 case Intrinsic::nvvm_ceil_ftz_f:
2690 case Intrinsic::nvvm_ceil_f:
2691 case Intrinsic::nvvm_ceil_d:
2692 return ConstantFoldFP(
2693 ceil, APF, Ty,
2695 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2696
2697 case Intrinsic::nvvm_fabs_ftz:
2698 case Intrinsic::nvvm_fabs:
2699 return ConstantFoldFP(
2700 fabs, APF, Ty,
2702 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2703
2704 case Intrinsic::nvvm_floor_ftz_f:
2705 case Intrinsic::nvvm_floor_f:
2706 case Intrinsic::nvvm_floor_d:
2707 return ConstantFoldFP(
2708 floor, APF, Ty,
2710 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2711
2712 case Intrinsic::nvvm_rcp_rm_ftz_f:
2713 case Intrinsic::nvvm_rcp_rn_ftz_f:
2714 case Intrinsic::nvvm_rcp_rp_ftz_f:
2715 case Intrinsic::nvvm_rcp_rz_ftz_f:
2716 case Intrinsic::nvvm_rcp_rm_d:
2717 case Intrinsic::nvvm_rcp_rm_f:
2718 case Intrinsic::nvvm_rcp_rn_d:
2719 case Intrinsic::nvvm_rcp_rn_f:
2720 case Intrinsic::nvvm_rcp_rp_d:
2721 case Intrinsic::nvvm_rcp_rp_f:
2722 case Intrinsic::nvvm_rcp_rz_d:
2723 case Intrinsic::nvvm_rcp_rz_f: {
2724 APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
2725 bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
2726
2727 auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
2729 APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
2730
2731 if (Status == APFloat::opOK || Status == APFloat::opInexact) {
2732 if (IsFTZ)
2733 Res = FTZPreserveSign(Res);
2734 return ConstantFP::get(Ty->getContext(), Res);
2735 }
2736 return nullptr;
2737 }
2738
2739 case Intrinsic::nvvm_round_ftz_f:
2740 case Intrinsic::nvvm_round_f:
2741 case Intrinsic::nvvm_round_d: {
2742 // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
2743 // integer, choosing even integer if source is equidistant between two
2744 // integers, so the semantics are closer to "rint" rather than "round".
2745 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2746 auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2747 V.roundToIntegral(APFloat::rmNearestTiesToEven);
2748 return ConstantFP::get(Ty->getContext(), V);
2749 }
2750
2751 case Intrinsic::nvvm_saturate_ftz_f:
2752 case Intrinsic::nvvm_saturate_d:
2753 case Intrinsic::nvvm_saturate_f: {
2754 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2755 auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2756 if (V.isNegative() || V.isZero() || V.isNaN())
2757 return ConstantFP::getZero(Ty);
2759 if (V > One)
2760 return ConstantFP::get(Ty->getContext(), One);
2761 return ConstantFP::get(Ty->getContext(), APF);
2762 }
2763
2764 case Intrinsic::nvvm_sqrt_rn_ftz_f:
2765 case Intrinsic::nvvm_sqrt_f:
2766 case Intrinsic::nvvm_sqrt_rn_d:
2767 case Intrinsic::nvvm_sqrt_rn_f:
2768 if (APF.isNegative())
2769 return nullptr;
2770 return ConstantFoldFP(
2771 sqrt, APF, Ty,
2773 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2774
2775 // AMDGCN Intrinsics:
2776 case Intrinsic::amdgcn_cos:
2777 case Intrinsic::amdgcn_sin: {
2778 double V = getValueAsDouble(Op);
2779 if (V < -256.0 || V > 256.0)
2780 // The gfx8 and gfx9 architectures handle arguments outside the range
2781 // [-256, 256] differently. This should be a rare case so bail out
2782 // rather than trying to handle the difference.
2783 return nullptr;
2784 bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
2785 double V4 = V * 4.0;
2786 if (V4 == floor(V4)) {
2787 // Force exact results for quarter-integer inputs.
2788 const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
2789 V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
2790 } else {
2791 if (IsCos)
2792 V = cos(V * 2.0 * numbers::pi);
2793 else
2794 V = sin(V * 2.0 * numbers::pi);
2795 }
2796 return GetConstantFoldFPValue(V, Ty);
2797 }
2798 }
2799
2800 if (!TLI)
2801 return nullptr;
2802
2804 if (!TLI->getLibFunc(Name, Func))
2805 return nullptr;
2806
2807 switch (Func) {
2808 default:
2809 break;
2810 case LibFunc_acos:
2811 case LibFunc_acosf:
2812 case LibFunc_acos_finite:
2813 case LibFunc_acosf_finite:
2814 if (TLI->has(Func))
2815 return ConstantFoldFP(acos, APF, Ty);
2816 break;
2817 case LibFunc_asin:
2818 case LibFunc_asinf:
2819 case LibFunc_asin_finite:
2820 case LibFunc_asinf_finite:
2821 if (TLI->has(Func))
2822 return ConstantFoldFP(asin, APF, Ty);
2823 break;
2824 case LibFunc_atan:
2825 case LibFunc_atanf:
2826 // Implement optional behavior from C's Annex F for +/-0.0.
2827 if (U.isZero())
2828 return ConstantFP::get(Ty->getContext(), U);
2829 if (TLI->has(Func))
2830 return ConstantFoldFP(atan, APF, Ty);
2831 break;
2832 case LibFunc_ceil:
2833 case LibFunc_ceilf:
2834 if (TLI->has(Func)) {
2835 U.roundToIntegral(APFloat::rmTowardPositive);
2836 return ConstantFP::get(Ty->getContext(), U);
2837 }
2838 break;
2839 case LibFunc_cos:
2840 case LibFunc_cosf:
2841 if (TLI->has(Func))
2842 return ConstantFoldFP(cos, APF, Ty);
2843 break;
2844 case LibFunc_cosh:
2845 case LibFunc_coshf:
2846 case LibFunc_cosh_finite:
2847 case LibFunc_coshf_finite:
2848 if (TLI->has(Func))
2849 return ConstantFoldFP(cosh, APF, Ty);
2850 break;
2851 case LibFunc_exp:
2852 case LibFunc_expf:
2853 case LibFunc_exp_finite:
2854 case LibFunc_expf_finite:
2855 if (TLI->has(Func))
2856 return ConstantFoldFP(exp, APF, Ty);
2857 break;
2858 case LibFunc_exp2:
2859 case LibFunc_exp2f:
2860 case LibFunc_exp2_finite:
2861 case LibFunc_exp2f_finite:
2862 if (TLI->has(Func))
2863 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2864 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
2865 break;
2866 case LibFunc_fabs:
2867 case LibFunc_fabsf:
2868 if (TLI->has(Func)) {
2869 U.clearSign();
2870 return ConstantFP::get(Ty->getContext(), U);
2871 }
2872 break;
2873 case LibFunc_floor:
2874 case LibFunc_floorf:
2875 if (TLI->has(Func)) {
2876 U.roundToIntegral(APFloat::rmTowardNegative);
2877 return ConstantFP::get(Ty->getContext(), U);
2878 }
2879 break;
2880 case LibFunc_log:
2881 case LibFunc_logf:
2882 case LibFunc_log_finite:
2883 case LibFunc_logf_finite:
2884 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2885 return ConstantFoldFP(log, APF, Ty);
2886 break;
2887 case LibFunc_log2:
2888 case LibFunc_log2f:
2889 case LibFunc_log2_finite:
2890 case LibFunc_log2f_finite:
2891 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2892 // TODO: What about hosts that lack a C99 library?
2893 return ConstantFoldFP(log2, APF, Ty);
2894 break;
2895 case LibFunc_log10:
2896 case LibFunc_log10f:
2897 case LibFunc_log10_finite:
2898 case LibFunc_log10f_finite:
2899 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2900 // TODO: What about hosts that lack a C99 library?
2901 return ConstantFoldFP(log10, APF, Ty);
2902 break;
2903 case LibFunc_ilogb:
2904 case LibFunc_ilogbf:
2905 if (!APF.isZero() && TLI->has(Func))
2906 return ConstantInt::get(Ty, ilogb(APF), true);
2907 break;
2908 case LibFunc_logb:
2909 case LibFunc_logbf:
2910 if (!APF.isZero() && TLI->has(Func))
2911 return ConstantFoldFP(logb, APF, Ty);
2912 break;
2913 case LibFunc_log1p:
2914 case LibFunc_log1pf:
2915 // Implement optional behavior from C's Annex F for +/-0.0.
2916 if (U.isZero())
2917 return ConstantFP::get(Ty->getContext(), U);
2918 if (APF > APFloat::getOne(APF.getSemantics(), true) && TLI->has(Func))
2919 return ConstantFoldFP(log1p, APF, Ty);
2920 break;
2921 case LibFunc_logl:
2922 return nullptr;
2923 case LibFunc_erf:
2924 case LibFunc_erff:
2925 if (TLI->has(Func))
2926 return ConstantFoldFP(erf, APF, Ty);
2927 break;
2928 case LibFunc_nearbyint:
2929 case LibFunc_nearbyintf:
2930 case LibFunc_rint:
2931 case LibFunc_rintf:
2932 if (TLI->has(Func)) {
2933 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2934 return ConstantFP::get(Ty->getContext(), U);
2935 }
2936 break;
2937 case LibFunc_round:
2938 case LibFunc_roundf:
2939 if (TLI->has(Func)) {
2940 U.roundToIntegral(APFloat::rmNearestTiesToAway);
2941 return ConstantFP::get(Ty->getContext(), U);
2942 }
2943 break;
2944 case LibFunc_sin:
2945 case LibFunc_sinf:
2946 if (TLI->has(Func))
2947 return ConstantFoldFP(sin, APF, Ty);
2948 break;
2949 case LibFunc_sinh:
2950 case LibFunc_sinhf:
2951 case LibFunc_sinh_finite:
2952 case LibFunc_sinhf_finite:
2953 if (TLI->has(Func))
2954 return ConstantFoldFP(sinh, APF, Ty);
2955 break;
2956 case LibFunc_sqrt:
2957 case LibFunc_sqrtf:
2958 if (!APF.isNegative() && TLI->has(Func))
2959 return ConstantFoldFP(sqrt, APF, Ty);
2960 break;
2961 case LibFunc_tan:
2962 case LibFunc_tanf:
2963 if (TLI->has(Func))
2964 return ConstantFoldFP(tan, APF, Ty);
2965 break;
2966 case LibFunc_tanh:
2967 case LibFunc_tanhf:
2968 if (TLI->has(Func))
2969 return ConstantFoldFP(tanh, APF, Ty);
2970 break;
2971 case LibFunc_trunc:
2972 case LibFunc_truncf:
2973 if (TLI->has(Func)) {
2974 U.roundToIntegral(APFloat::rmTowardZero);
2975 return ConstantFP::get(Ty->getContext(), U);
2976 }
2977 break;
2978 }
2979 return nullptr;
2980 }
2981
2982 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
2983 switch (IntrinsicID) {
2984 case Intrinsic::bswap:
2985 return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
2986 case Intrinsic::ctpop:
2987 return ConstantInt::get(Ty, Op->getValue().popcount());
2988 case Intrinsic::bitreverse:
2989 return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
2990 case Intrinsic::convert_from_fp16: {
2991 APFloat Val(APFloat::IEEEhalf(), Op->getValue());
2992
2993 bool lost = false;
2995 Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
2996
2997 // Conversion is always precise.
2998 (void)status;
2999 assert(status != APFloat::opInexact && !lost &&
3000 "Precision lost during fp16 constfolding");
3001
3002 return ConstantFP::get(Ty->getContext(), Val);
3003 }
3004
3005 case Intrinsic::amdgcn_s_wqm: {
3006 uint64_t Val = Op->getZExtValue();
3007 Val |= (Val & 0x5555555555555555ULL) << 1 |
3008 ((Val >> 1) & 0x5555555555555555ULL);
3009 Val |= (Val & 0x3333333333333333ULL) << 2 |
3010 ((Val >> 2) & 0x3333333333333333ULL);
3011 return ConstantInt::get(Ty, Val);
3012 }
3013
3014 case Intrinsic::amdgcn_s_quadmask: {
3015 uint64_t Val = Op->getZExtValue();
3016 uint64_t QuadMask = 0;
3017 for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
3018 if (!(Val & 0xF))
3019 continue;
3020
3021 QuadMask |= (1ULL << I);
3022 }
3023 return ConstantInt::get(Ty, QuadMask);
3024 }
3025
3026 case Intrinsic::amdgcn_s_bitreplicate: {
3027 uint64_t Val = Op->getZExtValue();
3028 Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
3029 Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
3030 Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
3031 Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
3032 Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
3033 Val = Val | Val << 1;
3034 return ConstantInt::get(Ty, Val);
3035 }
3036
3037 default:
3038 return nullptr;
3039 }
3040 }
3041
3042 switch (IntrinsicID) {
3043 default: break;
3044 case Intrinsic::vector_reduce_add:
3045 case Intrinsic::vector_reduce_mul:
3046 case Intrinsic::vector_reduce_and:
3047 case Intrinsic::vector_reduce_or:
3048 case Intrinsic::vector_reduce_xor:
3049 case Intrinsic::vector_reduce_smin:
3050 case Intrinsic::vector_reduce_smax:
3051 case Intrinsic::vector_reduce_umin:
3052 case Intrinsic::vector_reduce_umax:
3053 if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
3054 return C;
3055 break;
3056 }
3057
3058 // Support ConstantVector in case we have an Undef in the top.
3059 if (isa<ConstantVector>(Operands[0]) ||
3060 isa<ConstantDataVector>(Operands[0]) ||
3061 isa<ConstantAggregateZero>(Operands[0])) {
3062 auto *Op = cast<Constant>(Operands[0]);
3063 switch (IntrinsicID) {
3064 default: break;
3065 case Intrinsic::x86_sse_cvtss2si:
3066 case Intrinsic::x86_sse_cvtss2si64:
3067 case Intrinsic::x86_sse2_cvtsd2si:
3068 case Intrinsic::x86_sse2_cvtsd2si64:
3069 if (ConstantFP *FPOp =
3070 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3071 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3072 /*roundTowardZero=*/false, Ty,
3073 /*IsSigned*/true);
3074 break;
3075 case Intrinsic::x86_sse_cvttss2si:
3076 case Intrinsic::x86_sse_cvttss2si64:
3077 case Intrinsic::x86_sse2_cvttsd2si:
3078 case Intrinsic::x86_sse2_cvttsd2si64:
3079 if (ConstantFP *FPOp =
3080 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3081 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3082 /*roundTowardZero=*/true, Ty,
3083 /*IsSigned*/true);
3084 break;
3085
3086 case Intrinsic::wasm_anytrue:
3087 return Op->isZeroValue() ? ConstantInt::get(Ty, 0)
3088 : ConstantInt::get(Ty, 1);
3089
3090 case Intrinsic::wasm_alltrue:
3091 // Check each element individually
3092 unsigned E = cast<FixedVectorType>(Op->getType())->getNumElements();
3093 for (unsigned I = 0; I != E; ++I)
3094 if (Constant *Elt = Op->getAggregateElement(I))
3095 if (Elt->isZeroValue())
3096 return ConstantInt::get(Ty, 0);
3097
3098 return ConstantInt::get(Ty, 1);
3099 }
3100 }
3101
3102 return nullptr;
3103}
3104
3105static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
3106 const ConstrainedFPIntrinsic *Call) {
3107 APFloat::opStatus St = APFloat::opOK;
3108 auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call);
3109 FCmpInst::Predicate Cond = FCmp->getPredicate();
3110 if (FCmp->isSignaling()) {
3111 if (Op1.isNaN() || Op2.isNaN())
3112 St = APFloat::opInvalidOp;
3113 } else {
3114 if (Op1.isSignaling() || Op2.isSignaling())
3115 St = APFloat::opInvalidOp;
3116 }
3117 bool Result = FCmpInst::compare(Op1, Op2, Cond);
3118 if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
3119 return ConstantInt::get(Call->getType()->getScalarType(), Result);
3120 return nullptr;
3121}
3122
3123static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,
3125 const TargetLibraryInfo *TLI) {
3126 if (!TLI)
3127 return nullptr;
3128
3130 if (!TLI->getLibFunc(Name, Func))
3131 return nullptr;
3132
3133 const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]);
3134 if (!Op1)
3135 return nullptr;
3136
3137 const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]);
3138 if (!Op2)
3139 return nullptr;
3140
3141 const APFloat &Op1V = Op1->getValueAPF();
3142 const APFloat &Op2V = Op2->getValueAPF();
3143
3144 switch (Func) {
3145 default:
3146 break;
3147 case LibFunc_pow:
3148 case LibFunc_powf:
3149 case LibFunc_pow_finite:
3150 case LibFunc_powf_finite:
3151 if (TLI->has(Func))
3152 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
3153 break;
3154 case LibFunc_fmod:
3155 case LibFunc_fmodf:
3156 if (TLI->has(Func)) {
3157 APFloat V = Op1->getValueAPF();
3158 if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
3159 return ConstantFP::get(Ty->getContext(), V);
3160 }
3161 break;
3162 case LibFunc_remainder:
3163 case LibFunc_remainderf:
3164 if (TLI->has(Func)) {
3165 APFloat V = Op1->getValueAPF();
3166 if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
3167 return ConstantFP::get(Ty->getContext(), V);
3168 }
3169 break;
3170 case LibFunc_atan2:
3171 case LibFunc_atan2f:
3172 // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
3173 // (Solaris), so we do not assume a known result for that.
3174 if (Op1V.isZero() && Op2V.isZero())
3175 return nullptr;
3176 [[fallthrough]];
3177 case LibFunc_atan2_finite:
3178 case LibFunc_atan2f_finite:
3179 if (TLI->has(Func))
3180 return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
3181 break;
3182 }
3183
3184 return nullptr;
3185}
3186
3187static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
3189 const CallBase *Call) {
3190 assert(Operands.size() == 2 && "Wrong number of operands.");
3191
3192 if (Ty->isFloatingPointTy()) {
3193 // TODO: We should have undef handling for all of the FP intrinsics that
3194 // are attempted to be folded in this function.
3195 bool IsOp0Undef = isa<UndefValue>(Operands[0]);
3196 bool IsOp1Undef = isa<UndefValue>(Operands[1]);
3197 switch (IntrinsicID) {
3198 case Intrinsic::maxnum:
3199 case Intrinsic::minnum:
3200 case Intrinsic::maximum:
3201 case Intrinsic::minimum:
3202 case Intrinsic::maximumnum:
3203 case Intrinsic::minimumnum:
3204 case Intrinsic::nvvm_fmax_d:
3205 case Intrinsic::nvvm_fmin_d:
3206 // If one argument is undef, return the other argument.
3207 if (IsOp0Undef)
3208 return Operands[1];
3209 if (IsOp1Undef)
3210 return Operands[0];
3211 break;
3212
3213 case Intrinsic::nvvm_fmax_f:
3214 case Intrinsic::nvvm_fmax_ftz_f:
3215 case Intrinsic::nvvm_fmax_ftz_nan_f:
3216 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3217 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3218 case Intrinsic::nvvm_fmax_nan_f:
3219 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3220 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3221
3222 case Intrinsic::nvvm_fmin_f:
3223 case Intrinsic::nvvm_fmin_ftz_f:
3224 case Intrinsic::nvvm_fmin_ftz_nan_f:
3225 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3226 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3227 case Intrinsic::nvvm_fmin_nan_f:
3228 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3229 case Intrinsic::nvvm_fmin_xorsign_abs_f:
3230 // If one arg is undef, the other arg can be returned only if it is
3231 // constant, as we may need to flush it to sign-preserving zero or
3232 // canonicalize the NaN.
3233 if (!IsOp0Undef && !IsOp1Undef)
3234 break;
3235 if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) {
3236 if (Op->isNaN()) {
3237 APInt NVCanonicalNaN(32, 0x7fffffff);
3238 return ConstantFP::get(
3239 Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
3240 }
3241 if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
3242 return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF()));
3243 else
3244 return Op;
3245 }
3246 break;
3247 }
3248 }
3249
3250 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
3251 const APFloat &Op1V = Op1->getValueAPF();
3252
3253 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
3254 if (Op2->getType() != Op1->getType())
3255 return nullptr;
3256 const APFloat &Op2V = Op2->getValueAPF();
3257
3258 if (const auto *ConstrIntr =
3259 dyn_cast_if_present<ConstrainedFPIntrinsic>(Call)) {
3260 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
3261 APFloat Res = Op1V;
3263 switch (IntrinsicID) {
3264 default:
3265 return nullptr;
3266 case Intrinsic::experimental_constrained_fadd:
3267 St = Res.add(Op2V, RM);
3268 break;
3269 case Intrinsic::experimental_constrained_fsub:
3270 St = Res.subtract(Op2V, RM);
3271 break;
3272 case Intrinsic::experimental_constrained_fmul:
3273 St = Res.multiply(Op2V, RM);
3274 break;
3275 case Intrinsic::experimental_constrained_fdiv:
3276 St = Res.divide(Op2V, RM);
3277 break;
3278 case Intrinsic::experimental_constrained_frem:
3279 St = Res.mod(Op2V);
3280 break;
3281 case Intrinsic::experimental_constrained_fcmp:
3282 case Intrinsic::experimental_constrained_fcmps:
3283 return evaluateCompare(Op1V, Op2V, ConstrIntr);
3284 }
3285 if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
3286 St))
3287 return ConstantFP::get(Ty->getContext(), Res);
3288 return nullptr;
3289 }
3290
3291 switch (IntrinsicID) {
3292 default:
3293 break;
3294 case Intrinsic::copysign:
3295 return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
3296 case Intrinsic::minnum:
3297 return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
3298 case Intrinsic::maxnum:
3299 return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
3300 case Intrinsic::minimum:
3301 return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
3302 case Intrinsic::maximum:
3303 return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
3304 case Intrinsic::minimumnum:
3305 return ConstantFP::get(Ty->getContext(), minimumnum(Op1V, Op2V));
3306 case Intrinsic::maximumnum:
3307 return ConstantFP::get(Ty->getContext(), maximumnum(Op1V, Op2V));
3308
3309 case Intrinsic::nvvm_fmax_d:
3310 case Intrinsic::nvvm_fmax_f:
3311 case Intrinsic::nvvm_fmax_ftz_f:
3312 case Intrinsic::nvvm_fmax_ftz_nan_f:
3313 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3314 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3315 case Intrinsic::nvvm_fmax_nan_f:
3316 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3317 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3318
3319 case Intrinsic::nvvm_fmin_d:
3320 case Intrinsic::nvvm_fmin_f:
3321 case Intrinsic::nvvm_fmin_ftz_f:
3322 case Intrinsic::nvvm_fmin_ftz_nan_f:
3323 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3324 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3325 case Intrinsic::nvvm_fmin_nan_f:
3326 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3327 case Intrinsic::nvvm_fmin_xorsign_abs_f: {
3328
3329 bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
3330 IntrinsicID == Intrinsic::nvvm_fmin_d);
3331 bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
3332 bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
3333 bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
3334
3335 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3336 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3337
3338 bool XorSign = false;
3339 if (IsXorSignAbs) {
3340 XorSign = A.isNegative() ^ B.isNegative();
3341 A = abs(A);
3342 B = abs(B);
3343 }
3344
3345 bool IsFMax = false;
3346 switch (IntrinsicID) {
3347 case Intrinsic::nvvm_fmax_d:
3348 case Intrinsic::nvvm_fmax_f:
3349 case Intrinsic::nvvm_fmax_ftz_f:
3350 case Intrinsic::nvvm_fmax_ftz_nan_f:
3351 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3352 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3353 case Intrinsic::nvvm_fmax_nan_f:
3354 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3355 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3356 IsFMax = true;
3357 break;
3358 }
3359 APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
3360
3361 if (ShouldCanonicalizeNaNs) {
3362 APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
3363 if (A.isNaN() && B.isNaN())
3364 return ConstantFP::get(Ty, NVCanonicalNaN);
3365 else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
3366 return ConstantFP::get(Ty, NVCanonicalNaN);
3367 }
3368
3369 if (A.isNaN() && B.isNaN())
3370 return Operands[1];
3371 else if (A.isNaN())
3372 Res = B;
3373 else if (B.isNaN())
3374 Res = A;
3375
3376 if (IsXorSignAbs && XorSign != Res.isNegative())
3377 Res.changeSign();
3378
3379 return ConstantFP::get(Ty->getContext(), Res);
3380 }
3381
3382 case Intrinsic::nvvm_add_rm_f:
3383 case Intrinsic::nvvm_add_rn_f:
3384 case Intrinsic::nvvm_add_rp_f:
3385 case Intrinsic::nvvm_add_rz_f:
3386 case Intrinsic::nvvm_add_rm_d:
3387 case Intrinsic::nvvm_add_rn_d:
3388 case Intrinsic::nvvm_add_rp_d:
3389 case Intrinsic::nvvm_add_rz_d:
3390 case Intrinsic::nvvm_add_rm_ftz_f:
3391 case Intrinsic::nvvm_add_rn_ftz_f:
3392 case Intrinsic::nvvm_add_rp_ftz_f:
3393 case Intrinsic::nvvm_add_rz_ftz_f: {
3394
3395 bool IsFTZ = nvvm::FAddShouldFTZ(IntrinsicID);
3396 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3397 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3398
3399 APFloat::roundingMode RoundMode =
3400 nvvm::GetFAddRoundingMode(IntrinsicID);
3401
3402 APFloat Res = A;
3403 APFloat::opStatus Status = Res.add(B, RoundMode);
3404
3405 if (!Res.isNaN() &&
3406 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3407 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3408 return ConstantFP::get(Ty->getContext(), Res);
3409 }
3410 return nullptr;
3411 }
3412
3413 case Intrinsic::nvvm_mul_rm_f:
3414 case Intrinsic::nvvm_mul_rn_f:
3415 case Intrinsic::nvvm_mul_rp_f:
3416 case Intrinsic::nvvm_mul_rz_f:
3417 case Intrinsic::nvvm_mul_rm_d:
3418 case Intrinsic::nvvm_mul_rn_d:
3419 case Intrinsic::nvvm_mul_rp_d:
3420 case Intrinsic::nvvm_mul_rz_d:
3421 case Intrinsic::nvvm_mul_rm_ftz_f:
3422 case Intrinsic::nvvm_mul_rn_ftz_f:
3423 case Intrinsic::nvvm_mul_rp_ftz_f:
3424 case Intrinsic::nvvm_mul_rz_ftz_f: {
3425
3426 bool IsFTZ = nvvm::FMulShouldFTZ(IntrinsicID);
3427 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3428 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3429
3430 APFloat::roundingMode RoundMode =
3431 nvvm::GetFMulRoundingMode(IntrinsicID);
3432
3433 APFloat Res = A;
3434 APFloat::opStatus Status = Res.multiply(B, RoundMode);
3435
3436 if (!Res.isNaN() &&
3437 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3438 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3439 return ConstantFP::get(Ty->getContext(), Res);
3440 }
3441 return nullptr;
3442 }
3443
3444 case Intrinsic::nvvm_div_rm_f:
3445 case Intrinsic::nvvm_div_rn_f:
3446 case Intrinsic::nvvm_div_rp_f:
3447 case Intrinsic::nvvm_div_rz_f:
3448 case Intrinsic::nvvm_div_rm_d:
3449 case Intrinsic::nvvm_div_rn_d:
3450 case Intrinsic::nvvm_div_rp_d:
3451 case Intrinsic::nvvm_div_rz_d:
3452 case Intrinsic::nvvm_div_rm_ftz_f:
3453 case Intrinsic::nvvm_div_rn_ftz_f:
3454 case Intrinsic::nvvm_div_rp_ftz_f:
3455 case Intrinsic::nvvm_div_rz_ftz_f: {
3456 bool IsFTZ = nvvm::FDivShouldFTZ(IntrinsicID);
3457 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3458 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3459 APFloat::roundingMode RoundMode =
3460 nvvm::GetFDivRoundingMode(IntrinsicID);
3461
3462 APFloat Res = A;
3463 APFloat::opStatus Status = Res.divide(B, RoundMode);
3464 if (!Res.isNaN() &&
3465 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3466 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3467 return ConstantFP::get(Ty->getContext(), Res);
3468 }
3469 return nullptr;
3470 }
3471 }
3472
3473 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
3474 return nullptr;
3475
3476 switch (IntrinsicID) {
3477 default:
3478 break;
3479 case Intrinsic::pow:
3480 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
3481 case Intrinsic::amdgcn_fmul_legacy:
3482 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3483 // NaN or infinity, gives +0.0.
3484 if (Op1V.isZero() || Op2V.isZero())
3485 return ConstantFP::getZero(Ty);
3486 return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
3487 }
3488
3489 } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
3490 switch (IntrinsicID) {
3491 case Intrinsic::ldexp: {
3492 return ConstantFP::get(
3493 Ty->getContext(),
3494 scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven));
3495 }
3496 case Intrinsic::is_fpclass: {
3497 FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());
3498 bool Result =
3499 ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||
3500 ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||
3501 ((Mask & fcNegInf) && Op1V.isNegInfinity()) ||
3502 ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||
3503 ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||
3504 ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||
3505 ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||
3506 ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||
3507 ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||
3508 ((Mask & fcPosInf) && Op1V.isPosInfinity());
3509 return ConstantInt::get(Ty, Result);
3510 }
3511 case Intrinsic::powi: {
3512 int Exp = static_cast<int>(Op2C->getSExtValue());
3513 switch (Ty->getTypeID()) {
3514 case Type::HalfTyID:
3515 case Type::FloatTyID: {
3516 APFloat Res(static_cast<float>(std::pow(Op1V.convertToFloat(), Exp)));
3517 if (Ty->isHalfTy()) {
3518 bool Unused;
3519 Res.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
3520 &Unused);
3521 }
3522 return ConstantFP::get(Ty->getContext(), Res);
3523 }
3524 case Type::DoubleTyID:
3525 return ConstantFP::get(Ty, std::pow(Op1V.convertToDouble(), Exp));
3526 default:
3527 return nullptr;
3528 }
3529 }
3530 default:
3531 break;
3532 }
3533 }
3534 return nullptr;
3535 }
3536
3537 if (Operands[0]->getType()->isIntegerTy() &&
3538 Operands[1]->getType()->isIntegerTy()) {
3539 const APInt *C0, *C1;
3540 if (!getConstIntOrUndef(Operands[0], C0) ||
3541 !getConstIntOrUndef(Operands[1], C1))
3542 return nullptr;
3543
3544 switch (IntrinsicID) {
3545 default: break;
3546 case Intrinsic::smax:
3547 case Intrinsic::smin:
3548 case Intrinsic::umax:
3549 case Intrinsic::umin:
3550 if (!C0 && !C1)
3551 return UndefValue::get(Ty);
3552 if (!C0 || !C1)
3553 return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);
3554 return ConstantInt::get(
3555 Ty, ICmpInst::compare(*C0, *C1,
3556 MinMaxIntrinsic::getPredicate(IntrinsicID))
3557 ? *C0
3558 : *C1);
3559
3560 case Intrinsic::scmp:
3561 case Intrinsic::ucmp:
3562 if (!C0 || !C1)
3563 return ConstantInt::get(Ty, 0);
3564
3565 int Res;
3566 if (IntrinsicID == Intrinsic::scmp)
3567 Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;
3568 else
3569 Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;
3570 return ConstantInt::get(Ty, Res, /*IsSigned=*/true);
3571
3572 case Intrinsic::usub_with_overflow:
3573 case Intrinsic::ssub_with_overflow:
3574 // X - undef -> { 0, false }
3575 // undef - X -> { 0, false }
3576 if (!C0 || !C1)
3577 return Constant::getNullValue(Ty);
3578 [[fallthrough]];
3579 case Intrinsic::uadd_with_overflow:
3580 case Intrinsic::sadd_with_overflow:
3581 // X + undef -> { -1, false }
3582 // undef + x -> { -1, false }
3583 if (!C0 || !C1) {
3584 return ConstantStruct::get(
3585 cast<StructType>(Ty),
3588 }
3589 [[fallthrough]];
3590 case Intrinsic::smul_with_overflow:
3591 case Intrinsic::umul_with_overflow: {
3592 // undef * X -> { 0, false }
3593 // X * undef -> { 0, false }
3594 if (!C0 || !C1)
3595 return Constant::getNullValue(Ty);
3596
3597 APInt Res;
3598 bool Overflow;
3599 switch (IntrinsicID) {
3600 default: llvm_unreachable("Invalid case");
3601 case Intrinsic::sadd_with_overflow:
3602 Res = C0->sadd_ov(*C1, Overflow);
3603 break;
3604 case Intrinsic::uadd_with_overflow:
3605 Res = C0->uadd_ov(*C1, Overflow);
3606 break;
3607 case Intrinsic::ssub_with_overflow:
3608 Res = C0->ssub_ov(*C1, Overflow);
3609 break;
3610 case Intrinsic::usub_with_overflow:
3611 Res = C0->usub_ov(*C1, Overflow);
3612 break;
3613 case Intrinsic::smul_with_overflow:
3614 Res = C0->smul_ov(*C1, Overflow);
3615 break;
3616 case Intrinsic::umul_with_overflow:
3617 Res = C0->umul_ov(*C1, Overflow);
3618 break;
3619 }
3620 Constant *Ops[] = {
3621 ConstantInt::get(Ty->getContext(), Res),
3622 ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
3623 };
3624 return ConstantStruct::get(cast<StructType>(Ty), Ops);
3625 }
3626 case Intrinsic::uadd_sat:
3627 case Intrinsic::sadd_sat:
3628 if (!C0 && !C1)
3629 return UndefValue::get(Ty);
3630 if (!C0 || !C1)
3631 return Constant::getAllOnesValue(Ty);
3632 if (IntrinsicID == Intrinsic::uadd_sat)
3633 return ConstantInt::get(Ty, C0->uadd_sat(*C1));
3634 else
3635 return ConstantInt::get(Ty, C0->sadd_sat(*C1));
3636 case Intrinsic::usub_sat:
3637 case Intrinsic::ssub_sat:
3638 if (!C0 && !C1)
3639 return UndefValue::get(Ty);
3640 if (!C0 || !C1)
3641 return Constant::getNullValue(Ty);
3642 if (IntrinsicID == Intrinsic::usub_sat)
3643 return ConstantInt::get(Ty, C0->usub_sat(*C1));
3644 else
3645 return ConstantInt::get(Ty, C0->ssub_sat(*C1));
3646 case Intrinsic::cttz:
3647 case Intrinsic::ctlz:
3648 assert(C1 && "Must be constant int");
3649
3650 // cttz(0, 1) and ctlz(0, 1) are poison.
3651 if (C1->isOne() && (!C0 || C0->isZero()))
3652 return PoisonValue::get(Ty);
3653 if (!C0)
3654 return Constant::getNullValue(Ty);
3655 if (IntrinsicID == Intrinsic::cttz)
3656 return ConstantInt::get(Ty, C0->countr_zero());
3657 else
3658 return ConstantInt::get(Ty, C0->countl_zero());
3659
3660 case Intrinsic::abs:
3661 assert(C1 && "Must be constant int");
3662 assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
3663
3664 // Undef or minimum val operand with poison min --> poison
3665 if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
3666 return PoisonValue::get(Ty);
3667
3668 // Undef operand with no poison min --> 0 (sign bit must be clear)
3669 if (!C0)
3670 return Constant::getNullValue(Ty);
3671
3672 return ConstantInt::get(Ty, C0->abs());
3673 case Intrinsic::amdgcn_wave_reduce_umin:
3674 case Intrinsic::amdgcn_wave_reduce_umax:
3675 return dyn_cast<Constant>(Operands[0]);
3676 }
3677
3678 return nullptr;
3679 }
3680
3681 // Support ConstantVector in case we have an Undef in the top.
3682 if ((isa<ConstantVector>(Operands[0]) ||
3683 isa<ConstantDataVector>(Operands[0])) &&
3684 // Check for default rounding mode.
3685 // FIXME: Support other rounding modes?
3686 isa<ConstantInt>(Operands[1]) &&
3687 cast<ConstantInt>(Operands[1])->getValue() == 4) {
3688 auto *Op = cast<Constant>(Operands[0]);
3689 switch (IntrinsicID) {
3690 default: break;
3691 case Intrinsic::x86_avx512_vcvtss2si32:
3692 case Intrinsic::x86_avx512_vcvtss2si64:
3693 case Intrinsic::x86_avx512_vcvtsd2si32:
3694 case Intrinsic::x86_avx512_vcvtsd2si64:
3695 if (ConstantFP *FPOp =
3696 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3697 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3698 /*roundTowardZero=*/false, Ty,
3699 /*IsSigned*/true);
3700 break;
3701 case Intrinsic::x86_avx512_vcvtss2usi32:
3702 case Intrinsic::x86_avx512_vcvtss2usi64:
3703 case Intrinsic::x86_avx512_vcvtsd2usi32:
3704 case Intrinsic::x86_avx512_vcvtsd2usi64:
3705 if (ConstantFP *FPOp =
3706 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3707 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3708 /*roundTowardZero=*/false, Ty,
3709 /*IsSigned*/false);
3710 break;
3711 case Intrinsic::x86_avx512_cvttss2si:
3712 case Intrinsic::x86_avx512_cvttss2si64:
3713 case Intrinsic::x86_avx512_cvttsd2si:
3714 case Intrinsic::x86_avx512_cvttsd2si64:
3715 if (ConstantFP *FPOp =
3716 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3717 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3718 /*roundTowardZero=*/true, Ty,
3719 /*IsSigned*/true);
3720 break;
3721 case Intrinsic::x86_avx512_cvttss2usi:
3722 case Intrinsic::x86_avx512_cvttss2usi64:
3723 case Intrinsic::x86_avx512_cvttsd2usi:
3724 case Intrinsic::x86_avx512_cvttsd2usi64:
3725 if (ConstantFP *FPOp =
3726 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3727 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3728 /*roundTowardZero=*/true, Ty,
3729 /*IsSigned*/false);
3730 break;
3731 }
3732 }
3733 return nullptr;
3734}
3735
3736static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
3737 const APFloat &S0,
3738 const APFloat &S1,
3739 const APFloat &S2) {
3740 unsigned ID;
3741 const fltSemantics &Sem = S0.getSemantics();
3742 APFloat MA(Sem), SC(Sem), TC(Sem);
3743 if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
3744 if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
3745 // S2 < 0
3746 ID = 5;
3747 SC = -S0;
3748 } else {
3749 ID = 4;
3750 SC = S0;
3751 }
3752 MA = S2;
3753 TC = -S1;
3754 } else if (abs(S1) >= abs(S0)) {
3755 if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
3756 // S1 < 0
3757 ID = 3;
3758 TC = -S2;
3759 } else {
3760 ID = 2;
3761 TC = S2;
3762 }
3763 MA = S1;
3764 SC = S0;
3765 } else {
3766 if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
3767 // S0 < 0
3768 ID = 1;
3769 SC = S2;
3770 } else {
3771 ID = 0;
3772 SC = -S2;
3773 }
3774 MA = S0;
3775 TC = -S1;
3776 }
3777 switch (IntrinsicID) {
3778 default:
3779 llvm_unreachable("unhandled amdgcn cube intrinsic");
3780 case Intrinsic::amdgcn_cubeid:
3781 return APFloat(Sem, ID);
3782 case Intrinsic::amdgcn_cubema:
3783 return MA + MA;
3784 case Intrinsic::amdgcn_cubesc:
3785 return SC;
3786 case Intrinsic::amdgcn_cubetc:
3787 return TC;
3788 }
3789}
3790
3791static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
3792 Type *Ty) {
3793 const APInt *C0, *C1, *C2;
3794 if (!getConstIntOrUndef(Operands[0], C0) ||
3795 !getConstIntOrUndef(Operands[1], C1) ||
3796 !getConstIntOrUndef(Operands[2], C2))
3797 return nullptr;
3798
3799 if (!C2)
3800 return UndefValue::get(Ty);
3801
3802 APInt Val(32, 0);
3803 unsigned NumUndefBytes = 0;
3804 for (unsigned I = 0; I < 32; I += 8) {
3805 unsigned Sel = C2->extractBitsAsZExtValue(8, I);
3806 unsigned B = 0;
3807
3808 if (Sel >= 13)
3809 B = 0xff;
3810 else if (Sel == 12)
3811 B = 0x00;
3812 else {
3813 const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
3814 if (!Src)
3815 ++NumUndefBytes;
3816 else if (Sel < 8)
3817 B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
3818 else
3819 B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
3820 }
3821
3822 Val.insertBits(B, I, 8);
3823 }
3824
3825 if (NumUndefBytes == 4)
3826 return UndefValue::get(Ty);
3827
3828 return ConstantInt::get(Ty, Val);
3829}
3830
3831static Constant *ConstantFoldScalarCall3(StringRef Name,
3832 Intrinsic::ID IntrinsicID,
3833 Type *Ty,
3835 const TargetLibraryInfo *TLI,
3836 const CallBase *Call) {
3837 assert(Operands.size() == 3 && "Wrong number of operands.");
3838
3839 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
3840 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
3841 if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
3842 const APFloat &C1 = Op1->getValueAPF();
3843 const APFloat &C2 = Op2->getValueAPF();
3844 const APFloat &C3 = Op3->getValueAPF();
3845
3846 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
3847 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
3848 APFloat Res = C1;
3850 switch (IntrinsicID) {
3851 default:
3852 return nullptr;
3853 case Intrinsic::experimental_constrained_fma:
3854 case Intrinsic::experimental_constrained_fmuladd:
3855 St = Res.fusedMultiplyAdd(C2, C3, RM);
3856 break;
3857 }
3858 if (mayFoldConstrained(
3859 const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
3860 return ConstantFP::get(Ty->getContext(), Res);
3861 return nullptr;
3862 }
3863
3864 switch (IntrinsicID) {
3865 default: break;
3866 case Intrinsic::amdgcn_fma_legacy: {
3867 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3868 // NaN or infinity, gives +0.0.
3869 if (C1.isZero() || C2.isZero()) {
3870 // It's tempting to just return C3 here, but that would give the
3871 // wrong result if C3 was -0.0.
3872 return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
3873 }
3874 [[fallthrough]];
3875 }
3876 case Intrinsic::fma:
3877 case Intrinsic::fmuladd: {
3878 APFloat V = C1;
3879 V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);
3880 return ConstantFP::get(Ty->getContext(), V);
3881 }
3882
3883 case Intrinsic::nvvm_fma_rm_f:
3884 case Intrinsic::nvvm_fma_rn_f:
3885 case Intrinsic::nvvm_fma_rp_f:
3886 case Intrinsic::nvvm_fma_rz_f:
3887 case Intrinsic::nvvm_fma_rm_d:
3888 case Intrinsic::nvvm_fma_rn_d:
3889 case Intrinsic::nvvm_fma_rp_d:
3890 case Intrinsic::nvvm_fma_rz_d:
3891 case Intrinsic::nvvm_fma_rm_ftz_f:
3892 case Intrinsic::nvvm_fma_rn_ftz_f:
3893 case Intrinsic::nvvm_fma_rp_ftz_f:
3894 case Intrinsic::nvvm_fma_rz_ftz_f: {
3895 bool IsFTZ = nvvm::FMAShouldFTZ(IntrinsicID);
3896 APFloat A = IsFTZ ? FTZPreserveSign(C1) : C1;
3897 APFloat B = IsFTZ ? FTZPreserveSign(C2) : C2;
3898 APFloat C = IsFTZ ? FTZPreserveSign(C3) : C3;
3899
3900 APFloat::roundingMode RoundMode =
3901 nvvm::GetFMARoundingMode(IntrinsicID);
3902
3903 APFloat Res = A;
3904 APFloat::opStatus Status = Res.fusedMultiplyAdd(B, C, RoundMode);
3905
3906 if (!Res.isNaN() &&
3907 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3908 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3909 return ConstantFP::get(Ty->getContext(), Res);
3910 }
3911 return nullptr;
3912 }
3913
3914 case Intrinsic::amdgcn_cubeid:
3915 case Intrinsic::amdgcn_cubema:
3916 case Intrinsic::amdgcn_cubesc:
3917 case Intrinsic::amdgcn_cubetc: {
3918 APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);
3919 return ConstantFP::get(Ty->getContext(), V);
3920 }
3921 }
3922 }
3923 }
3924 }
3925
3926 if (IntrinsicID == Intrinsic::smul_fix ||
3927 IntrinsicID == Intrinsic::smul_fix_sat) {
3928 const APInt *C0, *C1;
3929 if (!getConstIntOrUndef(Operands[0], C0) ||
3930 !getConstIntOrUndef(Operands[1], C1))
3931 return nullptr;
3932
3933 // undef * C -> 0
3934 // C * undef -> 0
3935 if (!C0 || !C1)
3936 return Constant::getNullValue(Ty);
3937
3938 // This code performs rounding towards negative infinity in case the result
3939 // cannot be represented exactly for the given scale. Targets that do care
3940 // about rounding should use a target hook for specifying how rounding
3941 // should be done, and provide their own folding to be consistent with
3942 // rounding. This is the same approach as used by
3943 // DAGTypeLegalizer::ExpandIntRes_MULFIX.
3944 unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();
3945 unsigned Width = C0->getBitWidth();
3946 assert(Scale < Width && "Illegal scale.");
3947 unsigned ExtendedWidth = Width * 2;
3948 APInt Product =
3949 (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);
3950 if (IntrinsicID == Intrinsic::smul_fix_sat) {
3951 APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);
3952 APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);
3953 Product = APIntOps::smin(Product, Max);
3954 Product = APIntOps::smax(Product, Min);
3955 }
3956 return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));
3957 }
3958
3959 if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
3960 const APInt *C0, *C1, *C2;
3961 if (!getConstIntOrUndef(Operands[0], C0) ||
3962 !getConstIntOrUndef(Operands[1], C1) ||
3963 !getConstIntOrUndef(Operands[2], C2))
3964 return nullptr;
3965
3966 bool IsRight = IntrinsicID == Intrinsic::fshr;
3967 if (!C2)
3968 return Operands[IsRight ? 1 : 0];
3969 if (!C0 && !C1)
3970 return UndefValue::get(Ty);
3971
3972 // The shift amount is interpreted as modulo the bitwidth. If the shift
3973 // amount is effectively 0, avoid UB due to oversized inverse shift below.
3974 unsigned BitWidth = C2->getBitWidth();
3975 unsigned ShAmt = C2->urem(BitWidth);
3976 if (!ShAmt)
3977 return Operands[IsRight ? 1 : 0];
3978
3979 // (C0 << ShlAmt) | (C1 >> LshrAmt)
3980 unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
3981 unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
3982 if (!C0)
3983 return ConstantInt::get(Ty, C1->lshr(LshrAmt));
3984 if (!C1)
3985 return ConstantInt::get(Ty, C0->shl(ShlAmt));
3986 return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
3987 }
3988
3989 if (IntrinsicID == Intrinsic::amdgcn_perm)
3990 return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
3991
3992 return nullptr;
3993}
3994
3995static Constant *ConstantFoldScalarCall(StringRef Name,
3996 Intrinsic::ID IntrinsicID,
3997 Type *Ty,
3999 const TargetLibraryInfo *TLI,
4000 const CallBase *Call) {
4001 if (IntrinsicID != Intrinsic::not_intrinsic &&
4002 any_of(Operands, IsaPred<PoisonValue>) &&
4003 intrinsicPropagatesPoison(IntrinsicID))
4004 return PoisonValue::get(Ty);
4005
4006 if (Operands.size() == 1)
4007 return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
4008
4009 if (Operands.size() == 2) {
4010 if (Constant *FoldedLibCall =
4011 ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {
4012 return FoldedLibCall;
4013 }
4014 return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);
4015 }
4016
4017 if (Operands.size() == 3)
4018 return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
4019
4020 return nullptr;
4021}
4022
4023static Constant *ConstantFoldFixedVectorCall(
4024 StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
4026 const TargetLibraryInfo *TLI, const CallBase *Call) {
4029 Type *Ty = FVTy->getElementType();
4030
4031 switch (IntrinsicID) {
4032 case Intrinsic::masked_load: {
4033 auto *SrcPtr = Operands[0];
4034 auto *Mask = Operands[2];
4035 auto *Passthru = Operands[3];
4036
4037 Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
4038
4039 SmallVector<Constant *, 32> NewElements;
4040 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4041 auto *MaskElt = Mask->getAggregateElement(I);
4042 if (!MaskElt)
4043 break;
4044 auto *PassthruElt = Passthru->getAggregateElement(I);
4045 auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;
4046 if (isa<UndefValue>(MaskElt)) {
4047 if (PassthruElt)
4048 NewElements.push_back(PassthruElt);
4049 else if (VecElt)
4050 NewElements.push_back(VecElt);
4051 else
4052 return nullptr;
4053 }
4054 if (MaskElt->isNullValue()) {
4055 if (!PassthruElt)
4056 return nullptr;
4057 NewElements.push_back(PassthruElt);
4058 } else if (MaskElt->isOneValue()) {
4059 if (!VecElt)
4060 return nullptr;
4061 NewElements.push_back(VecElt);
4062 } else {
4063 return nullptr;
4064 }
4065 }
4066 if (NewElements.size() != FVTy->getNumElements())
4067 return nullptr;
4068 return ConstantVector::get(NewElements);
4069 }
4070 case Intrinsic::arm_mve_vctp8:
4071 case Intrinsic::arm_mve_vctp16:
4072 case Intrinsic::arm_mve_vctp32:
4073 case Intrinsic::arm_mve_vctp64: {
4074 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
4075 unsigned Lanes = FVTy->getNumElements();
4076 uint64_t Limit = Op->getZExtValue();
4077
4079 for (unsigned i = 0; i < Lanes; i++) {
4080 if (i < Limit)
4082 else
4084 }
4085 return ConstantVector::get(NCs);
4086 }
4087 return nullptr;
4088 }
4089 case Intrinsic::get_active_lane_mask: {
4090 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
4091 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
4092 if (Op0 && Op1) {
4093 unsigned Lanes = FVTy->getNumElements();
4094 uint64_t Base = Op0->getZExtValue();
4095 uint64_t Limit = Op1->getZExtValue();
4096
4098 for (unsigned i = 0; i < Lanes; i++) {
4099 if (Base + i < Limit)
4101 else
4103 }
4104 return ConstantVector::get(NCs);
4105 }
4106 return nullptr;
4107 }
4108 case Intrinsic::vector_extract: {
4109 auto *Idx = dyn_cast<ConstantInt>(Operands[1]);
4110 Constant *Vec = Operands[0];
4111 if (!Idx || !isa<FixedVectorType>(Vec->getType()))
4112 return nullptr;
4113
4114 unsigned NumElements = FVTy->getNumElements();
4115 unsigned VecNumElements =
4116 cast<FixedVectorType>(Vec->getType())->getNumElements();
4117 unsigned StartingIndex = Idx->getZExtValue();
4118
4119 // Extracting entire vector is nop
4120 if (NumElements == VecNumElements && StartingIndex == 0)
4121 return Vec;
4122
4123 for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E;
4124 ++I) {
4125 Constant *Elt = Vec->getAggregateElement(I);
4126 if (!Elt)
4127 return nullptr;
4128 Result[I - StartingIndex] = Elt;
4129 }
4130
4131 return ConstantVector::get(Result);
4132 }
4133 case Intrinsic::vector_insert: {
4134 Constant *Vec = Operands[0];
4135 Constant *SubVec = Operands[1];
4136 auto *Idx = dyn_cast<ConstantInt>(Operands[2]);
4137 if (!Idx || !isa<FixedVectorType>(Vec->getType()))
4138 return nullptr;
4139
4140 unsigned SubVecNumElements =
4141 cast<FixedVectorType>(SubVec->getType())->getNumElements();
4142 unsigned VecNumElements =
4143 cast<FixedVectorType>(Vec->getType())->getNumElements();
4144 unsigned IdxN = Idx->getZExtValue();
4145 // Replacing entire vector with a subvec is nop
4146 if (SubVecNumElements == VecNumElements && IdxN == 0)
4147 return SubVec;
4148
4149 for (unsigned I = 0; I < VecNumElements; ++I) {
4150 Constant *Elt;
4151 if (I < IdxN + SubVecNumElements)
4152 Elt = SubVec->getAggregateElement(I - IdxN);
4153 else
4154 Elt = Vec->getAggregateElement(I);
4155 if (!Elt)
4156 return nullptr;
4157 Result[I] = Elt;
4158 }
4159 return ConstantVector::get(Result);
4160 }
4161 case Intrinsic::vector_interleave2: {
4162 unsigned NumElements =
4163 cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
4164 for (unsigned I = 0; I < NumElements; ++I) {
4165 Constant *Elt0 = Operands[0]->getAggregateElement(I);
4166 Constant *Elt1 = Operands[1]->getAggregateElement(I);
4167 if (!Elt0 || !Elt1)
4168 return nullptr;
4169 Result[2 * I] = Elt0;
4170 Result[2 * I + 1] = Elt1;
4171 }
4172 return ConstantVector::get(Result);
4173 }
4174 case Intrinsic::wasm_dot: {
4175 unsigned NumElements =
4176 cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
4177
4178 assert(NumElements == 8 && Result.size() == 4 &&
4179 "wasm dot takes i16x8 and produces i32x4");
4180 assert(Ty->isIntegerTy());
4181 int32_t MulVector[8];
4182
4183 for (unsigned I = 0; I < NumElements; ++I) {
4184 ConstantInt *Elt0 =
4185 cast<ConstantInt>(Operands[0]->getAggregateElement(I));
4186 ConstantInt *Elt1 =
4187 cast<ConstantInt>(Operands[1]->getAggregateElement(I));
4188
4189 MulVector[I] = Elt0->getSExtValue() * Elt1->getSExtValue();
4190 }
4191 for (unsigned I = 0; I < Result.size(); I++) {
4192 int64_t IAdd = (int64_t)MulVector[I * 2] + (int64_t)MulVector[I * 2 + 1];
4193 Result[I] = ConstantInt::get(Ty, IAdd);
4194 }
4195
4196 return ConstantVector::get(Result);
4197 }
4198 default:
4199 break;
4200 }
4201
4202 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4203 // Gather a column of constants.
4204 for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
4205 // Some intrinsics use a scalar type for certain arguments.
4206 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J, /*TTI=*/nullptr)) {
4207 Lane[J] = Operands[J];
4208 continue;
4209 }
4210
4211 Constant *Agg = Operands[J]->getAggregateElement(I);
4212 if (!Agg)
4213 return nullptr;
4214
4215 Lane[J] = Agg;
4216 }
4217
4218 // Use the regular scalar folding to simplify this column.
4219 Constant *Folded =
4220 ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
4221 if (!Folded)
4222 return nullptr;
4223 Result[I] = Folded;
4224 }
4225
4226 return ConstantVector::get(Result);
4227}
4228
4229static Constant *ConstantFoldScalableVectorCall(
4230 StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
4232 const TargetLibraryInfo *TLI, const CallBase *Call) {
4233 switch (IntrinsicID) {
4234 case Intrinsic::aarch64_sve_convert_from_svbool: {
4235 auto *Src = dyn_cast<Constant>(Operands[0]);
4236 if (!Src || !Src->isNullValue())
4237 break;
4238
4239 return ConstantInt::getFalse(SVTy);
4240 }
4241 default:
4242 break;
4243 }
4244
4245 // If trivially vectorizable, try folding it via the scalar call if all
4246 // operands are splats.
4247
4248 // TODO: ConstantFoldFixedVectorCall should probably check this too?
4249 if (!isTriviallyVectorizable(IntrinsicID))
4250 return nullptr;
4251
4253 for (auto [I, Op] : enumerate(Operands)) {
4254 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, I, /*TTI=*/nullptr)) {
4255 SplatOps.push_back(Op);
4256 continue;
4257 }
4258 Constant *Splat = Op->getSplatValue();
4259 if (!Splat)
4260 return nullptr;
4261 SplatOps.push_back(Splat);
4262 }
4263 Constant *Folded = ConstantFoldScalarCall(
4264 Name, IntrinsicID, SVTy->getElementType(), SplatOps, TLI, Call);
4265 if (!Folded)
4266 return nullptr;
4267 return ConstantVector::getSplat(SVTy->getElementCount(), Folded);
4268}
4269
4270static std::pair<Constant *, Constant *>
4271ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {
4272 if (isa<PoisonValue>(Op))
4273 return {Op, PoisonValue::get(IntTy)};
4274
4275 auto *ConstFP = dyn_cast<ConstantFP>(Op);
4276 if (!ConstFP)
4277 return {};
4278
4279 const APFloat &U = ConstFP->getValueAPF();
4280 int FrexpExp;
4281 APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven);
4282 Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant);
4283
4284 // The exponent is an "unspecified value" for inf/nan. We use zero to avoid
4285 // using undef.
4286 Constant *Result1 = FrexpMant.isFinite()
4287 ? ConstantInt::getSigned(IntTy, FrexpExp)
4288 : ConstantInt::getNullValue(IntTy);
4289 return {Result0, Result1};
4290}
4291
4292/// Handle intrinsics that return tuples, which may be tuples of vectors.
4293static Constant *
4294ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
4296 const DataLayout &DL, const TargetLibraryInfo *TLI,
4297 const CallBase *Call) {
4298
4299 switch (IntrinsicID) {
4300 case Intrinsic::frexp: {
4301 Type *Ty0 = StTy->getContainedType(0);
4302 Type *Ty1 = StTy->getContainedType(1)->getScalarType();
4303
4304 if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) {
4305 SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());
4306 SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());
4307
4308 for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {
4309 Constant *Lane = Operands[0]->getAggregateElement(I);
4310 std::tie(Results0[I], Results1[I]) =
4311 ConstantFoldScalarFrexpCall(Lane, Ty1);
4312 if (!Results0[I])
4313 return nullptr;
4314 }
4315
4316 return ConstantStruct::get(StTy, ConstantVector::get(Results0),
4317 ConstantVector::get(Results1));
4318 }
4319
4320 auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1);
4321 if (!Result0)
4322 return nullptr;
4323 return ConstantStruct::get(StTy, Result0, Result1);
4324 }
4325 case Intrinsic::sincos: {
4326 Type *Ty = StTy->getContainedType(0);
4327 Type *TyScalar = Ty->getScalarType();
4328
4329 auto ConstantFoldScalarSincosCall =
4330 [&](Constant *Op) -> std::pair<Constant *, Constant *> {
4331 Constant *SinResult =
4332 ConstantFoldScalarCall(Name, Intrinsic::sin, TyScalar, Op, TLI, Call);
4333 Constant *CosResult =
4334 ConstantFoldScalarCall(Name, Intrinsic::cos, TyScalar, Op, TLI, Call);
4335 return std::make_pair(SinResult, CosResult);
4336 };
4337
4338 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
4339 SmallVector<Constant *> SinResults(FVTy->getNumElements());
4340 SmallVector<Constant *> CosResults(FVTy->getNumElements());
4341
4342 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4343 Constant *Lane = Operands[0]->getAggregateElement(I);
4344 std::tie(SinResults[I], CosResults[I]) =
4345 ConstantFoldScalarSincosCall(Lane);
4346 if (!SinResults[I] || !CosResults[I])
4347 return nullptr;
4348 }
4349
4350 return ConstantStruct::get(StTy, ConstantVector::get(SinResults),
4351 ConstantVector::get(CosResults));
4352 }
4353
4354 auto [SinResult, CosResult] = ConstantFoldScalarSincosCall(Operands[0]);
4355 if (!SinResult || !CosResult)
4356 return nullptr;
4357 return ConstantStruct::get(StTy, SinResult, CosResult);
4358 }
4359 case Intrinsic::vector_deinterleave2: {
4360 auto *Vec = Operands[0];
4361 auto *VecTy = cast<VectorType>(Vec->getType());
4362
4363 if (auto *EltC = Vec->getSplatValue()) {
4364 ElementCount HalfEC = VecTy->getElementCount().divideCoefficientBy(2);
4365 auto *HalfVec = ConstantVector::getSplat(HalfEC, EltC);
4366 return ConstantStruct::get(StTy, HalfVec, HalfVec);
4367 }
4368
4369 if (!isa<FixedVectorType>(Vec->getType()))
4370 return nullptr;
4371
4372 unsigned NumElements = VecTy->getElementCount().getFixedValue() / 2;
4373 SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
4374 for (unsigned I = 0; I < NumElements; ++I) {
4375 Constant *Elt0 = Vec->getAggregateElement(2 * I);
4376 Constant *Elt1 = Vec->getAggregateElement(2 * I + 1);
4377 if (!Elt0 || !Elt1)
4378 return nullptr;
4379 Res0[I] = Elt0;
4380 Res1[I] = Elt1;
4381 }
4382 return ConstantStruct::get(StTy, ConstantVector::get(Res0),
4383 ConstantVector::get(Res1));
4384 }
4385 default:
4386 // TODO: Constant folding of vector intrinsics that fall through here does
4387 // not work (e.g. overflow intrinsics)
4388 return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call);
4389 }
4390
4391 return nullptr;
4392}
4393
4394} // end anonymous namespace
4395
4397 Constant *RHS, Type *Ty,
4399 auto *Call = dyn_cast_if_present<CallBase>(FMFSource);
4400 // Ensure we check flags like StrictFP that might prevent this from getting
4401 // folded before generating a result.
4402 if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
4403 return nullptr;
4404 return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
4405}
4406
4409 const TargetLibraryInfo *TLI,
4410 bool AllowNonDeterministic) {
4411 if (Call->isNoBuiltin())
4412 return nullptr;
4413 if (!F->hasName())
4414 return nullptr;
4415
4416 // If this is not an intrinsic and not recognized as a library call, bail out.
4417 Intrinsic::ID IID = F->getIntrinsicID();
4418 if (IID == Intrinsic::not_intrinsic) {
4419 if (!TLI)
4420 return nullptr;
4421 LibFunc LibF;
4422 if (!TLI->getLibFunc(*F, LibF))
4423 return nullptr;
4424 }
4425
4426 // Conservatively assume that floating-point libcalls may be
4427 // non-deterministic.
4428 Type *Ty = F->getReturnType();
4429 if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())
4430 return nullptr;
4431
4432 StringRef Name = F->getName();
4433 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
4434 return ConstantFoldFixedVectorCall(
4435 Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call);
4436
4437 if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
4438 return ConstantFoldScalableVectorCall(
4439 Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call);
4440
4441 if (auto *StTy = dyn_cast<StructType>(Ty))
4442 return ConstantFoldStructCall(Name, IID, StTy, Operands,
4443 F->getDataLayout(), TLI, Call);
4444
4445 // TODO: If this is a library function, we already discovered that above,
4446 // so we should pass the LibFunc, not the name (and it might be better
4447 // still to separate intrinsic handling from libcalls).
4448 return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call);
4449}
4450
4452 const TargetLibraryInfo *TLI) {
4453 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
4454 // (and to some extent ConstantFoldScalarCall).
4455 if (Call->isNoBuiltin() || Call->isStrictFP())
4456 return false;
4457 Function *F = Call->getCalledFunction();
4458 if (!F)
4459 return false;
4460
4461 LibFunc Func;
4462 if (!TLI || !TLI->getLibFunc(*F, Func))
4463 return false;
4464
4465 if (Call->arg_size() == 1) {
4466 if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
4467 const APFloat &Op = OpC->getValueAPF();
4468 switch (Func) {
4469 case LibFunc_logl:
4470 case LibFunc_log:
4471 case LibFunc_logf:
4472 case LibFunc_log2l:
4473 case LibFunc_log2:
4474 case LibFunc_log2f:
4475 case LibFunc_log10l:
4476 case LibFunc_log10:
4477 case LibFunc_log10f:
4478 return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
4479
4480 case LibFunc_ilogb:
4481 return !Op.isNaN() && !Op.isZero() && !Op.isInfinity();
4482
4483 case LibFunc_expl:
4484 case LibFunc_exp:
4485 case LibFunc_expf:
4486 // FIXME: These boundaries are slightly conservative.
4487 if (OpC->getType()->isDoubleTy())
4488 return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
4489 if (OpC->getType()->isFloatTy())
4490 return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
4491 break;
4492
4493 case LibFunc_exp2l:
4494 case LibFunc_exp2:
4495 case LibFunc_exp2f:
4496 // FIXME: These boundaries are slightly conservative.
4497 if (OpC->getType()->isDoubleTy())
4498 return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
4499 if (OpC->getType()->isFloatTy())
4500 return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
4501 break;
4502
4503 case LibFunc_sinl:
4504 case LibFunc_sin:
4505 case LibFunc_sinf:
4506 case LibFunc_cosl:
4507 case LibFunc_cos:
4508 case LibFunc_cosf:
4509 return !Op.isInfinity();
4510
4511 case LibFunc_tanl:
4512 case LibFunc_tan:
4513 case LibFunc_tanf: {
4514 // FIXME: Stop using the host math library.
4515 // FIXME: The computation isn't done in the right precision.
4516 Type *Ty = OpC->getType();
4517 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
4518 return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;
4519 break;
4520 }
4521
4522 case LibFunc_atan:
4523 case LibFunc_atanf:
4524 case LibFunc_atanl:
4525 // Per POSIX, this MAY fail if Op is denormal. We choose not failing.
4526 return true;
4527
4528 case LibFunc_asinl:
4529 case LibFunc_asin:
4530 case LibFunc_asinf:
4531 case LibFunc_acosl:
4532 case LibFunc_acos:
4533 case LibFunc_acosf:
4534 return !(Op < APFloat::getOne(Op.getSemantics(), true) ||
4535 Op > APFloat::getOne(Op.getSemantics()));
4536
4537 case LibFunc_sinh:
4538 case LibFunc_cosh:
4539 case LibFunc_sinhf:
4540 case LibFunc_coshf:
4541 case LibFunc_sinhl:
4542 case LibFunc_coshl:
4543 // FIXME: These boundaries are slightly conservative.
4544 if (OpC->getType()->isDoubleTy())
4545 return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
4546 if (OpC->getType()->isFloatTy())
4547 return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
4548 break;
4549
4550 case LibFunc_sqrtl:
4551 case LibFunc_sqrt:
4552 case LibFunc_sqrtf:
4553 return Op.isNaN() || Op.isZero() || !Op.isNegative();
4554
4555 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
4556 // maybe others?
4557 default:
4558 break;
4559 }
4560 }
4561 }
4562
4563 if (Call->arg_size() == 2) {
4564 ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
4565 ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
4566 if (Op0C && Op1C) {
4567 const APFloat &Op0 = Op0C->getValueAPF();
4568 const APFloat &Op1 = Op1C->getValueAPF();
4569
4570 switch (Func) {
4571 case LibFunc_powl:
4572 case LibFunc_pow:
4573 case LibFunc_powf: {
4574 // FIXME: Stop using the host math library.
4575 // FIXME: The computation isn't done in the right precision.
4576 Type *Ty = Op0C->getType();
4577 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
4578 if (Ty == Op1C->getType())
4579 return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;
4580 }
4581 break;
4582 }
4583
4584 case LibFunc_fmodl:
4585 case LibFunc_fmod:
4586 case LibFunc_fmodf:
4587 case LibFunc_remainderl:
4588 case LibFunc_remainder:
4589 case LibFunc_remainderf:
4590 return Op0.isNaN() || Op1.isNaN() ||
4591 (!Op0.isInfinity() && !Op1.isZero());
4592
4593 case LibFunc_atan2:
4594 case LibFunc_atan2f:
4595 case LibFunc_atan2l:
4596 // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and
4597 // GLIBC and MSVC do not appear to raise an error on those, we
4598 // cannot rely on that behavior. POSIX and C11 say that a domain error
4599 // may occur, so allow for that possibility.
4600 return !Op0.isZero() || !Op1.isZero();
4601
4602 default:
4603 break;
4604 }
4605 }
4606 }
4607
4608 return false;
4609}
4610
4611void TargetFolder::anchor() {}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static Constant * FoldBitCast(Constant *V, Type *DestTy)
static ConstantFP * flushDenormalConstant(Type *Ty, const APFloat &APF, DenormalMode::DenormalModeKind Mode)
Constant * getConstantAtOffset(Constant *Base, APInt Offset, const DataLayout &DL)
If this Offset points exactly to the start of an aggregate element, return that element,...
static cl::opt< bool > DisableFPCallFolding("disable-fp-call-folding", cl::desc("Disable constant-folding of FP intrinsics and libcalls."), cl::init(false), cl::Hidden)
static ConstantFP * flushDenormalConstantFP(ConstantFP *CFP, const Instruction *Inst, bool IsOutput)
static DenormalMode getInstrDenormalMode(const Instruction *CtxI, Type *Ty)
Return the denormal mode that can be assumed when executing a floating point operation at CtxI.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
amode Optimize addressing mode
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static bool InRange(int64_t Value, unsigned short Shift, int LBound, int HBound)
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1208
void copySign(const APFloat &RHS)
Definition: APFloat.h:1302
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:6057
opStatus subtract(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1190
bool isNegative() const
Definition: APFloat.h:1449
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:6115
bool isPosInfinity() const
Definition: APFloat.h:1462
bool isNormal() const
Definition: APFloat.h:1453
bool isDenormal() const
Definition: APFloat.h:1450
opStatus add(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1181
const fltSemantics & getSemantics() const
Definition: APFloat.h:1457
bool isNonZero() const
Definition: APFloat.h:1458
bool isFinite() const
Definition: APFloat.h:1454
bool isNaN() const
Definition: APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1088
opStatus multiply(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1199
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:6143
bool isSignaling() const
Definition: APFloat.h:1451
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1235
bool isZero() const
Definition: APFloat.h:1445
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
opStatus mod(const APFloat &RHS)
Definition: APFloat.h:1226
bool isNegInfinity() const
Definition: APFloat.h:1463
void changeSign()
Definition: APFloat.h:1297
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:1079
bool isInfinity() const
Definition: APFloat.h:1446
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1971
LLVM_ABI APInt usub_sat(const APInt &RHS) const
Definition: APInt.cpp:2055
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:520
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
APInt abs() const
Get the absolute value.
Definition: APInt.h:1795
LLVM_ABI APInt sadd_sat(const APInt &RHS) const
Definition: APInt.cpp:2026
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1948
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1928
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1041
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition: APInt.cpp:2036
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1960
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
LLVM_ABI APInt ssub_sat(const APInt &RHS) const
Definition: APInt.cpp:2045
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:156
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
const T * data() const
Definition: ArrayRef.h:144
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
bool isFPPredicate() const
Definition: InstrTypes.h:784
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:715
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2314
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2564
static LLVM_ABI bool isDesirableCastOp(unsigned Opcode)
Whether creating a constant expression for this cast is desirable.
Definition: Constants.cpp:2443
static LLVM_ABI Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2213
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2654
static LLVM_ABI Constant * getInsertElement(Constant *Vec, Constant *Elt, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2586
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2300
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2609
static bool isSupportedGetElementPtr(const Type *SrcElemTy)
Whether creating a constant expression for this getelementptr type is supported.
Definition: Constants.h:1387
static LLVM_ABI Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible.
Definition: Constants.cpp:2347
static LLVM_ABI bool isDesirableBinOp(unsigned Opcode)
Whether creating a constant expression for this binary operator is desirable.
Definition: Constants.cpp:2389
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1274
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2328
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
const APFloat & getValueAPF() const
Definition: Constants.h:320
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1059
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:131
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:882
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1380
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1474
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423
This is an important base class in LLVM.
Definition: Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1713
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:435
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Constrained floating point compare intrinsics.
This is the common base class for constrained floating point intrinsics.
LLVM_ABI std::optional< fp::ExceptionBehavior > getExceptionBehavior() const
LLVM_ABI std::optional< RoundingMode > getRoundingMode() const
Wrapper for a function that represents a value that functionally represents the original function.
Definition: Constants.h:952
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
static LLVM_ABI bool compare(const APFloat &LHS, const APFloat &RHS, FCmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
This provides a helper for copying FMF from an instruction or setting specified flags.
Definition: IRBuilder.h:93
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:592
unsigned getNumElements() const
Definition: DerivedTypes.h:635
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
Definition: Function.cpp:803
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
GEPNoWrapFlags withoutNoUnsignedSignedWrap() const
static GEPNoWrapFlags noUnsignedWrap()
bool hasNoUnsignedSignedWrap() const
bool isInBounds() const
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:296
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:132
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool hasDefinitiveInitializer() const
hasDefinitiveInitializer - Whether the global variable has an initializer, and any other instances of...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
bool isCast() const
Definition: Instruction.h:321
bool isBinaryOp() const
Definition: Instruction.h:317
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
bool isUnaryOp() const
Definition: Instruction.h:316
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
static APInt getSaturationPoint(Intrinsic::ID ID, unsigned numBits)
Min/max intrinsics are monotonic, they operate on a fixed-bitwidth values, so there is a certain thre...
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
Class to represent scalable SIMD vectors.
Definition: DerivedTypes.h:639
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:287
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:626
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
Definition: DataLayout.cpp:92
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:657
Class to represent struct types.
Definition: DerivedTypes.h:218
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:162
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:261
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:311
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:304
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVM_ABI const fltSemantics & getFltSemantics() const
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:381
bool isIEEELikeFPTy() const
Return true if this is a well-behaved IEEE-like type, which has a IEEE compatible layout,...
Definition: Type.h:170
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
Base class of all SIMD vector types.
Definition: DerivedTypes.h:430
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:695
Type * getElementType() const
Definition: DerivedTypes.h:463
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
const ParentTy * getParent() const
Definition: ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
@ ebStrict
This corresponds to "fpexcept.strict".
Definition: FPEnv.h:42
@ ebIgnore
This corresponds to "fpexcept.ignore".
Definition: FPEnv.h:40
constexpr double pi
Definition: MathExtras.h:53
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
LLVM_ABI Constant * ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, Constant *RHS, Type *Ty, Instruction *FMFSource)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI Constant * ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, const DataLayout &DL)
ConstantFoldLoadThroughBitcast - try to cast constant to destination type returning null if unsuccess...
static double log2(double V)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI Constant * ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL, const Instruction *I, bool AllowNonDeterministic=true)
Attempt to constant fold a floating point binary operation with the specified operands,...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
unsigned getPointerAddressSpace(const Type *T)
Definition: SPIRVUtils.h:294
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1563
LLVM_ABI Constant * ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, Constant *C1, Constant *C2)
LLVM_ABI Constant * ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V)
LLVM_ABI bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &DL, DSOLocalEquivalent **DSOEquiv=nullptr)
If this constant is a constant offset from a global, return the global and the constant.
LLVM_ABI bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI)
Check whether the given call has no side-effects.
LLVM_ABI Constant * ReadByteArrayFromGlobal(const GlobalVariable *GV, uint64_t Offset)
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition: APFloat.h:1643
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition: APFloat.h:1534
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1555
LLVM_ABI Constant * ConstantFoldExtractValueInstruction(Constant *Agg, ArrayRef< unsigned > Idxs)
Attempt to constant fold an extractvalue instruction with the specified operands and indices.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition: APFloat.h:1598
LLVM_ABI Constant * ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, const DataLayout &DL)
If C is a uniform value where all bits are the same (either all zero, all ones, all undef or all pois...
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
LLVM_ABI Constant * FlushFPConstant(Constant *Operand, const Instruction *I, bool IsOutput)
Attempt to flush float point constant according to denormal mode set in the instruction's parent func...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition: APFloat.h:1629
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition: APFloat.h:1543
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1172
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
LLVM_ABI bool intrinsicPropagatesPoison(Intrinsic::ID IID)
Return whether this intrinsic propagates poison for all operands.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition: APFloat.h:1579
@ Sub
Subtraction of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI Constant * ConstantFoldCastInstruction(unsigned opcode, Constant *V, Type *DestTy)
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition: APFloat.h:1616
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition: APFloat.h:1656
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:46
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:320
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
DenormalModeKind
Represent handled modes for denormal (aka subnormal) modes in the floating point environment.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ Dynamic
Denormals have unknown treatment.
@ IEEE
IEEE-754 denormal numbers preserved.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
static constexpr DenormalMode getDynamic()
static constexpr DenormalMode getIEEE()
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:54
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:60