LLVM 22.0.0git
ConstantFolding.cpp
Go to the documentation of this file.
1//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines routines for folding instructions into constants.
10//
11// Also, to supplement the basic IR ConstantExpr simplifications,
12// this file defines some additional folding routines that can make use of
13// DataLayout information. These functions cannot go in IR due to library
14// dependency issues.
15//
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/APSInt.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/StringRef.h"
31#include "llvm/Config/config.h"
32#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/IntrinsicsAMDGPU.h"
47#include "llvm/IR/IntrinsicsARM.h"
48#include "llvm/IR/IntrinsicsNVPTX.h"
49#include "llvm/IR/IntrinsicsWebAssembly.h"
50#include "llvm/IR/IntrinsicsX86.h"
52#include "llvm/IR/Operator.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
59#include <cassert>
60#include <cerrno>
61#include <cfenv>
62#include <cmath>
63#include <cstdint>
64
65using namespace llvm;
66
68 "disable-fp-call-folding",
69 cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
70 cl::init(false), cl::Hidden);
71
72namespace {
73
74//===----------------------------------------------------------------------===//
75// Constant Folding internal helper functions
76//===----------------------------------------------------------------------===//
77
78static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
79 Constant *C, Type *SrcEltTy,
80 unsigned NumSrcElts,
81 const DataLayout &DL) {
82 // Now that we know that the input value is a vector of integers, just shift
83 // and insert them into our result.
84 unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
85 for (unsigned i = 0; i != NumSrcElts; ++i) {
86 Constant *Element;
87 if (DL.isLittleEndian())
88 Element = C->getAggregateElement(NumSrcElts - i - 1);
89 else
90 Element = C->getAggregateElement(i);
91
92 if (isa_and_nonnull<UndefValue>(Element)) {
93 Result <<= BitShift;
94 continue;
95 }
96
97 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
98 if (!ElementCI)
99 return ConstantExpr::getBitCast(C, DestTy);
100
101 Result <<= BitShift;
102 Result |= ElementCI->getValue().zext(Result.getBitWidth());
103 }
104
105 return nullptr;
106}
107
108/// Constant fold bitcast, symbolically evaluating it with DataLayout.
109/// This always returns a non-null constant, but it may be a
110/// ConstantExpr if unfoldable.
111Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
112 assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
113 "Invalid constantexpr bitcast!");
114
115 // Catch the obvious splat cases.
116 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))
117 return Res;
118
119 if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
120 // Handle a vector->scalar integer/fp cast.
121 if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
122 unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
123 Type *SrcEltTy = VTy->getElementType();
124
125 // If the vector is a vector of floating point, convert it to vector of int
126 // to simplify things.
127 if (SrcEltTy->isFloatingPointTy()) {
128 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
129 auto *SrcIVTy = FixedVectorType::get(
130 IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
131 // Ask IR to do the conversion now that #elts line up.
132 C = ConstantExpr::getBitCast(C, SrcIVTy);
133 }
134
135 APInt Result(DL.getTypeSizeInBits(DestTy), 0);
136 if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
137 SrcEltTy, NumSrcElts, DL))
138 return CE;
139
140 if (isa<IntegerType>(DestTy))
141 return ConstantInt::get(DestTy, Result);
142
143 APFloat FP(DestTy->getFltSemantics(), Result);
144 return ConstantFP::get(DestTy->getContext(), FP);
145 }
146 }
147
148 // The code below only handles casts to vectors currently.
149 auto *DestVTy = dyn_cast<VectorType>(DestTy);
150 if (!DestVTy)
151 return ConstantExpr::getBitCast(C, DestTy);
152
153 // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
154 // vector so the code below can handle it uniformly.
155 if (!isa<VectorType>(C->getType()) &&
157 Constant *Ops = C; // don't take the address of C!
158 return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
159 }
160
161 // Some of what follows may extend to cover scalable vectors but the current
162 // implementation is fixed length specific.
163 if (!isa<FixedVectorType>(C->getType()))
164 return ConstantExpr::getBitCast(C, DestTy);
165
166 // If this is a bitcast from constant vector -> vector, fold it.
169 return ConstantExpr::getBitCast(C, DestTy);
170
171 // If the element types match, IR can fold it.
172 unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
173 unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
174 if (NumDstElt == NumSrcElt)
175 return ConstantExpr::getBitCast(C, DestTy);
176
177 Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
178 Type *DstEltTy = DestVTy->getElementType();
179
180 // Otherwise, we're changing the number of elements in a vector, which
181 // requires endianness information to do the right thing. For example,
182 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
183 // folds to (little endian):
184 // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
185 // and to (big endian):
186 // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
187
188 // First thing is first. We only want to think about integer here, so if
189 // we have something in FP form, recast it as integer.
190 if (DstEltTy->isFloatingPointTy()) {
191 // Fold to an vector of integers with same size as our FP type.
192 unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
193 auto *DestIVTy = FixedVectorType::get(
194 IntegerType::get(C->getContext(), FPWidth), NumDstElt);
195 // Recursively handle this integer conversion, if possible.
196 C = FoldBitCast(C, DestIVTy, DL);
197
198 // Finally, IR can handle this now that #elts line up.
199 return ConstantExpr::getBitCast(C, DestTy);
200 }
201
202 // Okay, we know the destination is integer, if the input is FP, convert
203 // it to integer first.
204 if (SrcEltTy->isFloatingPointTy()) {
205 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
206 auto *SrcIVTy = FixedVectorType::get(
207 IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
208 // Ask IR to do the conversion now that #elts line up.
209 C = ConstantExpr::getBitCast(C, SrcIVTy);
210 assert((isa<ConstantVector>(C) || // FIXME: Remove ConstantVector.
212 "Constant folding cannot fail for plain fp->int bitcast!");
213 }
214
215 // Now we know that the input and output vectors are both integer vectors
216 // of the same size, and that their #elements is not the same. Do the
217 // conversion here, which depends on whether the input or output has
218 // more elements.
219 bool isLittleEndian = DL.isLittleEndian();
220
222 if (NumDstElt < NumSrcElt) {
223 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
225 unsigned Ratio = NumSrcElt/NumDstElt;
226 unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
227 unsigned SrcElt = 0;
228 for (unsigned i = 0; i != NumDstElt; ++i) {
229 // Build each element of the result.
230 Constant *Elt = Zero;
231 unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
232 for (unsigned j = 0; j != Ratio; ++j) {
233 Constant *Src = C->getAggregateElement(SrcElt++);
236 cast<VectorType>(C->getType())->getElementType());
237 else
239 if (!Src) // Reject constantexpr elements.
240 return ConstantExpr::getBitCast(C, DestTy);
241
242 // Zero extend the element to the right size.
243 Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),
244 DL);
245 assert(Src && "Constant folding cannot fail on plain integers");
246
247 // Shift it to the right place, depending on endianness.
249 Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),
250 DL);
251 assert(Src && "Constant folding cannot fail on plain integers");
252
253 ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
254
255 // Mix it in.
256 Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);
257 assert(Elt && "Constant folding cannot fail on plain integers");
258 }
259 Result.push_back(Elt);
260 }
261 return ConstantVector::get(Result);
262 }
263
264 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
265 unsigned Ratio = NumDstElt/NumSrcElt;
266 unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
267
268 // Loop over each source value, expanding into multiple results.
269 for (unsigned i = 0; i != NumSrcElt; ++i) {
270 auto *Element = C->getAggregateElement(i);
271
272 if (!Element) // Reject constantexpr elements.
273 return ConstantExpr::getBitCast(C, DestTy);
274
275 if (isa<UndefValue>(Element)) {
276 // Correctly Propagate undef values.
277 Result.append(Ratio, UndefValue::get(DstEltTy));
278 continue;
279 }
280
281 auto *Src = dyn_cast<ConstantInt>(Element);
282 if (!Src)
283 return ConstantExpr::getBitCast(C, DestTy);
284
285 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
286 for (unsigned j = 0; j != Ratio; ++j) {
287 // Shift the piece of the value into the right place, depending on
288 // endianness.
289 APInt Elt = Src->getValue().lshr(ShiftAmt);
290 ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
291
292 // Truncate and remember this piece.
293 Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
294 }
295 }
296
297 return ConstantVector::get(Result);
298}
299
300} // end anonymous namespace
301
302/// If this constant is a constant offset from a global, return the global and
303/// the constant. Because of constantexprs, this function is recursive.
305 APInt &Offset, const DataLayout &DL,
306 DSOLocalEquivalent **DSOEquiv) {
307 if (DSOEquiv)
308 *DSOEquiv = nullptr;
309
310 // Trivial case, constant is the global.
311 if ((GV = dyn_cast<GlobalValue>(C))) {
312 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
313 Offset = APInt(BitWidth, 0);
314 return true;
315 }
316
317 if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
318 if (DSOEquiv)
319 *DSOEquiv = FoundDSOEquiv;
320 GV = FoundDSOEquiv->getGlobalValue();
321 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
322 Offset = APInt(BitWidth, 0);
323 return true;
324 }
325
326 // Otherwise, if this isn't a constant expr, bail out.
327 auto *CE = dyn_cast<ConstantExpr>(C);
328 if (!CE) return false;
329
330 // Look through ptr->int and ptr->ptr casts.
331 if (CE->getOpcode() == Instruction::PtrToInt ||
332 CE->getOpcode() == Instruction::BitCast)
333 return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
334 DSOEquiv);
335
336 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
337 auto *GEP = dyn_cast<GEPOperator>(CE);
338 if (!GEP)
339 return false;
340
341 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
342 APInt TmpOffset(BitWidth, 0);
343
344 // If the base isn't a global+constant, we aren't either.
345 if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
346 DSOEquiv))
347 return false;
348
349 // Otherwise, add any offset that our operands provide.
350 if (!GEP->accumulateConstantOffset(DL, TmpOffset))
351 return false;
352
353 Offset = TmpOffset;
354 return true;
355}
356
358 const DataLayout &DL) {
359 do {
360 Type *SrcTy = C->getType();
361 if (SrcTy == DestTy)
362 return C;
363
364 TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
365 TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
366 if (!TypeSize::isKnownGE(SrcSize, DestSize))
367 return nullptr;
368
369 // Catch the obvious splat cases (since all-zeros can coerce non-integral
370 // pointers legally).
371 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))
372 return Res;
373
374 // If the type sizes are the same and a cast is legal, just directly
375 // cast the constant.
376 // But be careful not to coerce non-integral pointers illegally.
377 if (SrcSize == DestSize &&
378 DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
379 DL.isNonIntegralPointerType(DestTy->getScalarType())) {
380 Instruction::CastOps Cast = Instruction::BitCast;
381 // If we are going from a pointer to int or vice versa, we spell the cast
382 // differently.
383 if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
384 Cast = Instruction::IntToPtr;
385 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
386 Cast = Instruction::PtrToInt;
387
388 if (CastInst::castIsValid(Cast, C, DestTy))
389 return ConstantFoldCastOperand(Cast, C, DestTy, DL);
390 }
391
392 // If this isn't an aggregate type, there is nothing we can do to drill down
393 // and find a bitcastable constant.
394 if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
395 return nullptr;
396
397 // We're simulating a load through a pointer that was bitcast to point to
398 // a different type, so we can try to walk down through the initial
399 // elements of an aggregate to see if some part of the aggregate is
400 // castable to implement the "load" semantic model.
401 if (SrcTy->isStructTy()) {
402 // Struct types might have leading zero-length elements like [0 x i32],
403 // which are certainly not what we are looking for, so skip them.
404 unsigned Elem = 0;
405 Constant *ElemC;
406 do {
407 ElemC = C->getAggregateElement(Elem++);
408 } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
409 C = ElemC;
410 } else {
411 // For non-byte-sized vector elements, the first element is not
412 // necessarily located at the vector base address.
413 if (auto *VT = dyn_cast<VectorType>(SrcTy))
414 if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))
415 return nullptr;
416
417 C = C->getAggregateElement(0u);
418 }
419 } while (C);
420
421 return nullptr;
422}
423
424namespace {
425
426/// Recursive helper to read bits out of global. C is the constant being copied
427/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
428/// results into and BytesLeft is the number of bytes left in
429/// the CurPtr buffer. DL is the DataLayout.
430bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
431 unsigned BytesLeft, const DataLayout &DL) {
432 assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
433 "Out of range access");
434
435 // Reading type padding, return zero.
436 if (ByteOffset >= DL.getTypeStoreSize(C->getType()))
437 return true;
438
439 // If this element is zero or undefined, we can just return since *CurPtr is
440 // zero initialized.
442 return true;
443
444 if (auto *CI = dyn_cast<ConstantInt>(C)) {
445 if ((CI->getBitWidth() & 7) != 0)
446 return false;
447 const APInt &Val = CI->getValue();
448 unsigned IntBytes = unsigned(CI->getBitWidth()/8);
449
450 for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
451 unsigned n = ByteOffset;
452 if (!DL.isLittleEndian())
453 n = IntBytes - n - 1;
454 CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue();
455 ++ByteOffset;
456 }
457 return true;
458 }
459
460 if (auto *CFP = dyn_cast<ConstantFP>(C)) {
461 if (CFP->getType()->isDoubleTy()) {
462 C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
463 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
464 }
465 if (CFP->getType()->isFloatTy()){
466 C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
467 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
468 }
469 if (CFP->getType()->isHalfTy()){
470 C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
471 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
472 }
473 return false;
474 }
475
476 if (auto *CS = dyn_cast<ConstantStruct>(C)) {
477 const StructLayout *SL = DL.getStructLayout(CS->getType());
478 unsigned Index = SL->getElementContainingOffset(ByteOffset);
479 uint64_t CurEltOffset = SL->getElementOffset(Index);
480 ByteOffset -= CurEltOffset;
481
482 while (true) {
483 // If the element access is to the element itself and not to tail padding,
484 // read the bytes from the element.
485 uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
486
487 if (ByteOffset < EltSize &&
488 !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
489 BytesLeft, DL))
490 return false;
491
492 ++Index;
493
494 // Check to see if we read from the last struct element, if so we're done.
495 if (Index == CS->getType()->getNumElements())
496 return true;
497
498 // If we read all of the bytes we needed from this element we're done.
499 uint64_t NextEltOffset = SL->getElementOffset(Index);
500
501 if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
502 return true;
503
504 // Move to the next element of the struct.
505 CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
506 BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
507 ByteOffset = 0;
508 CurEltOffset = NextEltOffset;
509 }
510 // not reached.
511 }
512
515 uint64_t NumElts, EltSize;
516 Type *EltTy;
517 if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
518 NumElts = AT->getNumElements();
519 EltTy = AT->getElementType();
520 EltSize = DL.getTypeAllocSize(EltTy);
521 } else {
522 NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
523 EltTy = cast<FixedVectorType>(C->getType())->getElementType();
524 // TODO: For non-byte-sized vectors, current implementation assumes there is
525 // padding to the next byte boundary between elements.
526 if (!DL.typeSizeEqualsStoreSize(EltTy))
527 return false;
528
529 EltSize = DL.getTypeStoreSize(EltTy);
530 }
531 uint64_t Index = ByteOffset / EltSize;
532 uint64_t Offset = ByteOffset - Index * EltSize;
533
534 for (; Index != NumElts; ++Index) {
535 if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
536 BytesLeft, DL))
537 return false;
538
539 uint64_t BytesWritten = EltSize - Offset;
540 assert(BytesWritten <= EltSize && "Not indexing into this element?");
541 if (BytesWritten >= BytesLeft)
542 return true;
543
544 Offset = 0;
545 BytesLeft -= BytesWritten;
546 CurPtr += BytesWritten;
547 }
548 return true;
549 }
550
551 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
552 if (CE->getOpcode() == Instruction::IntToPtr &&
553 CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
554 return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
555 BytesLeft, DL);
556 }
557 }
558
559 // Otherwise, unknown initializer type.
560 return false;
561}
562
563Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
564 int64_t Offset, const DataLayout &DL) {
565 // Bail out early. Not expect to load from scalable global variable.
566 if (isa<ScalableVectorType>(LoadTy))
567 return nullptr;
568
569 auto *IntType = dyn_cast<IntegerType>(LoadTy);
570
571 // If this isn't an integer load we can't fold it directly.
572 if (!IntType) {
573 // If this is a non-integer load, we can try folding it as an int load and
574 // then bitcast the result. This can be useful for union cases. Note
575 // that address spaces don't matter here since we're not going to result in
576 // an actual new load.
577 if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
578 !LoadTy->isVectorTy())
579 return nullptr;
580
581 Type *MapTy = Type::getIntNTy(C->getContext(),
582 DL.getTypeSizeInBits(LoadTy).getFixedValue());
583 if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
584 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
585 // Materializing a zero can be done trivially without a bitcast
586 return Constant::getNullValue(LoadTy);
587 Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
588 Res = FoldBitCast(Res, CastTy, DL);
589 if (LoadTy->isPtrOrPtrVectorTy()) {
590 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
591 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
592 return Constant::getNullValue(LoadTy);
593 if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
594 // Be careful not to replace a load of an addrspace value with an inttoptr here
595 return nullptr;
596 Res = ConstantExpr::getIntToPtr(Res, LoadTy);
597 }
598 return Res;
599 }
600 return nullptr;
601 }
602
603 unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
604 if (BytesLoaded > 32 || BytesLoaded == 0)
605 return nullptr;
606
607 // If we're not accessing anything in this constant, the result is undefined.
608 if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
609 return PoisonValue::get(IntType);
610
611 // TODO: We should be able to support scalable types.
612 TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
613 if (InitializerSize.isScalable())
614 return nullptr;
615
616 // If we're not accessing anything in this constant, the result is undefined.
617 if (Offset >= (int64_t)InitializerSize.getFixedValue())
618 return PoisonValue::get(IntType);
619
620 unsigned char RawBytes[32] = {0};
621 unsigned char *CurPtr = RawBytes;
622 unsigned BytesLeft = BytesLoaded;
623
624 // If we're loading off the beginning of the global, some bytes may be valid.
625 if (Offset < 0) {
626 CurPtr += -Offset;
627 BytesLeft += Offset;
628 Offset = 0;
629 }
630
631 if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
632 return nullptr;
633
634 APInt ResultVal = APInt(IntType->getBitWidth(), 0);
635 if (DL.isLittleEndian()) {
636 ResultVal = RawBytes[BytesLoaded - 1];
637 for (unsigned i = 1; i != BytesLoaded; ++i) {
638 ResultVal <<= 8;
639 ResultVal |= RawBytes[BytesLoaded - 1 - i];
640 }
641 } else {
642 ResultVal = RawBytes[0];
643 for (unsigned i = 1; i != BytesLoaded; ++i) {
644 ResultVal <<= 8;
645 ResultVal |= RawBytes[i];
646 }
647 }
648
649 return ConstantInt::get(IntType->getContext(), ResultVal);
650}
651
652} // anonymous namespace
653
654// If GV is a constant with an initializer read its representation starting
655// at Offset and return it as a constant array of unsigned char. Otherwise
656// return null.
659 if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
660 return nullptr;
661
662 const DataLayout &DL = GV->getDataLayout();
663 Constant *Init = const_cast<Constant *>(GV->getInitializer());
664 TypeSize InitSize = DL.getTypeAllocSize(Init->getType());
665 if (InitSize < Offset)
666 return nullptr;
667
668 uint64_t NBytes = InitSize - Offset;
669 if (NBytes > UINT16_MAX)
670 // Bail for large initializers in excess of 64K to avoid allocating
671 // too much memory.
672 // Offset is assumed to be less than or equal than InitSize (this
673 // is enforced in ReadDataFromGlobal).
674 return nullptr;
675
676 SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
677 unsigned char *CurPtr = RawBytes.data();
678
679 if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))
680 return nullptr;
681
682 return ConstantDataArray::get(GV->getContext(), RawBytes);
683}
684
685/// If this Offset points exactly to the start of an aggregate element, return
686/// that element, otherwise return nullptr.
688 const DataLayout &DL) {
689 if (Offset.isZero())
690 return Base;
691
693 return nullptr;
694
695 Type *ElemTy = Base->getType();
696 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
697 if (!Offset.isZero() || !Indices[0].isZero())
698 return nullptr;
699
700 Constant *C = Base;
701 for (const APInt &Index : drop_begin(Indices)) {
702 if (Index.isNegative() || Index.getActiveBits() >= 32)
703 return nullptr;
704
705 C = C->getAggregateElement(Index.getZExtValue());
706 if (!C)
707 return nullptr;
708 }
709
710 return C;
711}
712
714 const APInt &Offset,
715 const DataLayout &DL) {
716 if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
717 if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
718 return Result;
719
720 // Explicitly check for out-of-bounds access, so we return poison even if the
721 // constant is a uniform value.
722 TypeSize Size = DL.getTypeAllocSize(C->getType());
723 if (!Size.isScalable() && Offset.sge(Size.getFixedValue()))
724 return PoisonValue::get(Ty);
725
726 // Try an offset-independent fold of a uniform value.
727 if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL))
728 return Result;
729
730 // Try hard to fold loads from bitcasted strange and non-type-safe things.
731 if (Offset.getSignificantBits() <= 64)
732 if (Constant *Result =
733 FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
734 return Result;
735
736 return nullptr;
737}
738
743
746 const DataLayout &DL) {
747 // We can only fold loads from constant globals with a definitive initializer.
748 // Check this upfront, to skip expensive offset calculations.
750 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
751 return nullptr;
752
753 C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
754 DL, Offset, /* AllowNonInbounds */ true));
755
756 if (C == GV)
757 if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
758 Offset, DL))
759 return Result;
760
761 // If this load comes from anywhere in a uniform constant global, the value
762 // is always the same, regardless of the loaded offset.
763 return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL);
764}
765
767 const DataLayout &DL) {
768 APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
769 return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL);
770}
771
773 const DataLayout &DL) {
774 if (isa<PoisonValue>(C))
775 return PoisonValue::get(Ty);
776 if (isa<UndefValue>(C))
777 return UndefValue::get(Ty);
778 // If padding is needed when storing C to memory, then it isn't considered as
779 // uniform.
780 if (!DL.typeSizeEqualsStoreSize(C->getType()))
781 return nullptr;
782 if (C->isNullValue() && !Ty->isX86_AMXTy())
783 return Constant::getNullValue(Ty);
784 if (C->isAllOnesValue() &&
785 (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
786 return Constant::getAllOnesValue(Ty);
787 return nullptr;
788}
789
790namespace {
791
792/// One of Op0/Op1 is a constant expression.
793/// Attempt to symbolically evaluate the result of a binary operator merging
794/// these together. If target data info is available, it is provided as DL,
795/// otherwise DL is null.
796Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
797 const DataLayout &DL) {
798 // SROA
799
800 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
801 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
802 // bits.
803
804 if (Opc == Instruction::And) {
805 KnownBits Known0 = computeKnownBits(Op0, DL);
806 KnownBits Known1 = computeKnownBits(Op1, DL);
807 if ((Known1.One | Known0.Zero).isAllOnes()) {
808 // All the bits of Op0 that the 'and' could be masking are already zero.
809 return Op0;
810 }
811 if ((Known0.One | Known1.Zero).isAllOnes()) {
812 // All the bits of Op1 that the 'and' could be masking are already zero.
813 return Op1;
814 }
815
816 Known0 &= Known1;
817 if (Known0.isConstant())
818 return ConstantInt::get(Op0->getType(), Known0.getConstant());
819 }
820
821 // If the constant expr is something like &A[123] - &A[4].f, fold this into a
822 // constant. This happens frequently when iterating over a global array.
823 if (Opc == Instruction::Sub) {
824 GlobalValue *GV1, *GV2;
825 APInt Offs1, Offs2;
826
827 if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
828 if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
829 unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
830
831 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
832 // PtrToInt may change the bitwidth so we have convert to the right size
833 // first.
834 return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
835 Offs2.zextOrTrunc(OpSize));
836 }
837 }
838
839 return nullptr;
840}
841
842/// If array indices are not pointer-sized integers, explicitly cast them so
843/// that they aren't implicitly casted by the getelementptr.
844Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
845 Type *ResultTy, GEPNoWrapFlags NW,
846 std::optional<ConstantRange> InRange,
847 const DataLayout &DL, const TargetLibraryInfo *TLI) {
848 Type *IntIdxTy = DL.getIndexType(ResultTy);
849 Type *IntIdxScalarTy = IntIdxTy->getScalarType();
850
851 bool Any = false;
853 for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
854 if ((i == 1 ||
856 SrcElemTy, Ops.slice(1, i - 1)))) &&
857 Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
858 Any = true;
859 Type *NewType =
860 Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;
862 CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType,
863 DL);
864 if (!NewIdx)
865 return nullptr;
866 NewIdxs.push_back(NewIdx);
867 } else
868 NewIdxs.push_back(Ops[i]);
869 }
870
871 if (!Any)
872 return nullptr;
873
874 Constant *C =
875 ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange);
876 return ConstantFoldConstant(C, DL, TLI);
877}
878
879/// If we can symbolically evaluate the GEP constant expression, do so.
880Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
882 const DataLayout &DL,
883 const TargetLibraryInfo *TLI) {
884 Type *SrcElemTy = GEP->getSourceElementType();
885 Type *ResTy = GEP->getType();
886 if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
887 return nullptr;
888
889 if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(),
890 GEP->getInRange(), DL, TLI))
891 return C;
892
893 Constant *Ptr = Ops[0];
894 if (!Ptr->getType()->isPointerTy())
895 return nullptr;
896
897 Type *IntIdxTy = DL.getIndexType(Ptr->getType());
898
899 for (unsigned i = 1, e = Ops.size(); i != e; ++i)
900 if (!isa<ConstantInt>(Ops[i]) || !Ops[i]->getType()->isIntegerTy())
901 return nullptr;
902
903 unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
905 BitWidth,
906 DL.getIndexedOffsetInType(
907 SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)),
908 /*isSigned=*/true, /*implicitTrunc=*/true);
909
910 std::optional<ConstantRange> InRange = GEP->getInRange();
911 if (InRange)
912 InRange = InRange->sextOrTrunc(BitWidth);
913
914 // If this is a GEP of a GEP, fold it all into a single GEP.
915 GEPNoWrapFlags NW = GEP->getNoWrapFlags();
916 bool Overflow = false;
917 while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
918 NW &= GEP->getNoWrapFlags();
919
920 SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
921
922 // Do not try the incorporate the sub-GEP if some index is not a number.
923 bool AllConstantInt = true;
924 for (Value *NestedOp : NestedOps)
925 if (!isa<ConstantInt>(NestedOp)) {
926 AllConstantInt = false;
927 break;
928 }
929 if (!AllConstantInt)
930 break;
931
932 // Adjust inrange offset and intersect inrange attributes
933 if (auto GEPRange = GEP->getInRange()) {
934 auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
935 InRange =
936 InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
937 }
938
939 Ptr = cast<Constant>(GEP->getOperand(0));
940 SrcElemTy = GEP->getSourceElementType();
941 Offset = Offset.sadd_ov(
942 APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps),
943 /*isSigned=*/true, /*implicitTrunc=*/true),
944 Overflow);
945 }
946
947 // Preserving nusw (without inbounds) also requires that the offset
948 // additions did not overflow.
949 if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow)
951
952 // If the base value for this address is a literal integer value, fold the
953 // getelementptr to the resulting integer value casted to the pointer type.
954 APInt BaseIntVal(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
955 if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {
956 if (CE->getOpcode() == Instruction::IntToPtr) {
957 if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
958 BaseIntVal = Base->getValue().zextOrTrunc(BaseIntVal.getBitWidth());
959 }
960 }
961
962 if ((Ptr->isNullValue() || BaseIntVal != 0) &&
963 !DL.mustNotIntroduceIntToPtr(Ptr->getType())) {
964
965 // If the index size is smaller than the pointer size, add to the low
966 // bits only.
967 BaseIntVal.insertBits(BaseIntVal.trunc(BitWidth) + Offset, 0);
968 Constant *C = ConstantInt::get(Ptr->getContext(), BaseIntVal);
969 return ConstantExpr::getIntToPtr(C, ResTy);
970 }
971
972 // Try to infer inbounds for GEPs of globals.
973 if (!NW.isInBounds() && Offset.isNonNegative()) {
974 bool CanBeNull, CanBeFreed;
975 uint64_t DerefBytes =
976 Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
977 if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes))
979 }
980
981 // nusw + nneg -> nuw
982 if (NW.hasNoUnsignedSignedWrap() && Offset.isNonNegative())
984
985 // Otherwise canonicalize this to a single ptradd.
986 LLVMContext &Ctx = Ptr->getContext();
988 ConstantInt::get(Ctx, Offset), NW,
989 InRange);
990}
991
992/// Attempt to constant fold an instruction with the
993/// specified opcode and operands. If successful, the constant result is
994/// returned, if not, null is returned. Note that this function can fail when
995/// attempting to fold instructions like loads and stores, which have no
996/// constant expression form.
997Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
999 const DataLayout &DL,
1000 const TargetLibraryInfo *TLI,
1001 bool AllowNonDeterministic) {
1002 Type *DestTy = InstOrCE->getType();
1003
1004 if (Instruction::isUnaryOp(Opcode))
1005 return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
1006
1007 if (Instruction::isBinaryOp(Opcode)) {
1008 switch (Opcode) {
1009 default:
1010 break;
1011 case Instruction::FAdd:
1012 case Instruction::FSub:
1013 case Instruction::FMul:
1014 case Instruction::FDiv:
1015 case Instruction::FRem:
1016 // Handle floating point instructions separately to account for denormals
1017 // TODO: If a constant expression is being folded rather than an
1018 // instruction, denormals will not be flushed/treated as zero
1019 if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
1020 return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,
1021 AllowNonDeterministic);
1022 }
1023 }
1024 return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
1025 }
1026
1027 if (Instruction::isCast(Opcode))
1028 return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
1029
1030 if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
1031 Type *SrcElemTy = GEP->getSourceElementType();
1033 return nullptr;
1034
1035 if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
1036 return C;
1037
1038 return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1),
1039 GEP->getNoWrapFlags(),
1040 GEP->getInRange());
1041 }
1042
1043 if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
1044 return CE->getWithOperands(Ops);
1045
1046 switch (Opcode) {
1047 default: return nullptr;
1048 case Instruction::ICmp:
1049 case Instruction::FCmp: {
1050 auto *C = cast<CmpInst>(InstOrCE);
1051 return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],
1052 DL, TLI, C);
1053 }
1054 case Instruction::Freeze:
1055 return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
1056 case Instruction::Call:
1057 if (auto *F = dyn_cast<Function>(Ops.back())) {
1058 const auto *Call = cast<CallBase>(InstOrCE);
1060 return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,
1061 AllowNonDeterministic);
1062 }
1063 return nullptr;
1064 case Instruction::Select:
1065 return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]);
1066 case Instruction::ExtractElement:
1068 case Instruction::ExtractValue:
1070 Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
1071 case Instruction::InsertElement:
1072 return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
1073 case Instruction::InsertValue:
1075 Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());
1076 case Instruction::ShuffleVector:
1078 Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
1079 case Instruction::Load: {
1080 const auto *LI = dyn_cast<LoadInst>(InstOrCE);
1081 if (LI->isVolatile())
1082 return nullptr;
1083 return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
1084 }
1085 }
1086}
1087
1088} // end anonymous namespace
1089
1090//===----------------------------------------------------------------------===//
1091// Constant Folding public APIs
1092//===----------------------------------------------------------------------===//
1093
1094namespace {
1095
1096Constant *
1097ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
1098 const TargetLibraryInfo *TLI,
1101 return const_cast<Constant *>(C);
1102
1104 for (const Use &OldU : C->operands()) {
1105 Constant *OldC = cast<Constant>(&OldU);
1106 Constant *NewC = OldC;
1107 // Recursively fold the ConstantExpr's operands. If we have already folded
1108 // a ConstantExpr, we don't have to process it again.
1109 if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
1110 auto It = FoldedOps.find(OldC);
1111 if (It == FoldedOps.end()) {
1112 NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
1113 FoldedOps.insert({OldC, NewC});
1114 } else {
1115 NewC = It->second;
1116 }
1117 }
1118 Ops.push_back(NewC);
1119 }
1120
1121 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1122 if (Constant *Res = ConstantFoldInstOperandsImpl(
1123 CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))
1124 return Res;
1125 return const_cast<Constant *>(C);
1126 }
1127
1129 return ConstantVector::get(Ops);
1130}
1131
1132} // end anonymous namespace
1133
1135 const DataLayout &DL,
1136 const TargetLibraryInfo *TLI) {
1137 // Handle PHI nodes quickly here...
1138 if (auto *PN = dyn_cast<PHINode>(I)) {
1139 Constant *CommonValue = nullptr;
1140
1142 for (Value *Incoming : PN->incoming_values()) {
1143 // If the incoming value is undef then skip it. Note that while we could
1144 // skip the value if it is equal to the phi node itself we choose not to
1145 // because that would break the rule that constant folding only applies if
1146 // all operands are constants.
1148 continue;
1149 // If the incoming value is not a constant, then give up.
1150 auto *C = dyn_cast<Constant>(Incoming);
1151 if (!C)
1152 return nullptr;
1153 // Fold the PHI's operands.
1154 C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1155 // If the incoming value is a different constant to
1156 // the one we saw previously, then give up.
1157 if (CommonValue && C != CommonValue)
1158 return nullptr;
1159 CommonValue = C;
1160 }
1161
1162 // If we reach here, all incoming values are the same constant or undef.
1163 return CommonValue ? CommonValue : UndefValue::get(PN->getType());
1164 }
1165
1166 // Scan the operand list, checking to see if they are all constants, if so,
1167 // hand off to ConstantFoldInstOperandsImpl.
1168 if (!all_of(I->operands(), [](const Use &U) { return isa<Constant>(U); }))
1169 return nullptr;
1170
1173 for (const Use &OpU : I->operands()) {
1174 auto *Op = cast<Constant>(&OpU);
1175 // Fold the Instruction's operands.
1176 Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
1177 Ops.push_back(Op);
1178 }
1179
1180 return ConstantFoldInstOperands(I, Ops, DL, TLI);
1181}
1182
1184 const TargetLibraryInfo *TLI) {
1186 return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1187}
1188
1191 const DataLayout &DL,
1192 const TargetLibraryInfo *TLI,
1193 bool AllowNonDeterministic) {
1194 return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,
1195 AllowNonDeterministic);
1196}
1197
1199 unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
1200 const TargetLibraryInfo *TLI, const Instruction *I) {
1201 CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
1202 // fold: icmp (inttoptr x), null -> icmp x, 0
1203 // fold: icmp null, (inttoptr x) -> icmp 0, x
1204 // fold: icmp (ptrtoint x), 0 -> icmp x, null
1205 // fold: icmp 0, (ptrtoint x) -> icmp null, x
1206 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
1207 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
1208 //
1209 // FIXME: The following comment is out of data and the DataLayout is here now.
1210 // ConstantExpr::getCompare cannot do this, because it doesn't have DL
1211 // around to know if bit truncation is happening.
1212 if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
1213 if (Ops1->isNullValue()) {
1214 if (CE0->getOpcode() == Instruction::IntToPtr) {
1215 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1216 // Convert the integer value to the right size to ensure we get the
1217 // proper extension or truncation.
1218 if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
1219 /*IsSigned*/ false, DL)) {
1220 Constant *Null = Constant::getNullValue(C->getType());
1221 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
1222 }
1223 }
1224
1225 // Only do this transformation if the int is intptrty in size, otherwise
1226 // there is a truncation or extension that we aren't modeling.
1227 if (CE0->getOpcode() == Instruction::PtrToInt) {
1228 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
1229 if (CE0->getType() == IntPtrTy) {
1230 Constant *C = CE0->getOperand(0);
1231 Constant *Null = Constant::getNullValue(C->getType());
1232 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
1233 }
1234 }
1235 }
1236
1237 if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
1238 if (CE0->getOpcode() == CE1->getOpcode()) {
1239 if (CE0->getOpcode() == Instruction::IntToPtr) {
1240 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1241
1242 // Convert the integer value to the right size to ensure we get the
1243 // proper extension or truncation.
1244 Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
1245 /*IsSigned*/ false, DL);
1246 Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy,
1247 /*IsSigned*/ false, DL);
1248 if (C0 && C1)
1249 return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
1250 }
1251
1252 // Only do this transformation if the int is intptrty in size, otherwise
1253 // there is a truncation or extension that we aren't modeling.
1254 if (CE0->getOpcode() == Instruction::PtrToInt) {
1255 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
1256 if (CE0->getType() == IntPtrTy &&
1257 CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
1259 Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
1260 }
1261 }
1262 }
1263 }
1264
1265 // Convert pointer comparison (base+offset1) pred (base+offset2) into
1266 // offset1 pred offset2, for the case where the offset is inbounds. This
1267 // only works for equality and unsigned comparison, as inbounds permits
1268 // crossing the sign boundary. However, the offset comparison itself is
1269 // signed.
1270 if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {
1271 unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());
1272 APInt Offset0(IndexWidth, 0);
1273 bool IsEqPred = ICmpInst::isEquality(Predicate);
1274 Value *Stripped0 = Ops0->stripAndAccumulateConstantOffsets(
1275 DL, Offset0, /*AllowNonInbounds=*/IsEqPred,
1276 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1277 /*LookThroughIntToPtr=*/IsEqPred);
1278 APInt Offset1(IndexWidth, 0);
1279 Value *Stripped1 = Ops1->stripAndAccumulateConstantOffsets(
1280 DL, Offset1, /*AllowNonInbounds=*/IsEqPred,
1281 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1282 /*LookThroughIntToPtr=*/IsEqPred);
1283 if (Stripped0 == Stripped1)
1284 return ConstantInt::getBool(
1285 Ops0->getContext(),
1286 ICmpInst::compare(Offset0, Offset1,
1287 ICmpInst::getSignedPredicate(Predicate)));
1288 }
1289 } else if (isa<ConstantExpr>(Ops1)) {
1290 // If RHS is a constant expression, but the left side isn't, swap the
1291 // operands and try again.
1292 Predicate = ICmpInst::getSwappedPredicate(Predicate);
1293 return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
1294 }
1295
1296 if (CmpInst::isFPPredicate(Predicate)) {
1297 // Flush any denormal constant float input according to denormal handling
1298 // mode.
1299 Ops0 = FlushFPConstant(Ops0, I, /*IsOutput=*/false);
1300 if (!Ops0)
1301 return nullptr;
1302 Ops1 = FlushFPConstant(Ops1, I, /*IsOutput=*/false);
1303 if (!Ops1)
1304 return nullptr;
1305 }
1306
1307 return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1);
1308}
1309
1311 const DataLayout &DL) {
1313
1314 return ConstantFoldUnaryInstruction(Opcode, Op);
1315}
1316
1318 Constant *RHS,
1319 const DataLayout &DL) {
1321 if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))
1322 if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
1323 return C;
1324
1326 return ConstantExpr::get(Opcode, LHS, RHS);
1327 return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
1328}
1329
1332 switch (Mode) {
1334 return nullptr;
1335 case DenormalMode::IEEE:
1336 return ConstantFP::get(Ty->getContext(), APF);
1338 return ConstantFP::get(
1339 Ty->getContext(),
1342 return ConstantFP::get(Ty->getContext(),
1343 APFloat::getZero(APF.getSemantics(), false));
1344 default:
1345 break;
1346 }
1347
1348 llvm_unreachable("unknown denormal mode");
1349}
1350
1351/// Return the denormal mode that can be assumed when executing a floating point
1352/// operation at \p CtxI.
1354 if (!CtxI || !CtxI->getParent() || !CtxI->getFunction())
1355 return DenormalMode::getDynamic();
1356 return CtxI->getFunction()->getDenormalMode(Ty->getFltSemantics());
1357}
1358
1360 const Instruction *Inst,
1361 bool IsOutput) {
1362 const APFloat &APF = CFP->getValueAPF();
1363 if (!APF.isDenormal())
1364 return CFP;
1365
1367 return flushDenormalConstant(CFP->getType(), APF,
1368 IsOutput ? Mode.Output : Mode.Input);
1369}
1370
1372 bool IsOutput) {
1373 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand))
1374 return flushDenormalConstantFP(CFP, Inst, IsOutput);
1375
1377 return Operand;
1378
1379 Type *Ty = Operand->getType();
1380 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1381 if (VecTy) {
1382 if (auto *Splat = dyn_cast_or_null<ConstantFP>(Operand->getSplatValue())) {
1383 ConstantFP *Folded = flushDenormalConstantFP(Splat, Inst, IsOutput);
1384 if (!Folded)
1385 return nullptr;
1386 return ConstantVector::getSplat(VecTy->getElementCount(), Folded);
1387 }
1388
1389 Ty = VecTy->getElementType();
1390 }
1391
1392 if (isa<ConstantExpr>(Operand))
1393 return Operand;
1394
1395 if (const auto *CV = dyn_cast<ConstantVector>(Operand)) {
1397 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1398 Constant *Element = CV->getAggregateElement(i);
1399 if (isa<UndefValue>(Element)) {
1400 NewElts.push_back(Element);
1401 continue;
1402 }
1403
1404 ConstantFP *CFP = dyn_cast<ConstantFP>(Element);
1405 if (!CFP)
1406 return nullptr;
1407
1408 ConstantFP *Folded = flushDenormalConstantFP(CFP, Inst, IsOutput);
1409 if (!Folded)
1410 return nullptr;
1411 NewElts.push_back(Folded);
1412 }
1413
1414 return ConstantVector::get(NewElts);
1415 }
1416
1417 if (const auto *CDV = dyn_cast<ConstantDataVector>(Operand)) {
1419 for (unsigned I = 0, E = CDV->getNumElements(); I < E; ++I) {
1420 const APFloat &Elt = CDV->getElementAsAPFloat(I);
1421 if (!Elt.isDenormal()) {
1422 NewElts.push_back(ConstantFP::get(Ty, Elt));
1423 } else {
1424 DenormalMode Mode = getInstrDenormalMode(Inst, Ty);
1425 ConstantFP *Folded =
1426 flushDenormalConstant(Ty, Elt, IsOutput ? Mode.Output : Mode.Input);
1427 if (!Folded)
1428 return nullptr;
1429 NewElts.push_back(Folded);
1430 }
1431 }
1432
1433 return ConstantVector::get(NewElts);
1434 }
1435
1436 return nullptr;
1437}
1438
1440 Constant *RHS, const DataLayout &DL,
1441 const Instruction *I,
1442 bool AllowNonDeterministic) {
1443 if (Instruction::isBinaryOp(Opcode)) {
1444 // Flush denormal inputs if needed.
1445 Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
1446 if (!Op0)
1447 return nullptr;
1448 Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
1449 if (!Op1)
1450 return nullptr;
1451
1452 // If nsz or an algebraic FMF flag is set, the result of the FP operation
1453 // may change due to future optimization. Don't constant fold them if
1454 // non-deterministic results are not allowed.
1455 if (!AllowNonDeterministic)
1457 if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||
1458 FP->hasAllowContract() || FP->hasAllowReciprocal())
1459 return nullptr;
1460
1461 // Calculate constant result.
1462 Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
1463 if (!C)
1464 return nullptr;
1465
1466 // Flush denormal output if needed.
1467 C = FlushFPConstant(C, I, /* IsOutput */ true);
1468 if (!C)
1469 return nullptr;
1470
1471 // The precise NaN value is non-deterministic.
1472 if (!AllowNonDeterministic && C->isNaN())
1473 return nullptr;
1474
1475 return C;
1476 }
1477 // If instruction lacks a parent/function and the denormal mode cannot be
1478 // determined, use the default (IEEE).
1479 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
1480}
1481
1483 Type *DestTy, const DataLayout &DL) {
1484 assert(Instruction::isCast(Opcode));
1485
1486 if (auto *CE = dyn_cast<ConstantExpr>(C))
1487 if (CE->isCast())
1488 if (unsigned NewOp = CastInst::isEliminableCastPair(
1489 Instruction::CastOps(CE->getOpcode()),
1490 Instruction::CastOps(Opcode), CE->getOperand(0)->getType(),
1491 C->getType(), DestTy, &DL))
1492 return ConstantFoldCastOperand(NewOp, CE->getOperand(0), DestTy, DL);
1493
1494 switch (Opcode) {
1495 default:
1496 llvm_unreachable("Missing case");
1497 case Instruction::PtrToAddr:
1498 // TODO: Add some of the ptrtoint folds here as well.
1499 break;
1500 case Instruction::PtrToInt:
1501 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1502 Constant *FoldedValue = nullptr;
1503 // If the input is a inttoptr, eliminate the pair. This requires knowing
1504 // the width of a pointer, so it can't be done in ConstantExpr::getCast.
1505 if (CE->getOpcode() == Instruction::IntToPtr) {
1506 // zext/trunc the inttoptr to pointer size.
1507 FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0),
1508 DL.getIntPtrType(CE->getType()),
1509 /*IsSigned=*/false, DL);
1510 } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
1511 // If we have GEP, we can perform the following folds:
1512 // (ptrtoint (gep null, x)) -> x
1513 // (ptrtoint (gep (gep null, x), y) -> x + y, etc.
1514 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
1515 APInt BaseOffset(BitWidth, 0);
1516 auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
1517 DL, BaseOffset, /*AllowNonInbounds=*/true));
1518 if (Base->isNullValue()) {
1519 FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
1520 } else {
1521 // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
1522 if (GEP->getNumIndices() == 1 &&
1523 GEP->getSourceElementType()->isIntegerTy(8)) {
1524 auto *Ptr = cast<Constant>(GEP->getPointerOperand());
1525 auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
1526 Type *IntIdxTy = DL.getIndexType(Ptr->getType());
1527 if (Sub && Sub->getType() == IntIdxTy &&
1528 Sub->getOpcode() == Instruction::Sub &&
1529 Sub->getOperand(0)->isNullValue())
1530 FoldedValue = ConstantExpr::getSub(
1531 ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
1532 }
1533 }
1534 }
1535 if (FoldedValue) {
1536 // Do a zext or trunc to get to the ptrtoint dest size.
1537 return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false,
1538 DL);
1539 }
1540 }
1541 break;
1542 case Instruction::IntToPtr:
1543 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
1544 // the int size is >= the ptr size and the address spaces are the same.
1545 // This requires knowing the width of a pointer, so it can't be done in
1546 // ConstantExpr::getCast.
1547 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1548 if (CE->getOpcode() == Instruction::PtrToInt) {
1549 Constant *SrcPtr = CE->getOperand(0);
1550 unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
1551 unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
1552
1553 if (MidIntSize >= SrcPtrSize) {
1554 unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
1555 if (SrcAS == DestTy->getPointerAddressSpace())
1556 return FoldBitCast(CE->getOperand(0), DestTy, DL);
1557 }
1558 }
1559 }
1560 break;
1561 case Instruction::Trunc:
1562 case Instruction::ZExt:
1563 case Instruction::SExt:
1564 case Instruction::FPTrunc:
1565 case Instruction::FPExt:
1566 case Instruction::UIToFP:
1567 case Instruction::SIToFP:
1568 case Instruction::FPToUI:
1569 case Instruction::FPToSI:
1570 case Instruction::AddrSpaceCast:
1571 break;
1572 case Instruction::BitCast:
1573 return FoldBitCast(C, DestTy, DL);
1574 }
1575
1577 return ConstantExpr::getCast(Opcode, C, DestTy);
1578 return ConstantFoldCastInstruction(Opcode, C, DestTy);
1579}
1580
1582 bool IsSigned, const DataLayout &DL) {
1583 Type *SrcTy = C->getType();
1584 if (SrcTy == DestTy)
1585 return C;
1586 if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
1587 return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL);
1588 if (IsSigned)
1589 return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL);
1590 return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL);
1591}
1592
1593//===----------------------------------------------------------------------===//
1594// Constant Folding for Calls
1595//
1596
1598 if (Call->isNoBuiltin())
1599 return false;
1600 if (Call->getFunctionType() != F->getFunctionType())
1601 return false;
1602
1603 // Allow FP calls (both libcalls and intrinsics) to avoid being folded.
1604 // This can be useful for GPU targets or in cross-compilation scenarios
1605 // when the exact target FP behaviour is required, and the host compiler's
1606 // behaviour may be slightly different from the device's run-time behaviour.
1607 if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
1608 any_of(F->args(), [](const Argument &Arg) {
1609 return Arg.getType()->isFloatingPointTy();
1610 })))
1611 return false;
1612
1613 switch (F->getIntrinsicID()) {
1614 // Operations that do not operate floating-point numbers and do not depend on
1615 // FP environment can be folded even in strictfp functions.
1616 case Intrinsic::bswap:
1617 case Intrinsic::ctpop:
1618 case Intrinsic::ctlz:
1619 case Intrinsic::cttz:
1620 case Intrinsic::fshl:
1621 case Intrinsic::fshr:
1622 case Intrinsic::launder_invariant_group:
1623 case Intrinsic::strip_invariant_group:
1624 case Intrinsic::masked_load:
1625 case Intrinsic::get_active_lane_mask:
1626 case Intrinsic::abs:
1627 case Intrinsic::smax:
1628 case Intrinsic::smin:
1629 case Intrinsic::umax:
1630 case Intrinsic::umin:
1631 case Intrinsic::scmp:
1632 case Intrinsic::ucmp:
1633 case Intrinsic::sadd_with_overflow:
1634 case Intrinsic::uadd_with_overflow:
1635 case Intrinsic::ssub_with_overflow:
1636 case Intrinsic::usub_with_overflow:
1637 case Intrinsic::smul_with_overflow:
1638 case Intrinsic::umul_with_overflow:
1639 case Intrinsic::sadd_sat:
1640 case Intrinsic::uadd_sat:
1641 case Intrinsic::ssub_sat:
1642 case Intrinsic::usub_sat:
1643 case Intrinsic::smul_fix:
1644 case Intrinsic::smul_fix_sat:
1645 case Intrinsic::bitreverse:
1646 case Intrinsic::is_constant:
1647 case Intrinsic::vector_reduce_add:
1648 case Intrinsic::vector_reduce_mul:
1649 case Intrinsic::vector_reduce_and:
1650 case Intrinsic::vector_reduce_or:
1651 case Intrinsic::vector_reduce_xor:
1652 case Intrinsic::vector_reduce_smin:
1653 case Intrinsic::vector_reduce_smax:
1654 case Intrinsic::vector_reduce_umin:
1655 case Intrinsic::vector_reduce_umax:
1656 case Intrinsic::vector_extract:
1657 case Intrinsic::vector_insert:
1658 case Intrinsic::vector_interleave2:
1659 case Intrinsic::vector_deinterleave2:
1660 // Target intrinsics
1661 case Intrinsic::amdgcn_perm:
1662 case Intrinsic::amdgcn_wave_reduce_umin:
1663 case Intrinsic::amdgcn_wave_reduce_umax:
1664 case Intrinsic::amdgcn_wave_reduce_max:
1665 case Intrinsic::amdgcn_wave_reduce_min:
1666 case Intrinsic::amdgcn_wave_reduce_add:
1667 case Intrinsic::amdgcn_wave_reduce_sub:
1668 case Intrinsic::amdgcn_wave_reduce_and:
1669 case Intrinsic::amdgcn_wave_reduce_or:
1670 case Intrinsic::amdgcn_wave_reduce_xor:
1671 case Intrinsic::amdgcn_s_wqm:
1672 case Intrinsic::amdgcn_s_quadmask:
1673 case Intrinsic::amdgcn_s_bitreplicate:
1674 case Intrinsic::arm_mve_vctp8:
1675 case Intrinsic::arm_mve_vctp16:
1676 case Intrinsic::arm_mve_vctp32:
1677 case Intrinsic::arm_mve_vctp64:
1678 case Intrinsic::aarch64_sve_convert_from_svbool:
1679 case Intrinsic::wasm_alltrue:
1680 case Intrinsic::wasm_anytrue:
1681 case Intrinsic::wasm_dot:
1682 // WebAssembly float semantics are always known
1683 case Intrinsic::wasm_trunc_signed:
1684 case Intrinsic::wasm_trunc_unsigned:
1685 return true;
1686
1687 // Floating point operations cannot be folded in strictfp functions in
1688 // general case. They can be folded if FP environment is known to compiler.
1689 case Intrinsic::minnum:
1690 case Intrinsic::maxnum:
1691 case Intrinsic::minimum:
1692 case Intrinsic::maximum:
1693 case Intrinsic::minimumnum:
1694 case Intrinsic::maximumnum:
1695 case Intrinsic::log:
1696 case Intrinsic::log2:
1697 case Intrinsic::log10:
1698 case Intrinsic::exp:
1699 case Intrinsic::exp2:
1700 case Intrinsic::exp10:
1701 case Intrinsic::sqrt:
1702 case Intrinsic::sin:
1703 case Intrinsic::cos:
1704 case Intrinsic::sincos:
1705 case Intrinsic::sinh:
1706 case Intrinsic::cosh:
1707 case Intrinsic::atan:
1708 case Intrinsic::pow:
1709 case Intrinsic::powi:
1710 case Intrinsic::ldexp:
1711 case Intrinsic::fma:
1712 case Intrinsic::fmuladd:
1713 case Intrinsic::frexp:
1714 case Intrinsic::fptoui_sat:
1715 case Intrinsic::fptosi_sat:
1716 case Intrinsic::convert_from_fp16:
1717 case Intrinsic::convert_to_fp16:
1718 case Intrinsic::amdgcn_cos:
1719 case Intrinsic::amdgcn_cubeid:
1720 case Intrinsic::amdgcn_cubema:
1721 case Intrinsic::amdgcn_cubesc:
1722 case Intrinsic::amdgcn_cubetc:
1723 case Intrinsic::amdgcn_fmul_legacy:
1724 case Intrinsic::amdgcn_fma_legacy:
1725 case Intrinsic::amdgcn_fract:
1726 case Intrinsic::amdgcn_sin:
1727 // The intrinsics below depend on rounding mode in MXCSR.
1728 case Intrinsic::x86_sse_cvtss2si:
1729 case Intrinsic::x86_sse_cvtss2si64:
1730 case Intrinsic::x86_sse_cvttss2si:
1731 case Intrinsic::x86_sse_cvttss2si64:
1732 case Intrinsic::x86_sse2_cvtsd2si:
1733 case Intrinsic::x86_sse2_cvtsd2si64:
1734 case Intrinsic::x86_sse2_cvttsd2si:
1735 case Intrinsic::x86_sse2_cvttsd2si64:
1736 case Intrinsic::x86_avx512_vcvtss2si32:
1737 case Intrinsic::x86_avx512_vcvtss2si64:
1738 case Intrinsic::x86_avx512_cvttss2si:
1739 case Intrinsic::x86_avx512_cvttss2si64:
1740 case Intrinsic::x86_avx512_vcvtsd2si32:
1741 case Intrinsic::x86_avx512_vcvtsd2si64:
1742 case Intrinsic::x86_avx512_cvttsd2si:
1743 case Intrinsic::x86_avx512_cvttsd2si64:
1744 case Intrinsic::x86_avx512_vcvtss2usi32:
1745 case Intrinsic::x86_avx512_vcvtss2usi64:
1746 case Intrinsic::x86_avx512_cvttss2usi:
1747 case Intrinsic::x86_avx512_cvttss2usi64:
1748 case Intrinsic::x86_avx512_vcvtsd2usi32:
1749 case Intrinsic::x86_avx512_vcvtsd2usi64:
1750 case Intrinsic::x86_avx512_cvttsd2usi:
1751 case Intrinsic::x86_avx512_cvttsd2usi64:
1752
1753 // NVVM FMax intrinsics
1754 case Intrinsic::nvvm_fmax_d:
1755 case Intrinsic::nvvm_fmax_f:
1756 case Intrinsic::nvvm_fmax_ftz_f:
1757 case Intrinsic::nvvm_fmax_ftz_nan_f:
1758 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
1759 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
1760 case Intrinsic::nvvm_fmax_nan_f:
1761 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
1762 case Intrinsic::nvvm_fmax_xorsign_abs_f:
1763
1764 // NVVM FMin intrinsics
1765 case Intrinsic::nvvm_fmin_d:
1766 case Intrinsic::nvvm_fmin_f:
1767 case Intrinsic::nvvm_fmin_ftz_f:
1768 case Intrinsic::nvvm_fmin_ftz_nan_f:
1769 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
1770 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
1771 case Intrinsic::nvvm_fmin_nan_f:
1772 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
1773 case Intrinsic::nvvm_fmin_xorsign_abs_f:
1774
1775 // NVVM float/double to int32/uint32 conversion intrinsics
1776 case Intrinsic::nvvm_f2i_rm:
1777 case Intrinsic::nvvm_f2i_rn:
1778 case Intrinsic::nvvm_f2i_rp:
1779 case Intrinsic::nvvm_f2i_rz:
1780 case Intrinsic::nvvm_f2i_rm_ftz:
1781 case Intrinsic::nvvm_f2i_rn_ftz:
1782 case Intrinsic::nvvm_f2i_rp_ftz:
1783 case Intrinsic::nvvm_f2i_rz_ftz:
1784 case Intrinsic::nvvm_f2ui_rm:
1785 case Intrinsic::nvvm_f2ui_rn:
1786 case Intrinsic::nvvm_f2ui_rp:
1787 case Intrinsic::nvvm_f2ui_rz:
1788 case Intrinsic::nvvm_f2ui_rm_ftz:
1789 case Intrinsic::nvvm_f2ui_rn_ftz:
1790 case Intrinsic::nvvm_f2ui_rp_ftz:
1791 case Intrinsic::nvvm_f2ui_rz_ftz:
1792 case Intrinsic::nvvm_d2i_rm:
1793 case Intrinsic::nvvm_d2i_rn:
1794 case Intrinsic::nvvm_d2i_rp:
1795 case Intrinsic::nvvm_d2i_rz:
1796 case Intrinsic::nvvm_d2ui_rm:
1797 case Intrinsic::nvvm_d2ui_rn:
1798 case Intrinsic::nvvm_d2ui_rp:
1799 case Intrinsic::nvvm_d2ui_rz:
1800
1801 // NVVM float/double to int64/uint64 conversion intrinsics
1802 case Intrinsic::nvvm_f2ll_rm:
1803 case Intrinsic::nvvm_f2ll_rn:
1804 case Intrinsic::nvvm_f2ll_rp:
1805 case Intrinsic::nvvm_f2ll_rz:
1806 case Intrinsic::nvvm_f2ll_rm_ftz:
1807 case Intrinsic::nvvm_f2ll_rn_ftz:
1808 case Intrinsic::nvvm_f2ll_rp_ftz:
1809 case Intrinsic::nvvm_f2ll_rz_ftz:
1810 case Intrinsic::nvvm_f2ull_rm:
1811 case Intrinsic::nvvm_f2ull_rn:
1812 case Intrinsic::nvvm_f2ull_rp:
1813 case Intrinsic::nvvm_f2ull_rz:
1814 case Intrinsic::nvvm_f2ull_rm_ftz:
1815 case Intrinsic::nvvm_f2ull_rn_ftz:
1816 case Intrinsic::nvvm_f2ull_rp_ftz:
1817 case Intrinsic::nvvm_f2ull_rz_ftz:
1818 case Intrinsic::nvvm_d2ll_rm:
1819 case Intrinsic::nvvm_d2ll_rn:
1820 case Intrinsic::nvvm_d2ll_rp:
1821 case Intrinsic::nvvm_d2ll_rz:
1822 case Intrinsic::nvvm_d2ull_rm:
1823 case Intrinsic::nvvm_d2ull_rn:
1824 case Intrinsic::nvvm_d2ull_rp:
1825 case Intrinsic::nvvm_d2ull_rz:
1826
1827 // NVVM math intrinsics:
1828 case Intrinsic::nvvm_ceil_d:
1829 case Intrinsic::nvvm_ceil_f:
1830 case Intrinsic::nvvm_ceil_ftz_f:
1831
1832 case Intrinsic::nvvm_fabs:
1833 case Intrinsic::nvvm_fabs_ftz:
1834
1835 case Intrinsic::nvvm_floor_d:
1836 case Intrinsic::nvvm_floor_f:
1837 case Intrinsic::nvvm_floor_ftz_f:
1838
1839 case Intrinsic::nvvm_rcp_rm_d:
1840 case Intrinsic::nvvm_rcp_rm_f:
1841 case Intrinsic::nvvm_rcp_rm_ftz_f:
1842 case Intrinsic::nvvm_rcp_rn_d:
1843 case Intrinsic::nvvm_rcp_rn_f:
1844 case Intrinsic::nvvm_rcp_rn_ftz_f:
1845 case Intrinsic::nvvm_rcp_rp_d:
1846 case Intrinsic::nvvm_rcp_rp_f:
1847 case Intrinsic::nvvm_rcp_rp_ftz_f:
1848 case Intrinsic::nvvm_rcp_rz_d:
1849 case Intrinsic::nvvm_rcp_rz_f:
1850 case Intrinsic::nvvm_rcp_rz_ftz_f:
1851
1852 case Intrinsic::nvvm_round_d:
1853 case Intrinsic::nvvm_round_f:
1854 case Intrinsic::nvvm_round_ftz_f:
1855
1856 case Intrinsic::nvvm_saturate_d:
1857 case Intrinsic::nvvm_saturate_f:
1858 case Intrinsic::nvvm_saturate_ftz_f:
1859
1860 case Intrinsic::nvvm_sqrt_f:
1861 case Intrinsic::nvvm_sqrt_rn_d:
1862 case Intrinsic::nvvm_sqrt_rn_f:
1863 case Intrinsic::nvvm_sqrt_rn_ftz_f:
1864 return !Call->isStrictFP();
1865
1866 // NVVM add intrinsics with explicit rounding modes
1867 case Intrinsic::nvvm_add_rm_d:
1868 case Intrinsic::nvvm_add_rn_d:
1869 case Intrinsic::nvvm_add_rp_d:
1870 case Intrinsic::nvvm_add_rz_d:
1871 case Intrinsic::nvvm_add_rm_f:
1872 case Intrinsic::nvvm_add_rn_f:
1873 case Intrinsic::nvvm_add_rp_f:
1874 case Intrinsic::nvvm_add_rz_f:
1875 case Intrinsic::nvvm_add_rm_ftz_f:
1876 case Intrinsic::nvvm_add_rn_ftz_f:
1877 case Intrinsic::nvvm_add_rp_ftz_f:
1878 case Intrinsic::nvvm_add_rz_ftz_f:
1879
1880 // NVVM div intrinsics with explicit rounding modes
1881 case Intrinsic::nvvm_div_rm_d:
1882 case Intrinsic::nvvm_div_rn_d:
1883 case Intrinsic::nvvm_div_rp_d:
1884 case Intrinsic::nvvm_div_rz_d:
1885 case Intrinsic::nvvm_div_rm_f:
1886 case Intrinsic::nvvm_div_rn_f:
1887 case Intrinsic::nvvm_div_rp_f:
1888 case Intrinsic::nvvm_div_rz_f:
1889 case Intrinsic::nvvm_div_rm_ftz_f:
1890 case Intrinsic::nvvm_div_rn_ftz_f:
1891 case Intrinsic::nvvm_div_rp_ftz_f:
1892 case Intrinsic::nvvm_div_rz_ftz_f:
1893
1894 // NVVM mul intrinsics with explicit rounding modes
1895 case Intrinsic::nvvm_mul_rm_d:
1896 case Intrinsic::nvvm_mul_rn_d:
1897 case Intrinsic::nvvm_mul_rp_d:
1898 case Intrinsic::nvvm_mul_rz_d:
1899 case Intrinsic::nvvm_mul_rm_f:
1900 case Intrinsic::nvvm_mul_rn_f:
1901 case Intrinsic::nvvm_mul_rp_f:
1902 case Intrinsic::nvvm_mul_rz_f:
1903 case Intrinsic::nvvm_mul_rm_ftz_f:
1904 case Intrinsic::nvvm_mul_rn_ftz_f:
1905 case Intrinsic::nvvm_mul_rp_ftz_f:
1906 case Intrinsic::nvvm_mul_rz_ftz_f:
1907
1908 // NVVM fma intrinsics with explicit rounding modes
1909 case Intrinsic::nvvm_fma_rm_d:
1910 case Intrinsic::nvvm_fma_rn_d:
1911 case Intrinsic::nvvm_fma_rp_d:
1912 case Intrinsic::nvvm_fma_rz_d:
1913 case Intrinsic::nvvm_fma_rm_f:
1914 case Intrinsic::nvvm_fma_rn_f:
1915 case Intrinsic::nvvm_fma_rp_f:
1916 case Intrinsic::nvvm_fma_rz_f:
1917 case Intrinsic::nvvm_fma_rm_ftz_f:
1918 case Intrinsic::nvvm_fma_rn_ftz_f:
1919 case Intrinsic::nvvm_fma_rp_ftz_f:
1920 case Intrinsic::nvvm_fma_rz_ftz_f:
1921
1922 // Sign operations are actually bitwise operations, they do not raise
1923 // exceptions even for SNANs.
1924 case Intrinsic::fabs:
1925 case Intrinsic::copysign:
1926 case Intrinsic::is_fpclass:
1927 // Non-constrained variants of rounding operations means default FP
1928 // environment, they can be folded in any case.
1929 case Intrinsic::ceil:
1930 case Intrinsic::floor:
1931 case Intrinsic::round:
1932 case Intrinsic::roundeven:
1933 case Intrinsic::trunc:
1934 case Intrinsic::nearbyint:
1935 case Intrinsic::rint:
1936 case Intrinsic::canonicalize:
1937
1938 // Constrained intrinsics can be folded if FP environment is known
1939 // to compiler.
1940 case Intrinsic::experimental_constrained_fma:
1941 case Intrinsic::experimental_constrained_fmuladd:
1942 case Intrinsic::experimental_constrained_fadd:
1943 case Intrinsic::experimental_constrained_fsub:
1944 case Intrinsic::experimental_constrained_fmul:
1945 case Intrinsic::experimental_constrained_fdiv:
1946 case Intrinsic::experimental_constrained_frem:
1947 case Intrinsic::experimental_constrained_ceil:
1948 case Intrinsic::experimental_constrained_floor:
1949 case Intrinsic::experimental_constrained_round:
1950 case Intrinsic::experimental_constrained_roundeven:
1951 case Intrinsic::experimental_constrained_trunc:
1952 case Intrinsic::experimental_constrained_nearbyint:
1953 case Intrinsic::experimental_constrained_rint:
1954 case Intrinsic::experimental_constrained_fcmp:
1955 case Intrinsic::experimental_constrained_fcmps:
1956 return true;
1957 default:
1958 return false;
1959 case Intrinsic::not_intrinsic: break;
1960 }
1961
1962 if (!F->hasName() || Call->isStrictFP())
1963 return false;
1964
1965 // In these cases, the check of the length is required. We don't want to
1966 // return true for a name like "cos\0blah" which strcmp would return equal to
1967 // "cos", but has length 8.
1968 StringRef Name = F->getName();
1969 switch (Name[0]) {
1970 default:
1971 return false;
1972 case 'a':
1973 return Name == "acos" || Name == "acosf" ||
1974 Name == "asin" || Name == "asinf" ||
1975 Name == "atan" || Name == "atanf" ||
1976 Name == "atan2" || Name == "atan2f";
1977 case 'c':
1978 return Name == "ceil" || Name == "ceilf" ||
1979 Name == "cos" || Name == "cosf" ||
1980 Name == "cosh" || Name == "coshf";
1981 case 'e':
1982 return Name == "exp" || Name == "expf" || Name == "exp2" ||
1983 Name == "exp2f" || Name == "erf" || Name == "erff";
1984 case 'f':
1985 return Name == "fabs" || Name == "fabsf" ||
1986 Name == "floor" || Name == "floorf" ||
1987 Name == "fmod" || Name == "fmodf";
1988 case 'i':
1989 return Name == "ilogb" || Name == "ilogbf";
1990 case 'l':
1991 return Name == "log" || Name == "logf" || Name == "logl" ||
1992 Name == "log2" || Name == "log2f" || Name == "log10" ||
1993 Name == "log10f" || Name == "logb" || Name == "logbf" ||
1994 Name == "log1p" || Name == "log1pf";
1995 case 'n':
1996 return Name == "nearbyint" || Name == "nearbyintf";
1997 case 'p':
1998 return Name == "pow" || Name == "powf";
1999 case 'r':
2000 return Name == "remainder" || Name == "remainderf" ||
2001 Name == "rint" || Name == "rintf" ||
2002 Name == "round" || Name == "roundf";
2003 case 's':
2004 return Name == "sin" || Name == "sinf" ||
2005 Name == "sinh" || Name == "sinhf" ||
2006 Name == "sqrt" || Name == "sqrtf";
2007 case 't':
2008 return Name == "tan" || Name == "tanf" ||
2009 Name == "tanh" || Name == "tanhf" ||
2010 Name == "trunc" || Name == "truncf";
2011 case '_':
2012 // Check for various function names that get used for the math functions
2013 // when the header files are preprocessed with the macro
2014 // __FINITE_MATH_ONLY__ enabled.
2015 // The '12' here is the length of the shortest name that can match.
2016 // We need to check the size before looking at Name[1] and Name[2]
2017 // so we may as well check a limit that will eliminate mismatches.
2018 if (Name.size() < 12 || Name[1] != '_')
2019 return false;
2020 switch (Name[2]) {
2021 default:
2022 return false;
2023 case 'a':
2024 return Name == "__acos_finite" || Name == "__acosf_finite" ||
2025 Name == "__asin_finite" || Name == "__asinf_finite" ||
2026 Name == "__atan2_finite" || Name == "__atan2f_finite";
2027 case 'c':
2028 return Name == "__cosh_finite" || Name == "__coshf_finite";
2029 case 'e':
2030 return Name == "__exp_finite" || Name == "__expf_finite" ||
2031 Name == "__exp2_finite" || Name == "__exp2f_finite";
2032 case 'l':
2033 return Name == "__log_finite" || Name == "__logf_finite" ||
2034 Name == "__log10_finite" || Name == "__log10f_finite";
2035 case 'p':
2036 return Name == "__pow_finite" || Name == "__powf_finite";
2037 case 's':
2038 return Name == "__sinh_finite" || Name == "__sinhf_finite";
2039 }
2040 }
2041}
2042
2043namespace {
2044
2045Constant *GetConstantFoldFPValue(double V, Type *Ty) {
2046 if (Ty->isHalfTy() || Ty->isFloatTy()) {
2047 APFloat APF(V);
2048 bool unused;
2049 APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
2050 return ConstantFP::get(Ty->getContext(), APF);
2051 }
2052 if (Ty->isDoubleTy())
2053 return ConstantFP::get(Ty->getContext(), APFloat(V));
2054 llvm_unreachable("Can only constant fold half/float/double");
2055}
2056
2057#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2058Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {
2059 if (Ty->isFP128Ty())
2060 return ConstantFP::get(Ty, V);
2061 llvm_unreachable("Can only constant fold fp128");
2062}
2063#endif
2064
2065/// Clear the floating-point exception state.
2066inline void llvm_fenv_clearexcept() {
2067#if HAVE_DECL_FE_ALL_EXCEPT
2068 feclearexcept(FE_ALL_EXCEPT);
2069#endif
2070 errno = 0;
2071}
2072
2073/// Test if a floating-point exception was raised.
2074inline bool llvm_fenv_testexcept() {
2075 int errno_val = errno;
2076 if (errno_val == ERANGE || errno_val == EDOM)
2077 return true;
2078#if HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
2079 if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
2080 return true;
2081#endif
2082 return false;
2083}
2084
2085static APFloat FTZPreserveSign(const APFloat &V) {
2086 if (V.isDenormal())
2087 return APFloat::getZero(V.getSemantics(), V.isNegative());
2088 return V;
2089}
2090
2091static APFloat FlushToPositiveZero(const APFloat &V) {
2092 if (V.isDenormal())
2093 return APFloat::getZero(V.getSemantics(), false);
2094 return V;
2095}
2096
2097static APFloat FlushWithDenormKind(const APFloat &V,
2098 DenormalMode::DenormalModeKind DenormKind) {
2101 switch (DenormKind) {
2103 return V;
2105 return FTZPreserveSign(V);
2107 return FlushToPositiveZero(V);
2108 default:
2109 llvm_unreachable("Invalid denormal mode!");
2110 }
2111}
2112
2113Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
2114 DenormalMode DenormMode = DenormalMode::getIEEE()) {
2115 if (!DenormMode.isValid() ||
2116 DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
2117 DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
2118 return nullptr;
2119
2120 llvm_fenv_clearexcept();
2121 auto Input = FlushWithDenormKind(V, DenormMode.Input);
2122 double Result = NativeFP(Input.convertToDouble());
2123 if (llvm_fenv_testexcept()) {
2124 llvm_fenv_clearexcept();
2125 return nullptr;
2126 }
2127
2128 Constant *Output = GetConstantFoldFPValue(Result, Ty);
2129 if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
2130 return Output;
2131 const auto *CFP = static_cast<ConstantFP *>(Output);
2132 const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output);
2133 return ConstantFP::get(Ty->getContext(), Res);
2134}
2135
2136#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2137Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
2138 Type *Ty) {
2139 llvm_fenv_clearexcept();
2140 float128 Result = NativeFP(V.convertToQuad());
2141 if (llvm_fenv_testexcept()) {
2142 llvm_fenv_clearexcept();
2143 return nullptr;
2144 }
2145
2146 return GetConstantFoldFPValue128(Result, Ty);
2147}
2148#endif
2149
2150Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
2151 const APFloat &V, const APFloat &W, Type *Ty) {
2152 llvm_fenv_clearexcept();
2153 double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
2154 if (llvm_fenv_testexcept()) {
2155 llvm_fenv_clearexcept();
2156 return nullptr;
2157 }
2158
2159 return GetConstantFoldFPValue(Result, Ty);
2160}
2161
2162Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
2164 if (!VT)
2165 return nullptr;
2166
2167 // This isn't strictly necessary, but handle the special/common case of zero:
2168 // all integer reductions of a zero input produce zero.
2170 return ConstantInt::get(VT->getElementType(), 0);
2171
2172 // This is the same as the underlying binops - poison propagates.
2173 if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
2174 return PoisonValue::get(VT->getElementType());
2175
2176 // TODO: Handle undef.
2178 return nullptr;
2179
2180 auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
2181 if (!EltC)
2182 return nullptr;
2183
2184 APInt Acc = EltC->getValue();
2185 for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
2186 if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
2187 return nullptr;
2188 const APInt &X = EltC->getValue();
2189 switch (IID) {
2190 case Intrinsic::vector_reduce_add:
2191 Acc = Acc + X;
2192 break;
2193 case Intrinsic::vector_reduce_mul:
2194 Acc = Acc * X;
2195 break;
2196 case Intrinsic::vector_reduce_and:
2197 Acc = Acc & X;
2198 break;
2199 case Intrinsic::vector_reduce_or:
2200 Acc = Acc | X;
2201 break;
2202 case Intrinsic::vector_reduce_xor:
2203 Acc = Acc ^ X;
2204 break;
2205 case Intrinsic::vector_reduce_smin:
2206 Acc = APIntOps::smin(Acc, X);
2207 break;
2208 case Intrinsic::vector_reduce_smax:
2209 Acc = APIntOps::smax(Acc, X);
2210 break;
2211 case Intrinsic::vector_reduce_umin:
2212 Acc = APIntOps::umin(Acc, X);
2213 break;
2214 case Intrinsic::vector_reduce_umax:
2215 Acc = APIntOps::umax(Acc, X);
2216 break;
2217 }
2218 }
2219
2220 return ConstantInt::get(Op->getContext(), Acc);
2221}
2222
2223/// Attempt to fold an SSE floating point to integer conversion of a constant
2224/// floating point. If roundTowardZero is false, the default IEEE rounding is
2225/// used (toward nearest, ties to even). This matches the behavior of the
2226/// non-truncating SSE instructions in the default rounding mode. The desired
2227/// integer type Ty is used to select how many bits are available for the
2228/// result. Returns null if the conversion cannot be performed, otherwise
2229/// returns the Constant value resulting from the conversion.
2230Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
2231 Type *Ty, bool IsSigned) {
2232 // All of these conversion intrinsics form an integer of at most 64bits.
2233 unsigned ResultWidth = Ty->getIntegerBitWidth();
2234 assert(ResultWidth <= 64 &&
2235 "Can only constant fold conversions to 64 and 32 bit ints");
2236
2237 uint64_t UIntVal;
2238 bool isExact = false;
2242 Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth,
2243 IsSigned, mode, &isExact);
2244 if (status != APFloat::opOK &&
2245 (!roundTowardZero || status != APFloat::opInexact))
2246 return nullptr;
2247 return ConstantInt::get(Ty, UIntVal, IsSigned);
2248}
2249
2250double getValueAsDouble(ConstantFP *Op) {
2251 Type *Ty = Op->getType();
2252
2253 if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
2254 return Op->getValueAPF().convertToDouble();
2255
2256 bool unused;
2257 APFloat APF = Op->getValueAPF();
2259 return APF.convertToDouble();
2260}
2261
2262static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
2263 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
2264 C = &CI->getValue();
2265 return true;
2266 }
2267 if (isa<UndefValue>(Op)) {
2268 C = nullptr;
2269 return true;
2270 }
2271 return false;
2272}
2273
2274/// Checks if the given intrinsic call, which evaluates to constant, is allowed
2275/// to be folded.
2276///
2277/// \param CI Constrained intrinsic call.
2278/// \param St Exception flags raised during constant evaluation.
2279static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
2280 APFloat::opStatus St) {
2281 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2282 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2283
2284 // If the operation does not change exception status flags, it is safe
2285 // to fold.
2286 if (St == APFloat::opStatus::opOK)
2287 return true;
2288
2289 // If evaluation raised FP exception, the result can depend on rounding
2290 // mode. If the latter is unknown, folding is not possible.
2291 if (ORM == RoundingMode::Dynamic)
2292 return false;
2293
2294 // If FP exceptions are ignored, fold the call, even if such exception is
2295 // raised.
2296 if (EB && *EB != fp::ExceptionBehavior::ebStrict)
2297 return true;
2298
2299 // Leave the calculation for runtime so that exception flags be correctly set
2300 // in hardware.
2301 return false;
2302}
2303
2304/// Returns the rounding mode that should be used for constant evaluation.
2305static RoundingMode
2306getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
2307 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2308 if (!ORM || *ORM == RoundingMode::Dynamic)
2309 // Even if the rounding mode is unknown, try evaluating the operation.
2310 // If it does not raise inexact exception, rounding was not applied,
2311 // so the result is exact and does not depend on rounding mode. Whether
2312 // other FP exceptions are raised, it does not depend on rounding mode.
2314 return *ORM;
2315}
2316
2317/// Try to constant fold llvm.canonicalize for the given caller and value.
2318static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,
2319 const APFloat &Src) {
2320 // Zero, positive and negative, is always OK to fold.
2321 if (Src.isZero()) {
2322 // Get a fresh 0, since ppc_fp128 does have non-canonical zeros.
2323 return ConstantFP::get(
2324 CI->getContext(),
2325 APFloat::getZero(Src.getSemantics(), Src.isNegative()));
2326 }
2327
2328 if (!Ty->isIEEELikeFPTy())
2329 return nullptr;
2330
2331 // Zero is always canonical and the sign must be preserved.
2332 //
2333 // Denorms and nans may have special encodings, but it should be OK to fold a
2334 // totally average number.
2335 if (Src.isNormal() || Src.isInfinity())
2336 return ConstantFP::get(CI->getContext(), Src);
2337
2338 if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {
2339 DenormalMode DenormMode =
2340 CI->getFunction()->getDenormalMode(Src.getSemantics());
2341
2342 if (DenormMode == DenormalMode::getIEEE())
2343 return ConstantFP::get(CI->getContext(), Src);
2344
2345 if (DenormMode.Input == DenormalMode::Dynamic)
2346 return nullptr;
2347
2348 // If we know if either input or output is flushed, we can fold.
2349 if ((DenormMode.Input == DenormalMode::Dynamic &&
2350 DenormMode.Output == DenormalMode::IEEE) ||
2351 (DenormMode.Input == DenormalMode::IEEE &&
2352 DenormMode.Output == DenormalMode::Dynamic))
2353 return nullptr;
2354
2355 bool IsPositive =
2356 (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||
2357 (DenormMode.Output == DenormalMode::PositiveZero &&
2358 DenormMode.Input == DenormalMode::IEEE));
2359
2360 return ConstantFP::get(CI->getContext(),
2361 APFloat::getZero(Src.getSemantics(), !IsPositive));
2362 }
2363
2364 return nullptr;
2365}
2366
2367static Constant *ConstantFoldScalarCall1(StringRef Name,
2368 Intrinsic::ID IntrinsicID,
2369 Type *Ty,
2371 const TargetLibraryInfo *TLI,
2372 const CallBase *Call) {
2373 assert(Operands.size() == 1 && "Wrong number of operands.");
2374
2375 if (IntrinsicID == Intrinsic::is_constant) {
2376 // We know we have a "Constant" argument. But we want to only
2377 // return true for manifest constants, not those that depend on
2378 // constants with unknowable values, e.g. GlobalValue or BlockAddress.
2379 if (Operands[0]->isManifestConstant())
2380 return ConstantInt::getTrue(Ty->getContext());
2381 return nullptr;
2382 }
2383
2384 if (isa<UndefValue>(Operands[0])) {
2385 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
2386 // ctpop() is between 0 and bitwidth, pick 0 for undef.
2387 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
2388 if (IntrinsicID == Intrinsic::cos ||
2389 IntrinsicID == Intrinsic::ctpop ||
2390 IntrinsicID == Intrinsic::fptoui_sat ||
2391 IntrinsicID == Intrinsic::fptosi_sat ||
2392 IntrinsicID == Intrinsic::canonicalize)
2393 return Constant::getNullValue(Ty);
2394 if (IntrinsicID == Intrinsic::bswap ||
2395 IntrinsicID == Intrinsic::bitreverse ||
2396 IntrinsicID == Intrinsic::launder_invariant_group ||
2397 IntrinsicID == Intrinsic::strip_invariant_group)
2398 return Operands[0];
2399 }
2400
2402 // launder(null) == null == strip(null) iff in addrspace 0
2403 if (IntrinsicID == Intrinsic::launder_invariant_group ||
2404 IntrinsicID == Intrinsic::strip_invariant_group) {
2405 // If instruction is not yet put in a basic block (e.g. when cloning
2406 // a function during inlining), Call's caller may not be available.
2407 // So check Call's BB first before querying Call->getCaller.
2408 const Function *Caller =
2409 Call->getParent() ? Call->getCaller() : nullptr;
2410 if (Caller &&
2412 Caller, Operands[0]->getType()->getPointerAddressSpace())) {
2413 return Operands[0];
2414 }
2415 return nullptr;
2416 }
2417 }
2418
2419 if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
2420 if (IntrinsicID == Intrinsic::convert_to_fp16) {
2421 APFloat Val(Op->getValueAPF());
2422
2423 bool lost = false;
2425
2426 return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
2427 }
2428
2429 APFloat U = Op->getValueAPF();
2430
2431 if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
2432 IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
2433 bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
2434
2435 if (U.isNaN())
2436 return nullptr;
2437
2438 unsigned Width = Ty->getIntegerBitWidth();
2439 APSInt Int(Width, !Signed);
2440 bool IsExact = false;
2442 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
2443
2445 return ConstantInt::get(Ty, Int);
2446
2447 return nullptr;
2448 }
2449
2450 if (IntrinsicID == Intrinsic::fptoui_sat ||
2451 IntrinsicID == Intrinsic::fptosi_sat) {
2452 // convertToInteger() already has the desired saturation semantics.
2453 APSInt Int(Ty->getIntegerBitWidth(),
2454 IntrinsicID == Intrinsic::fptoui_sat);
2455 bool IsExact;
2456 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
2457 return ConstantInt::get(Ty, Int);
2458 }
2459
2460 if (IntrinsicID == Intrinsic::canonicalize)
2461 return constantFoldCanonicalize(Ty, Call, U);
2462
2463#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2464 if (Ty->isFP128Ty()) {
2465 if (IntrinsicID == Intrinsic::log) {
2466 float128 Result = logf128(Op->getValueAPF().convertToQuad());
2467 return GetConstantFoldFPValue128(Result, Ty);
2468 }
2469
2470 LibFunc Fp128Func = NotLibFunc;
2471 if (TLI && TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) &&
2472 Fp128Func == LibFunc_logl)
2473 return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty);
2474 }
2475#endif
2476
2477 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() &&
2478 !Ty->isIntegerTy())
2479 return nullptr;
2480
2481 // Use internal versions of these intrinsics.
2482
2483 if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
2484 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2485 return ConstantFP::get(Ty->getContext(), U);
2486 }
2487
2488 if (IntrinsicID == Intrinsic::round) {
2489 U.roundToIntegral(APFloat::rmNearestTiesToAway);
2490 return ConstantFP::get(Ty->getContext(), U);
2491 }
2492
2493 if (IntrinsicID == Intrinsic::roundeven) {
2494 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2495 return ConstantFP::get(Ty->getContext(), U);
2496 }
2497
2498 if (IntrinsicID == Intrinsic::ceil) {
2499 U.roundToIntegral(APFloat::rmTowardPositive);
2500 return ConstantFP::get(Ty->getContext(), U);
2501 }
2502
2503 if (IntrinsicID == Intrinsic::floor) {
2504 U.roundToIntegral(APFloat::rmTowardNegative);
2505 return ConstantFP::get(Ty->getContext(), U);
2506 }
2507
2508 if (IntrinsicID == Intrinsic::trunc) {
2509 U.roundToIntegral(APFloat::rmTowardZero);
2510 return ConstantFP::get(Ty->getContext(), U);
2511 }
2512
2513 if (IntrinsicID == Intrinsic::fabs) {
2514 U.clearSign();
2515 return ConstantFP::get(Ty->getContext(), U);
2516 }
2517
2518 if (IntrinsicID == Intrinsic::amdgcn_fract) {
2519 // The v_fract instruction behaves like the OpenCL spec, which defines
2520 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
2521 // there to prevent fract(-small) from returning 1.0. It returns the
2522 // largest positive floating-point number less than 1.0."
2523 APFloat FloorU(U);
2524 FloorU.roundToIntegral(APFloat::rmTowardNegative);
2525 APFloat FractU(U - FloorU);
2526 APFloat AlmostOne(U.getSemantics(), 1);
2527 AlmostOne.next(/*nextDown*/ true);
2528 return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
2529 }
2530
2531 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
2532 // raise FP exceptions, unless the argument is signaling NaN.
2533
2534 std::optional<APFloat::roundingMode> RM;
2535 switch (IntrinsicID) {
2536 default:
2537 break;
2538 case Intrinsic::experimental_constrained_nearbyint:
2539 case Intrinsic::experimental_constrained_rint: {
2541 RM = CI->getRoundingMode();
2542 if (!RM || *RM == RoundingMode::Dynamic)
2543 return nullptr;
2544 break;
2545 }
2546 case Intrinsic::experimental_constrained_round:
2548 break;
2549 case Intrinsic::experimental_constrained_ceil:
2551 break;
2552 case Intrinsic::experimental_constrained_floor:
2554 break;
2555 case Intrinsic::experimental_constrained_trunc:
2557 break;
2558 }
2559 if (RM) {
2561 if (U.isFinite()) {
2562 APFloat::opStatus St = U.roundToIntegral(*RM);
2563 if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
2564 St == APFloat::opInexact) {
2565 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2566 if (EB == fp::ebStrict)
2567 return nullptr;
2568 }
2569 } else if (U.isSignaling()) {
2570 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2571 if (EB && *EB != fp::ebIgnore)
2572 return nullptr;
2573 U = APFloat::getQNaN(U.getSemantics());
2574 }
2575 return ConstantFP::get(Ty->getContext(), U);
2576 }
2577
2578 // NVVM float/double to signed/unsigned int32/int64 conversions:
2579 switch (IntrinsicID) {
2580 // f2i
2581 case Intrinsic::nvvm_f2i_rm:
2582 case Intrinsic::nvvm_f2i_rn:
2583 case Intrinsic::nvvm_f2i_rp:
2584 case Intrinsic::nvvm_f2i_rz:
2585 case Intrinsic::nvvm_f2i_rm_ftz:
2586 case Intrinsic::nvvm_f2i_rn_ftz:
2587 case Intrinsic::nvvm_f2i_rp_ftz:
2588 case Intrinsic::nvvm_f2i_rz_ftz:
2589 // f2ui
2590 case Intrinsic::nvvm_f2ui_rm:
2591 case Intrinsic::nvvm_f2ui_rn:
2592 case Intrinsic::nvvm_f2ui_rp:
2593 case Intrinsic::nvvm_f2ui_rz:
2594 case Intrinsic::nvvm_f2ui_rm_ftz:
2595 case Intrinsic::nvvm_f2ui_rn_ftz:
2596 case Intrinsic::nvvm_f2ui_rp_ftz:
2597 case Intrinsic::nvvm_f2ui_rz_ftz:
2598 // d2i
2599 case Intrinsic::nvvm_d2i_rm:
2600 case Intrinsic::nvvm_d2i_rn:
2601 case Intrinsic::nvvm_d2i_rp:
2602 case Intrinsic::nvvm_d2i_rz:
2603 // d2ui
2604 case Intrinsic::nvvm_d2ui_rm:
2605 case Intrinsic::nvvm_d2ui_rn:
2606 case Intrinsic::nvvm_d2ui_rp:
2607 case Intrinsic::nvvm_d2ui_rz:
2608 // f2ll
2609 case Intrinsic::nvvm_f2ll_rm:
2610 case Intrinsic::nvvm_f2ll_rn:
2611 case Intrinsic::nvvm_f2ll_rp:
2612 case Intrinsic::nvvm_f2ll_rz:
2613 case Intrinsic::nvvm_f2ll_rm_ftz:
2614 case Intrinsic::nvvm_f2ll_rn_ftz:
2615 case Intrinsic::nvvm_f2ll_rp_ftz:
2616 case Intrinsic::nvvm_f2ll_rz_ftz:
2617 // f2ull
2618 case Intrinsic::nvvm_f2ull_rm:
2619 case Intrinsic::nvvm_f2ull_rn:
2620 case Intrinsic::nvvm_f2ull_rp:
2621 case Intrinsic::nvvm_f2ull_rz:
2622 case Intrinsic::nvvm_f2ull_rm_ftz:
2623 case Intrinsic::nvvm_f2ull_rn_ftz:
2624 case Intrinsic::nvvm_f2ull_rp_ftz:
2625 case Intrinsic::nvvm_f2ull_rz_ftz:
2626 // d2ll
2627 case Intrinsic::nvvm_d2ll_rm:
2628 case Intrinsic::nvvm_d2ll_rn:
2629 case Intrinsic::nvvm_d2ll_rp:
2630 case Intrinsic::nvvm_d2ll_rz:
2631 // d2ull
2632 case Intrinsic::nvvm_d2ull_rm:
2633 case Intrinsic::nvvm_d2ull_rn:
2634 case Intrinsic::nvvm_d2ull_rp:
2635 case Intrinsic::nvvm_d2ull_rz: {
2636 // In float-to-integer conversion, NaN inputs are converted to 0.
2637 if (U.isNaN()) {
2638 // In float-to-integer conversion, NaN inputs are converted to 0
2639 // when the source and destination bitwidths are both less than 64.
2640 if (nvvm::FPToIntegerIntrinsicNaNZero(IntrinsicID))
2641 return ConstantInt::get(Ty, 0);
2642
2643 // Otherwise, the most significant bit is set.
2644 unsigned BitWidth = Ty->getIntegerBitWidth();
2645 uint64_t Val = 1ULL << (BitWidth - 1);
2646 return ConstantInt::get(Ty, APInt(BitWidth, Val, /*IsSigned=*/false));
2647 }
2648
2649 APFloat::roundingMode RMode =
2651 bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
2652 bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
2653
2654 APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
2655 auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
2656
2657 // Return max/min value for integers if the result is +/-inf or
2658 // is too large to fit in the result's integer bitwidth.
2659 bool IsExact = false;
2660 FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2661 return ConstantInt::get(Ty, ResInt);
2662 }
2663 }
2664
2665 /// We only fold functions with finite arguments. Folding NaN and inf is
2666 /// likely to be aborted with an exception anyway, and some host libms
2667 /// have known errors raising exceptions.
2668 if (!U.isFinite())
2669 return nullptr;
2670
2671 /// Currently APFloat versions of these functions do not exist, so we use
2672 /// the host native double versions. Float versions are not called
2673 /// directly but for all these it is true (float)(f((double)arg)) ==
2674 /// f(arg). Long double not supported yet.
2675 const APFloat &APF = Op->getValueAPF();
2676
2677 switch (IntrinsicID) {
2678 default: break;
2679 case Intrinsic::log:
2680 return ConstantFoldFP(log, APF, Ty);
2681 case Intrinsic::log2:
2682 // TODO: What about hosts that lack a C99 library?
2683 return ConstantFoldFP(log2, APF, Ty);
2684 case Intrinsic::log10:
2685 // TODO: What about hosts that lack a C99 library?
2686 return ConstantFoldFP(log10, APF, Ty);
2687 case Intrinsic::exp:
2688 return ConstantFoldFP(exp, APF, Ty);
2689 case Intrinsic::exp2:
2690 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2691 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
2692 case Intrinsic::exp10:
2693 // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.
2694 return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty);
2695 case Intrinsic::sin:
2696 return ConstantFoldFP(sin, APF, Ty);
2697 case Intrinsic::cos:
2698 return ConstantFoldFP(cos, APF, Ty);
2699 case Intrinsic::sinh:
2700 return ConstantFoldFP(sinh, APF, Ty);
2701 case Intrinsic::cosh:
2702 return ConstantFoldFP(cosh, APF, Ty);
2703 case Intrinsic::atan:
2704 // Implement optional behavior from C's Annex F for +/-0.0.
2705 if (U.isZero())
2706 return ConstantFP::get(Ty->getContext(), U);
2707 return ConstantFoldFP(atan, APF, Ty);
2708 case Intrinsic::sqrt:
2709 return ConstantFoldFP(sqrt, APF, Ty);
2710
2711 // NVVM Intrinsics:
2712 case Intrinsic::nvvm_ceil_ftz_f:
2713 case Intrinsic::nvvm_ceil_f:
2714 case Intrinsic::nvvm_ceil_d:
2715 return ConstantFoldFP(
2716 ceil, APF, Ty,
2718 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2719
2720 case Intrinsic::nvvm_fabs_ftz:
2721 case Intrinsic::nvvm_fabs:
2722 return ConstantFoldFP(
2723 fabs, APF, Ty,
2725 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2726
2727 case Intrinsic::nvvm_floor_ftz_f:
2728 case Intrinsic::nvvm_floor_f:
2729 case Intrinsic::nvvm_floor_d:
2730 return ConstantFoldFP(
2731 floor, APF, Ty,
2733 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2734
2735 case Intrinsic::nvvm_rcp_rm_ftz_f:
2736 case Intrinsic::nvvm_rcp_rn_ftz_f:
2737 case Intrinsic::nvvm_rcp_rp_ftz_f:
2738 case Intrinsic::nvvm_rcp_rz_ftz_f:
2739 case Intrinsic::nvvm_rcp_rm_d:
2740 case Intrinsic::nvvm_rcp_rm_f:
2741 case Intrinsic::nvvm_rcp_rn_d:
2742 case Intrinsic::nvvm_rcp_rn_f:
2743 case Intrinsic::nvvm_rcp_rp_d:
2744 case Intrinsic::nvvm_rcp_rp_f:
2745 case Intrinsic::nvvm_rcp_rz_d:
2746 case Intrinsic::nvvm_rcp_rz_f: {
2747 APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
2748 bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
2749
2750 auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
2752 APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
2753
2755 if (IsFTZ)
2756 Res = FTZPreserveSign(Res);
2757 return ConstantFP::get(Ty->getContext(), Res);
2758 }
2759 return nullptr;
2760 }
2761
2762 case Intrinsic::nvvm_round_ftz_f:
2763 case Intrinsic::nvvm_round_f:
2764 case Intrinsic::nvvm_round_d: {
2765 // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
2766 // integer, choosing even integer if source is equidistant between two
2767 // integers, so the semantics are closer to "rint" rather than "round".
2768 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2769 auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2771 return ConstantFP::get(Ty->getContext(), V);
2772 }
2773
2774 case Intrinsic::nvvm_saturate_ftz_f:
2775 case Intrinsic::nvvm_saturate_d:
2776 case Intrinsic::nvvm_saturate_f: {
2777 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2778 auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2779 if (V.isNegative() || V.isZero() || V.isNaN())
2780 return ConstantFP::getZero(Ty);
2782 if (V > One)
2783 return ConstantFP::get(Ty->getContext(), One);
2784 return ConstantFP::get(Ty->getContext(), APF);
2785 }
2786
2787 case Intrinsic::nvvm_sqrt_rn_ftz_f:
2788 case Intrinsic::nvvm_sqrt_f:
2789 case Intrinsic::nvvm_sqrt_rn_d:
2790 case Intrinsic::nvvm_sqrt_rn_f:
2791 if (APF.isNegative())
2792 return nullptr;
2793 return ConstantFoldFP(
2794 sqrt, APF, Ty,
2796 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2797
2798 // AMDGCN Intrinsics:
2799 case Intrinsic::amdgcn_cos:
2800 case Intrinsic::amdgcn_sin: {
2801 double V = getValueAsDouble(Op);
2802 if (V < -256.0 || V > 256.0)
2803 // The gfx8 and gfx9 architectures handle arguments outside the range
2804 // [-256, 256] differently. This should be a rare case so bail out
2805 // rather than trying to handle the difference.
2806 return nullptr;
2807 bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
2808 double V4 = V * 4.0;
2809 if (V4 == floor(V4)) {
2810 // Force exact results for quarter-integer inputs.
2811 const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
2812 V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
2813 } else {
2814 if (IsCos)
2815 V = cos(V * 2.0 * numbers::pi);
2816 else
2817 V = sin(V * 2.0 * numbers::pi);
2818 }
2819 return GetConstantFoldFPValue(V, Ty);
2820 }
2821 }
2822
2823 if (!TLI)
2824 return nullptr;
2825
2827 if (!TLI->getLibFunc(Name, Func))
2828 return nullptr;
2829
2830 switch (Func) {
2831 default:
2832 break;
2833 case LibFunc_acos:
2834 case LibFunc_acosf:
2835 case LibFunc_acos_finite:
2836 case LibFunc_acosf_finite:
2837 if (TLI->has(Func))
2838 return ConstantFoldFP(acos, APF, Ty);
2839 break;
2840 case LibFunc_asin:
2841 case LibFunc_asinf:
2842 case LibFunc_asin_finite:
2843 case LibFunc_asinf_finite:
2844 if (TLI->has(Func))
2845 return ConstantFoldFP(asin, APF, Ty);
2846 break;
2847 case LibFunc_atan:
2848 case LibFunc_atanf:
2849 // Implement optional behavior from C's Annex F for +/-0.0.
2850 if (U.isZero())
2851 return ConstantFP::get(Ty->getContext(), U);
2852 if (TLI->has(Func))
2853 return ConstantFoldFP(atan, APF, Ty);
2854 break;
2855 case LibFunc_ceil:
2856 case LibFunc_ceilf:
2857 if (TLI->has(Func)) {
2858 U.roundToIntegral(APFloat::rmTowardPositive);
2859 return ConstantFP::get(Ty->getContext(), U);
2860 }
2861 break;
2862 case LibFunc_cos:
2863 case LibFunc_cosf:
2864 if (TLI->has(Func))
2865 return ConstantFoldFP(cos, APF, Ty);
2866 break;
2867 case LibFunc_cosh:
2868 case LibFunc_coshf:
2869 case LibFunc_cosh_finite:
2870 case LibFunc_coshf_finite:
2871 if (TLI->has(Func))
2872 return ConstantFoldFP(cosh, APF, Ty);
2873 break;
2874 case LibFunc_exp:
2875 case LibFunc_expf:
2876 case LibFunc_exp_finite:
2877 case LibFunc_expf_finite:
2878 if (TLI->has(Func))
2879 return ConstantFoldFP(exp, APF, Ty);
2880 break;
2881 case LibFunc_exp2:
2882 case LibFunc_exp2f:
2883 case LibFunc_exp2_finite:
2884 case LibFunc_exp2f_finite:
2885 if (TLI->has(Func))
2886 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2887 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
2888 break;
2889 case LibFunc_fabs:
2890 case LibFunc_fabsf:
2891 if (TLI->has(Func)) {
2892 U.clearSign();
2893 return ConstantFP::get(Ty->getContext(), U);
2894 }
2895 break;
2896 case LibFunc_floor:
2897 case LibFunc_floorf:
2898 if (TLI->has(Func)) {
2899 U.roundToIntegral(APFloat::rmTowardNegative);
2900 return ConstantFP::get(Ty->getContext(), U);
2901 }
2902 break;
2903 case LibFunc_log:
2904 case LibFunc_logf:
2905 case LibFunc_log_finite:
2906 case LibFunc_logf_finite:
2907 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2908 return ConstantFoldFP(log, APF, Ty);
2909 break;
2910 case LibFunc_log2:
2911 case LibFunc_log2f:
2912 case LibFunc_log2_finite:
2913 case LibFunc_log2f_finite:
2914 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2915 // TODO: What about hosts that lack a C99 library?
2916 return ConstantFoldFP(log2, APF, Ty);
2917 break;
2918 case LibFunc_log10:
2919 case LibFunc_log10f:
2920 case LibFunc_log10_finite:
2921 case LibFunc_log10f_finite:
2922 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2923 // TODO: What about hosts that lack a C99 library?
2924 return ConstantFoldFP(log10, APF, Ty);
2925 break;
2926 case LibFunc_ilogb:
2927 case LibFunc_ilogbf:
2928 if (!APF.isZero() && TLI->has(Func))
2929 return ConstantInt::get(Ty, ilogb(APF), true);
2930 break;
2931 case LibFunc_logb:
2932 case LibFunc_logbf:
2933 if (!APF.isZero() && TLI->has(Func))
2934 return ConstantFoldFP(logb, APF, Ty);
2935 break;
2936 case LibFunc_log1p:
2937 case LibFunc_log1pf:
2938 // Implement optional behavior from C's Annex F for +/-0.0.
2939 if (U.isZero())
2940 return ConstantFP::get(Ty->getContext(), U);
2941 if (APF > APFloat::getOne(APF.getSemantics(), true) && TLI->has(Func))
2942 return ConstantFoldFP(log1p, APF, Ty);
2943 break;
2944 case LibFunc_logl:
2945 return nullptr;
2946 case LibFunc_erf:
2947 case LibFunc_erff:
2948 if (TLI->has(Func))
2949 return ConstantFoldFP(erf, APF, Ty);
2950 break;
2951 case LibFunc_nearbyint:
2952 case LibFunc_nearbyintf:
2953 case LibFunc_rint:
2954 case LibFunc_rintf:
2955 if (TLI->has(Func)) {
2956 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2957 return ConstantFP::get(Ty->getContext(), U);
2958 }
2959 break;
2960 case LibFunc_round:
2961 case LibFunc_roundf:
2962 if (TLI->has(Func)) {
2963 U.roundToIntegral(APFloat::rmNearestTiesToAway);
2964 return ConstantFP::get(Ty->getContext(), U);
2965 }
2966 break;
2967 case LibFunc_sin:
2968 case LibFunc_sinf:
2969 if (TLI->has(Func))
2970 return ConstantFoldFP(sin, APF, Ty);
2971 break;
2972 case LibFunc_sinh:
2973 case LibFunc_sinhf:
2974 case LibFunc_sinh_finite:
2975 case LibFunc_sinhf_finite:
2976 if (TLI->has(Func))
2977 return ConstantFoldFP(sinh, APF, Ty);
2978 break;
2979 case LibFunc_sqrt:
2980 case LibFunc_sqrtf:
2981 if (!APF.isNegative() && TLI->has(Func))
2982 return ConstantFoldFP(sqrt, APF, Ty);
2983 break;
2984 case LibFunc_tan:
2985 case LibFunc_tanf:
2986 if (TLI->has(Func))
2987 return ConstantFoldFP(tan, APF, Ty);
2988 break;
2989 case LibFunc_tanh:
2990 case LibFunc_tanhf:
2991 if (TLI->has(Func))
2992 return ConstantFoldFP(tanh, APF, Ty);
2993 break;
2994 case LibFunc_trunc:
2995 case LibFunc_truncf:
2996 if (TLI->has(Func)) {
2997 U.roundToIntegral(APFloat::rmTowardZero);
2998 return ConstantFP::get(Ty->getContext(), U);
2999 }
3000 break;
3001 }
3002 return nullptr;
3003 }
3004
3005 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
3006 switch (IntrinsicID) {
3007 case Intrinsic::bswap:
3008 return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
3009 case Intrinsic::ctpop:
3010 return ConstantInt::get(Ty, Op->getValue().popcount());
3011 case Intrinsic::bitreverse:
3012 return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
3013 case Intrinsic::convert_from_fp16: {
3014 APFloat Val(APFloat::IEEEhalf(), Op->getValue());
3015
3016 bool lost = false;
3017 APFloat::opStatus status = Val.convert(
3018 Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
3019
3020 // Conversion is always precise.
3021 (void)status;
3022 assert(status != APFloat::opInexact && !lost &&
3023 "Precision lost during fp16 constfolding");
3024
3025 return ConstantFP::get(Ty->getContext(), Val);
3026 }
3027
3028 case Intrinsic::amdgcn_s_wqm: {
3029 uint64_t Val = Op->getZExtValue();
3030 Val |= (Val & 0x5555555555555555ULL) << 1 |
3031 ((Val >> 1) & 0x5555555555555555ULL);
3032 Val |= (Val & 0x3333333333333333ULL) << 2 |
3033 ((Val >> 2) & 0x3333333333333333ULL);
3034 return ConstantInt::get(Ty, Val);
3035 }
3036
3037 case Intrinsic::amdgcn_s_quadmask: {
3038 uint64_t Val = Op->getZExtValue();
3039 uint64_t QuadMask = 0;
3040 for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
3041 if (!(Val & 0xF))
3042 continue;
3043
3044 QuadMask |= (1ULL << I);
3045 }
3046 return ConstantInt::get(Ty, QuadMask);
3047 }
3048
3049 case Intrinsic::amdgcn_s_bitreplicate: {
3050 uint64_t Val = Op->getZExtValue();
3051 Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
3052 Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
3053 Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
3054 Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
3055 Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
3056 Val = Val | Val << 1;
3057 return ConstantInt::get(Ty, Val);
3058 }
3059
3060 default:
3061 return nullptr;
3062 }
3063 }
3064
3065 switch (IntrinsicID) {
3066 default: break;
3067 case Intrinsic::vector_reduce_add:
3068 case Intrinsic::vector_reduce_mul:
3069 case Intrinsic::vector_reduce_and:
3070 case Intrinsic::vector_reduce_or:
3071 case Intrinsic::vector_reduce_xor:
3072 case Intrinsic::vector_reduce_smin:
3073 case Intrinsic::vector_reduce_smax:
3074 case Intrinsic::vector_reduce_umin:
3075 case Intrinsic::vector_reduce_umax:
3076 if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
3077 return C;
3078 break;
3079 }
3080
3081 // Support ConstantVector in case we have an Undef in the top.
3082 if (isa<ConstantVector>(Operands[0]) ||
3085 auto *Op = cast<Constant>(Operands[0]);
3086 switch (IntrinsicID) {
3087 default: break;
3088 case Intrinsic::x86_sse_cvtss2si:
3089 case Intrinsic::x86_sse_cvtss2si64:
3090 case Intrinsic::x86_sse2_cvtsd2si:
3091 case Intrinsic::x86_sse2_cvtsd2si64:
3092 if (ConstantFP *FPOp =
3093 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3094 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3095 /*roundTowardZero=*/false, Ty,
3096 /*IsSigned*/true);
3097 break;
3098 case Intrinsic::x86_sse_cvttss2si:
3099 case Intrinsic::x86_sse_cvttss2si64:
3100 case Intrinsic::x86_sse2_cvttsd2si:
3101 case Intrinsic::x86_sse2_cvttsd2si64:
3102 if (ConstantFP *FPOp =
3103 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3104 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3105 /*roundTowardZero=*/true, Ty,
3106 /*IsSigned*/true);
3107 break;
3108
3109 case Intrinsic::wasm_anytrue:
3110 return Op->isZeroValue() ? ConstantInt::get(Ty, 0)
3111 : ConstantInt::get(Ty, 1);
3112
3113 case Intrinsic::wasm_alltrue:
3114 // Check each element individually
3115 unsigned E = cast<FixedVectorType>(Op->getType())->getNumElements();
3116 for (unsigned I = 0; I != E; ++I)
3117 if (Constant *Elt = Op->getAggregateElement(I))
3118 if (Elt->isZeroValue())
3119 return ConstantInt::get(Ty, 0);
3120
3121 return ConstantInt::get(Ty, 1);
3122 }
3123 }
3124
3125 return nullptr;
3126}
3127
3128static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
3132 FCmpInst::Predicate Cond = FCmp->getPredicate();
3133 if (FCmp->isSignaling()) {
3134 if (Op1.isNaN() || Op2.isNaN())
3136 } else {
3137 if (Op1.isSignaling() || Op2.isSignaling())
3139 }
3140 bool Result = FCmpInst::compare(Op1, Op2, Cond);
3141 if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
3142 return ConstantInt::get(Call->getType()->getScalarType(), Result);
3143 return nullptr;
3144}
3145
3146static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,
3148 const TargetLibraryInfo *TLI) {
3149 if (!TLI)
3150 return nullptr;
3151
3153 if (!TLI->getLibFunc(Name, Func))
3154 return nullptr;
3155
3156 const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]);
3157 if (!Op1)
3158 return nullptr;
3159
3160 const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]);
3161 if (!Op2)
3162 return nullptr;
3163
3164 const APFloat &Op1V = Op1->getValueAPF();
3165 const APFloat &Op2V = Op2->getValueAPF();
3166
3167 switch (Func) {
3168 default:
3169 break;
3170 case LibFunc_pow:
3171 case LibFunc_powf:
3172 case LibFunc_pow_finite:
3173 case LibFunc_powf_finite:
3174 if (TLI->has(Func))
3175 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
3176 break;
3177 case LibFunc_fmod:
3178 case LibFunc_fmodf:
3179 if (TLI->has(Func)) {
3180 APFloat V = Op1->getValueAPF();
3181 if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
3182 return ConstantFP::get(Ty->getContext(), V);
3183 }
3184 break;
3185 case LibFunc_remainder:
3186 case LibFunc_remainderf:
3187 if (TLI->has(Func)) {
3188 APFloat V = Op1->getValueAPF();
3189 if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
3190 return ConstantFP::get(Ty->getContext(), V);
3191 }
3192 break;
3193 case LibFunc_atan2:
3194 case LibFunc_atan2f:
3195 // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
3196 // (Solaris), so we do not assume a known result for that.
3197 if (Op1V.isZero() && Op2V.isZero())
3198 return nullptr;
3199 [[fallthrough]];
3200 case LibFunc_atan2_finite:
3201 case LibFunc_atan2f_finite:
3202 if (TLI->has(Func))
3203 return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
3204 break;
3205 }
3206
3207 return nullptr;
3208}
3209
3210static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
3212 const CallBase *Call) {
3213 assert(Operands.size() == 2 && "Wrong number of operands.");
3214
3215 if (Ty->isFloatingPointTy()) {
3216 // TODO: We should have undef handling for all of the FP intrinsics that
3217 // are attempted to be folded in this function.
3218 bool IsOp0Undef = isa<UndefValue>(Operands[0]);
3219 bool IsOp1Undef = isa<UndefValue>(Operands[1]);
3220 switch (IntrinsicID) {
3221 case Intrinsic::maxnum:
3222 case Intrinsic::minnum:
3223 case Intrinsic::maximum:
3224 case Intrinsic::minimum:
3225 case Intrinsic::maximumnum:
3226 case Intrinsic::minimumnum:
3227 case Intrinsic::nvvm_fmax_d:
3228 case Intrinsic::nvvm_fmin_d:
3229 // If one argument is undef, return the other argument.
3230 if (IsOp0Undef)
3231 return Operands[1];
3232 if (IsOp1Undef)
3233 return Operands[0];
3234 break;
3235
3236 case Intrinsic::nvvm_fmax_f:
3237 case Intrinsic::nvvm_fmax_ftz_f:
3238 case Intrinsic::nvvm_fmax_ftz_nan_f:
3239 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3240 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3241 case Intrinsic::nvvm_fmax_nan_f:
3242 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3243 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3244
3245 case Intrinsic::nvvm_fmin_f:
3246 case Intrinsic::nvvm_fmin_ftz_f:
3247 case Intrinsic::nvvm_fmin_ftz_nan_f:
3248 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3249 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3250 case Intrinsic::nvvm_fmin_nan_f:
3251 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3252 case Intrinsic::nvvm_fmin_xorsign_abs_f:
3253 // If one arg is undef, the other arg can be returned only if it is
3254 // constant, as we may need to flush it to sign-preserving zero or
3255 // canonicalize the NaN.
3256 if (!IsOp0Undef && !IsOp1Undef)
3257 break;
3258 if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) {
3259 if (Op->isNaN()) {
3260 APInt NVCanonicalNaN(32, 0x7fffffff);
3261 return ConstantFP::get(
3262 Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
3263 }
3264 if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
3265 return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF()));
3266 else
3267 return Op;
3268 }
3269 break;
3270 }
3271 }
3272
3273 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
3274 const APFloat &Op1V = Op1->getValueAPF();
3275
3276 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
3277 if (Op2->getType() != Op1->getType())
3278 return nullptr;
3279 const APFloat &Op2V = Op2->getValueAPF();
3280
3281 if (const auto *ConstrIntr =
3283 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
3284 APFloat Res = Op1V;
3286 switch (IntrinsicID) {
3287 default:
3288 return nullptr;
3289 case Intrinsic::experimental_constrained_fadd:
3290 St = Res.add(Op2V, RM);
3291 break;
3292 case Intrinsic::experimental_constrained_fsub:
3293 St = Res.subtract(Op2V, RM);
3294 break;
3295 case Intrinsic::experimental_constrained_fmul:
3296 St = Res.multiply(Op2V, RM);
3297 break;
3298 case Intrinsic::experimental_constrained_fdiv:
3299 St = Res.divide(Op2V, RM);
3300 break;
3301 case Intrinsic::experimental_constrained_frem:
3302 St = Res.mod(Op2V);
3303 break;
3304 case Intrinsic::experimental_constrained_fcmp:
3305 case Intrinsic::experimental_constrained_fcmps:
3306 return evaluateCompare(Op1V, Op2V, ConstrIntr);
3307 }
3308 if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
3309 St))
3310 return ConstantFP::get(Ty->getContext(), Res);
3311 return nullptr;
3312 }
3313
3314 switch (IntrinsicID) {
3315 default:
3316 break;
3317 case Intrinsic::copysign:
3318 return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
3319 case Intrinsic::minnum:
3320 return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
3321 case Intrinsic::maxnum:
3322 return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
3323 case Intrinsic::minimum:
3324 return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
3325 case Intrinsic::maximum:
3326 return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
3327 case Intrinsic::minimumnum:
3328 return ConstantFP::get(Ty->getContext(), minimumnum(Op1V, Op2V));
3329 case Intrinsic::maximumnum:
3330 return ConstantFP::get(Ty->getContext(), maximumnum(Op1V, Op2V));
3331
3332 case Intrinsic::nvvm_fmax_d:
3333 case Intrinsic::nvvm_fmax_f:
3334 case Intrinsic::nvvm_fmax_ftz_f:
3335 case Intrinsic::nvvm_fmax_ftz_nan_f:
3336 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3337 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3338 case Intrinsic::nvvm_fmax_nan_f:
3339 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3340 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3341
3342 case Intrinsic::nvvm_fmin_d:
3343 case Intrinsic::nvvm_fmin_f:
3344 case Intrinsic::nvvm_fmin_ftz_f:
3345 case Intrinsic::nvvm_fmin_ftz_nan_f:
3346 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3347 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3348 case Intrinsic::nvvm_fmin_nan_f:
3349 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3350 case Intrinsic::nvvm_fmin_xorsign_abs_f: {
3351
3352 bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
3353 IntrinsicID == Intrinsic::nvvm_fmin_d);
3354 bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
3355 bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
3356 bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
3357
3358 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3359 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3360
3361 bool XorSign = false;
3362 if (IsXorSignAbs) {
3363 XorSign = A.isNegative() ^ B.isNegative();
3364 A = abs(A);
3365 B = abs(B);
3366 }
3367
3368 bool IsFMax = false;
3369 switch (IntrinsicID) {
3370 case Intrinsic::nvvm_fmax_d:
3371 case Intrinsic::nvvm_fmax_f:
3372 case Intrinsic::nvvm_fmax_ftz_f:
3373 case Intrinsic::nvvm_fmax_ftz_nan_f:
3374 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3375 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3376 case Intrinsic::nvvm_fmax_nan_f:
3377 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3378 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3379 IsFMax = true;
3380 break;
3381 }
3382 APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
3383
3384 if (ShouldCanonicalizeNaNs) {
3385 APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
3386 if (A.isNaN() && B.isNaN())
3387 return ConstantFP::get(Ty, NVCanonicalNaN);
3388 else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
3389 return ConstantFP::get(Ty, NVCanonicalNaN);
3390 }
3391
3392 if (A.isNaN() && B.isNaN())
3393 return Operands[1];
3394 else if (A.isNaN())
3395 Res = B;
3396 else if (B.isNaN())
3397 Res = A;
3398
3399 if (IsXorSignAbs && XorSign != Res.isNegative())
3400 Res.changeSign();
3401
3402 return ConstantFP::get(Ty->getContext(), Res);
3403 }
3404
3405 case Intrinsic::nvvm_add_rm_f:
3406 case Intrinsic::nvvm_add_rn_f:
3407 case Intrinsic::nvvm_add_rp_f:
3408 case Intrinsic::nvvm_add_rz_f:
3409 case Intrinsic::nvvm_add_rm_d:
3410 case Intrinsic::nvvm_add_rn_d:
3411 case Intrinsic::nvvm_add_rp_d:
3412 case Intrinsic::nvvm_add_rz_d:
3413 case Intrinsic::nvvm_add_rm_ftz_f:
3414 case Intrinsic::nvvm_add_rn_ftz_f:
3415 case Intrinsic::nvvm_add_rp_ftz_f:
3416 case Intrinsic::nvvm_add_rz_ftz_f: {
3417
3418 bool IsFTZ = nvvm::FAddShouldFTZ(IntrinsicID);
3419 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3420 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3421
3422 APFloat::roundingMode RoundMode =
3423 nvvm::GetFAddRoundingMode(IntrinsicID);
3424
3425 APFloat Res = A;
3426 APFloat::opStatus Status = Res.add(B, RoundMode);
3427
3428 if (!Res.isNaN() &&
3430 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3431 return ConstantFP::get(Ty->getContext(), Res);
3432 }
3433 return nullptr;
3434 }
3435
3436 case Intrinsic::nvvm_mul_rm_f:
3437 case Intrinsic::nvvm_mul_rn_f:
3438 case Intrinsic::nvvm_mul_rp_f:
3439 case Intrinsic::nvvm_mul_rz_f:
3440 case Intrinsic::nvvm_mul_rm_d:
3441 case Intrinsic::nvvm_mul_rn_d:
3442 case Intrinsic::nvvm_mul_rp_d:
3443 case Intrinsic::nvvm_mul_rz_d:
3444 case Intrinsic::nvvm_mul_rm_ftz_f:
3445 case Intrinsic::nvvm_mul_rn_ftz_f:
3446 case Intrinsic::nvvm_mul_rp_ftz_f:
3447 case Intrinsic::nvvm_mul_rz_ftz_f: {
3448
3449 bool IsFTZ = nvvm::FMulShouldFTZ(IntrinsicID);
3450 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3451 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3452
3453 APFloat::roundingMode RoundMode =
3454 nvvm::GetFMulRoundingMode(IntrinsicID);
3455
3456 APFloat Res = A;
3457 APFloat::opStatus Status = Res.multiply(B, RoundMode);
3458
3459 if (!Res.isNaN() &&
3461 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3462 return ConstantFP::get(Ty->getContext(), Res);
3463 }
3464 return nullptr;
3465 }
3466
3467 case Intrinsic::nvvm_div_rm_f:
3468 case Intrinsic::nvvm_div_rn_f:
3469 case Intrinsic::nvvm_div_rp_f:
3470 case Intrinsic::nvvm_div_rz_f:
3471 case Intrinsic::nvvm_div_rm_d:
3472 case Intrinsic::nvvm_div_rn_d:
3473 case Intrinsic::nvvm_div_rp_d:
3474 case Intrinsic::nvvm_div_rz_d:
3475 case Intrinsic::nvvm_div_rm_ftz_f:
3476 case Intrinsic::nvvm_div_rn_ftz_f:
3477 case Intrinsic::nvvm_div_rp_ftz_f:
3478 case Intrinsic::nvvm_div_rz_ftz_f: {
3479 bool IsFTZ = nvvm::FDivShouldFTZ(IntrinsicID);
3480 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3481 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3482 APFloat::roundingMode RoundMode =
3483 nvvm::GetFDivRoundingMode(IntrinsicID);
3484
3485 APFloat Res = A;
3486 APFloat::opStatus Status = Res.divide(B, RoundMode);
3487 if (!Res.isNaN() &&
3489 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3490 return ConstantFP::get(Ty->getContext(), Res);
3491 }
3492 return nullptr;
3493 }
3494 }
3495
3496 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
3497 return nullptr;
3498
3499 switch (IntrinsicID) {
3500 default:
3501 break;
3502 case Intrinsic::pow:
3503 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
3504 case Intrinsic::amdgcn_fmul_legacy:
3505 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3506 // NaN or infinity, gives +0.0.
3507 if (Op1V.isZero() || Op2V.isZero())
3508 return ConstantFP::getZero(Ty);
3509 return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
3510 }
3511
3512 } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
3513 switch (IntrinsicID) {
3514 case Intrinsic::ldexp: {
3515 return ConstantFP::get(
3516 Ty->getContext(),
3517 scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven));
3518 }
3519 case Intrinsic::is_fpclass: {
3520 FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());
3521 bool Result =
3522 ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||
3523 ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||
3524 ((Mask & fcNegInf) && Op1V.isNegInfinity()) ||
3525 ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||
3526 ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||
3527 ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||
3528 ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||
3529 ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||
3530 ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||
3531 ((Mask & fcPosInf) && Op1V.isPosInfinity());
3532 return ConstantInt::get(Ty, Result);
3533 }
3534 case Intrinsic::powi: {
3535 int Exp = static_cast<int>(Op2C->getSExtValue());
3536 switch (Ty->getTypeID()) {
3537 case Type::HalfTyID:
3538 case Type::FloatTyID: {
3539 APFloat Res(static_cast<float>(std::pow(Op1V.convertToFloat(), Exp)));
3540 if (Ty->isHalfTy()) {
3541 bool Unused;
3543 &Unused);
3544 }
3545 return ConstantFP::get(Ty->getContext(), Res);
3546 }
3547 case Type::DoubleTyID:
3548 return ConstantFP::get(Ty, std::pow(Op1V.convertToDouble(), Exp));
3549 default:
3550 return nullptr;
3551 }
3552 }
3553 default:
3554 break;
3555 }
3556 }
3557 return nullptr;
3558 }
3559
3560 if (Operands[0]->getType()->isIntegerTy() &&
3561 Operands[1]->getType()->isIntegerTy()) {
3562 const APInt *C0, *C1;
3563 if (!getConstIntOrUndef(Operands[0], C0) ||
3564 !getConstIntOrUndef(Operands[1], C1))
3565 return nullptr;
3566
3567 switch (IntrinsicID) {
3568 default: break;
3569 case Intrinsic::smax:
3570 case Intrinsic::smin:
3571 case Intrinsic::umax:
3572 case Intrinsic::umin:
3573 if (!C0 && !C1)
3574 return UndefValue::get(Ty);
3575 if (!C0 || !C1)
3576 return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);
3577 return ConstantInt::get(
3578 Ty, ICmpInst::compare(*C0, *C1,
3579 MinMaxIntrinsic::getPredicate(IntrinsicID))
3580 ? *C0
3581 : *C1);
3582
3583 case Intrinsic::scmp:
3584 case Intrinsic::ucmp:
3585 if (!C0 || !C1)
3586 return ConstantInt::get(Ty, 0);
3587
3588 int Res;
3589 if (IntrinsicID == Intrinsic::scmp)
3590 Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;
3591 else
3592 Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;
3593 return ConstantInt::get(Ty, Res, /*IsSigned=*/true);
3594
3595 case Intrinsic::usub_with_overflow:
3596 case Intrinsic::ssub_with_overflow:
3597 // X - undef -> { 0, false }
3598 // undef - X -> { 0, false }
3599 if (!C0 || !C1)
3600 return Constant::getNullValue(Ty);
3601 [[fallthrough]];
3602 case Intrinsic::uadd_with_overflow:
3603 case Intrinsic::sadd_with_overflow:
3604 // X + undef -> { -1, false }
3605 // undef + x -> { -1, false }
3606 if (!C0 || !C1) {
3607 return ConstantStruct::get(
3608 cast<StructType>(Ty),
3609 {Constant::getAllOnesValue(Ty->getStructElementType(0)),
3610 Constant::getNullValue(Ty->getStructElementType(1))});
3611 }
3612 [[fallthrough]];
3613 case Intrinsic::smul_with_overflow:
3614 case Intrinsic::umul_with_overflow: {
3615 // undef * X -> { 0, false }
3616 // X * undef -> { 0, false }
3617 if (!C0 || !C1)
3618 return Constant::getNullValue(Ty);
3619
3620 APInt Res;
3621 bool Overflow;
3622 switch (IntrinsicID) {
3623 default: llvm_unreachable("Invalid case");
3624 case Intrinsic::sadd_with_overflow:
3625 Res = C0->sadd_ov(*C1, Overflow);
3626 break;
3627 case Intrinsic::uadd_with_overflow:
3628 Res = C0->uadd_ov(*C1, Overflow);
3629 break;
3630 case Intrinsic::ssub_with_overflow:
3631 Res = C0->ssub_ov(*C1, Overflow);
3632 break;
3633 case Intrinsic::usub_with_overflow:
3634 Res = C0->usub_ov(*C1, Overflow);
3635 break;
3636 case Intrinsic::smul_with_overflow:
3637 Res = C0->smul_ov(*C1, Overflow);
3638 break;
3639 case Intrinsic::umul_with_overflow:
3640 Res = C0->umul_ov(*C1, Overflow);
3641 break;
3642 }
3643 Constant *Ops[] = {
3644 ConstantInt::get(Ty->getContext(), Res),
3645 ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
3646 };
3648 }
3649 case Intrinsic::uadd_sat:
3650 case Intrinsic::sadd_sat:
3651 if (!C0 && !C1)
3652 return UndefValue::get(Ty);
3653 if (!C0 || !C1)
3654 return Constant::getAllOnesValue(Ty);
3655 if (IntrinsicID == Intrinsic::uadd_sat)
3656 return ConstantInt::get(Ty, C0->uadd_sat(*C1));
3657 else
3658 return ConstantInt::get(Ty, C0->sadd_sat(*C1));
3659 case Intrinsic::usub_sat:
3660 case Intrinsic::ssub_sat:
3661 if (!C0 && !C1)
3662 return UndefValue::get(Ty);
3663 if (!C0 || !C1)
3664 return Constant::getNullValue(Ty);
3665 if (IntrinsicID == Intrinsic::usub_sat)
3666 return ConstantInt::get(Ty, C0->usub_sat(*C1));
3667 else
3668 return ConstantInt::get(Ty, C0->ssub_sat(*C1));
3669 case Intrinsic::cttz:
3670 case Intrinsic::ctlz:
3671 assert(C1 && "Must be constant int");
3672
3673 // cttz(0, 1) and ctlz(0, 1) are poison.
3674 if (C1->isOne() && (!C0 || C0->isZero()))
3675 return PoisonValue::get(Ty);
3676 if (!C0)
3677 return Constant::getNullValue(Ty);
3678 if (IntrinsicID == Intrinsic::cttz)
3679 return ConstantInt::get(Ty, C0->countr_zero());
3680 else
3681 return ConstantInt::get(Ty, C0->countl_zero());
3682
3683 case Intrinsic::abs:
3684 assert(C1 && "Must be constant int");
3685 assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
3686
3687 // Undef or minimum val operand with poison min --> poison
3688 if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
3689 return PoisonValue::get(Ty);
3690
3691 // Undef operand with no poison min --> 0 (sign bit must be clear)
3692 if (!C0)
3693 return Constant::getNullValue(Ty);
3694
3695 return ConstantInt::get(Ty, C0->abs());
3696 case Intrinsic::amdgcn_wave_reduce_umin:
3697 case Intrinsic::amdgcn_wave_reduce_umax:
3698 case Intrinsic::amdgcn_wave_reduce_max:
3699 case Intrinsic::amdgcn_wave_reduce_min:
3700 case Intrinsic::amdgcn_wave_reduce_add:
3701 case Intrinsic::amdgcn_wave_reduce_sub:
3702 case Intrinsic::amdgcn_wave_reduce_and:
3703 case Intrinsic::amdgcn_wave_reduce_or:
3704 case Intrinsic::amdgcn_wave_reduce_xor:
3705 return dyn_cast<Constant>(Operands[0]);
3706 }
3707
3708 return nullptr;
3709 }
3710
3711 // Support ConstantVector in case we have an Undef in the top.
3712 if ((isa<ConstantVector>(Operands[0]) ||
3714 // Check for default rounding mode.
3715 // FIXME: Support other rounding modes?
3717 cast<ConstantInt>(Operands[1])->getValue() == 4) {
3718 auto *Op = cast<Constant>(Operands[0]);
3719 switch (IntrinsicID) {
3720 default: break;
3721 case Intrinsic::x86_avx512_vcvtss2si32:
3722 case Intrinsic::x86_avx512_vcvtss2si64:
3723 case Intrinsic::x86_avx512_vcvtsd2si32:
3724 case Intrinsic::x86_avx512_vcvtsd2si64:
3725 if (ConstantFP *FPOp =
3726 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3727 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3728 /*roundTowardZero=*/false, Ty,
3729 /*IsSigned*/true);
3730 break;
3731 case Intrinsic::x86_avx512_vcvtss2usi32:
3732 case Intrinsic::x86_avx512_vcvtss2usi64:
3733 case Intrinsic::x86_avx512_vcvtsd2usi32:
3734 case Intrinsic::x86_avx512_vcvtsd2usi64:
3735 if (ConstantFP *FPOp =
3736 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3737 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3738 /*roundTowardZero=*/false, Ty,
3739 /*IsSigned*/false);
3740 break;
3741 case Intrinsic::x86_avx512_cvttss2si:
3742 case Intrinsic::x86_avx512_cvttss2si64:
3743 case Intrinsic::x86_avx512_cvttsd2si:
3744 case Intrinsic::x86_avx512_cvttsd2si64:
3745 if (ConstantFP *FPOp =
3746 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3747 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3748 /*roundTowardZero=*/true, Ty,
3749 /*IsSigned*/true);
3750 break;
3751 case Intrinsic::x86_avx512_cvttss2usi:
3752 case Intrinsic::x86_avx512_cvttss2usi64:
3753 case Intrinsic::x86_avx512_cvttsd2usi:
3754 case Intrinsic::x86_avx512_cvttsd2usi64:
3755 if (ConstantFP *FPOp =
3756 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3757 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3758 /*roundTowardZero=*/true, Ty,
3759 /*IsSigned*/false);
3760 break;
3761 }
3762 }
3763 return nullptr;
3764}
3765
3766static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
3767 const APFloat &S0,
3768 const APFloat &S1,
3769 const APFloat &S2) {
3770 unsigned ID;
3771 const fltSemantics &Sem = S0.getSemantics();
3772 APFloat MA(Sem), SC(Sem), TC(Sem);
3773 if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
3774 if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
3775 // S2 < 0
3776 ID = 5;
3777 SC = -S0;
3778 } else {
3779 ID = 4;
3780 SC = S0;
3781 }
3782 MA = S2;
3783 TC = -S1;
3784 } else if (abs(S1) >= abs(S0)) {
3785 if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
3786 // S1 < 0
3787 ID = 3;
3788 TC = -S2;
3789 } else {
3790 ID = 2;
3791 TC = S2;
3792 }
3793 MA = S1;
3794 SC = S0;
3795 } else {
3796 if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
3797 // S0 < 0
3798 ID = 1;
3799 SC = S2;
3800 } else {
3801 ID = 0;
3802 SC = -S2;
3803 }
3804 MA = S0;
3805 TC = -S1;
3806 }
3807 switch (IntrinsicID) {
3808 default:
3809 llvm_unreachable("unhandled amdgcn cube intrinsic");
3810 case Intrinsic::amdgcn_cubeid:
3811 return APFloat(Sem, ID);
3812 case Intrinsic::amdgcn_cubema:
3813 return MA + MA;
3814 case Intrinsic::amdgcn_cubesc:
3815 return SC;
3816 case Intrinsic::amdgcn_cubetc:
3817 return TC;
3818 }
3819}
3820
3821static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
3822 Type *Ty) {
3823 const APInt *C0, *C1, *C2;
3824 if (!getConstIntOrUndef(Operands[0], C0) ||
3825 !getConstIntOrUndef(Operands[1], C1) ||
3826 !getConstIntOrUndef(Operands[2], C2))
3827 return nullptr;
3828
3829 if (!C2)
3830 return UndefValue::get(Ty);
3831
3832 APInt Val(32, 0);
3833 unsigned NumUndefBytes = 0;
3834 for (unsigned I = 0; I < 32; I += 8) {
3835 unsigned Sel = C2->extractBitsAsZExtValue(8, I);
3836 unsigned B = 0;
3837
3838 if (Sel >= 13)
3839 B = 0xff;
3840 else if (Sel == 12)
3841 B = 0x00;
3842 else {
3843 const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
3844 if (!Src)
3845 ++NumUndefBytes;
3846 else if (Sel < 8)
3847 B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
3848 else
3849 B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
3850 }
3851
3852 Val.insertBits(B, I, 8);
3853 }
3854
3855 if (NumUndefBytes == 4)
3856 return UndefValue::get(Ty);
3857
3858 return ConstantInt::get(Ty, Val);
3859}
3860
3861static Constant *ConstantFoldScalarCall3(StringRef Name,
3862 Intrinsic::ID IntrinsicID,
3863 Type *Ty,
3865 const TargetLibraryInfo *TLI,
3866 const CallBase *Call) {
3867 assert(Operands.size() == 3 && "Wrong number of operands.");
3868
3869 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
3870 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
3871 if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
3872 const APFloat &C1 = Op1->getValueAPF();
3873 const APFloat &C2 = Op2->getValueAPF();
3874 const APFloat &C3 = Op3->getValueAPF();
3875
3876 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
3877 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
3878 APFloat Res = C1;
3880 switch (IntrinsicID) {
3881 default:
3882 return nullptr;
3883 case Intrinsic::experimental_constrained_fma:
3884 case Intrinsic::experimental_constrained_fmuladd:
3885 St = Res.fusedMultiplyAdd(C2, C3, RM);
3886 break;
3887 }
3888 if (mayFoldConstrained(
3889 const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
3890 return ConstantFP::get(Ty->getContext(), Res);
3891 return nullptr;
3892 }
3893
3894 switch (IntrinsicID) {
3895 default: break;
3896 case Intrinsic::amdgcn_fma_legacy: {
3897 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3898 // NaN or infinity, gives +0.0.
3899 if (C1.isZero() || C2.isZero()) {
3900 // It's tempting to just return C3 here, but that would give the
3901 // wrong result if C3 was -0.0.
3902 return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
3903 }
3904 [[fallthrough]];
3905 }
3906 case Intrinsic::fma:
3907 case Intrinsic::fmuladd: {
3908 APFloat V = C1;
3910 return ConstantFP::get(Ty->getContext(), V);
3911 }
3912
3913 case Intrinsic::nvvm_fma_rm_f:
3914 case Intrinsic::nvvm_fma_rn_f:
3915 case Intrinsic::nvvm_fma_rp_f:
3916 case Intrinsic::nvvm_fma_rz_f:
3917 case Intrinsic::nvvm_fma_rm_d:
3918 case Intrinsic::nvvm_fma_rn_d:
3919 case Intrinsic::nvvm_fma_rp_d:
3920 case Intrinsic::nvvm_fma_rz_d:
3921 case Intrinsic::nvvm_fma_rm_ftz_f:
3922 case Intrinsic::nvvm_fma_rn_ftz_f:
3923 case Intrinsic::nvvm_fma_rp_ftz_f:
3924 case Intrinsic::nvvm_fma_rz_ftz_f: {
3925 bool IsFTZ = nvvm::FMAShouldFTZ(IntrinsicID);
3926 APFloat A = IsFTZ ? FTZPreserveSign(C1) : C1;
3927 APFloat B = IsFTZ ? FTZPreserveSign(C2) : C2;
3928 APFloat C = IsFTZ ? FTZPreserveSign(C3) : C3;
3929
3930 APFloat::roundingMode RoundMode =
3931 nvvm::GetFMARoundingMode(IntrinsicID);
3932
3933 APFloat Res = A;
3934 APFloat::opStatus Status = Res.fusedMultiplyAdd(B, C, RoundMode);
3935
3936 if (!Res.isNaN() &&
3938 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3939 return ConstantFP::get(Ty->getContext(), Res);
3940 }
3941 return nullptr;
3942 }
3943
3944 case Intrinsic::amdgcn_cubeid:
3945 case Intrinsic::amdgcn_cubema:
3946 case Intrinsic::amdgcn_cubesc:
3947 case Intrinsic::amdgcn_cubetc: {
3948 APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);
3949 return ConstantFP::get(Ty->getContext(), V);
3950 }
3951 }
3952 }
3953 }
3954 }
3955
3956 if (IntrinsicID == Intrinsic::smul_fix ||
3957 IntrinsicID == Intrinsic::smul_fix_sat) {
3958 const APInt *C0, *C1;
3959 if (!getConstIntOrUndef(Operands[0], C0) ||
3960 !getConstIntOrUndef(Operands[1], C1))
3961 return nullptr;
3962
3963 // undef * C -> 0
3964 // C * undef -> 0
3965 if (!C0 || !C1)
3966 return Constant::getNullValue(Ty);
3967
3968 // This code performs rounding towards negative infinity in case the result
3969 // cannot be represented exactly for the given scale. Targets that do care
3970 // about rounding should use a target hook for specifying how rounding
3971 // should be done, and provide their own folding to be consistent with
3972 // rounding. This is the same approach as used by
3973 // DAGTypeLegalizer::ExpandIntRes_MULFIX.
3974 unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();
3975 unsigned Width = C0->getBitWidth();
3976 assert(Scale < Width && "Illegal scale.");
3977 unsigned ExtendedWidth = Width * 2;
3978 APInt Product =
3979 (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);
3980 if (IntrinsicID == Intrinsic::smul_fix_sat) {
3981 APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);
3982 APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);
3983 Product = APIntOps::smin(Product, Max);
3984 Product = APIntOps::smax(Product, Min);
3985 }
3986 return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));
3987 }
3988
3989 if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
3990 const APInt *C0, *C1, *C2;
3991 if (!getConstIntOrUndef(Operands[0], C0) ||
3992 !getConstIntOrUndef(Operands[1], C1) ||
3993 !getConstIntOrUndef(Operands[2], C2))
3994 return nullptr;
3995
3996 bool IsRight = IntrinsicID == Intrinsic::fshr;
3997 if (!C2)
3998 return Operands[IsRight ? 1 : 0];
3999 if (!C0 && !C1)
4000 return UndefValue::get(Ty);
4001
4002 // The shift amount is interpreted as modulo the bitwidth. If the shift
4003 // amount is effectively 0, avoid UB due to oversized inverse shift below.
4004 unsigned BitWidth = C2->getBitWidth();
4005 unsigned ShAmt = C2->urem(BitWidth);
4006 if (!ShAmt)
4007 return Operands[IsRight ? 1 : 0];
4008
4009 // (C0 << ShlAmt) | (C1 >> LshrAmt)
4010 unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
4011 unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
4012 if (!C0)
4013 return ConstantInt::get(Ty, C1->lshr(LshrAmt));
4014 if (!C1)
4015 return ConstantInt::get(Ty, C0->shl(ShlAmt));
4016 return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
4017 }
4018
4019 if (IntrinsicID == Intrinsic::amdgcn_perm)
4020 return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
4021
4022 return nullptr;
4023}
4024
4025static Constant *ConstantFoldScalarCall(StringRef Name,
4026 Intrinsic::ID IntrinsicID,
4027 Type *Ty,
4029 const TargetLibraryInfo *TLI,
4030 const CallBase *Call) {
4031 if (IntrinsicID != Intrinsic::not_intrinsic &&
4033 intrinsicPropagatesPoison(IntrinsicID))
4034 return PoisonValue::get(Ty);
4035
4036 if (Operands.size() == 1)
4037 return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
4038
4039 if (Operands.size() == 2) {
4040 if (Constant *FoldedLibCall =
4041 ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {
4042 return FoldedLibCall;
4043 }
4044 return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);
4045 }
4046
4047 if (Operands.size() == 3)
4048 return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
4049
4050 return nullptr;
4051}
4052
4053static Constant *ConstantFoldFixedVectorCall(
4054 StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
4056 const TargetLibraryInfo *TLI, const CallBase *Call) {
4059 Type *Ty = FVTy->getElementType();
4060
4061 switch (IntrinsicID) {
4062 case Intrinsic::masked_load: {
4063 auto *SrcPtr = Operands[0];
4064 auto *Mask = Operands[2];
4065 auto *Passthru = Operands[3];
4066
4067 Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
4068
4069 SmallVector<Constant *, 32> NewElements;
4070 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4071 auto *MaskElt = Mask->getAggregateElement(I);
4072 if (!MaskElt)
4073 break;
4074 auto *PassthruElt = Passthru->getAggregateElement(I);
4075 auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;
4076 if (isa<UndefValue>(MaskElt)) {
4077 if (PassthruElt)
4078 NewElements.push_back(PassthruElt);
4079 else if (VecElt)
4080 NewElements.push_back(VecElt);
4081 else
4082 return nullptr;
4083 }
4084 if (MaskElt->isNullValue()) {
4085 if (!PassthruElt)
4086 return nullptr;
4087 NewElements.push_back(PassthruElt);
4088 } else if (MaskElt->isOneValue()) {
4089 if (!VecElt)
4090 return nullptr;
4091 NewElements.push_back(VecElt);
4092 } else {
4093 return nullptr;
4094 }
4095 }
4096 if (NewElements.size() != FVTy->getNumElements())
4097 return nullptr;
4098 return ConstantVector::get(NewElements);
4099 }
4100 case Intrinsic::arm_mve_vctp8:
4101 case Intrinsic::arm_mve_vctp16:
4102 case Intrinsic::arm_mve_vctp32:
4103 case Intrinsic::arm_mve_vctp64: {
4104 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
4105 unsigned Lanes = FVTy->getNumElements();
4106 uint64_t Limit = Op->getZExtValue();
4107
4109 for (unsigned i = 0; i < Lanes; i++) {
4110 if (i < Limit)
4112 else
4114 }
4115 return ConstantVector::get(NCs);
4116 }
4117 return nullptr;
4118 }
4119 case Intrinsic::get_active_lane_mask: {
4120 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
4121 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
4122 if (Op0 && Op1) {
4123 unsigned Lanes = FVTy->getNumElements();
4124 uint64_t Base = Op0->getZExtValue();
4125 uint64_t Limit = Op1->getZExtValue();
4126
4128 for (unsigned i = 0; i < Lanes; i++) {
4129 if (Base + i < Limit)
4131 else
4133 }
4134 return ConstantVector::get(NCs);
4135 }
4136 return nullptr;
4137 }
4138 case Intrinsic::vector_extract: {
4139 auto *Idx = dyn_cast<ConstantInt>(Operands[1]);
4140 Constant *Vec = Operands[0];
4141 if (!Idx || !isa<FixedVectorType>(Vec->getType()))
4142 return nullptr;
4143
4144 unsigned NumElements = FVTy->getNumElements();
4145 unsigned VecNumElements =
4146 cast<FixedVectorType>(Vec->getType())->getNumElements();
4147 unsigned StartingIndex = Idx->getZExtValue();
4148
4149 // Extracting entire vector is nop
4150 if (NumElements == VecNumElements && StartingIndex == 0)
4151 return Vec;
4152
4153 for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E;
4154 ++I) {
4155 Constant *Elt = Vec->getAggregateElement(I);
4156 if (!Elt)
4157 return nullptr;
4158 Result[I - StartingIndex] = Elt;
4159 }
4160
4161 return ConstantVector::get(Result);
4162 }
4163 case Intrinsic::vector_insert: {
4164 Constant *Vec = Operands[0];
4165 Constant *SubVec = Operands[1];
4166 auto *Idx = dyn_cast<ConstantInt>(Operands[2]);
4167 if (!Idx || !isa<FixedVectorType>(Vec->getType()))
4168 return nullptr;
4169
4170 unsigned SubVecNumElements =
4171 cast<FixedVectorType>(SubVec->getType())->getNumElements();
4172 unsigned VecNumElements =
4173 cast<FixedVectorType>(Vec->getType())->getNumElements();
4174 unsigned IdxN = Idx->getZExtValue();
4175 // Replacing entire vector with a subvec is nop
4176 if (SubVecNumElements == VecNumElements && IdxN == 0)
4177 return SubVec;
4178
4179 for (unsigned I = 0; I < VecNumElements; ++I) {
4180 Constant *Elt;
4181 if (I < IdxN + SubVecNumElements)
4182 Elt = SubVec->getAggregateElement(I - IdxN);
4183 else
4184 Elt = Vec->getAggregateElement(I);
4185 if (!Elt)
4186 return nullptr;
4187 Result[I] = Elt;
4188 }
4189 return ConstantVector::get(Result);
4190 }
4191 case Intrinsic::vector_interleave2: {
4192 unsigned NumElements =
4193 cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
4194 for (unsigned I = 0; I < NumElements; ++I) {
4195 Constant *Elt0 = Operands[0]->getAggregateElement(I);
4196 Constant *Elt1 = Operands[1]->getAggregateElement(I);
4197 if (!Elt0 || !Elt1)
4198 return nullptr;
4199 Result[2 * I] = Elt0;
4200 Result[2 * I + 1] = Elt1;
4201 }
4202 return ConstantVector::get(Result);
4203 }
4204 case Intrinsic::wasm_dot: {
4205 unsigned NumElements =
4206 cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
4207
4208 assert(NumElements == 8 && Result.size() == 4 &&
4209 "wasm dot takes i16x8 and produces i32x4");
4210 assert(Ty->isIntegerTy());
4211 int32_t MulVector[8];
4212
4213 for (unsigned I = 0; I < NumElements; ++I) {
4214 ConstantInt *Elt0 =
4215 cast<ConstantInt>(Operands[0]->getAggregateElement(I));
4216 ConstantInt *Elt1 =
4217 cast<ConstantInt>(Operands[1]->getAggregateElement(I));
4218
4219 MulVector[I] = Elt0->getSExtValue() * Elt1->getSExtValue();
4220 }
4221 for (unsigned I = 0; I < Result.size(); I++) {
4222 int64_t IAdd = (int64_t)MulVector[I * 2] + (int64_t)MulVector[I * 2 + 1];
4223 Result[I] = ConstantInt::get(Ty, IAdd);
4224 }
4225
4226 return ConstantVector::get(Result);
4227 }
4228 default:
4229 break;
4230 }
4231
4232 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4233 // Gather a column of constants.
4234 for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
4235 // Some intrinsics use a scalar type for certain arguments.
4236 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J, /*TTI=*/nullptr)) {
4237 Lane[J] = Operands[J];
4238 continue;
4239 }
4240
4241 Constant *Agg = Operands[J]->getAggregateElement(I);
4242 if (!Agg)
4243 return nullptr;
4244
4245 Lane[J] = Agg;
4246 }
4247
4248 // Use the regular scalar folding to simplify this column.
4249 Constant *Folded =
4250 ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
4251 if (!Folded)
4252 return nullptr;
4253 Result[I] = Folded;
4254 }
4255
4256 return ConstantVector::get(Result);
4257}
4258
4259static Constant *ConstantFoldScalableVectorCall(
4260 StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
4262 const TargetLibraryInfo *TLI, const CallBase *Call) {
4263 switch (IntrinsicID) {
4264 case Intrinsic::aarch64_sve_convert_from_svbool: {
4265 auto *Src = dyn_cast<Constant>(Operands[0]);
4266 if (!Src || !Src->isNullValue())
4267 break;
4268
4269 return ConstantInt::getFalse(SVTy);
4270 }
4271 case Intrinsic::get_active_lane_mask: {
4272 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
4273 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
4274 if (Op0 && Op1 && Op0->getValue().uge(Op1->getValue()))
4275 return ConstantVector::getNullValue(SVTy);
4276 break;
4277 }
4278 default:
4279 break;
4280 }
4281
4282 // If trivially vectorizable, try folding it via the scalar call if all
4283 // operands are splats.
4284
4285 // TODO: ConstantFoldFixedVectorCall should probably check this too?
4286 if (!isTriviallyVectorizable(IntrinsicID))
4287 return nullptr;
4288
4290 for (auto [I, Op] : enumerate(Operands)) {
4291 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, I, /*TTI=*/nullptr)) {
4292 SplatOps.push_back(Op);
4293 continue;
4294 }
4295 Constant *Splat = Op->getSplatValue();
4296 if (!Splat)
4297 return nullptr;
4298 SplatOps.push_back(Splat);
4299 }
4300 Constant *Folded = ConstantFoldScalarCall(
4301 Name, IntrinsicID, SVTy->getElementType(), SplatOps, TLI, Call);
4302 if (!Folded)
4303 return nullptr;
4304 return ConstantVector::getSplat(SVTy->getElementCount(), Folded);
4305}
4306
4307static std::pair<Constant *, Constant *>
4308ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {
4309 if (isa<PoisonValue>(Op))
4310 return {Op, PoisonValue::get(IntTy)};
4311
4312 auto *ConstFP = dyn_cast<ConstantFP>(Op);
4313 if (!ConstFP)
4314 return {};
4315
4316 const APFloat &U = ConstFP->getValueAPF();
4317 int FrexpExp;
4318 APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven);
4319 Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant);
4320
4321 // The exponent is an "unspecified value" for inf/nan. We use zero to avoid
4322 // using undef.
4323 Constant *Result1 = FrexpMant.isFinite()
4324 ? ConstantInt::getSigned(IntTy, FrexpExp)
4325 : ConstantInt::getNullValue(IntTy);
4326 return {Result0, Result1};
4327}
4328
4329/// Handle intrinsics that return tuples, which may be tuples of vectors.
4330static Constant *
4331ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
4333 const DataLayout &DL, const TargetLibraryInfo *TLI,
4334 const CallBase *Call) {
4335
4336 switch (IntrinsicID) {
4337 case Intrinsic::frexp: {
4338 Type *Ty0 = StTy->getContainedType(0);
4339 Type *Ty1 = StTy->getContainedType(1)->getScalarType();
4340
4341 if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) {
4342 SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());
4343 SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());
4344
4345 for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {
4346 Constant *Lane = Operands[0]->getAggregateElement(I);
4347 std::tie(Results0[I], Results1[I]) =
4348 ConstantFoldScalarFrexpCall(Lane, Ty1);
4349 if (!Results0[I])
4350 return nullptr;
4351 }
4352
4353 return ConstantStruct::get(StTy, ConstantVector::get(Results0),
4354 ConstantVector::get(Results1));
4355 }
4356
4357 auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1);
4358 if (!Result0)
4359 return nullptr;
4360 return ConstantStruct::get(StTy, Result0, Result1);
4361 }
4362 case Intrinsic::sincos: {
4363 Type *Ty = StTy->getContainedType(0);
4364 Type *TyScalar = Ty->getScalarType();
4365
4366 auto ConstantFoldScalarSincosCall =
4367 [&](Constant *Op) -> std::pair<Constant *, Constant *> {
4368 Constant *SinResult =
4369 ConstantFoldScalarCall(Name, Intrinsic::sin, TyScalar, Op, TLI, Call);
4370 Constant *CosResult =
4371 ConstantFoldScalarCall(Name, Intrinsic::cos, TyScalar, Op, TLI, Call);
4372 return std::make_pair(SinResult, CosResult);
4373 };
4374
4375 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
4376 SmallVector<Constant *> SinResults(FVTy->getNumElements());
4377 SmallVector<Constant *> CosResults(FVTy->getNumElements());
4378
4379 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4380 Constant *Lane = Operands[0]->getAggregateElement(I);
4381 std::tie(SinResults[I], CosResults[I]) =
4382 ConstantFoldScalarSincosCall(Lane);
4383 if (!SinResults[I] || !CosResults[I])
4384 return nullptr;
4385 }
4386
4387 return ConstantStruct::get(StTy, ConstantVector::get(SinResults),
4388 ConstantVector::get(CosResults));
4389 }
4390
4391 auto [SinResult, CosResult] = ConstantFoldScalarSincosCall(Operands[0]);
4392 if (!SinResult || !CosResult)
4393 return nullptr;
4394 return ConstantStruct::get(StTy, SinResult, CosResult);
4395 }
4396 case Intrinsic::vector_deinterleave2: {
4397 auto *Vec = Operands[0];
4398 auto *VecTy = cast<VectorType>(Vec->getType());
4399
4400 if (auto *EltC = Vec->getSplatValue()) {
4401 ElementCount HalfEC = VecTy->getElementCount().divideCoefficientBy(2);
4402 auto *HalfVec = ConstantVector::getSplat(HalfEC, EltC);
4403 return ConstantStruct::get(StTy, HalfVec, HalfVec);
4404 }
4405
4406 if (!isa<FixedVectorType>(Vec->getType()))
4407 return nullptr;
4408
4409 unsigned NumElements = VecTy->getElementCount().getFixedValue() / 2;
4410 SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
4411 for (unsigned I = 0; I < NumElements; ++I) {
4412 Constant *Elt0 = Vec->getAggregateElement(2 * I);
4413 Constant *Elt1 = Vec->getAggregateElement(2 * I + 1);
4414 if (!Elt0 || !Elt1)
4415 return nullptr;
4416 Res0[I] = Elt0;
4417 Res1[I] = Elt1;
4418 }
4419 return ConstantStruct::get(StTy, ConstantVector::get(Res0),
4420 ConstantVector::get(Res1));
4421 }
4422 default:
4423 // TODO: Constant folding of vector intrinsics that fall through here does
4424 // not work (e.g. overflow intrinsics)
4425 return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call);
4426 }
4427
4428 return nullptr;
4429}
4430
4431} // end anonymous namespace
4432
4434 Constant *RHS, Type *Ty,
4437 // Ensure we check flags like StrictFP that might prevent this from getting
4438 // folded before generating a result.
4439 if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
4440 return nullptr;
4441 return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
4442}
4443
4446 const TargetLibraryInfo *TLI,
4447 bool AllowNonDeterministic) {
4448 if (Call->isNoBuiltin())
4449 return nullptr;
4450 if (!F->hasName())
4451 return nullptr;
4452
4453 // If this is not an intrinsic and not recognized as a library call, bail out.
4454 Intrinsic::ID IID = F->getIntrinsicID();
4455 if (IID == Intrinsic::not_intrinsic) {
4456 if (!TLI)
4457 return nullptr;
4458 LibFunc LibF;
4459 if (!TLI->getLibFunc(*F, LibF))
4460 return nullptr;
4461 }
4462
4463 // Conservatively assume that floating-point libcalls may be
4464 // non-deterministic.
4465 Type *Ty = F->getReturnType();
4466 if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())
4467 return nullptr;
4468
4469 StringRef Name = F->getName();
4470 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
4471 return ConstantFoldFixedVectorCall(
4472 Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call);
4473
4474 if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
4475 return ConstantFoldScalableVectorCall(
4476 Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call);
4477
4478 if (auto *StTy = dyn_cast<StructType>(Ty))
4479 return ConstantFoldStructCall(Name, IID, StTy, Operands,
4480 F->getDataLayout(), TLI, Call);
4481
4482 // TODO: If this is a library function, we already discovered that above,
4483 // so we should pass the LibFunc, not the name (and it might be better
4484 // still to separate intrinsic handling from libcalls).
4485 return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call);
4486}
4487
4489 const TargetLibraryInfo *TLI) {
4490 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
4491 // (and to some extent ConstantFoldScalarCall).
4492 if (Call->isNoBuiltin() || Call->isStrictFP())
4493 return false;
4494 Function *F = Call->getCalledFunction();
4495 if (!F)
4496 return false;
4497
4498 LibFunc Func;
4499 if (!TLI || !TLI->getLibFunc(*F, Func))
4500 return false;
4501
4502 if (Call->arg_size() == 1) {
4503 if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
4504 const APFloat &Op = OpC->getValueAPF();
4505 switch (Func) {
4506 case LibFunc_logl:
4507 case LibFunc_log:
4508 case LibFunc_logf:
4509 case LibFunc_log2l:
4510 case LibFunc_log2:
4511 case LibFunc_log2f:
4512 case LibFunc_log10l:
4513 case LibFunc_log10:
4514 case LibFunc_log10f:
4515 return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
4516
4517 case LibFunc_ilogb:
4518 return !Op.isNaN() && !Op.isZero() && !Op.isInfinity();
4519
4520 case LibFunc_expl:
4521 case LibFunc_exp:
4522 case LibFunc_expf:
4523 // FIXME: These boundaries are slightly conservative.
4524 if (OpC->getType()->isDoubleTy())
4525 return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
4526 if (OpC->getType()->isFloatTy())
4527 return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
4528 break;
4529
4530 case LibFunc_exp2l:
4531 case LibFunc_exp2:
4532 case LibFunc_exp2f:
4533 // FIXME: These boundaries are slightly conservative.
4534 if (OpC->getType()->isDoubleTy())
4535 return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
4536 if (OpC->getType()->isFloatTy())
4537 return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
4538 break;
4539
4540 case LibFunc_sinl:
4541 case LibFunc_sin:
4542 case LibFunc_sinf:
4543 case LibFunc_cosl:
4544 case LibFunc_cos:
4545 case LibFunc_cosf:
4546 return !Op.isInfinity();
4547
4548 case LibFunc_tanl:
4549 case LibFunc_tan:
4550 case LibFunc_tanf: {
4551 // FIXME: Stop using the host math library.
4552 // FIXME: The computation isn't done in the right precision.
4553 Type *Ty = OpC->getType();
4554 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
4555 return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;
4556 break;
4557 }
4558
4559 case LibFunc_atan:
4560 case LibFunc_atanf:
4561 case LibFunc_atanl:
4562 // Per POSIX, this MAY fail if Op is denormal. We choose not failing.
4563 return true;
4564
4565 case LibFunc_asinl:
4566 case LibFunc_asin:
4567 case LibFunc_asinf:
4568 case LibFunc_acosl:
4569 case LibFunc_acos:
4570 case LibFunc_acosf:
4571 return !(Op < APFloat::getOne(Op.getSemantics(), true) ||
4572 Op > APFloat::getOne(Op.getSemantics()));
4573
4574 case LibFunc_sinh:
4575 case LibFunc_cosh:
4576 case LibFunc_sinhf:
4577 case LibFunc_coshf:
4578 case LibFunc_sinhl:
4579 case LibFunc_coshl:
4580 // FIXME: These boundaries are slightly conservative.
4581 if (OpC->getType()->isDoubleTy())
4582 return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
4583 if (OpC->getType()->isFloatTy())
4584 return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
4585 break;
4586
4587 case LibFunc_sqrtl:
4588 case LibFunc_sqrt:
4589 case LibFunc_sqrtf:
4590 return Op.isNaN() || Op.isZero() || !Op.isNegative();
4591
4592 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
4593 // maybe others?
4594 default:
4595 break;
4596 }
4597 }
4598 }
4599
4600 if (Call->arg_size() == 2) {
4601 ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
4602 ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
4603 if (Op0C && Op1C) {
4604 const APFloat &Op0 = Op0C->getValueAPF();
4605 const APFloat &Op1 = Op1C->getValueAPF();
4606
4607 switch (Func) {
4608 case LibFunc_powl:
4609 case LibFunc_pow:
4610 case LibFunc_powf: {
4611 // FIXME: Stop using the host math library.
4612 // FIXME: The computation isn't done in the right precision.
4613 Type *Ty = Op0C->getType();
4614 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
4615 if (Ty == Op1C->getType())
4616 return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;
4617 }
4618 break;
4619 }
4620
4621 case LibFunc_fmodl:
4622 case LibFunc_fmod:
4623 case LibFunc_fmodf:
4624 case LibFunc_remainderl:
4625 case LibFunc_remainder:
4626 case LibFunc_remainderf:
4627 return Op0.isNaN() || Op1.isNaN() ||
4628 (!Op0.isInfinity() && !Op1.isZero());
4629
4630 case LibFunc_atan2:
4631 case LibFunc_atan2f:
4632 case LibFunc_atan2l:
4633 // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and
4634 // GLIBC and MSVC do not appear to raise an error on those, we
4635 // cannot rely on that behavior. POSIX and C11 say that a domain error
4636 // may occur, so allow for that possibility.
4637 return !Op0.isZero() || !Op1.isZero();
4638
4639 default:
4640 break;
4641 }
4642 }
4643 }
4644
4645 return false;
4646}
4647
4649 unsigned CastOp, const DataLayout &DL,
4650 PreservedCastFlags *Flags) {
4651 switch (CastOp) {
4652 case Instruction::BitCast:
4653 // Bitcast is always lossless.
4654 return ConstantFoldCastOperand(Instruction::BitCast, C, InvCastTo, DL);
4655 case Instruction::Trunc: {
4656 auto *ZExtC = ConstantFoldCastOperand(Instruction::ZExt, C, InvCastTo, DL);
4657 if (Flags) {
4658 // Truncation back on ZExt value is always NUW.
4659 Flags->NUW = true;
4660 // Test positivity of C.
4661 auto *SExtC =
4662 ConstantFoldCastOperand(Instruction::SExt, C, InvCastTo, DL);
4663 Flags->NSW = ZExtC == SExtC;
4664 }
4665 return ZExtC;
4666 }
4667 case Instruction::SExt:
4668 case Instruction::ZExt: {
4669 auto *InvC = ConstantExpr::getTrunc(C, InvCastTo);
4670 auto *CastInvC = ConstantFoldCastOperand(CastOp, InvC, C->getType(), DL);
4671 // Must satisfy CastOp(InvC) == C.
4672 if (!CastInvC || CastInvC != C)
4673 return nullptr;
4674 if (Flags && CastOp == Instruction::ZExt) {
4675 auto *SExtInvC =
4676 ConstantFoldCastOperand(Instruction::SExt, InvC, C->getType(), DL);
4677 // Test positivity of InvC.
4678 Flags->NNeg = CastInvC == SExtInvC;
4679 }
4680 return InvC;
4681 }
4682 default:
4683 return nullptr;
4684 }
4685}
4686
4688 const DataLayout &DL,
4689 PreservedCastFlags *Flags) {
4690 return getLosslessInvCast(C, DestTy, Instruction::ZExt, DL, Flags);
4691}
4692
4694 const DataLayout &DL,
4695 PreservedCastFlags *Flags) {
4696 return getLosslessInvCast(C, DestTy, Instruction::SExt, DL, Flags);
4697}
4698
4699void TargetFolder::anchor() {}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Constant * FoldBitCast(Constant *V, Type *DestTy)
static ConstantFP * flushDenormalConstant(Type *Ty, const APFloat &APF, DenormalMode::DenormalModeKind Mode)
Constant * getConstantAtOffset(Constant *Base, APInt Offset, const DataLayout &DL)
If this Offset points exactly to the start of an aggregate element, return that element,...
static cl::opt< bool > DisableFPCallFolding("disable-fp-call-folding", cl::desc("Disable constant-folding of FP intrinsics and libcalls."), cl::init(false), cl::Hidden)
static ConstantFP * flushDenormalConstantFP(ConstantFP *CFP, const Instruction *Inst, bool IsOutput)
static DenormalMode getInstrDenormalMode(const Instruction *CtxI, Type *Ty)
Return the denormal mode that can be assumed when executing a floating point operation at CtxI.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
amode Optimize addressing mode
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
static bool InRange(int64_t Value, unsigned short Shift, int LBound, int HBound)
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
The Input class is used to parse a yaml document into in-memory structs and vectors.
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1208
void copySign(const APFloat &RHS)
Definition APFloat.h:1302
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
opStatus subtract(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isNegative() const
Definition APFloat.h:1449
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6115
bool isPosInfinity() const
Definition APFloat.h:1462
bool isNormal() const
Definition APFloat.h:1453
bool isDenormal() const
Definition APFloat.h:1450
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1181
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNonZero() const
Definition APFloat.h:1458
bool isFinite() const
Definition APFloat.h:1454
bool isNaN() const
Definition APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1088
opStatus multiply(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1199
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6143
bool isSignaling() const
Definition APFloat.h:1451
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1235
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1226
bool isNegInfinity() const
Definition APFloat.h:1463
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1248
void changeSign()
Definition APFloat.h:1297
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1079
bool isInfinity() const
Definition APFloat.h:1446
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
LLVM_ABI APInt usub_sat(const APInt &RHS) const
Definition APInt.cpp:2055
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
LLVM_ABI APInt sadd_sat(const APInt &RHS) const
Definition APInt.cpp:2026
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1948
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1928
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2036
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
LLVM_ABI APInt ssub_sat(const APInt &RHS) const
Definition APInt.cpp:2045
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static bool isFPPredicate(Predicate P)
Definition InstrTypes.h:770
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI bool isDesirableCastOp(unsigned Opcode)
Whether creating a constant expression for this cast is desirable.
static LLVM_ABI Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getInsertElement(Constant *Vec, Constant *Elt, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static bool isSupportedGetElementPtr(const Type *SrcElemTy)
Whether creating a constant expression for this getelementptr type is supported.
Definition Constants.h:1387
static LLVM_ABI Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible.
static LLVM_ABI bool isDesirableBinOp(unsigned Opcode)
Whether creating a constant expression for this binary operator is desirable.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValueAPF() const
Definition Constants.h:320
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
Constrained floating point compare intrinsics.
This is the common base class for constrained floating point intrinsics.
LLVM_ABI std::optional< fp::ExceptionBehavior > getExceptionBehavior() const
LLVM_ABI std::optional< RoundingMode > getRoundingMode() const
Wrapper for a function that represents a value that functionally represents the original function.
Definition Constants.h:952
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
static LLVM_ABI bool compare(const APFloat &LHS, const APFloat &RHS, FCmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
This provides a helper for copying FMF from an instruction or setting specified flags.
Definition IRBuilder.h:93
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
Definition Function.cpp:803
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
GEPNoWrapFlags withoutNoUnsignedSignedWrap() const
static GEPNoWrapFlags noUnsignedWrap()
bool hasNoUnsignedSignedWrap() const
bool isInBounds() const
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
PointerType * getType() const
Global values are always pointers.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool hasDefinitiveInitializer() const
hasDefinitiveInitializer - Whether the global variable has an initializer, and any other instances of...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
bool isCast() const
bool isBinaryOp() const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isUnaryOp() const
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
static APInt getSaturationPoint(Intrinsic::ID ID, unsigned numBits)
Min/max intrinsics are monotonic, they operate on a fixed-bitwidth values, so there is a certain thre...
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Class to represent scalable SIMD vectors.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:712
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:743
Class to represent struct types.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
Definition Type.h:56
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:200
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:381
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:238
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
@ ebStrict
This corresponds to "fpexcept.strict".
Definition FPEnv.h:42
@ ebIgnore
This corresponds to "fpexcept.ignore".
Definition FPEnv.h:40
constexpr double pi
Definition MathExtras.h:53
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
LLVM_ABI Constant * ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, Constant *RHS, Type *Ty, Instruction *FMFSource)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
LLVM_ABI Constant * ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, const DataLayout &DL)
ConstantFoldLoadThroughBitcast - try to cast constant to destination type returning null if unsuccess...
static double log2(double V)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI Constant * ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL, const Instruction *I, bool AllowNonDeterministic=true)
Attempt to constant fold a floating point binary operation with the specified operands,...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
unsigned getPointerAddressSpace(const Type *T)
Definition SPIRVUtils.h:345
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1563
LLVM_ABI Constant * ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, Constant *C1, Constant *C2)
LLVM_ABI Constant * ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V)
LLVM_ABI bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &DL, DSOLocalEquivalent **DSOEquiv=nullptr)
If this constant is a constant offset from a global, return the global and the constant.
LLVM_ABI bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI)
Check whether the given call has no side-effects.
LLVM_ABI Constant * ReadByteArrayFromGlobal(const GlobalVariable *GV, uint64_t Offset)
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:733
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1643
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1534
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:677
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1555
LLVM_ABI Constant * ConstantFoldExtractValueInstruction(Constant *Agg, ArrayRef< unsigned > Idxs)
Attempt to constant fold an extractvalue instruction with the specified operands and indices.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1598
LLVM_ABI Constant * ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, const DataLayout &DL)
If C is a uniform value where all bits are the same (either all zero, all ones, all undef or all pois...
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
LLVM_ABI Constant * FlushFPConstant(Constant *Operand, const Instruction *I, bool IsOutput)
Attempt to flush float point constant according to denormal mode set in the instruction's parent func...
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1629
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1543
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool intrinsicPropagatesPoison(Intrinsic::ID IID)
Return whether this intrinsic propagates poison for all operands.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1579
@ Sub
Subtraction of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ NearestTiesToEven
roundTiesToEven.
@ Dynamic
Denotes mode unknown at compile time.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI Constant * ConstantFoldCastInstruction(unsigned opcode, Constant *V, Type *DestTy)
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1616
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1656
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:831
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:309
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:307
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:302
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:306
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
DenormalModeKind
Represent handled modes for denormal (aka subnormal) modes in the floating point environment.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ Dynamic
Denormals have unknown treatment.
@ IEEE
IEEE-754 denormal numbers preserved.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
static constexpr DenormalMode getDynamic()
static constexpr DenormalMode getIEEE()
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60