LLVM 22.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/MDBuilder.h"
24#include <cmath>
25
26#define DEBUG_TYPE "amdgpu-simplifylib"
27
28using namespace llvm;
29using namespace llvm::PatternMatch;
30
31static cl::opt<bool> EnablePreLink("amdgpu-prelink",
32 cl::desc("Enable pre-link mode optimizations"),
33 cl::init(false),
35
36static cl::list<std::string> UseNative("amdgpu-use-native",
37 cl::desc("Comma separated list of functions to replace with native, or all"),
40
41#define MATH_PI numbers::pi
42#define MATH_E numbers::e
43#define MATH_SQRT2 numbers::sqrt2
44#define MATH_SQRT1_2 numbers::inv_sqrt2
45
46namespace llvm {
47
49private:
50 const TargetLibraryInfo *TLInfo = nullptr;
51 AssumptionCache *AC = nullptr;
52 DominatorTree *DT = nullptr;
53
54 using FuncInfo = llvm::AMDGPULibFunc;
55
56 // -fuse-native.
57 bool AllNative = false;
58
59 bool useNativeFunc(const StringRef F) const;
60
61 // Return a pointer (pointer expr) to the function if function definition with
62 // "FuncName" exists. It may create a new function prototype in pre-link mode.
63 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
64
65 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
66
67 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
68
69 /* Specialized optimizations */
70
71 // pow/powr/pown
72 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
73
74 // rootn
75 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
76
77 // -fuse-native for sincos
78 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
79
80 // evaluate calls if calls' arguments are constants.
81 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
82 Constant *copr0, Constant *copr1);
83 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
84
85 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
86 /// of cos, sincos call).
87 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
88 FastMathFlags FMF,
90 FunctionCallee Fsincos);
91
92 // sin/cos
93 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
94
95 // __read_pipe/__write_pipe
96 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
97 const FuncInfo &FInfo);
98
99 // Get a scalar native builtin single argument FP function
100 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
101
102 /// Substitute a call to a known libcall with an intrinsic call. If \p
103 /// AllowMinSize is true, allow the replacement in a minsize function.
104 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
105 bool AllowMinSizeF32 = false,
106 bool AllowF64 = false,
107 bool AllowStrictFP = false);
108 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
109 Intrinsic::ID IntrID);
110
111 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
112 Intrinsic::ID IntrID,
113 bool AllowMinSizeF32 = false,
114 bool AllowF64 = false,
115 bool AllowStrictFP = false);
116
117protected:
118 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
119
121
122 static void replaceCall(Instruction *I, Value *With) {
123 I->replaceAllUsesWith(With);
124 I->eraseFromParent();
125 }
126
127 static void replaceCall(FPMathOperator *I, Value *With) {
129 }
130
131public:
132 AMDGPULibCalls() = default;
133
134 bool fold(CallInst *CI);
135
137 void initNativeFuncs();
138
139 // Replace a normal math function call with that native version
140 bool useNative(CallInst *CI);
141};
142
143} // end namespace llvm
144
145template <typename IRB>
146static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
147 const Twine &Name = "") {
148 CallInst *R = B.CreateCall(Callee, Arg, Name);
149 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
150 R->setCallingConv(F->getCallingConv());
151 return R;
152}
153
154template <typename IRB>
155static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
156 Value *Arg2, const Twine &Name = "") {
157 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
158 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
159 R->setCallingConv(F->getCallingConv());
160 return R;
161}
162
164 Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
165 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
166 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
167
168 return FunctionType::get(FT->getReturnType(),
169 {FT->getParamType(0), PowNExpTy}, false);
170}
171
172// Data structures for table-driven optimizations.
173// FuncTbl works for both f32 and f64 functions with 1 input argument
174
176 double result;
177 double input;
178};
179
180/* a list of {result, input} */
181static const TableEntry tbl_acos[] = {
182 {MATH_PI / 2.0, 0.0},
183 {MATH_PI / 2.0, -0.0},
184 {0.0, 1.0},
185 {MATH_PI, -1.0}
186};
187static const TableEntry tbl_acosh[] = {
188 {0.0, 1.0}
189};
190static const TableEntry tbl_acospi[] = {
191 {0.5, 0.0},
192 {0.5, -0.0},
193 {0.0, 1.0},
194 {1.0, -1.0}
195};
196static const TableEntry tbl_asin[] = {
197 {0.0, 0.0},
198 {-0.0, -0.0},
199 {MATH_PI / 2.0, 1.0},
200 {-MATH_PI / 2.0, -1.0}
201};
202static const TableEntry tbl_asinh[] = {
203 {0.0, 0.0},
204 {-0.0, -0.0}
205};
206static const TableEntry tbl_asinpi[] = {
207 {0.0, 0.0},
208 {-0.0, -0.0},
209 {0.5, 1.0},
210 {-0.5, -1.0}
211};
212static const TableEntry tbl_atan[] = {
213 {0.0, 0.0},
214 {-0.0, -0.0},
215 {MATH_PI / 4.0, 1.0},
216 {-MATH_PI / 4.0, -1.0}
217};
218static const TableEntry tbl_atanh[] = {
219 {0.0, 0.0},
220 {-0.0, -0.0}
221};
222static const TableEntry tbl_atanpi[] = {
223 {0.0, 0.0},
224 {-0.0, -0.0},
225 {0.25, 1.0},
226 {-0.25, -1.0}
227};
228static const TableEntry tbl_cbrt[] = {
229 {0.0, 0.0},
230 {-0.0, -0.0},
231 {1.0, 1.0},
232 {-1.0, -1.0},
233};
234static const TableEntry tbl_cos[] = {
235 {1.0, 0.0},
236 {1.0, -0.0}
237};
238static const TableEntry tbl_cosh[] = {
239 {1.0, 0.0},
240 {1.0, -0.0}
241};
242static const TableEntry tbl_cospi[] = {
243 {1.0, 0.0},
244 {1.0, -0.0}
245};
246static const TableEntry tbl_erfc[] = {
247 {1.0, 0.0},
248 {1.0, -0.0}
249};
250static const TableEntry tbl_erf[] = {
251 {0.0, 0.0},
252 {-0.0, -0.0}
253};
254static const TableEntry tbl_exp[] = {
255 {1.0, 0.0},
256 {1.0, -0.0},
257 {MATH_E, 1.0}
258};
259static const TableEntry tbl_exp2[] = {
260 {1.0, 0.0},
261 {1.0, -0.0},
262 {2.0, 1.0}
263};
264static const TableEntry tbl_exp10[] = {
265 {1.0, 0.0},
266 {1.0, -0.0},
267 {10.0, 1.0}
268};
269static const TableEntry tbl_expm1[] = {
270 {0.0, 0.0},
271 {-0.0, -0.0}
272};
273static const TableEntry tbl_log[] = {
274 {0.0, 1.0},
275 {1.0, MATH_E}
276};
277static const TableEntry tbl_log2[] = {
278 {0.0, 1.0},
279 {1.0, 2.0}
280};
281static const TableEntry tbl_log10[] = {
282 {0.0, 1.0},
283 {1.0, 10.0}
284};
285static const TableEntry tbl_rsqrt[] = {
286 {1.0, 1.0},
287 {MATH_SQRT1_2, 2.0}
288};
289static const TableEntry tbl_sin[] = {
290 {0.0, 0.0},
291 {-0.0, -0.0}
292};
293static const TableEntry tbl_sinh[] = {
294 {0.0, 0.0},
295 {-0.0, -0.0}
296};
297static const TableEntry tbl_sinpi[] = {
298 {0.0, 0.0},
299 {-0.0, -0.0}
300};
301static const TableEntry tbl_sqrt[] = {
302 {0.0, 0.0},
303 {1.0, 1.0},
304 {MATH_SQRT2, 2.0}
305};
306static const TableEntry tbl_tan[] = {
307 {0.0, 0.0},
308 {-0.0, -0.0}
309};
310static const TableEntry tbl_tanh[] = {
311 {0.0, 0.0},
312 {-0.0, -0.0}
313};
314static const TableEntry tbl_tanpi[] = {
315 {0.0, 0.0},
316 {-0.0, -0.0}
317};
318static const TableEntry tbl_tgamma[] = {
319 {1.0, 1.0},
320 {1.0, 2.0},
321 {2.0, 3.0},
322 {6.0, 4.0}
323};
324
326 switch(id) {
342 return true;
343 default:;
344 }
345 return false;
346}
347
349
351 switch(id) {
389 default:;
390 }
391 return TableRef();
392}
393
394static inline int getVecSize(const AMDGPULibFunc& FInfo) {
395 return FInfo.getLeads()[0].VectorSize;
396}
397
398static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
399 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
400}
401
402FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
403 // If we are doing PreLinkOpt, the function is external. So it is safe to
404 // use getOrInsertFunction() at this stage.
405
407 : AMDGPULibFunc::getFunction(M, fInfo);
408}
409
410bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
411 FuncInfo &FInfo) {
412 return AMDGPULibFunc::parse(FMangledName, FInfo);
413}
414
416 return FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs();
417}
418
420 const FPMathOperator *FPOp) const {
421 // TODO: Refine to approxFunc or contract
422 return FPOp->isFast();
423}
424
426 AC = &FAM.getResult<AssumptionAnalysis>(F);
427 TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
428 DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
429}
430
431bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
432 return AllNative || llvm::is_contained(UseNative, F);
433}
434
436 AllNative = useNativeFunc("all") ||
437 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
438 UseNative.begin()->empty());
439}
440
441bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
442 bool native_sin = useNativeFunc("sin");
443 bool native_cos = useNativeFunc("cos");
444
445 if (native_sin && native_cos) {
446 Module *M = aCI->getModule();
447 Value *opr0 = aCI->getArgOperand(0);
448
449 AMDGPULibFunc nf;
450 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
451 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
452
455 FunctionCallee sinExpr = getFunction(M, nf);
456
459 FunctionCallee cosExpr = getFunction(M, nf);
460 if (sinExpr && cosExpr) {
461 Value *sinval =
462 CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
463 Value *cosval =
464 CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
465 new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
466
467 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
468 << " with native version of sin/cos");
469
470 replaceCall(aCI, sinval);
471 return true;
472 }
473 }
474 return false;
475}
476
478 Function *Callee = aCI->getCalledFunction();
479 if (!Callee || aCI->isNoBuiltin())
480 return false;
481
482 FuncInfo FInfo;
483 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
484 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
485 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
486 !(AllNative || useNativeFunc(FInfo.getName()))) {
487 return false;
488 }
489
490 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
491 return sincosUseNative(aCI, FInfo);
492
494 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
495 if (!F)
496 return false;
497
498 aCI->setCalledFunction(F);
499 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
500 << " with native version");
501 return true;
502}
503
504// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
505// builtin, with appended type size and alignment arguments, where 2 or 4
506// indicates the original number of arguments. The library has optimized version
507// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
508// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
509// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
510// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
511bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
512 const FuncInfo &FInfo) {
513 auto *Callee = CI->getCalledFunction();
514 if (!Callee->isDeclaration())
515 return false;
516
517 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
518 auto *M = Callee->getParent();
519 std::string Name = std::string(Callee->getName());
520 auto NumArg = CI->arg_size();
521 if (NumArg != 4 && NumArg != 6)
522 return false;
523 ConstantInt *PacketSize =
524 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
525 ConstantInt *PacketAlign =
526 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
527 if (!PacketSize || !PacketAlign)
528 return false;
529
530 unsigned Size = PacketSize->getZExtValue();
531 Align Alignment = PacketAlign->getAlignValue();
532 if (Alignment != Size)
533 return false;
534
535 unsigned PtrArgLoc = CI->arg_size() - 3;
536 Value *PtrArg = CI->getArgOperand(PtrArgLoc);
537 Type *PtrTy = PtrArg->getType();
538
540 for (unsigned I = 0; I != PtrArgLoc; ++I)
541 ArgTys.push_back(CI->getArgOperand(I)->getType());
542 ArgTys.push_back(PtrTy);
543
544 Name = Name + "_" + std::to_string(Size);
545 auto *FTy = FunctionType::get(Callee->getReturnType(),
546 ArrayRef<Type *>(ArgTys), false);
547 AMDGPULibFunc NewLibFunc(Name, FTy);
549 if (!F)
550 return false;
551
553 for (unsigned I = 0; I != PtrArgLoc; ++I)
554 Args.push_back(CI->getArgOperand(I));
555 Args.push_back(PtrArg);
556
557 auto *NCI = B.CreateCall(F, Args);
558 NCI->setAttributes(CI->getAttributes());
559 CI->replaceAllUsesWith(NCI);
560 CI->dropAllReferences();
561 CI->eraseFromParent();
562
563 return true;
564}
565
566static bool isKnownIntegral(const Value *V, const DataLayout &DL,
567 FastMathFlags FMF) {
568 if (isa<PoisonValue>(V))
569 return true;
570 if (isa<UndefValue>(V))
571 return false;
572
573 if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
574 return CF->getValueAPF().isInteger();
575
576 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
577 const Constant *CV = dyn_cast<Constant>(V);
578 if (VFVTy && CV) {
579 unsigned NumElts = VFVTy->getNumElements();
580 for (unsigned i = 0; i != NumElts; ++i) {
581 Constant *Elt = CV->getAggregateElement(i);
582 if (!Elt)
583 return false;
584 if (isa<PoisonValue>(Elt))
585 continue;
586
587 const ConstantFP *CFP = dyn_cast<ConstantFP>(Elt);
588 if (!CFP || !CFP->getValue().isInteger())
589 return false;
590 }
591
592 return true;
593 }
594
596 if (!I)
597 return false;
598
599 switch (I->getOpcode()) {
600 case Instruction::SIToFP:
601 case Instruction::UIToFP:
602 // TODO: Could check nofpclass(inf) on incoming argument
603 if (FMF.noInfs())
604 return true;
605
606 // Need to check int size cannot produce infinity, which computeKnownFPClass
607 // knows how to do already.
609 case Instruction::Call: {
610 const CallInst *CI = cast<CallInst>(I);
611 switch (CI->getIntrinsicID()) {
612 case Intrinsic::trunc:
613 case Intrinsic::floor:
614 case Intrinsic::ceil:
615 case Intrinsic::rint:
616 case Intrinsic::nearbyint:
617 case Intrinsic::round:
618 case Intrinsic::roundeven:
619 return (FMF.noInfs() && FMF.noNaNs()) ||
621 default:
622 break;
623 }
624
625 break;
626 }
627 default:
628 break;
629 }
630
631 return false;
632}
633
634// This function returns false if no change; return true otherwise.
636 Function *Callee = CI->getCalledFunction();
637 // Ignore indirect calls.
638 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
639 return false;
640
641 FuncInfo FInfo;
642 if (!parseFunctionName(Callee->getName(), FInfo))
643 return false;
644
645 // Further check the number of arguments to see if they match.
646 // TODO: Check calling convention matches too
647 if (!FInfo.isCompatibleSignature(*Callee->getParent(), CI->getFunctionType()))
648 return false;
649
650 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
651
652 if (TDOFold(CI, FInfo))
653 return true;
654
655 IRBuilder<> B(CI);
656 if (CI->isStrictFP())
657 B.setIsFPConstrained(true);
658
660 // Under unsafe-math, evaluate calls if possible.
661 // According to Brian Sumner, we can do this for all f32 function calls
662 // using host's double function calls.
663 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
664 return true;
665
666 // Copy fast flags from the original call.
667 FastMathFlags FMF = FPOp->getFastMathFlags();
668 B.setFastMathFlags(FMF);
669
670 // Specialized optimizations for each function call.
671 //
672 // TODO: Handle native functions
673 switch (FInfo.getId()) {
675 if (FMF.none())
676 return false;
677 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
678 FMF.approxFunc());
680 if (FMF.none())
681 return false;
682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
683 FMF.approxFunc());
685 if (FMF.none())
686 return false;
687 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
688 FMF.approxFunc());
690 if (FMF.none())
691 return false;
692 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
693 FMF.approxFunc());
695 if (FMF.none())
696 return false;
697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
698 FMF.approxFunc());
700 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
701 true, true);
703 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
704 true, true);
706 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
707 true);
709 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
710 true, true);
712 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
713 true, true);
715 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
716 true, true, true);
718 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
719 true);
721 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
722 true);
724 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
725 true);
727 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
728 true);
730 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
731 true);
733 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
734 return false;
735
736 Value *Arg1 = CI->getArgOperand(1);
737 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
738 VecTy && !isa<VectorType>(Arg1->getType())) {
739 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
740 CI->setArgOperand(1, SplatArg1);
741 }
742
744 CI->getModule(), Intrinsic::ldexp,
745 {CI->getType(), CI->getArgOperand(1)->getType()}));
746 return true;
747 }
749 Module *M = Callee->getParent();
750 AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo);
751 FunctionCallee PowrFunc = getFunction(M, PowrInfo);
753
754 // pow(x, y) -> powr(x, y) for x >= -0.0
755 // TODO: Account for flags on current call
756 if (PowrFunc &&
758 FPOp->getOperand(0),
759 SimplifyQuery(M->getDataLayout(), TLInfo, DT, AC, Call))) {
760 Call->setCalledFunction(PowrFunc);
761 return fold_pow(FPOp, B, PowrInfo) || true;
762 }
763
764 // pow(x, y) -> pown(x, y) for known integral y
765 if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(),
766 FPOp->getFastMathFlags())) {
767 FunctionType *PownType = getPownType(CI->getFunctionType());
768 AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true);
769 FunctionCallee PownFunc = getFunction(M, PownInfo);
770 if (PownFunc) {
771 // TODO: If the incoming integral value is an sitofp/uitofp, it won't
772 // fold out without a known range. We can probably take the source
773 // value directly.
774 Value *CastedArg =
775 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
776 // Have to drop any nofpclass attributes on the original call site.
777 Call->removeParamAttrs(
778 1, AttributeFuncs::typeIncompatible(CastedArg->getType(),
779 Call->getParamAttributes(1)));
780 Call->setCalledFunction(PownFunc);
781 Call->setArgOperand(1, CastedArg);
782 return fold_pow(FPOp, B, PownInfo) || true;
783 }
784 }
785
786 return fold_pow(FPOp, B, FInfo);
787 }
790 return fold_pow(FPOp, B, FInfo);
792 return fold_rootn(FPOp, B, FInfo);
794 // TODO: Allow with strictfp + constrained intrinsic
795 return tryReplaceLibcallWithSimpleIntrinsic(
796 B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
799 return fold_sincos(FPOp, B, FInfo);
800 default:
801 break;
802 }
803 } else {
804 // Specialized optimizations for each function call
805 switch (FInfo.getId()) {
810 return fold_read_write_pipe(CI, B, FInfo);
811 default:
812 break;
813 }
814 }
815
816 return false;
817}
818
819bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
820 // Table-Driven optimization
821 const TableRef tr = getOptTable(FInfo.getId());
822 if (tr.empty())
823 return false;
824
825 int const sz = (int)tr.size();
826 Value *opr0 = CI->getArgOperand(0);
827
828 if (getVecSize(FInfo) > 1) {
831 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
833 CV->getElementAsConstant((unsigned)eltNo));
834 assert(eltval && "Non-FP arguments in math function!");
835 bool found = false;
836 for (int i=0; i < sz; ++i) {
837 if (eltval->isExactlyValue(tr[i].input)) {
838 DVal.push_back(tr[i].result);
839 found = true;
840 break;
841 }
842 }
843 if (!found) {
844 // This vector constants not handled yet.
845 return false;
846 }
847 }
848 LLVMContext &context = CI->getParent()->getParent()->getContext();
849 Constant *nval;
850 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
852 for (double D : DVal)
853 FVal.push_back((float)D);
854 ArrayRef<float> tmp(FVal);
855 nval = ConstantDataVector::get(context, tmp);
856 } else { // F64
857 ArrayRef<double> tmp(DVal);
858 nval = ConstantDataVector::get(context, tmp);
859 }
860 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
861 replaceCall(CI, nval);
862 return true;
863 }
864 } else {
865 // Scalar version
866 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
867 for (int i = 0; i < sz; ++i) {
868 if (CF->isExactlyValue(tr[i].input)) {
869 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
870 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
871 replaceCall(CI, nval);
872 return true;
873 }
874 }
875 }
876 }
877
878 return false;
879}
880
881namespace llvm {
882static double log2(double V) {
883#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
884 return ::log2(V);
885#else
886 return log(V) / numbers::ln2;
887#endif
888}
889} // namespace llvm
890
891bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
892 const FuncInfo &FInfo) {
893 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
894 FInfo.getId() == AMDGPULibFunc::EI_POWR ||
895 FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
896 "fold_pow: encounter a wrong function call");
897
898 Module *M = B.GetInsertBlock()->getModule();
899 Type *eltType = FPOp->getType()->getScalarType();
900 Value *opr0 = FPOp->getOperand(0);
901 Value *opr1 = FPOp->getOperand(1);
902
903 const APFloat *CF = nullptr;
904 const APInt *CINT = nullptr;
905 if (!match(opr1, m_APFloatAllowPoison(CF)))
906 match(opr1, m_APIntAllowPoison(CINT));
907
908 // 0x1111111 means that we don't do anything for this call.
909 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
910
911 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
912 // pow/powr/pown(x, 0) == 1
913 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
914 Constant *cnval = ConstantFP::get(eltType, 1.0);
915 if (getVecSize(FInfo) > 1) {
916 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
917 }
918 replaceCall(FPOp, cnval);
919 return true;
920 }
921 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
922 // pow/powr/pown(x, 1.0) = x
923 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
924 replaceCall(FPOp, opr0);
925 return true;
926 }
927 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
928 // pow/powr/pown(x, 2.0) = x*x
929 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
930 << *opr0 << "\n");
931 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
932 replaceCall(FPOp, nval);
933 return true;
934 }
935 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
936 // pow/powr/pown(x, -1.0) = 1.0/x
937 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
938 Constant *cnval = ConstantFP::get(eltType, 1.0);
939 if (getVecSize(FInfo) > 1) {
940 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
941 }
942 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
943 replaceCall(FPOp, nval);
944 return true;
945 }
946
947 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
948 // pow[r](x, [-]0.5) = sqrt(x)
949 bool issqrt = CF->isExactlyValue(0.5);
950 if (FunctionCallee FPExpr =
951 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
953 FInfo))) {
954 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
955 << '(' << *opr0 << ")\n");
956 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
957 : "__pow2rsqrt");
958 replaceCall(FPOp, nval);
959 return true;
960 }
961 }
962
963 if (!isUnsafeFiniteOnlyMath(FPOp))
964 return false;
965
966 // Unsafe Math optimization
967
968 // Remember that ci_opr1 is set if opr1 is integral
969 if (CF) {
970 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
971 ? (double)CF->convertToFloat()
972 : CF->convertToDouble();
973 int ival = (int)dval;
974 if ((double)ival == dval) {
975 ci_opr1 = ival;
976 } else
977 ci_opr1 = 0x11111111;
978 }
979
980 // pow/powr/pown(x, c) = [1/](x*x*..x); where
981 // trunc(c) == c && the number of x == c && |c| <= 12
982 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
983 if (abs_opr1 <= 12) {
984 Constant *cnval;
985 Value *nval;
986 if (abs_opr1 == 0) {
987 cnval = ConstantFP::get(eltType, 1.0);
988 if (getVecSize(FInfo) > 1) {
989 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
990 }
991 nval = cnval;
992 } else {
993 Value *valx2 = nullptr;
994 nval = nullptr;
995 while (abs_opr1 > 0) {
996 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
997 if (abs_opr1 & 1) {
998 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
999 }
1000 abs_opr1 >>= 1;
1001 }
1002 }
1003
1004 if (ci_opr1 < 0) {
1005 cnval = ConstantFP::get(eltType, 1.0);
1006 if (getVecSize(FInfo) > 1) {
1007 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
1008 }
1009 nval = B.CreateFDiv(cnval, nval, "__1powprod");
1010 }
1011 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1012 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
1013 << ")\n");
1014 replaceCall(FPOp, nval);
1015 return true;
1016 }
1017
1018 // If we should use the generic intrinsic instead of emitting a libcall
1019 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
1020
1021 // powr ---> exp2(y * log2(x))
1022 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
1023 FunctionCallee ExpExpr;
1024 if (ShouldUseIntrinsic)
1025 ExpExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::exp2,
1026 {FPOp->getType()});
1027 else {
1028 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1029 if (!ExpExpr)
1030 return false;
1031 }
1032
1033 bool needlog = false;
1034 bool needabs = false;
1035 bool needcopysign = false;
1036 Constant *cnval = nullptr;
1037 if (getVecSize(FInfo) == 1) {
1038 CF = nullptr;
1039 match(opr0, m_APFloatAllowPoison(CF));
1040
1041 if (CF) {
1042 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1043 ? (double)CF->convertToFloat()
1044 : CF->convertToDouble();
1045
1046 V = log2(std::abs(V));
1047 cnval = ConstantFP::get(eltType, V);
1048 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
1049 CF->isNegative();
1050 } else {
1051 needlog = true;
1052 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
1053 }
1054 } else {
1055 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1056
1057 if (!CDV) {
1058 needlog = true;
1059 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
1060 } else {
1061 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1062 "Wrong vector size detected");
1063
1065 for (int i=0; i < getVecSize(FInfo); ++i) {
1066 double V = CDV->getElementAsAPFloat(i).convertToDouble();
1067 if (V < 0.0) needcopysign = true;
1068 V = log2(std::abs(V));
1069 DVal.push_back(V);
1070 }
1071 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1073 for (double D : DVal)
1074 FVal.push_back((float)D);
1075 ArrayRef<float> tmp(FVal);
1076 cnval = ConstantDataVector::get(M->getContext(), tmp);
1077 } else {
1078 ArrayRef<double> tmp(DVal);
1079 cnval = ConstantDataVector::get(M->getContext(), tmp);
1080 }
1081 }
1082 }
1083
1084 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1085 // We cannot handle corner cases for a general pow() function, give up
1086 // unless y is a constant integral value. Then proceed as if it were pown.
1087 if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags()))
1088 return false;
1089 }
1090
1091 Value *nval;
1092 if (needabs) {
1093 nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
1094 } else {
1095 nval = cnval ? cnval : opr0;
1096 }
1097 if (needlog) {
1098 FunctionCallee LogExpr;
1099 if (ShouldUseIntrinsic) {
1100 LogExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::log2,
1101 {FPOp->getType()});
1102 } else {
1103 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1104 if (!LogExpr)
1105 return false;
1106 }
1107
1108 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1109 }
1110
1111 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1112 // convert int(32) to fp(f32 or f64)
1113 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1114 }
1115 nval = B.CreateFMul(opr1, nval, "__ylogx");
1116 nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1117
1118 if (needcopysign) {
1119 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1120 Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1121 unsigned size = nTy->getScalarSizeInBits();
1122 Value *opr_n = FPOp->getOperand(1);
1123 if (opr_n->getType()->getScalarType()->isIntegerTy())
1124 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
1125 else
1126 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1127
1128 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1129 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1130 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1131 nval = B.CreateBitCast(nval, opr0->getType());
1132 }
1133
1134 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1135 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1136 replaceCall(FPOp, nval);
1137
1138 return true;
1139}
1140
1141bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
1142 const FuncInfo &FInfo) {
1143 Value *opr0 = FPOp->getOperand(0);
1144 Value *opr1 = FPOp->getOperand(1);
1145
1146 const APInt *CINT = nullptr;
1147 if (!match(opr1, m_APIntAllowPoison(CINT)))
1148 return false;
1149
1150 Function *Parent = B.GetInsertBlock()->getParent();
1151
1152 int ci_opr1 = (int)CINT->getSExtValue();
1153 if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1154 // rootn(x, 1) = x
1155 //
1156 // TODO: Insert constrained canonicalize for strictfp case.
1157 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
1158 replaceCall(FPOp, opr0);
1159 return true;
1160 }
1161
1162 Module *M = B.GetInsertBlock()->getModule();
1163
1164 CallInst *CI = cast<CallInst>(FPOp);
1165 if (ci_opr1 == 2 &&
1166 shouldReplaceLibcallWithIntrinsic(CI,
1167 /*AllowMinSizeF32=*/true,
1168 /*AllowF64=*/true)) {
1169 // rootn(x, 2) = sqrt(x)
1170 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1171
1172 CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1173 NewCall->takeName(CI);
1174
1175 // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1176 // metadata.
1177 MDBuilder MDHelper(M->getContext());
1178 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1179 NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
1180
1181 replaceCall(CI, NewCall);
1182 return true;
1183 }
1184
1185 if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1186 if (FunctionCallee FPExpr =
1187 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1188 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
1189 << ")\n");
1190 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1191 replaceCall(FPOp, nval);
1192 return true;
1193 }
1194 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1195 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
1196 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1197 opr0,
1198 "__rootn2div");
1199 replaceCall(FPOp, nval);
1200 return true;
1201 }
1202
1203 if (ci_opr1 == -2 &&
1204 shouldReplaceLibcallWithIntrinsic(CI,
1205 /*AllowMinSizeF32=*/true,
1206 /*AllowF64=*/true)) {
1207 // rootn(x, -2) = rsqrt(x)
1208
1209 // The original rootn had looser ulp requirements than the resultant sqrt
1210 // and fdiv.
1211 MDBuilder MDHelper(M->getContext());
1212 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1213
1214 // TODO: Could handle strictfp but need to fix strict sqrt emission
1215 FastMathFlags FMF = FPOp->getFastMathFlags();
1216 FMF.setAllowContract(true);
1217
1218 CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1220 B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1221 Sqrt->setFastMathFlags(FMF);
1222 RSqrt->setFastMathFlags(FMF);
1223 RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1224
1225 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
1226 << ")\n");
1227 replaceCall(CI, RSqrt);
1228 return true;
1229 }
1230
1231 return false;
1232}
1233
1234// Get a scalar native builtin single argument FP function
1235FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1236 const FuncInfo &FInfo) {
1237 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1238 return nullptr;
1239 FuncInfo nf = FInfo;
1241 return getFunction(M, nf);
1242}
1243
1244// Some library calls are just wrappers around llvm intrinsics, but compiled
1245// conservatively. Preserve the flags from the original call site by
1246// substituting them with direct calls with all the flags.
1247bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1248 bool AllowMinSizeF32,
1249 bool AllowF64,
1250 bool AllowStrictFP) {
1251 Type *FltTy = CI->getType()->getScalarType();
1252 const bool IsF32 = FltTy->isFloatTy();
1253
1254 // f64 intrinsics aren't implemented for most operations.
1255 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
1256 return false;
1257
1258 // We're implicitly inlining by replacing the libcall with the intrinsic, so
1259 // don't do it for noinline call sites.
1260 if (CI->isNoInline())
1261 return false;
1262
1263 const Function *ParentF = CI->getFunction();
1264 // TODO: Handle strictfp
1265 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
1266 return false;
1267
1268 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
1269 return false;
1270 return true;
1271}
1272
1273void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
1274 CallInst *CI,
1275 Intrinsic::ID IntrID) {
1276 if (CI->arg_size() == 2) {
1277 Value *Arg0 = CI->getArgOperand(0);
1278 Value *Arg1 = CI->getArgOperand(1);
1279 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
1280 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
1281 if (Arg0VecTy && !Arg1VecTy) {
1282 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1283 CI->setArgOperand(1, SplatRHS);
1284 } else if (!Arg0VecTy && Arg1VecTy) {
1285 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1286 CI->setArgOperand(0, SplatLHS);
1287 }
1288 }
1289
1291 CI->getModule(), IntrID, {CI->getType()}));
1292}
1293
1294bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1295 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
1296 bool AllowF64, bool AllowStrictFP) {
1297 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1298 AllowStrictFP))
1299 return false;
1300 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
1301 return true;
1302}
1303
1304std::tuple<Value *, Value *, Value *>
1305AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
1306 FunctionCallee Fsincos) {
1307 DebugLoc DL = B.getCurrentDebugLocation();
1308 Function *F = B.GetInsertBlock()->getParent();
1309 B.SetInsertPointPastAllocas(F);
1310
1311 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
1312
1313 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1314 // If the argument is an instruction, it must dominate all uses so put our
1315 // sincos call there. Otherwise, right after the allocas works well enough
1316 // if it's an argument or constant.
1317
1318 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1319
1320 // SetInsertPoint unwelcomely always tries to set the debug loc.
1321 B.SetCurrentDebugLocation(DL);
1322 }
1323
1324 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
1325
1326 // The allocaInst allocates the memory in private address space. This need
1327 // to be addrspacecasted to point to the address space of cos pointer type.
1328 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1329 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
1330
1331 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
1332
1333 // TODO: Is it worth trying to preserve the location for the cos calls for the
1334 // load?
1335
1336 LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1337 return {SinCos, LoadCos, SinCos};
1338}
1339
1340// fold sin, cos -> sincos.
1341bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1342 const FuncInfo &fInfo) {
1343 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1344 fInfo.getId() == AMDGPULibFunc::EI_COS);
1345
1346 if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
1347 getArgType(fInfo) != AMDGPULibFunc::F64) ||
1348 fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
1349 return false;
1350
1351 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1352
1353 Value *CArgVal = FPOp->getOperand(0);
1354
1355 // TODO: Constant fold the call
1356 if (isa<ConstantData>(CArgVal))
1357 return false;
1358
1359 CallInst *CI = cast<CallInst>(FPOp);
1360
1361 Function *F = B.GetInsertBlock()->getParent();
1362 Module *M = F->getParent();
1363
1364 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
1365 // implementation. Prefer the private form if available.
1366 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
1367 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1369
1370 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
1371 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1373
1374 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1375 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1376 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1377 if (!FSinCos)
1378 return false;
1379
1380 SmallVector<CallInst *> SinCalls;
1381 SmallVector<CallInst *> CosCalls;
1382 SmallVector<CallInst *> SinCosCalls;
1383 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1384 fInfo);
1385 const std::string PairName = PartnerInfo.mangle();
1386
1387 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
1388 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
1389 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1390 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1391
1392 // Intersect the two sets of flags.
1393 FastMathFlags FMF = FPOp->getFastMathFlags();
1394 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
1395
1396 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
1397
1398 for (User* U : CArgVal->users()) {
1399 CallInst *XI = dyn_cast<CallInst>(U);
1400 if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
1401 continue;
1402
1403 Function *UCallee = XI->getCalledFunction();
1404 if (!UCallee)
1405 continue;
1406
1407 bool Handled = true;
1408
1409 if (UCallee->getName() == SinName)
1410 SinCalls.push_back(XI);
1411 else if (UCallee->getName() == CosName)
1412 CosCalls.push_back(XI);
1413 else if (UCallee->getName() == SinCosPrivateName ||
1414 UCallee->getName() == SinCosGenericName)
1415 SinCosCalls.push_back(XI);
1416 else
1417 Handled = false;
1418
1419 if (Handled) {
1420 MergeDbgLocs.push_back(XI->getDebugLoc());
1421 auto *OtherOp = cast<FPMathOperator>(XI);
1422 FMF &= OtherOp->getFastMathFlags();
1424 FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
1425 }
1426 }
1427
1428 if (SinCalls.empty() || CosCalls.empty())
1429 return false;
1430
1431 B.setFastMathFlags(FMF);
1432 B.setDefaultFPMathTag(FPMath);
1433 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
1434 B.SetCurrentDebugLocation(DbgLoc);
1435
1436 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
1437
1438 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
1439 for (CallInst *C : Calls)
1440 C->replaceAllUsesWith(Res);
1441
1442 // Leave the other dead instructions to avoid clobbering iterators.
1443 };
1444
1445 replaceTrigInsts(SinCalls, Sin);
1446 replaceTrigInsts(CosCalls, Cos);
1447 replaceTrigInsts(SinCosCalls, SinCos);
1448
1449 // It's safe to delete the original now.
1450 CI->eraseFromParent();
1451 return true;
1452}
1453
1454bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
1455 double &Res1, Constant *copr0,
1456 Constant *copr1) {
1457 // By default, opr0/opr1/opr3 holds values of float/double type.
1458 // If they are not float/double, each function has to its
1459 // operand separately.
1460 double opr0 = 0.0, opr1 = 0.0;
1463 if (fpopr0) {
1464 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1465 ? fpopr0->getValueAPF().convertToDouble()
1466 : (double)fpopr0->getValueAPF().convertToFloat();
1467 }
1468
1469 if (fpopr1) {
1470 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1471 ? fpopr1->getValueAPF().convertToDouble()
1472 : (double)fpopr1->getValueAPF().convertToFloat();
1473 }
1474
1475 switch (FInfo.getId()) {
1476 default : return false;
1477
1479 Res0 = acos(opr0);
1480 return true;
1481
1483 // acosh(x) == log(x + sqrt(x*x - 1))
1484 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1485 return true;
1486
1488 Res0 = acos(opr0) / MATH_PI;
1489 return true;
1490
1492 Res0 = asin(opr0);
1493 return true;
1494
1496 // asinh(x) == log(x + sqrt(x*x + 1))
1497 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1498 return true;
1499
1501 Res0 = asin(opr0) / MATH_PI;
1502 return true;
1503
1505 Res0 = atan(opr0);
1506 return true;
1507
1509 // atanh(x) == (log(x+1) - log(x-1))/2;
1510 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1511 return true;
1512
1514 Res0 = atan(opr0) / MATH_PI;
1515 return true;
1516
1518 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1519 return true;
1520
1522 Res0 = cos(opr0);
1523 return true;
1524
1526 Res0 = cosh(opr0);
1527 return true;
1528
1530 Res0 = cos(MATH_PI * opr0);
1531 return true;
1532
1534 Res0 = exp(opr0);
1535 return true;
1536
1538 Res0 = pow(2.0, opr0);
1539 return true;
1540
1542 Res0 = pow(10.0, opr0);
1543 return true;
1544
1546 Res0 = log(opr0);
1547 return true;
1548
1550 Res0 = log(opr0) / log(2.0);
1551 return true;
1552
1554 Res0 = log(opr0) / log(10.0);
1555 return true;
1556
1558 Res0 = 1.0 / sqrt(opr0);
1559 return true;
1560
1562 Res0 = sin(opr0);
1563 return true;
1564
1566 Res0 = sinh(opr0);
1567 return true;
1568
1570 Res0 = sin(MATH_PI * opr0);
1571 return true;
1572
1574 Res0 = tan(opr0);
1575 return true;
1576
1578 Res0 = tanh(opr0);
1579 return true;
1580
1582 Res0 = tan(MATH_PI * opr0);
1583 return true;
1584
1585 // two-arg functions
1588 Res0 = pow(opr0, opr1);
1589 return true;
1590
1592 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1593 double val = (double)iopr1->getSExtValue();
1594 Res0 = pow(opr0, val);
1595 return true;
1596 }
1597 return false;
1598 }
1599
1601 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1602 double val = (double)iopr1->getSExtValue();
1603 Res0 = pow(opr0, 1.0 / val);
1604 return true;
1605 }
1606 return false;
1607 }
1608
1609 // with ptr arg
1611 Res0 = sin(opr0);
1612 Res1 = cos(opr0);
1613 return true;
1614 }
1615
1616 return false;
1617}
1618
1619bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1620 int numArgs = (int)aCI->arg_size();
1621 if (numArgs > 3)
1622 return false;
1623
1624 Constant *copr0 = nullptr;
1625 Constant *copr1 = nullptr;
1626 if (numArgs > 0) {
1627 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1628 return false;
1629 }
1630
1631 if (numArgs > 1) {
1632 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1633 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1634 return false;
1635 }
1636 }
1637
1638 // At this point, all arguments to aCI are constants.
1639
1640 // max vector size is 16, and sincos will generate two results.
1641 double DVal0[16], DVal1[16];
1642 int FuncVecSize = getVecSize(FInfo);
1643 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1644 if (FuncVecSize == 1) {
1645 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
1646 return false;
1647 }
1648 } else {
1649 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1650 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1651 for (int i = 0; i < FuncVecSize; ++i) {
1652 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1653 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1654 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
1655 return false;
1656 }
1657 }
1658 }
1659
1660 LLVMContext &context = aCI->getContext();
1661 Constant *nval0, *nval1;
1662 if (FuncVecSize == 1) {
1663 nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
1664 if (hasTwoResults)
1665 nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
1666 } else {
1667 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1668 SmallVector <float, 0> FVal0, FVal1;
1669 for (int i = 0; i < FuncVecSize; ++i)
1670 FVal0.push_back((float)DVal0[i]);
1671 ArrayRef<float> tmp0(FVal0);
1672 nval0 = ConstantDataVector::get(context, tmp0);
1673 if (hasTwoResults) {
1674 for (int i = 0; i < FuncVecSize; ++i)
1675 FVal1.push_back((float)DVal1[i]);
1676 ArrayRef<float> tmp1(FVal1);
1677 nval1 = ConstantDataVector::get(context, tmp1);
1678 }
1679 } else {
1680 ArrayRef<double> tmp0(DVal0);
1681 nval0 = ConstantDataVector::get(context, tmp0);
1682 if (hasTwoResults) {
1683 ArrayRef<double> tmp1(DVal1);
1684 nval1 = ConstantDataVector::get(context, tmp1);
1685 }
1686 }
1687 }
1688
1689 if (hasTwoResults) {
1690 // sincos
1691 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1692 "math function with ptr arg not supported yet");
1693 new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
1694 }
1695
1696 replaceCall(aCI, nval0);
1697 return true;
1698}
1699
1702 AMDGPULibCalls Simplifier;
1703 Simplifier.initNativeFuncs();
1704 Simplifier.initFunction(F, AM);
1705
1706 bool Changed = false;
1707
1708 LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1709 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1710
1711 for (auto &BB : F) {
1712 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1713 // Ignore non-calls.
1715 ++I;
1716
1717 if (CI) {
1718 if (Simplifier.fold(CI))
1719 Changed = true;
1720 }
1721 }
1722 }
1724}
1725
1728 if (UseNative.empty())
1729 return PreservedAnalyses::all();
1730
1731 AMDGPULibCalls Simplifier;
1732 Simplifier.initNativeFuncs();
1733 Simplifier.initFunction(F, AM);
1734
1735 bool Changed = false;
1736 for (auto &BB : F) {
1737 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1738 // Ignore non-calls.
1740 ++I;
1741 if (CI && Simplifier.useNative(CI))
1742 Changed = true;
1743 }
1744 }
1746}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isKnownIntegral(const Value *V, const DataLayout &DL, FastMathFlags FMF)
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
#define MATH_SQRT2
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
#define MATH_E
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
#define MATH_PI
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
#define MATH_SQRT1_2
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop term fold
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:77
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
bool fold(CallInst *CI)
static void replaceCall(Instruction *I, Value *With)
bool useNative(CallInst *CI)
void initFunction(Function &F, FunctionAnalysisManager &FAM)
AMDGPULibCalls()=default
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
EFuncId getId() const
bool isMangled() const
Param * getLeads()
Get leading parameters for mangled lib functions.
void setId(EFuncId Id)
ENamePrefix getPrefix() const
bool isNegative() const
Definition APFloat.h:1449
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6115
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6143
bool isZero() const
Definition APFloat.h:1445
bool isInteger() const
Definition APFloat.h:1466
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
bool isNoInline() const
Return true if the call should not be inlined.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:776
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValue() const
Definition Constants.h:321
const APFloat & getValueAPF() const
Definition Constants.h:320
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
Definition Constants.h:181
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
Definition Operator.h:286
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
Definition Operator.h:307
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition Operator.h:328
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool noInfs() const
Definition FMF.h:66
bool none() const
Definition FMF.h:57
bool approxFunc() const
Definition FMF.h:70
bool noNaNs() const
Definition FMF.h:65
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
void dropAllReferences()
Drop all references to operands.
Definition User.h:349
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
CallInst * Call
Changed
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
Definition APInt.cpp:3155
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
apfloat_match m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
constexpr double ln2
Definition MathExtras.h:49
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39