LLVM 22.0.0git
ExpandFp.cpp
Go to the documentation of this file.
1//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain floating point instructions at the IR level.
9//
10// It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
11// .. to’ instructions with a bitwidth above a threshold. This is
12// useful for targets like x86_64 that cannot lower fp convertions
13// with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
24#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/Module.h"
31#include "llvm/IR/PassManager.h"
34#include "llvm/Pass.h"
39#include <optional>
40
41#define DEBUG_TYPE "expand-fp"
42
43using namespace llvm;
44
46 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
48 cl::desc("fp convert instructions on integers with "
49 "more than <N> bits are expanded."));
50
51namespace {
52/// This class implements a precise expansion of the frem instruction.
53/// The generated code is based on the fmod implementation in the AMD device
54/// libs.
55class FRemExpander {
56 /// The IRBuilder to use for the expansion.
58
59 /// Floating point type of the return value and the arguments of the FRem
60 /// instructions that should be expanded.
61 Type *FremTy;
62
63 /// Floating point type to use for the computation. This may be
64 /// wider than the \p FremTy.
65 Type *ComputeFpTy;
66
67 /// Integer type used to hold the exponents returned by frexp.
68 Type *ExTy;
69
70 /// How many bits of the quotient to compute per iteration of the
71 /// algorithm, stored as a value of type \p ExTy.
72 Value *Bits;
73
74 /// Constant 1 of type \p ExTy.
75 Value *One;
76
77public:
78 static bool canExpandType(Type *Ty) {
79 // TODO The expansion should work for other floating point types
80 // as well, but this would require additional testing.
81 return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty();
82 }
83
84 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
85 assert(canExpandType(Ty));
86
87 // The type to use for the computation of the remainder. This may be
88 // wider than the input/result type which affects the ...
89 Type *ComputeTy = Ty;
90 // ... maximum number of iterations of the remainder computation loop
91 // to use. This value is for the case in which the computation
92 // uses the same input/result type.
93 unsigned MaxIter = 2;
94
95 if (Ty->isHalfTy()) {
96 // Use the wider type and less iterations.
97 ComputeTy = B.getFloatTy();
98 MaxIter = 1;
99 }
100
101 unsigned Precision =
103 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
104 }
105
106 /// Build the FRem expansion for the numerator \p X and the
107 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
108 /// code will be generated at the insertion point of \p B and the
109 /// insertion point will be reset at exit.
110 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
111
112 /// Build an approximate FRem expansion for the numerator \p X and
113 /// the denumerator \p Y at the insertion point of builder \p B.
114 /// The type of X and Y must match \p FremTy.
115 Value *buildApproxFRem(Value *X, Value *Y) const;
116
117private:
118 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
119 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
120 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {};
121
122 Value *createRcp(Value *V, const Twine &Name) const {
123 // Leave it to later optimizations to turn this into an rcp
124 // instruction if available.
125 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
126 }
127
128 // Helper function to build the UPDATE_AX code which is common to the
129 // loop body and the "final iteration".
130 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
131 // Build:
132 // float q = rint(ax * ayinv);
133 // ax = fma(-q, ay, ax);
134 // int clt = ax < 0.0f;
135 // float axp = ax + ay;
136 // ax = clt ? axp : ax;
137 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
138 {}, "q");
139 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
140 Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,
141 ConstantFP::getZero(ComputeFpTy), "clt");
142 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
143 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
144 }
145
146 /// Build code to extract the exponent and mantissa of \p Src.
147 /// Return the exponent minus one for use as a loop bound and
148 /// the mantissa taken to the given \p NewExp power.
149 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
150 const Twine &ExName,
151 const Twine &PowName) const {
152 // Build:
153 // ExName = frexp_exp(Src) - 1;
154 // PowName = fldexp(frexp_mant(ExName), NewExp);
155 Type *Ty = Src->getType();
156 Type *ExTy = B.getInt32Ty();
157 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
158 Value *Mant = B.CreateExtractValue(Frexp, {0});
159 Value *Exp = B.CreateExtractValue(Frexp, {1});
160
161 Exp = B.CreateSub(Exp, One, ExName);
162 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
163
164 return {Pow, Exp};
165 }
166
167 /// Build the main computation of the remainder for the case in which
168 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
169 /// denumerator. Add the incoming edge from the computation result
170 /// to \p RetPhi.
171 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
172 PHINode *RetPhi, FastMathFlags FMF) const {
173 IRBuilder<>::FastMathFlagGuard Guard(B);
174 B.setFastMathFlags(FMF);
175
176 // Build:
177 // ex = frexp_exp(ax) - 1;
178 // ax = fldexp(frexp_mant(ax), bits);
179 // ey = frexp_exp(ay) - 1;
180 // ay = fledxp(frexp_mant(ay), 1);
181 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
182 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
183
184 // Build:
185 // int nb = ex - ey;
186 // float ayinv = 1.0/ay;
187 Value *Nb = B.CreateSub(Ex, Ey, "nb");
188 Value *Ayinv = createRcp(Ay, "ayinv");
189
190 // Build: while (nb > bits)
191 BasicBlock *PreheaderBB = B.GetInsertBlock();
192 Function *Fun = PreheaderBB->getParent();
193 auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);
194 auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);
195
196 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
197
198 // Build loop body:
199 // UPDATE_AX
200 // ax = fldexp(ax, bits);
201 // nb -= bits;
202 // One iteration of the loop is factored out. The code shared by
203 // the loop and this "iteration" is denoted by UPDATE_AX.
204 B.SetInsertPoint(LoopBB);
205 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
206 NbIv->addIncoming(Nb, PreheaderBB);
207
208 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
209 AxPhi->addIncoming(Ax, PreheaderBB);
210
211 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
212 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
213 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
214 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
215
216 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
217
218 // Build final iteration
219 // ax = fldexp(ax, nb - bits + 1);
220 // UPDATE_AX
221 B.SetInsertPoint(ExitBB);
222
223 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
224 AxPhiExit->addIncoming(Ax, PreheaderBB);
225 AxPhiExit->addIncoming(AxPhi, LoopBB);
226 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
227 NbExitPhi->addIncoming(NbIv, LoopBB);
228 NbExitPhi->addIncoming(Nb, PreheaderBB);
229
230 Value *AxFinal = B.CreateLdexp(
231 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
232 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
233
234 // Build:
235 // ax = fldexp(ax, ey);
236 // ret = copysign(ax,x);
237 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
238 if (ComputeFpTy != FremTy)
239 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
240 Value *Ret = B.CreateCopySign(AxFinal, X);
241
242 RetPhi->addIncoming(Ret, ExitBB);
243 }
244
245 /// Build the else-branch of the conditional in the FRem
246 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
247 /// = |Y|, and X is the numerator and Y the denumerator. Add the
248 /// incoming edge from the result to \p RetPhi.
249 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
250 // Build:
251 // ret = ax == ay ? copysign(0.0f, x) : x;
252 Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);
253 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
254
255 RetPhi->addIncoming(Ret, B.GetInsertBlock());
256 }
257
258 /// Return a value that is NaN if one of the corner cases concerning
259 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
260 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
261 std::optional<SimplifyQuery> &SQ,
262 bool NoInfs) const {
263 // Build:
264 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
265 // ret = isfinite(x) ? ret : QNAN;
266 Value *Nan = ConstantFP::getQNaN(FremTy);
267 Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,
268 Ret);
269 Value *XFinite =
270 NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))
271 ? B.getTrue()
272 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X),
274 Ret = B.CreateSelect(XFinite, Ret, Nan);
275
276 return Ret;
277 }
278};
279
280Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
281 IRBuilder<>::FastMathFlagGuard Guard(B);
282 // Propagating the approximate functions flag to the
283 // division leads to an unacceptable drop in precision
284 // on AMDGPU.
285 // TODO Find out if any flags might be worth propagating.
286 B.clearFastMathFlags();
287
288 Value *Quot = B.CreateFDiv(X, Y);
289 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
290 Value *Neg = B.CreateFNeg(Trunc);
291
292 return B.CreateFMA(Neg, Y, X);
293}
294
295Value *FRemExpander::buildFRem(Value *X, Value *Y,
296 std::optional<SimplifyQuery> &SQ) const {
297 assert(X->getType() == FremTy && Y->getType() == FremTy);
298
299 FastMathFlags FMF = B.getFastMathFlags();
300
301 // This function generates the following code structure:
302 // if (abs(x) > abs(y))
303 // { ret = compute remainder }
304 // else
305 // { ret = x or 0 with sign of x }
306 // Adjust ret to NaN/inf in input
307 // return ret
308 Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax");
309 Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay");
310 if (ComputeFpTy != X->getType()) {
311 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
312 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
313 }
314 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
315
316 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
317 Value *Ret = RetPhi;
318
319 // We would return NaN in all corner cases handled here.
320 // Hence, if NaNs are excluded, keep the result as it is.
321 if (!FMF.noNaNs())
322 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
323
324 Function *Fun = B.GetInsertBlock()->getParent();
325 auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);
326 auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);
327 SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);
328
329 auto SavedInsertPt = B.GetInsertPoint();
330
331 // Build remainder computation for "then" branch
332 //
333 // The ordered comparison ensures that ax and ay are not NaNs
334 // in the then-branch. Furthermore, y cannot be an infinity and the
335 // check at the end of the function ensures that the result will not
336 // be used if x is an infinity.
337 FastMathFlags ComputeFMF = FMF;
338 ComputeFMF.setNoInfs();
339 ComputeFMF.setNoNaNs();
340
341 B.SetInsertPoint(ThenBB);
342 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
343 B.CreateBr(RetPhi->getParent());
344
345 // Build "else"-branch
346 B.SetInsertPoint(ElseBB);
347 buildElseBranch(Ax, Ay, X, RetPhi);
348 B.CreateBr(RetPhi->getParent());
349
350 B.SetInsertPoint(SavedInsertPt);
351
352 return Ret;
353}
354} // namespace
355
356static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
357 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
358
359 Type *ReturnTy = I.getType();
360 assert(FRemExpander::canExpandType(ReturnTy->getScalarType()));
361
362 FastMathFlags FMF = I.getFastMathFlags();
363 // TODO Make use of those flags for optimization?
364 FMF.setAllowReciprocal(false);
365 FMF.setAllowContract(false);
366
367 IRBuilder<> B(&I);
368 B.setFastMathFlags(FMF);
369 B.SetCurrentDebugLocation(I.getDebugLoc());
370
371 Type *ElemTy = ReturnTy->getScalarType();
372 const FRemExpander Expander = FRemExpander::create(B, ElemTy);
373
374 Value *Ret;
375 if (ReturnTy->isFloatingPointTy())
376 Ret = FMF.approxFunc()
377 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
378 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
379 else {
380 auto *VecTy = cast<FixedVectorType>(ReturnTy);
381
382 // This could use SplitBlockAndInsertForEachLane but the interface
383 // is a bit awkward for a constant number of elements and it will
384 // boil down to the same code.
385 // TODO Expand the FRem instruction only once and reuse the code.
386 Value *Nums = I.getOperand(0);
387 Value *Denums = I.getOperand(1);
388 Ret = PoisonValue::get(I.getType());
389 for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) {
390 Value *Num = B.CreateExtractElement(Nums, I);
391 Value *Denum = B.CreateExtractElement(Denums, I);
392 Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum)
393 : Expander.buildFRem(Num, Denum, SQ);
394 Ret = B.CreateInsertElement(Ret, Rem, I);
395 }
396 }
397
398 I.replaceAllUsesWith(Ret);
399 Ret->takeName(&I);
400 I.eraseFromParent();
401
402 return true;
403}
404// clang-format off: preserve formatting of the following example
405
406/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
407/// the generated code. This currently generates code similarly to compiler-rt's
408/// implementations.
409///
410/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
411/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
412/// entry:
413/// %0 = bitcast float %a to i32
414/// %conv.i = zext i32 %0 to i64
415/// %tobool.not = icmp sgt i32 %0, -1
416/// %conv = select i1 %tobool.not, i64 1, i64 -1
417/// %and = lshr i64 %conv.i, 23
418/// %shr = and i64 %and, 255
419/// %and2 = and i64 %conv.i, 8388607
420/// %or = or i64 %and2, 8388608
421/// %cmp = icmp ult i64 %shr, 127
422/// br i1 %cmp, label %cleanup, label %if.end
423///
424/// if.end: ; preds = %entry
425/// %sub = add nuw nsw i64 %shr, 4294967169
426/// %conv5 = and i64 %sub, 4294967232
427/// %cmp6.not = icmp eq i64 %conv5, 0
428/// br i1 %cmp6.not, label %if.end12, label %if.then8
429///
430/// if.then8: ; preds = %if.end
431/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
432/// -9223372036854775808 br label %cleanup
433///
434/// if.end12: ; preds = %if.end
435/// %cmp13 = icmp ult i64 %shr, 150
436/// br i1 %cmp13, label %if.then15, label %if.else
437///
438/// if.then15: ; preds = %if.end12
439/// %sub16 = sub nuw nsw i64 150, %shr
440/// %shr17 = lshr i64 %or, %sub16
441/// %mul = mul nsw i64 %shr17, %conv
442/// br label %cleanup
443///
444/// if.else: ; preds = %if.end12
445/// %sub18 = add nsw i64 %shr, -150
446/// %shl = shl i64 %or, %sub18
447/// %mul19 = mul nsw i64 %shl, %conv
448/// br label %cleanup
449///
450/// cleanup: ; preds = %entry,
451/// %if.else, %if.then15, %if.then8
452/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
453/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
454/// }
455///
456/// Replace fp to integer with generated code.
457static void expandFPToI(Instruction *FPToI) {
458 // clang-format on
459 IRBuilder<> Builder(FPToI);
460 auto *FloatVal = FPToI->getOperand(0);
461 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
462
463 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
464 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
465
466 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
467 // to i32 first following a sext/zext to target integer type.
468 Value *A1 = nullptr;
469 if (FloatVal->getType()->isHalfTy()) {
470 if (FPToI->getOpcode() == Instruction::FPToUI) {
471 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
472 A1 = Builder.CreateZExt(A0, IntTy);
473 } else { // FPToSI
474 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
475 A1 = Builder.CreateSExt(A0, IntTy);
476 }
477 FPToI->replaceAllUsesWith(A1);
478 FPToI->dropAllReferences();
479 FPToI->eraseFromParent();
480 return;
481 }
482
483 // fp80 conversion is implemented by fpext to fp128 first then do the
484 // conversion.
485 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
486 unsigned FloatWidth =
487 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
488 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
489 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
490 Value *ImplicitBit = Builder.CreateShl(
491 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
492 Value *SignificandMask =
493 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
494 Value *NegOne = Builder.CreateSExt(
495 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
496 Value *NegInf =
497 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
498 ConstantInt::getSigned(IntTy, BitWidth - 1));
499
500 BasicBlock *Entry = Builder.GetInsertBlock();
501 Function *F = Entry->getParent();
502 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
503 BasicBlock *End =
504 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
505 BasicBlock *IfEnd =
506 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
507 BasicBlock *IfThen5 =
508 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
509 BasicBlock *IfEnd9 =
510 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
511 BasicBlock *IfThen12 =
512 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
513 BasicBlock *IfElse =
514 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
515
516 Entry->getTerminator()->eraseFromParent();
517
518 // entry:
519 Builder.SetInsertPoint(Entry);
520 Value *FloatVal0 = FloatVal;
521 // fp80 conversion is implemented by fpext to fp128 first then do the
522 // conversion.
523 if (FloatVal->getType()->isX86_FP80Ty())
524 FloatVal0 =
525 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
526 Value *ARep0 =
527 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
528 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
529 Value *PosOrNeg = Builder.CreateICmpSGT(
530 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
531 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
532 ConstantInt::getSigned(IntTy, -1));
533 Value *And =
534 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
535 Value *And2 = Builder.CreateAnd(
536 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
537 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
538 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
539 Value *Cmp =
540 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
541 Builder.CreateCondBr(Cmp, End, IfEnd);
542
543 // if.end:
544 Builder.SetInsertPoint(IfEnd);
545 Value *Add1 = Builder.CreateAdd(
547 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
548 Value *Cmp3 = Builder.CreateICmpULT(
549 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
550 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
551
552 // if.then5:
553 Builder.SetInsertPoint(IfThen5);
554 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
555 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
556 Builder.CreateBr(End);
557
558 // if.end9:
559 Builder.SetInsertPoint(IfEnd9);
560 Value *Cmp10 = Builder.CreateICmpULT(
561 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
562 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
563
564 // if.then12:
565 Builder.SetInsertPoint(IfThen12);
566 Value *Sub13 = Builder.CreateSub(
567 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
568 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
569 Value *Mul = Builder.CreateMul(Shr14, Sign);
570 Builder.CreateBr(End);
571
572 // if.else:
573 Builder.SetInsertPoint(IfElse);
574 Value *Sub15 = Builder.CreateAdd(
576 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
577 Value *Shl = Builder.CreateShl(Or, Sub15);
578 Value *Mul16 = Builder.CreateMul(Shl, Sign);
579 Builder.CreateBr(End);
580
581 // cleanup:
582 Builder.SetInsertPoint(End, End->begin());
583 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
584
585 Retval0->addIncoming(Cond8, IfThen5);
586 Retval0->addIncoming(Mul, IfThen12);
587 Retval0->addIncoming(Mul16, IfElse);
588 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
589
590 FPToI->replaceAllUsesWith(Retval0);
591 FPToI->dropAllReferences();
592 FPToI->eraseFromParent();
593}
594
595// clang-format off: preserve formatting of the following example
596
597/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
598/// the generated code. This currently generates code similarly to compiler-rt's
599/// implementations. This implementation has an implicit assumption that integer
600/// width is larger than fp.
601///
602/// An example IR generated from compiler-rt/floatdisf.c looks like below:
603/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
604/// entry:
605/// %cmp = icmp eq i64 %a, 0
606/// br i1 %cmp, label %return, label %if.end
607///
608/// if.end: ; preds = %entry
609/// %shr = ashr i64 %a, 63
610/// %xor = xor i64 %shr, %a
611/// %sub = sub nsw i64 %xor, %shr
612/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
613/// %cast = trunc i64 %0 to i32
614/// %sub1 = sub nuw nsw i32 64, %cast
615/// %sub2 = xor i32 %cast, 63
616/// %cmp3 = icmp ult i32 %cast, 40
617/// br i1 %cmp3, label %if.then4, label %if.else
618///
619/// if.then4: ; preds = %if.end
620/// switch i32 %sub1, label %sw.default [
621/// i32 25, label %sw.bb
622/// i32 26, label %sw.epilog
623/// ]
624///
625/// sw.bb: ; preds = %if.then4
626/// %shl = shl i64 %sub, 1
627/// br label %sw.epilog
628///
629/// sw.default: ; preds = %if.then4
630/// %sub5 = sub nsw i64 38, %0
631/// %sh_prom = and i64 %sub5, 4294967295
632/// %shr6 = lshr i64 %sub, %sh_prom
633/// %shr9 = lshr i64 274877906943, %0
634/// %and = and i64 %shr9, %sub
635/// %cmp10 = icmp ne i64 %and, 0
636/// %conv11 = zext i1 %cmp10 to i64
637/// %or = or i64 %shr6, %conv11
638/// br label %sw.epilog
639///
640/// sw.epilog: ; preds = %sw.default,
641/// %if.then4, %sw.bb
642/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
643/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
644/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
645/// %tobool.not = icmp eq i64 %3, 0
646/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
647/// %spec.select = ashr i64 %inc, %spec.select.v
648/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
649/// br label %if.end26
650///
651/// if.else: ; preds = %if.end
652/// %sub23 = add nuw nsw i64 %0, 4294967256
653/// %sh_prom24 = and i64 %sub23, 4294967295
654/// %shl25 = shl i64 %sub, %sh_prom24
655/// br label %if.end26
656///
657/// if.end26: ; preds = %sw.epilog,
658/// %if.else
659/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
660/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
661/// %conv27 = trunc i64 %shr to i32
662/// %and28 = and i32 %conv27, -2147483648
663/// %add = shl nuw nsw i32 %e.0, 23
664/// %shl29 = add nuw nsw i32 %add, 1065353216
665/// %conv31 = trunc i64 %a.addr.1 to i32
666/// %and32 = and i32 %conv31, 8388607
667/// %or30 = or i32 %and32, %and28
668/// %or33 = or i32 %or30, %shl29
669/// %4 = bitcast i32 %or33 to float
670/// br label %return
671///
672/// return: ; preds = %entry,
673/// %if.end26
674/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
675/// ret float %retval.0
676/// }
677///
678/// Replace integer to fp with generated code.
679static void expandIToFP(Instruction *IToFP) {
680 // clang-format on
681 IRBuilder<> Builder(IToFP);
682 auto *IntVal = IToFP->getOperand(0);
683 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
684
685 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
686 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
687 // fp80 conversion is implemented by conversion tp fp128 first following
688 // a fptrunc to fp80.
689 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
690 // FIXME: As there is no related builtins added in compliler-rt,
691 // here currently utilized the fp32 <-> fp16 lib calls to implement.
692 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
693 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
694 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
695 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
696
697 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
698 "assumes integer width is larger than fp.");
699
700 Value *Temp1 =
701 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
702 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
703
704 BasicBlock *Entry = Builder.GetInsertBlock();
705 Function *F = Entry->getParent();
706 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
707 BasicBlock *End =
708 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
709 BasicBlock *IfEnd =
710 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
711 BasicBlock *IfThen4 =
712 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
713 BasicBlock *SwBB =
714 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
715 BasicBlock *SwDefault =
716 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
717 BasicBlock *SwEpilog =
718 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
719 BasicBlock *IfThen20 =
720 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
721 BasicBlock *IfElse =
722 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
723 BasicBlock *IfEnd26 =
724 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
725
726 Entry->getTerminator()->eraseFromParent();
727
728 Function *CTLZ =
729 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
730 ConstantInt *True = Builder.getTrue();
731
732 // entry:
733 Builder.SetInsertPoint(Entry);
734 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
735 Builder.CreateCondBr(Cmp, End, IfEnd);
736
737 // if.end:
738 Builder.SetInsertPoint(IfEnd);
739 Value *Shr =
740 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
741 Value *Xor = Builder.CreateXor(Shr, IntVal);
742 Value *Sub = Builder.CreateSub(Xor, Shr);
743 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
744 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
745 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
746 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
747 FloatWidth == 128 ? Call : Cast);
748 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
749 FloatWidth == 128 ? Call : Cast);
750 Value *Cmp3 = Builder.CreateICmpSGT(
751 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
752 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
753
754 // if.then4:
755 Builder.SetInsertPoint(IfThen4);
756 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
757 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
758 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
759
760 // sw.bb:
761 Builder.SetInsertPoint(SwBB);
762 Value *Shl =
763 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
764 Builder.CreateBr(SwEpilog);
765
766 // sw.default:
767 Builder.SetInsertPoint(SwDefault);
768 Value *Sub5 = Builder.CreateSub(
769 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
770 FloatWidth == 128 ? Call : Cast);
771 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
772 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
773 FloatWidth == 128 ? Sub5 : ShProm);
774 Value *Sub8 =
775 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
776 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
777 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
778 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
779 FloatWidth == 128 ? Sub8 : ShProm9);
780 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
781 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
782 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
783 Value *Or = Builder.CreateOr(Shr6, Conv11);
784 Builder.CreateBr(SwEpilog);
785
786 // sw.epilog:
787 Builder.SetInsertPoint(SwEpilog);
788 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
789 AAddr0->addIncoming(Or, SwDefault);
790 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
791 AAddr0->addIncoming(Shl, SwBB);
792 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
793 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
794 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
795 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
796 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
797 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
798 Value *Shr18 = nullptr;
799 if (IsSigned)
800 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
801 else
802 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
803 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
804 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
805 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
806 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
807 Value *ExtractT64 = nullptr;
808 if (FloatWidth > 80)
809 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
810 else
811 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
812 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
813
814 // if.then20
815 Builder.SetInsertPoint(IfThen20);
816 Value *Shr21 = nullptr;
817 if (IsSigned)
818 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
819 else
820 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
821 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
822 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
823 Value *ExtractT62 = nullptr;
824 if (FloatWidth > 80)
825 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
826 else
827 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
828 Builder.CreateBr(IfEnd26);
829
830 // if.else:
831 Builder.SetInsertPoint(IfElse);
832 Value *Sub24 = Builder.CreateAdd(
833 FloatWidth == 128 ? Call : Cast,
834 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
835 -(BitWidth - FPMantissaWidth - 1)));
836 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
837 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
838 FloatWidth == 128 ? Sub24 : ShProm25);
839 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
840 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
841 Value *ExtractT66 = nullptr;
842 if (FloatWidth > 80)
843 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
844 else
845 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
846 Builder.CreateBr(IfEnd26);
847
848 // if.end26:
849 Builder.SetInsertPoint(IfEnd26);
850 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
851 AAddr1Off0->addIncoming(ExtractT, IfThen20);
852 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
853 AAddr1Off0->addIncoming(ExtractT61, IfElse);
854 PHINode *AAddr1Off32 = nullptr;
855 if (FloatWidth > 32) {
856 AAddr1Off32 =
857 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
858 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
859 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
860 AAddr1Off32->addIncoming(ExtractT66, IfElse);
861 }
862 PHINode *E0 = nullptr;
863 if (FloatWidth <= 80) {
864 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
865 E0->addIncoming(Sub1, IfThen20);
866 E0->addIncoming(Sub2, SwEpilog);
867 E0->addIncoming(Sub2, IfElse);
868 }
869 Value *And29 = nullptr;
870 if (FloatWidth > 80) {
871 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
872 Builder.getIntN(BitWidth, 63));
873 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
874 } else {
875 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
876 And29 = Builder.CreateAnd(
877 Conv28, ConstantInt::getSigned(Builder.getInt32Ty(), 0x80000000));
878 }
879 unsigned TempMod = FPMantissaWidth % 32;
880 Value *And34 = nullptr;
881 Value *Shl30 = nullptr;
882 if (FloatWidth > 80) {
883 TempMod += 32;
884 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
885 Shl30 = Builder.CreateAdd(
886 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
887 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
888 } else {
889 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
890 Shl30 = Builder.CreateAdd(
891 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
892 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
893 Builder.getInt32((1 << TempMod) - 1));
894 }
895 Value *Or35 = nullptr;
896 if (FloatWidth > 80) {
897 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
898 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
899 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
900 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
901 Builder.getIntN(128, FPMantissaWidth));
902 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
903 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
904 Or35 = Builder.CreateOr(Or34, A6);
905 } else {
906 Value *Or31 = Builder.CreateOr(And34, And29);
907 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
908 }
909 Value *A4 = nullptr;
910 if (IToFP->getType()->isDoubleTy()) {
911 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
912 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
913 Value *And1 =
914 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
915 Value *Or1 = Builder.CreateOr(Shl1, And1);
916 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
917 } else if (IToFP->getType()->isX86_FP80Ty()) {
918 Value *A40 =
919 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
920 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
921 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
922 // Deal with "half" situation. This is a workaround since we don't have
923 // floattihf.c currently as referring.
924 Value *A40 =
925 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
926 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
927 } else // float type
928 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
929 Builder.CreateBr(End);
930
931 // return:
932 Builder.SetInsertPoint(End, End->begin());
933 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
934 Retval0->addIncoming(A4, IfEnd26);
935 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
936
937 IToFP->replaceAllUsesWith(Retval0);
938 IToFP->dropAllReferences();
939 IToFP->eraseFromParent();
940}
941
943 VectorType *VTy = cast<FixedVectorType>(I->getType());
944
945 IRBuilder<> Builder(I);
946
947 unsigned NumElements = VTy->getElementCount().getFixedValue();
948 Value *Result = PoisonValue::get(VTy);
949 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
950 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
951 Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
952 I->getType()->getScalarType());
953 Result = Builder.CreateInsertElement(Result, Cast, Idx);
954 if (isa<Instruction>(Cast))
955 Replace.push_back(cast<Instruction>(Cast));
956 }
957 I->replaceAllUsesWith(Result);
958 I->dropAllReferences();
959 I->eraseFromParent();
960}
961
962// This covers all floating point types; more than we need here.
963// TODO Move somewhere else for general use?
964/// Return the Libcall for a frem instruction of
965/// type \p Ty.
966static RTLIB::Libcall fremToLibcall(Type *Ty) {
967 assert(Ty->isFloatingPointTy());
968 if (Ty->isFloatTy() || Ty->is16bitFPTy())
969 return RTLIB::REM_F32;
970 if (Ty->isDoubleTy())
971 return RTLIB::REM_F64;
972 if (Ty->isFP128Ty())
973 return RTLIB::REM_F128;
974 if (Ty->isX86_FP80Ty())
975 return RTLIB::REM_F80;
976 if (Ty->isPPC_FP128Ty())
977 return RTLIB::REM_PPCF128;
978
979 llvm_unreachable("Unknown floating point type");
980}
981
982/* Return true if, according to \p LibInfo, the target either directly
983 supports the frem instruction for the \p Ty, has a custom lowering,
984 or uses a libcall. */
985static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) {
987 return true;
988
989 return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
990}
991
992static bool runImpl(Function &F, const TargetLowering &TLI,
993 AssumptionCache *AC) {
995 SmallVector<Instruction *, 4> ReplaceVector;
996 bool Modified = false;
997
998 unsigned MaxLegalFpConvertBitWidth =
1001 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1002
1003 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
1004 return false;
1005
1006 for (auto &I : instructions(F)) {
1007 switch (I.getOpcode()) {
1008 case Instruction::FRem: {
1009 Type *Ty = I.getType();
1010 // TODO: This pass doesn't handle scalable vectors.
1011 if (Ty->isScalableTy())
1012 continue;
1013
1014 if (targetSupportsFrem(TLI, Ty) ||
1015 !FRemExpander::canExpandType(Ty->getScalarType()))
1016 continue;
1017
1018 Replace.push_back(&I);
1019 Modified = true;
1020
1021 break;
1022 }
1023 case Instruction::FPToUI:
1024 case Instruction::FPToSI: {
1025 // TODO: This pass doesn't handle scalable vectors.
1026 if (I.getOperand(0)->getType()->isScalableTy())
1027 continue;
1028
1029 auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
1030 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
1031 continue;
1032
1033 if (I.getOperand(0)->getType()->isVectorTy())
1034 ReplaceVector.push_back(&I);
1035 else
1036 Replace.push_back(&I);
1037 Modified = true;
1038 break;
1039 }
1040 case Instruction::UIToFP:
1041 case Instruction::SIToFP: {
1042 // TODO: This pass doesn't handle scalable vectors.
1043 if (I.getOperand(0)->getType()->isScalableTy())
1044 continue;
1045
1046 auto *IntTy =
1047 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
1048 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
1049 continue;
1050
1051 if (I.getOperand(0)->getType()->isVectorTy())
1052 ReplaceVector.push_back(&I);
1053 else
1054 Replace.push_back(&I);
1055 Modified = true;
1056 break;
1057 }
1058 default:
1059 break;
1060 }
1061 }
1062
1063 while (!ReplaceVector.empty()) {
1064 Instruction *I = ReplaceVector.pop_back_val();
1065 scalarize(I, Replace);
1066 }
1067
1068 if (Replace.empty())
1069 return false;
1070
1071 while (!Replace.empty()) {
1072 Instruction *I = Replace.pop_back_val();
1073 if (I->getOpcode() == Instruction::FRem) {
1074 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1075 if (AC) {
1076 auto Res = std::make_optional<SimplifyQuery>(
1077 I->getModule()->getDataLayout(), I);
1078 Res->AC = AC;
1079 return Res;
1080 }
1081 return {};
1082 }();
1083
1085 } else if (I->getOpcode() == Instruction::FPToUI ||
1086 I->getOpcode() == Instruction::FPToSI) {
1087 expandFPToI(I);
1088 } else {
1089 expandIToFP(I);
1090 }
1091 }
1092
1093 return Modified;
1094}
1095
1096namespace {
1097class ExpandFpLegacyPass : public FunctionPass {
1098 CodeGenOptLevel OptLevel;
1099
1100public:
1101 static char ID;
1102
1103 ExpandFpLegacyPass(CodeGenOptLevel OptLevel)
1104 : FunctionPass(ID), OptLevel(OptLevel) {
1106 }
1107
1108 ExpandFpLegacyPass() : ExpandFpLegacyPass(CodeGenOptLevel::None) {};
1109
1110 bool runOnFunction(Function &F) override {
1111 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1112 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
1113 AssumptionCache *AC = nullptr;
1114
1115 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1116 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1117 return runImpl(F, *TLI, AC);
1118 }
1119
1120 void getAnalysisUsage(AnalysisUsage &AU) const override {
1121 AU.addRequired<TargetPassConfig>();
1122 if (OptLevel != CodeGenOptLevel::None)
1123 AU.addRequired<AssumptionCacheTracker>();
1124 AU.addPreserved<AAResultsWrapperPass>();
1125 AU.addPreserved<GlobalsAAWrapperPass>();
1126 }
1127};
1128} // namespace
1129
1131 : TM(TM), OptLevel(OptLevel) {}
1132
1134 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1135 static_cast<PassInfoMixin<ExpandFpPass> *>(this)->printPipeline(
1136 OS, MapClassName2PassName);
1137 OS << '<';
1138 OS << "O" << (int)OptLevel;
1139 OS << '>';
1140}
1141
1143 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1144 auto &TLI = *STI->getTargetLowering();
1145 AssumptionCache *AC = nullptr;
1146 if (OptLevel != CodeGenOptLevel::None)
1147 AC = &FAM.getResult<AssumptionAnalysis>(F);
1148 return runImpl(F, TLI, AC) ? PreservedAnalyses::none()
1150}
1151
1152char ExpandFpLegacyPass::ID = 0;
1153INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
1154 "Expand certain fp instructions", false, false)
1155INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
1156
1158 return new ExpandFpLegacyPass(OptLevel);
1159}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
Definition ExpandFp.cpp:356
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
Definition ExpandFp.cpp:679
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
Definition ExpandFp.cpp:457
static RTLIB::Libcall fremToLibcall(Type *Ty)
Return the Libcall for a frem instruction of type Ty.
Definition ExpandFp.cpp:966
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:992
static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty)
Definition ExpandFp.cpp:985
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Replace)
Definition ExpandFp.cpp:942
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
FunctionAnalysisManager FAM
Function * Fun
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
ExpandFpPass(const TargetMachine *TM, CodeGenOptLevel OptLevel)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool noInfs() const
Definition FMF.h:66
void setAllowReciprocal(bool B=true)
Definition FMF.h:87
bool approxFunc() const
Definition FMF.h:70
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
void setNoInfs(bool B=true)
Definition FMF.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:159
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:236
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
void dropAllReferences()
Drop all references to operands.
Definition User.h:349
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:390
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeExpandFpLegacyPassPass(PassRegistry &)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI FunctionPass * createExpandFpPass()
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Matching combinators.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70