LLVM 22.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
75};
76
77// How NaN values are represented. This is curently only used in combination
78// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79// while having IEEE non-finite behavior is liable to lead to unexpected
80// results.
81enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
84 IEEE,
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
92 AllOnes,
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
100};
101
102/* Represents floating point arithmetic semantics. */
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
120
122
123 /* Whether this semantics has an encoding for Zero */
124 bool hasZero = true;
125
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr = true;
128
129 /* Whether the sign bit of this semantics is the most significant bit */
130 bool hasSignBitInMSB = true;
131};
132
133static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
134static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
135static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
136static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
137static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
138static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
139static constexpr fltSemantics semFloat8E5M2FNUZ = {
141static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
142static constexpr fltSemantics semFloat8E4M3FN = {
144static constexpr fltSemantics semFloat8E4M3FNUZ = {
148static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
149static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
150static constexpr fltSemantics semFloat8E8M0FNU = {127,
151 -127,
152 1,
153 8,
156 false,
157 false,
158 false};
159
160static constexpr fltSemantics semFloat6E3M2FN = {
162static constexpr fltSemantics semFloat6E2M3FN = {
164static constexpr fltSemantics semFloat4E2M1FN = {
166static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
167static constexpr fltSemantics semBogus = {0, 0, 0, 0};
168static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
169static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
170 53 + 53, 128};
171
173 switch (S) {
174 case S_IEEEhalf:
175 return IEEEhalf();
176 case S_BFloat:
177 return BFloat();
178 case S_IEEEsingle:
179 return IEEEsingle();
180 case S_IEEEdouble:
181 return IEEEdouble();
182 case S_IEEEquad:
183 return IEEEquad();
185 return PPCDoubleDouble();
187 return PPCDoubleDoubleLegacy();
188 case S_Float8E5M2:
189 return Float8E5M2();
190 case S_Float8E5M2FNUZ:
191 return Float8E5M2FNUZ();
192 case S_Float8E4M3:
193 return Float8E4M3();
194 case S_Float8E4M3FN:
195 return Float8E4M3FN();
196 case S_Float8E4M3FNUZ:
197 return Float8E4M3FNUZ();
199 return Float8E4M3B11FNUZ();
200 case S_Float8E3M4:
201 return Float8E3M4();
202 case S_FloatTF32:
203 return FloatTF32();
204 case S_Float8E8M0FNU:
205 return Float8E8M0FNU();
206 case S_Float6E3M2FN:
207 return Float6E3M2FN();
208 case S_Float6E2M3FN:
209 return Float6E2M3FN();
210 case S_Float4E2M1FN:
211 return Float4E2M1FN();
213 return x87DoubleExtended();
214 }
215 llvm_unreachable("Unrecognised floating semantics");
216}
217
220 if (&Sem == &llvm::APFloat::IEEEhalf())
221 return S_IEEEhalf;
222 else if (&Sem == &llvm::APFloat::BFloat())
223 return S_BFloat;
224 else if (&Sem == &llvm::APFloat::IEEEsingle())
225 return S_IEEEsingle;
226 else if (&Sem == &llvm::APFloat::IEEEdouble())
227 return S_IEEEdouble;
228 else if (&Sem == &llvm::APFloat::IEEEquad())
229 return S_IEEEquad;
230 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
231 return S_PPCDoubleDouble;
232 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
234 else if (&Sem == &llvm::APFloat::Float8E5M2())
235 return S_Float8E5M2;
236 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
237 return S_Float8E5M2FNUZ;
238 else if (&Sem == &llvm::APFloat::Float8E4M3())
239 return S_Float8E4M3;
240 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
241 return S_Float8E4M3FN;
242 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
243 return S_Float8E4M3FNUZ;
244 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
245 return S_Float8E4M3B11FNUZ;
246 else if (&Sem == &llvm::APFloat::Float8E3M4())
247 return S_Float8E3M4;
248 else if (&Sem == &llvm::APFloat::FloatTF32())
249 return S_FloatTF32;
250 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
251 return S_Float8E8M0FNU;
252 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
253 return S_Float6E3M2FN;
254 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
255 return S_Float6E2M3FN;
256 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
257 return S_Float4E2M1FN;
258 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
259 return S_x87DoubleExtended;
260 else
261 llvm_unreachable("Unknown floating semantics");
262}
263
270 return semPPCDoubleDouble;
271}
274}
282}
291}
293
295 const fltSemantics &B) {
296 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
297 A.precision <= B.precision;
298}
299
305
306/* A tight upper bound on number of parts required to hold the value
307 pow(5, power) is
308
309 power * 815 / (351 * integerPartWidth) + 1
310
311 However, whilst the result may require only this many parts,
312 because we are multiplying two values to get it, the
313 multiplication may require an extra part with the excess part
314 being zero (consider the trivial case of 1 * 1, tcFullMultiply
315 requires two parts to hold the single-part result). So we add an
316 extra one to guarantee enough space whilst multiplying. */
317const unsigned int maxExponent = 16383;
318const unsigned int maxPrecision = 113;
320const unsigned int maxPowerOfFiveParts =
321 2 +
323
324unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
325 return semantics.precision;
326}
329 return semantics.maxExponent;
330}
333 return semantics.minExponent;
334}
335unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
336 return semantics.sizeInBits;
337}
339 bool isSigned) {
340 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
341 // at least one more bit than the MaxExponent to hold the max FP value.
342 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
343 // Extra sign bit needed.
344 if (isSigned)
345 ++MinBitWidth;
346 return MinBitWidth;
347}
348
350 return semantics.hasZero;
351}
352
354 return semantics.hasSignedRepr;
355}
356
359}
360
363}
364
366 // Keep in sync with Type::isIEEELikeFPTy
367 return SemanticsToEnum(semantics) <= S_IEEEquad;
368}
369
371 return semantics.hasSignBitInMSB;
372}
373
375 const fltSemantics &Dst) {
376 // Exponent range must be larger.
377 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
378 return false;
379
380 // If the mantissa is long enough, the result value could still be denormal
381 // with a larger exponent range.
382 //
383 // FIXME: This condition is probably not accurate but also shouldn't be a
384 // practical concern with existing types.
385 return Dst.precision >= Src.precision;
386}
387
389 return Sem.sizeInBits;
390}
391
392static constexpr APFloatBase::ExponentType
393exponentZero(const fltSemantics &semantics) {
394 return semantics.minExponent - 1;
395}
396
397static constexpr APFloatBase::ExponentType
398exponentInf(const fltSemantics &semantics) {
399 return semantics.maxExponent + 1;
400}
401
402static constexpr APFloatBase::ExponentType
403exponentNaN(const fltSemantics &semantics) {
406 return exponentZero(semantics);
407 if (semantics.hasSignedRepr)
408 return semantics.maxExponent;
409 }
410 return semantics.maxExponent + 1;
411}
412
413/* A bunch of private, handy routines. */
414
415static inline Error createError(const Twine &Err) {
416 return make_error<StringError>(Err, inconvertibleErrorCode());
417}
418
419static constexpr inline unsigned int partCountForBits(unsigned int bits) {
420 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
422}
423
424/* Returns 0U-9U. Return values >= 10U are not digits. */
425static inline unsigned int
426decDigitValue(unsigned int c)
427{
428 return c - '0';
429}
430
431/* Return the value of a decimal exponent of the form
432 [+-]ddddddd.
433
434 If the exponent overflows, returns a large exponent with the
435 appropriate sign. */
438 bool isNegative;
439 unsigned int absExponent;
440 const unsigned int overlargeExponent = 24000; /* FIXME. */
441 StringRef::iterator p = begin;
442
443 // Treat no exponent as 0 to match binutils
444 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
445 return 0;
446 }
447
448 isNegative = (*p == '-');
449 if (*p == '-' || *p == '+') {
450 p++;
451 if (p == end)
452 return createError("Exponent has no digits");
453 }
454
455 absExponent = decDigitValue(*p++);
456 if (absExponent >= 10U)
457 return createError("Invalid character in exponent");
458
459 for (; p != end; ++p) {
460 unsigned int value;
461
462 value = decDigitValue(*p);
463 if (value >= 10U)
464 return createError("Invalid character in exponent");
465
466 absExponent = absExponent * 10U + value;
467 if (absExponent >= overlargeExponent) {
468 absExponent = overlargeExponent;
469 break;
470 }
471 }
472
473 if (isNegative)
474 return -(int) absExponent;
475 else
476 return (int) absExponent;
477}
478
479/* This is ugly and needs cleaning up, but I don't immediately see
480 how whilst remaining safe. */
483 int exponentAdjustment) {
484 int unsignedExponent;
485 bool negative, overflow;
486 int exponent = 0;
487
488 if (p == end)
489 return createError("Exponent has no digits");
490
491 negative = *p == '-';
492 if (*p == '-' || *p == '+') {
493 p++;
494 if (p == end)
495 return createError("Exponent has no digits");
496 }
497
498 unsignedExponent = 0;
499 overflow = false;
500 for (; p != end; ++p) {
501 unsigned int value;
502
503 value = decDigitValue(*p);
504 if (value >= 10U)
505 return createError("Invalid character in exponent");
506
507 unsignedExponent = unsignedExponent * 10 + value;
508 if (unsignedExponent > 32767) {
509 overflow = true;
510 break;
511 }
512 }
513
514 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
515 overflow = true;
516
517 if (!overflow) {
518 exponent = unsignedExponent;
519 if (negative)
520 exponent = -exponent;
521 exponent += exponentAdjustment;
522 if (exponent > 32767 || exponent < -32768)
523 overflow = true;
524 }
525
526 if (overflow)
527 exponent = negative ? -32768: 32767;
528
529 return exponent;
530}
531
534 StringRef::iterator *dot) {
535 StringRef::iterator p = begin;
536 *dot = end;
537 while (p != end && *p == '0')
538 p++;
539
540 if (p != end && *p == '.') {
541 *dot = p++;
542
543 if (end - begin == 1)
544 return createError("Significand has no digits");
545
546 while (p != end && *p == '0')
547 p++;
548 }
549
550 return p;
551}
552
553/* Given a normal decimal floating point number of the form
554
555 dddd.dddd[eE][+-]ddd
556
557 where the decimal point and exponent are optional, fill out the
558 structure D. Exponent is appropriate if the significand is
559 treated as an integer, and normalizedExponent if the significand
560 is taken to have the decimal point after a single leading
561 non-zero digit.
562
563 If the value is zero, V->firstSigDigit points to a non-digit, and
564 the return exponent is zero.
565*/
567 const char *firstSigDigit;
568 const char *lastSigDigit;
571};
572
575 StringRef::iterator dot = end;
576
577 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
578 if (!PtrOrErr)
579 return PtrOrErr.takeError();
580 StringRef::iterator p = *PtrOrErr;
581
582 D->firstSigDigit = p;
583 D->exponent = 0;
584 D->normalizedExponent = 0;
585
586 for (; p != end; ++p) {
587 if (*p == '.') {
588 if (dot != end)
589 return createError("String contains multiple dots");
590 dot = p++;
591 if (p == end)
592 break;
593 }
594 if (decDigitValue(*p) >= 10U)
595 break;
596 }
597
598 if (p != end) {
599 if (*p != 'e' && *p != 'E')
600 return createError("Invalid character in significand");
601 if (p == begin)
602 return createError("Significand has no digits");
603 if (dot != end && p - begin == 1)
604 return createError("Significand has no digits");
605
606 /* p points to the first non-digit in the string */
607 auto ExpOrErr = readExponent(p + 1, end);
608 if (!ExpOrErr)
609 return ExpOrErr.takeError();
610 D->exponent = *ExpOrErr;
611
612 /* Implied decimal point? */
613 if (dot == end)
614 dot = p;
615 }
616
617 /* If number is all zeroes accept any exponent. */
618 if (p != D->firstSigDigit) {
619 /* Drop insignificant trailing zeroes. */
620 if (p != begin) {
621 do
622 do
623 p--;
624 while (p != begin && *p == '0');
625 while (p != begin && *p == '.');
626 }
627
628 /* Adjust the exponents for any decimal point. */
629 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
630 D->normalizedExponent = (D->exponent +
631 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
632 - (dot > D->firstSigDigit && dot < p)));
633 }
634
635 D->lastSigDigit = p;
636 return Error::success();
637}
638
639/* Return the trailing fraction of a hexadecimal number.
640 DIGITVALUE is the first hex digit of the fraction, P points to
641 the next digit. */
644 unsigned int digitValue) {
645 unsigned int hexDigit;
646
647 /* If the first trailing digit isn't 0 or 8 we can work out the
648 fraction immediately. */
649 if (digitValue > 8)
650 return lfMoreThanHalf;
651 else if (digitValue < 8 && digitValue > 0)
652 return lfLessThanHalf;
653
654 // Otherwise we need to find the first non-zero digit.
655 while (p != end && (*p == '0' || *p == '.'))
656 p++;
657
658 if (p == end)
659 return createError("Invalid trailing hexadecimal fraction!");
660
661 hexDigit = hexDigitValue(*p);
662
663 /* If we ran off the end it is exactly zero or one-half, otherwise
664 a little more. */
665 if (hexDigit == UINT_MAX)
666 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
667 else
668 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
669}
670
671/* Return the fraction lost were a bignum truncated losing the least
672 significant BITS bits. */
673static lostFraction
675 unsigned int partCount,
676 unsigned int bits)
677{
678 unsigned int lsb;
679
680 lsb = APInt::tcLSB(parts, partCount);
681
682 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
683 if (bits <= lsb)
684 return lfExactlyZero;
685 if (bits == lsb + 1)
686 return lfExactlyHalf;
687 if (bits <= partCount * APFloatBase::integerPartWidth &&
688 APInt::tcExtractBit(parts, bits - 1))
689 return lfMoreThanHalf;
690
691 return lfLessThanHalf;
692}
693
694/* Shift DST right BITS bits noting lost fraction. */
695static lostFraction
696shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
697{
698 lostFraction lost_fraction;
699
700 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
701
702 APInt::tcShiftRight(dst, parts, bits);
703
704 return lost_fraction;
705}
706
707/* Combine the effect of two lost fractions. */
708static lostFraction
710 lostFraction lessSignificant)
711{
712 if (lessSignificant != lfExactlyZero) {
713 if (moreSignificant == lfExactlyZero)
714 moreSignificant = lfLessThanHalf;
715 else if (moreSignificant == lfExactlyHalf)
716 moreSignificant = lfMoreThanHalf;
717 }
718
719 return moreSignificant;
720}
721
722/* The error from the true value, in half-ulps, on multiplying two
723 floating point numbers, which differ from the value they
724 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
725 than the returned value.
726
727 See "How to Read Floating Point Numbers Accurately" by William D
728 Clinger. */
729static unsigned int
730HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
731{
732 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
733
734 if (HUerr1 + HUerr2 == 0)
735 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
736 else
737 return inexactMultiply + 2 * (HUerr1 + HUerr2);
738}
739
740/* The number of ulps from the boundary (zero, or half if ISNEAREST)
741 when the least significant BITS are truncated. BITS cannot be
742 zero. */
744ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
745 bool isNearest) {
746 unsigned int count, partBits;
747 APFloatBase::integerPart part, boundary;
748
749 assert(bits != 0);
750
751 bits--;
753 partBits = bits % APFloatBase::integerPartWidth + 1;
754
755 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
756
757 if (isNearest)
758 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
759 else
760 boundary = 0;
761
762 if (count == 0) {
763 if (part - boundary <= boundary - part)
764 return part - boundary;
765 else
766 return boundary - part;
767 }
768
769 if (part == boundary) {
770 while (--count)
771 if (parts[count])
772 return ~(APFloatBase::integerPart) 0; /* A lot. */
773
774 return parts[0];
775 } else if (part == boundary - 1) {
776 while (--count)
777 if (~parts[count])
778 return ~(APFloatBase::integerPart) 0; /* A lot. */
779
780 return -parts[0];
781 }
782
783 return ~(APFloatBase::integerPart) 0; /* A lot. */
784}
785
786/* Place pow(5, power) in DST, and return the number of parts used.
787 DST must be at least one part larger than size of the answer. */
788static unsigned int
789powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
790 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
792 pow5s[0] = 78125 * 5;
793
794 unsigned int partsCount = 1;
795 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
796 unsigned int result;
797 assert(power <= maxExponent);
798
799 p1 = dst;
800 p2 = scratch;
801
802 *p1 = firstEightPowers[power & 7];
803 power >>= 3;
804
805 result = 1;
806 pow5 = pow5s;
807
808 for (unsigned int n = 0; power; power >>= 1, n++) {
809 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
810 if (n != 0) {
811 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
812 partsCount, partsCount);
813 partsCount *= 2;
814 if (pow5[partsCount - 1] == 0)
815 partsCount--;
816 }
817
818 if (power & 1) {
820
821 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
822 result += partsCount;
823 if (p2[result - 1] == 0)
824 result--;
825
826 /* Now result is in p1 with partsCount parts and p2 is scratch
827 space. */
828 tmp = p1;
829 p1 = p2;
830 p2 = tmp;
831 }
832
833 pow5 += partsCount;
834 }
835
836 if (p1 != dst)
837 APInt::tcAssign(dst, p1, result);
838
839 return result;
840}
841
842/* Zero at the end to avoid modular arithmetic when adding one; used
843 when rounding up during hexadecimal output. */
844static const char hexDigitsLower[] = "0123456789abcdef0";
845static const char hexDigitsUpper[] = "0123456789ABCDEF0";
846static const char infinityL[] = "infinity";
847static const char infinityU[] = "INFINITY";
848static const char NaNL[] = "nan";
849static const char NaNU[] = "NAN";
850
851/* Write out an integerPart in hexadecimal, starting with the most
852 significant nibble. Write out exactly COUNT hexdigits, return
853 COUNT. */
854static unsigned int
855partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
856 const char *hexDigitChars)
857{
858 unsigned int result = count;
859
861
862 part >>= (APFloatBase::integerPartWidth - 4 * count);
863 while (count--) {
864 dst[count] = hexDigitChars[part & 0xf];
865 part >>= 4;
866 }
867
868 return result;
869}
870
871/* Write out an unsigned decimal integer. */
872static char *
873writeUnsignedDecimal (char *dst, unsigned int n)
874{
875 char buff[40], *p;
876
877 p = buff;
878 do
879 *p++ = '0' + n % 10;
880 while (n /= 10);
881
882 do
883 *dst++ = *--p;
884 while (p != buff);
885
886 return dst;
887}
888
889/* Write out a signed decimal integer. */
890static char *
891writeSignedDecimal (char *dst, int value)
892{
893 if (value < 0) {
894 *dst++ = '-';
895 dst = writeUnsignedDecimal(dst, -(unsigned) value);
896 } else {
897 dst = writeUnsignedDecimal(dst, value);
898 }
899
900 return dst;
901}
902
903// Compute the ULP of the input using a definition from:
904// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
905// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
906static APFloat harrisonUlp(const APFloat &X) {
907 const fltSemantics &Sem = X.getSemantics();
908 switch (X.getCategory()) {
909 case APFloat::fcNaN:
910 return APFloat::getQNaN(Sem);
912 return APFloat::getInf(Sem);
913 case APFloat::fcZero:
914 return APFloat::getSmallest(Sem);
916 break;
917 }
918 if (X.isDenormal() || X.isSmallestNormalized())
919 return APFloat::getSmallest(Sem);
920 int Exp = ilogb(X);
921 if (X.getExactLog2() != INT_MIN)
922 Exp -= 1;
923 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
925}
926
927namespace detail {
928/* Constructors. */
929void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
930 unsigned int count;
931
932 semantics = ourSemantics;
933 count = partCount();
934 if (count > 1)
935 significand.parts = new integerPart[count];
936}
937
938void IEEEFloat::freeSignificand() {
939 if (needsCleanup())
940 delete [] significand.parts;
941}
942
943void IEEEFloat::assign(const IEEEFloat &rhs) {
944 assert(semantics == rhs.semantics);
945
946 sign = rhs.sign;
947 category = rhs.category;
948 exponent = rhs.exponent;
949 if (isFiniteNonZero() || category == fcNaN)
950 copySignificand(rhs);
951}
952
953void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
954 assert(isFiniteNonZero() || category == fcNaN);
955 assert(rhs.partCount() >= partCount());
956
957 APInt::tcAssign(significandParts(), rhs.significandParts(),
958 partCount());
959}
960
961/* Make this number a NaN, with an arbitrary but deterministic value
962 for the significand. If double or longer, this is a signalling NaN,
963 which may not be ideal. If float, this is QNaN(0). */
964void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
966 llvm_unreachable("This floating point format does not support NaN");
967
968 if (Negative && !semantics->hasSignedRepr)
970 "This floating point format does not support signed values");
971
972 category = fcNaN;
973 sign = Negative;
974 exponent = exponentNaN();
975
976 integerPart *significand = significandParts();
977 unsigned numParts = partCount();
978
979 APInt fill_storage;
981 // Finite-only types do not distinguish signalling and quiet NaN, so
982 // make them all signalling.
983 SNaN = false;
984 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
985 sign = true;
986 fill_storage = APInt::getZero(semantics->precision - 1);
987 } else {
988 fill_storage = APInt::getAllOnes(semantics->precision - 1);
989 }
990 fill = &fill_storage;
991 }
992
993 // Set the significand bits to the fill.
994 if (!fill || fill->getNumWords() < numParts)
995 APInt::tcSet(significand, 0, numParts);
996 if (fill) {
997 APInt::tcAssign(significand, fill->getRawData(),
998 std::min(fill->getNumWords(), numParts));
999
1000 // Zero out the excess bits of the significand.
1001 unsigned bitsToPreserve = semantics->precision - 1;
1002 unsigned part = bitsToPreserve / 64;
1003 bitsToPreserve %= 64;
1004 significand[part] &= ((1ULL << bitsToPreserve) - 1);
1005 for (part++; part != numParts; ++part)
1006 significand[part] = 0;
1007 }
1008
1009 unsigned QNaNBit =
1010 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
1011
1012 if (SNaN) {
1013 // We always have to clear the QNaN bit to make it an SNaN.
1014 APInt::tcClearBit(significand, QNaNBit);
1015
1016 // If there are no bits set in the payload, we have to set
1017 // *something* to make it a NaN instead of an infinity;
1018 // conventionally, this is the next bit down from the QNaN bit.
1019 if (APInt::tcIsZero(significand, numParts))
1020 APInt::tcSetBit(significand, QNaNBit - 1);
1021 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
1022 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
1023 // Do nothing.
1024 } else {
1025 // We always have to set the QNaN bit to make it a QNaN.
1026 APInt::tcSetBit(significand, QNaNBit);
1027 }
1028
1029 // For x87 extended precision, we want to make a NaN, not a
1030 // pseudo-NaN. Maybe we should expose the ability to make
1031 // pseudo-NaNs?
1032 if (semantics == &semX87DoubleExtended)
1033 APInt::tcSetBit(significand, QNaNBit + 1);
1034}
1035
1037 if (this != &rhs) {
1038 if (semantics != rhs.semantics) {
1039 freeSignificand();
1040 initialize(rhs.semantics);
1041 }
1042 assign(rhs);
1043 }
1044
1045 return *this;
1046}
1047
1049 freeSignificand();
1050
1051 semantics = rhs.semantics;
1052 significand = rhs.significand;
1053 exponent = rhs.exponent;
1054 category = rhs.category;
1055 sign = rhs.sign;
1056
1057 rhs.semantics = &semBogus;
1058 return *this;
1059}
1060
1062 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1063 (APInt::tcExtractBit(significandParts(),
1064 semantics->precision - 1) == 0);
1065}
1066
1068 // The smallest number by magnitude in our format will be the smallest
1069 // denormal, i.e. the floating point number with exponent being minimum
1070 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1071 return isFiniteNonZero() && exponent == semantics->minExponent &&
1072 significandMSB() == 0;
1073}
1074
1076 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1077 isSignificandAllZerosExceptMSB();
1078}
1079
1080unsigned int IEEEFloat::getNumHighBits() const {
1081 const unsigned int PartCount = partCountForBits(semantics->precision);
1082 const unsigned int Bits = PartCount * integerPartWidth;
1083
1084 // Compute how many bits are used in the final word.
1085 // When precision is just 1, it represents the 'Pth'
1086 // Precision bit and not the actual significand bit.
1087 const unsigned int NumHighBits = (semantics->precision > 1)
1088 ? (Bits - semantics->precision + 1)
1089 : (Bits - semantics->precision);
1090 return NumHighBits;
1091}
1092
1093bool IEEEFloat::isSignificandAllOnes() const {
1094 // Test if the significand excluding the integral bit is all ones. This allows
1095 // us to test for binade boundaries.
1096 const integerPart *Parts = significandParts();
1097 const unsigned PartCount = partCountForBits(semantics->precision);
1098 for (unsigned i = 0; i < PartCount - 1; i++)
1099 if (~Parts[i])
1100 return false;
1101
1102 // Set the unused high bits to all ones when we compare.
1103 const unsigned NumHighBits = getNumHighBits();
1104 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1105 "Can not have more high bits to fill than integerPartWidth");
1106 const integerPart HighBitFill =
1107 ~integerPart(0) << (integerPartWidth - NumHighBits);
1108 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1109 return false;
1110
1111 return true;
1112}
1113
1114bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1115 // Test if the significand excluding the integral bit is all ones except for
1116 // the least significant bit.
1117 const integerPart *Parts = significandParts();
1118
1119 if (Parts[0] & 1)
1120 return false;
1121
1122 const unsigned PartCount = partCountForBits(semantics->precision);
1123 for (unsigned i = 0; i < PartCount - 1; i++) {
1124 if (~Parts[i] & ~unsigned{!i})
1125 return false;
1126 }
1127
1128 // Set the unused high bits to all ones when we compare.
1129 const unsigned NumHighBits = getNumHighBits();
1130 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1131 "Can not have more high bits to fill than integerPartWidth");
1132 const integerPart HighBitFill = ~integerPart(0)
1133 << (integerPartWidth - NumHighBits);
1134 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1135 return false;
1136
1137 return true;
1138}
1139
1140bool IEEEFloat::isSignificandAllZeros() const {
1141 // Test if the significand excluding the integral bit is all zeros. This
1142 // allows us to test for binade boundaries.
1143 const integerPart *Parts = significandParts();
1144 const unsigned PartCount = partCountForBits(semantics->precision);
1145
1146 for (unsigned i = 0; i < PartCount - 1; i++)
1147 if (Parts[i])
1148 return false;
1149
1150 // Compute how many bits are used in the final word.
1151 const unsigned NumHighBits = getNumHighBits();
1152 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1153 "clear than integerPartWidth");
1154 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1155
1156 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1157 return false;
1158
1159 return true;
1160}
1161
1162bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1163 const integerPart *Parts = significandParts();
1164 const unsigned PartCount = partCountForBits(semantics->precision);
1165
1166 for (unsigned i = 0; i < PartCount - 1; i++) {
1167 if (Parts[i])
1168 return false;
1169 }
1170
1171 const unsigned NumHighBits = getNumHighBits();
1172 const integerPart MSBMask = integerPart(1)
1173 << (integerPartWidth - NumHighBits);
1174 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1175}
1176
1178 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1180 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1181 // The largest number by magnitude in our format will be the floating point
1182 // number with maximum exponent and with significand that is all ones except
1183 // the LSB.
1184 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1185 ? isSignificandAllOnesExceptLSB()
1186 : IsMaxExp;
1187 } else {
1188 // The largest number by magnitude in our format will be the floating point
1189 // number with maximum exponent and with significand that is all ones.
1190 return IsMaxExp && isSignificandAllOnes();
1191 }
1192}
1193
1195 // This could be made more efficient; I'm going for obviously correct.
1196 if (!isFinite()) return false;
1197 IEEEFloat truncated = *this;
1198 truncated.roundToIntegral(rmTowardZero);
1199 return compare(truncated) == cmpEqual;
1200}
1201
1202bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1203 if (this == &rhs)
1204 return true;
1205 if (semantics != rhs.semantics ||
1206 category != rhs.category ||
1207 sign != rhs.sign)
1208 return false;
1209 if (category==fcZero || category==fcInfinity)
1210 return true;
1211
1212 if (isFiniteNonZero() && exponent != rhs.exponent)
1213 return false;
1214
1215 return std::equal(significandParts(), significandParts() + partCount(),
1216 rhs.significandParts());
1217}
1218
1220 initialize(&ourSemantics);
1221 sign = 0;
1222 category = fcNormal;
1223 zeroSignificand();
1224 exponent = ourSemantics.precision - 1;
1225 significandParts()[0] = value;
1227}
1228
1230 initialize(&ourSemantics);
1231 // The Float8E8MOFNU format does not have a representation
1232 // for zero. So, use the closest representation instead.
1233 // Moreover, the all-zero encoding represents a valid
1234 // normal value (which is the smallestNormalized here).
1235 // Hence, we call makeSmallestNormalized (where category is
1236 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1237 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1238}
1239
1240// Delegate to the previous constructor, because later copy constructor may
1241// actually inspects category, which can't be garbage.
1243 : IEEEFloat(ourSemantics) {}
1244
1246 initialize(rhs.semantics);
1247 assign(rhs);
1248}
1249
1251 *this = std::move(rhs);
1252}
1253
1254IEEEFloat::~IEEEFloat() { freeSignificand(); }
1255
1256unsigned int IEEEFloat::partCount() const {
1257 return partCountForBits(semantics->precision + 1);
1258}
1259
1260const APFloat::integerPart *IEEEFloat::significandParts() const {
1261 return const_cast<IEEEFloat *>(this)->significandParts();
1262}
1263
1264APFloat::integerPart *IEEEFloat::significandParts() {
1265 if (partCount() > 1)
1266 return significand.parts;
1267 else
1268 return &significand.part;
1269}
1270
1271void IEEEFloat::zeroSignificand() {
1272 APInt::tcSet(significandParts(), 0, partCount());
1273}
1274
1275/* Increment an fcNormal floating point number's significand. */
1276void IEEEFloat::incrementSignificand() {
1277 integerPart carry;
1278
1279 carry = APInt::tcIncrement(significandParts(), partCount());
1280
1281 /* Our callers should never cause us to overflow. */
1282 assert(carry == 0);
1283 (void)carry;
1284}
1285
1286/* Add the significand of the RHS. Returns the carry flag. */
1287APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1288 integerPart *parts;
1289
1290 parts = significandParts();
1291
1292 assert(semantics == rhs.semantics);
1293 assert(exponent == rhs.exponent);
1294
1295 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1296}
1297
1298/* Subtract the significand of the RHS with a borrow flag. Returns
1299 the borrow flag. */
1300APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1301 integerPart borrow) {
1302 integerPart *parts;
1303
1304 parts = significandParts();
1305
1306 assert(semantics == rhs.semantics);
1307 assert(exponent == rhs.exponent);
1308
1309 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1310 partCount());
1311}
1312
1313/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1314 on to the full-precision result of the multiplication. Returns the
1315 lost fraction. */
1316lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1317 IEEEFloat addend,
1318 bool ignoreAddend) {
1319 unsigned int omsb; // One, not zero, based MSB.
1320 unsigned int partsCount, newPartsCount, precision;
1321 integerPart *lhsSignificand;
1322 integerPart scratch[4];
1323 integerPart *fullSignificand;
1324 lostFraction lost_fraction;
1325 bool ignored;
1326
1327 assert(semantics == rhs.semantics);
1328
1329 precision = semantics->precision;
1330
1331 // Allocate space for twice as many bits as the original significand, plus one
1332 // extra bit for the addition to overflow into.
1333 newPartsCount = partCountForBits(precision * 2 + 1);
1334
1335 if (newPartsCount > 4)
1336 fullSignificand = new integerPart[newPartsCount];
1337 else
1338 fullSignificand = scratch;
1339
1340 lhsSignificand = significandParts();
1341 partsCount = partCount();
1342
1343 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1344 rhs.significandParts(), partsCount, partsCount);
1345
1346 lost_fraction = lfExactlyZero;
1347 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1348 exponent += rhs.exponent;
1349
1350 // Assume the operands involved in the multiplication are single-precision
1351 // FP, and the two multiplicants are:
1352 // *this = a23 . a22 ... a0 * 2^e1
1353 // rhs = b23 . b22 ... b0 * 2^e2
1354 // the result of multiplication is:
1355 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1356 // Note that there are three significant bits at the left-hand side of the
1357 // radix point: two for the multiplication, and an overflow bit for the
1358 // addition (that will always be zero at this point). Move the radix point
1359 // toward left by two bits, and adjust exponent accordingly.
1360 exponent += 2;
1361
1362 if (!ignoreAddend && addend.isNonZero()) {
1363 // The intermediate result of the multiplication has "2 * precision"
1364 // signicant bit; adjust the addend to be consistent with mul result.
1365 //
1366 Significand savedSignificand = significand;
1367 const fltSemantics *savedSemantics = semantics;
1368 fltSemantics extendedSemantics;
1370 unsigned int extendedPrecision;
1371
1372 // Normalize our MSB to one below the top bit to allow for overflow.
1373 extendedPrecision = 2 * precision + 1;
1374 if (omsb != extendedPrecision - 1) {
1375 assert(extendedPrecision > omsb);
1376 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1377 (extendedPrecision - 1) - omsb);
1378 exponent -= (extendedPrecision - 1) - omsb;
1379 }
1380
1381 /* Create new semantics. */
1382 extendedSemantics = *semantics;
1383 extendedSemantics.precision = extendedPrecision;
1384
1385 if (newPartsCount == 1)
1386 significand.part = fullSignificand[0];
1387 else
1388 significand.parts = fullSignificand;
1389 semantics = &extendedSemantics;
1390
1391 // Make a copy so we can convert it to the extended semantics.
1392 // Note that we cannot convert the addend directly, as the extendedSemantics
1393 // is a local variable (which we take a reference to).
1394 IEEEFloat extendedAddend(addend);
1395 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1396 &ignored);
1397 assert(status == APFloat::opOK);
1398 (void)status;
1399
1400 // Shift the significand of the addend right by one bit. This guarantees
1401 // that the high bit of the significand is zero (same as fullSignificand),
1402 // so the addition will overflow (if it does overflow at all) into the top bit.
1403 lost_fraction = extendedAddend.shiftSignificandRight(1);
1404 assert(lost_fraction == lfExactlyZero &&
1405 "Lost precision while shifting addend for fused-multiply-add.");
1406
1407 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1408
1409 /* Restore our state. */
1410 if (newPartsCount == 1)
1411 fullSignificand[0] = significand.part;
1412 significand = savedSignificand;
1413 semantics = savedSemantics;
1414
1415 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1416 }
1417
1418 // Convert the result having "2 * precision" significant-bits back to the one
1419 // having "precision" significant-bits. First, move the radix point from
1420 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1421 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1422 exponent -= precision + 1;
1423
1424 // In case MSB resides at the left-hand side of radix point, shift the
1425 // mantissa right by some amount to make sure the MSB reside right before
1426 // the radix point (i.e. "MSB . rest-significant-bits").
1427 //
1428 // Note that the result is not normalized when "omsb < precision". So, the
1429 // caller needs to call IEEEFloat::normalize() if normalized value is
1430 // expected.
1431 if (omsb > precision) {
1432 unsigned int bits, significantParts;
1433 lostFraction lf;
1434
1435 bits = omsb - precision;
1436 significantParts = partCountForBits(omsb);
1437 lf = shiftRight(fullSignificand, significantParts, bits);
1438 lost_fraction = combineLostFractions(lf, lost_fraction);
1439 exponent += bits;
1440 }
1441
1442 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1443
1444 if (newPartsCount > 4)
1445 delete [] fullSignificand;
1446
1447 return lost_fraction;
1448}
1449
1450lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1451 // When the given semantics has zero, the addend here is a zero.
1452 // i.e . it belongs to the 'fcZero' category.
1453 // But when the semantics does not support zero, we need to
1454 // explicitly convey that this addend should be ignored
1455 // for multiplication.
1456 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1457}
1458
1459/* Multiply the significands of LHS and RHS to DST. */
1460lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1461 unsigned int bit, i, partsCount;
1462 const integerPart *rhsSignificand;
1463 integerPart *lhsSignificand, *dividend, *divisor;
1464 integerPart scratch[4];
1465 lostFraction lost_fraction;
1466
1467 assert(semantics == rhs.semantics);
1468
1469 lhsSignificand = significandParts();
1470 rhsSignificand = rhs.significandParts();
1471 partsCount = partCount();
1472
1473 if (partsCount > 2)
1474 dividend = new integerPart[partsCount * 2];
1475 else
1476 dividend = scratch;
1477
1478 divisor = dividend + partsCount;
1479
1480 /* Copy the dividend and divisor as they will be modified in-place. */
1481 for (i = 0; i < partsCount; i++) {
1482 dividend[i] = lhsSignificand[i];
1483 divisor[i] = rhsSignificand[i];
1484 lhsSignificand[i] = 0;
1485 }
1486
1487 exponent -= rhs.exponent;
1488
1489 unsigned int precision = semantics->precision;
1490
1491 /* Normalize the divisor. */
1492 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1493 if (bit) {
1494 exponent += bit;
1495 APInt::tcShiftLeft(divisor, partsCount, bit);
1496 }
1497
1498 /* Normalize the dividend. */
1499 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1500 if (bit) {
1501 exponent -= bit;
1502 APInt::tcShiftLeft(dividend, partsCount, bit);
1503 }
1504
1505 /* Ensure the dividend >= divisor initially for the loop below.
1506 Incidentally, this means that the division loop below is
1507 guaranteed to set the integer bit to one. */
1508 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1509 exponent--;
1510 APInt::tcShiftLeft(dividend, partsCount, 1);
1511 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1512 }
1513
1514 /* Long division. */
1515 for (bit = precision; bit; bit -= 1) {
1516 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1517 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1518 APInt::tcSetBit(lhsSignificand, bit - 1);
1519 }
1520
1521 APInt::tcShiftLeft(dividend, partsCount, 1);
1522 }
1523
1524 /* Figure out the lost fraction. */
1525 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1526
1527 if (cmp > 0)
1528 lost_fraction = lfMoreThanHalf;
1529 else if (cmp == 0)
1530 lost_fraction = lfExactlyHalf;
1531 else if (APInt::tcIsZero(dividend, partsCount))
1532 lost_fraction = lfExactlyZero;
1533 else
1534 lost_fraction = lfLessThanHalf;
1535
1536 if (partsCount > 2)
1537 delete [] dividend;
1538
1539 return lost_fraction;
1540}
1541
1542unsigned int IEEEFloat::significandMSB() const {
1543 return APInt::tcMSB(significandParts(), partCount());
1544}
1545
1546unsigned int IEEEFloat::significandLSB() const {
1547 return APInt::tcLSB(significandParts(), partCount());
1548}
1549
1550/* Note that a zero result is NOT normalized to fcZero. */
1551lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1552 /* Our exponent should not overflow. */
1553 assert((ExponentType) (exponent + bits) >= exponent);
1554
1555 exponent += bits;
1556
1557 return shiftRight(significandParts(), partCount(), bits);
1558}
1559
1560/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1561void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1562 assert(bits < semantics->precision ||
1563 (semantics->precision == 1 && bits <= 1));
1564
1565 if (bits) {
1566 unsigned int partsCount = partCount();
1567
1568 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1569 exponent -= bits;
1570
1571 assert(!APInt::tcIsZero(significandParts(), partsCount));
1572 }
1573}
1574
1576 int compare;
1577
1578 assert(semantics == rhs.semantics);
1580 assert(rhs.isFiniteNonZero());
1581
1582 compare = exponent - rhs.exponent;
1583
1584 /* If exponents are equal, do an unsigned bignum comparison of the
1585 significands. */
1586 if (compare == 0)
1587 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1588 partCount());
1589
1590 if (compare > 0)
1591 return cmpGreaterThan;
1592 else if (compare < 0)
1593 return cmpLessThan;
1594 else
1595 return cmpEqual;
1596}
1597
1598/* Set the least significant BITS bits of a bignum, clear the
1599 rest. */
1600static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1601 unsigned bits) {
1602 unsigned i = 0;
1603 while (bits > APInt::APINT_BITS_PER_WORD) {
1604 dst[i++] = ~(APInt::WordType)0;
1606 }
1607
1608 if (bits)
1609 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1610
1611 while (i < parts)
1612 dst[i++] = 0;
1613}
1614
1615/* Handle overflow. Sign is preserved. We either become infinity or
1616 the largest finite number. */
1617APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1619 /* Infinity? */
1620 if (rounding_mode == rmNearestTiesToEven ||
1621 rounding_mode == rmNearestTiesToAway ||
1622 (rounding_mode == rmTowardPositive && !sign) ||
1623 (rounding_mode == rmTowardNegative && sign)) {
1625 makeNaN(false, sign);
1626 else
1627 category = fcInfinity;
1628 return static_cast<opStatus>(opOverflow | opInexact);
1629 }
1630 }
1631
1632 /* Otherwise we become the largest finite number. */
1633 category = fcNormal;
1634 exponent = semantics->maxExponent;
1635 tcSetLeastSignificantBits(significandParts(), partCount(),
1636 semantics->precision);
1639 APInt::tcClearBit(significandParts(), 0);
1640
1641 return opInexact;
1642}
1643
1644/* Returns TRUE if, when truncating the current number, with BIT the
1645 new LSB, with the given lost fraction and rounding mode, the result
1646 would need to be rounded away from zero (i.e., by increasing the
1647 signficand). This routine must work for fcZero of both signs, and
1648 fcNormal numbers. */
1649bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1650 lostFraction lost_fraction,
1651 unsigned int bit) const {
1652 /* NaNs and infinities should not have lost fractions. */
1653 assert(isFiniteNonZero() || category == fcZero);
1654
1655 /* Current callers never pass this so we don't handle it. */
1656 assert(lost_fraction != lfExactlyZero);
1657
1658 switch (rounding_mode) {
1660 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1661
1663 if (lost_fraction == lfMoreThanHalf)
1664 return true;
1665
1666 /* Our zeroes don't have a significand to test. */
1667 if (lost_fraction == lfExactlyHalf && category != fcZero)
1668 return APInt::tcExtractBit(significandParts(), bit);
1669
1670 return false;
1671
1672 case rmTowardZero:
1673 return false;
1674
1675 case rmTowardPositive:
1676 return !sign;
1677
1678 case rmTowardNegative:
1679 return sign;
1680
1681 default:
1682 break;
1683 }
1684 llvm_unreachable("Invalid rounding mode found");
1685}
1686
1687APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1688 lostFraction lost_fraction) {
1689 unsigned int omsb; /* One, not zero, based MSB. */
1690 int exponentChange;
1691
1692 if (!isFiniteNonZero())
1693 return opOK;
1694
1695 /* Before rounding normalize the exponent of fcNormal numbers. */
1696 omsb = significandMSB() + 1;
1697
1698 // Only skip this `if` if the value is exactly zero.
1699 if (omsb || lost_fraction != lfExactlyZero) {
1700 /* OMSB is numbered from 1. We want to place it in the integer
1701 bit numbered PRECISION if possible, with a compensating change in
1702 the exponent. */
1703 exponentChange = omsb - semantics->precision;
1704
1705 /* If the resulting exponent is too high, overflow according to
1706 the rounding mode. */
1707 if (exponent + exponentChange > semantics->maxExponent)
1708 return handleOverflow(rounding_mode);
1709
1710 /* Subnormal numbers have exponent minExponent, and their MSB
1711 is forced based on that. */
1712 if (exponent + exponentChange < semantics->minExponent)
1713 exponentChange = semantics->minExponent - exponent;
1714
1715 /* Shifting left is easy as we don't lose precision. */
1716 if (exponentChange < 0) {
1717 assert(lost_fraction == lfExactlyZero);
1718
1719 shiftSignificandLeft(-exponentChange);
1720
1721 return opOK;
1722 }
1723
1724 if (exponentChange > 0) {
1725 lostFraction lf;
1726
1727 /* Shift right and capture any new lost fraction. */
1728 lf = shiftSignificandRight(exponentChange);
1729
1730 lost_fraction = combineLostFractions(lf, lost_fraction);
1731
1732 /* Keep OMSB up-to-date. */
1733 if (omsb > (unsigned) exponentChange)
1734 omsb -= exponentChange;
1735 else
1736 omsb = 0;
1737 }
1738 }
1739
1740 // The all-ones values is an overflow if NaN is all ones. If NaN is
1741 // represented by negative zero, then it is a valid finite value.
1743 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1744 exponent == semantics->maxExponent && isSignificandAllOnes())
1745 return handleOverflow(rounding_mode);
1746
1747 /* Now round the number according to rounding_mode given the lost
1748 fraction. */
1749
1750 /* As specified in IEEE 754, since we do not trap we do not report
1751 underflow for exact results. */
1752 if (lost_fraction == lfExactlyZero) {
1753 /* Canonicalize zeroes. */
1754 if (omsb == 0) {
1755 category = fcZero;
1756 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1757 sign = false;
1758 if (!semantics->hasZero)
1760 }
1761
1762 return opOK;
1763 }
1764
1765 /* Increment the significand if we're rounding away from zero. */
1766 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1767 if (omsb == 0)
1768 exponent = semantics->minExponent;
1769
1770 incrementSignificand();
1771 omsb = significandMSB() + 1;
1772
1773 /* Did the significand increment overflow? */
1774 if (omsb == (unsigned) semantics->precision + 1) {
1775 /* Renormalize by incrementing the exponent and shifting our
1776 significand right one. However if we already have the
1777 maximum exponent we overflow to infinity. */
1778 if (exponent == semantics->maxExponent)
1779 // Invoke overflow handling with a rounding mode that will guarantee
1780 // that the result gets turned into the correct infinity representation.
1781 // This is needed instead of just setting the category to infinity to
1782 // account for 8-bit floating point types that have no inf, only NaN.
1783 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1784
1785 shiftSignificandRight(1);
1786
1787 return opInexact;
1788 }
1789
1790 // The all-ones values is an overflow if NaN is all ones. If NaN is
1791 // represented by negative zero, then it is a valid finite value.
1793 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1794 exponent == semantics->maxExponent && isSignificandAllOnes())
1795 return handleOverflow(rounding_mode);
1796 }
1797
1798 /* The normal case - we were and are not denormal, and any
1799 significand increment above didn't overflow. */
1800 if (omsb == semantics->precision)
1801 return opInexact;
1802
1803 /* We have a non-zero denormal. */
1804 assert(omsb < semantics->precision);
1805
1806 /* Canonicalize zeroes. */
1807 if (omsb == 0) {
1808 category = fcZero;
1809 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1810 sign = false;
1811 // This condition handles the case where the semantics
1812 // does not have zero but uses the all-zero encoding
1813 // to represent the smallest normal value.
1814 if (!semantics->hasZero)
1816 }
1817
1818 /* The fcZero case is a denormal that underflowed to zero. */
1819 return (opStatus) (opUnderflow | opInexact);
1820}
1821
1822APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1823 bool subtract) {
1824 switch (PackCategoriesIntoKey(category, rhs.category)) {
1825 default:
1826 llvm_unreachable(nullptr);
1827
1831 assign(rhs);
1832 [[fallthrough]];
1837 if (isSignaling()) {
1838 makeQuiet();
1839 return opInvalidOp;
1840 }
1841 return rhs.isSignaling() ? opInvalidOp : opOK;
1842
1846 return opOK;
1847
1850 category = fcInfinity;
1851 sign = rhs.sign ^ subtract;
1852 return opOK;
1853
1855 assign(rhs);
1856 sign = rhs.sign ^ subtract;
1857 return opOK;
1858
1860 /* Sign depends on rounding mode; handled by caller. */
1861 return opOK;
1862
1864 /* Differently signed infinities can only be validly
1865 subtracted. */
1866 if (((sign ^ rhs.sign)!=0) != subtract) {
1867 makeNaN();
1868 return opInvalidOp;
1869 }
1870
1871 return opOK;
1872
1874 return opDivByZero;
1875 }
1876}
1877
1878/* Add or subtract two normal numbers. */
1879lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1880 bool subtract) {
1881 integerPart carry = 0;
1882 lostFraction lost_fraction;
1883 int bits;
1884
1885 /* Determine if the operation on the absolute values is effectively
1886 an addition or subtraction. */
1887 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1888
1889 /* Are we bigger exponent-wise than the RHS? */
1890 bits = exponent - rhs.exponent;
1891
1892 /* Subtraction is more subtle than one might naively expect. */
1893 if (subtract) {
1894 if ((bits < 0) && !semantics->hasSignedRepr)
1896 "This floating point format does not support signed values");
1897
1898 IEEEFloat temp_rhs(rhs);
1899 bool lost_fraction_is_from_rhs = false;
1900
1901 if (bits == 0)
1902 lost_fraction = lfExactlyZero;
1903 else if (bits > 0) {
1904 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1905 lost_fraction_is_from_rhs = true;
1906 shiftSignificandLeft(1);
1907 } else {
1908 lost_fraction = shiftSignificandRight(-bits - 1);
1909 temp_rhs.shiftSignificandLeft(1);
1910 }
1911
1912 // Should we reverse the subtraction.
1913 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1914 if (cmp_result == cmpLessThan) {
1915 bool borrow =
1916 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1917 if (borrow) {
1918 // The lost fraction is being subtracted, borrow from the significand
1919 // and invert `lost_fraction`.
1920 if (lost_fraction == lfLessThanHalf)
1921 lost_fraction = lfMoreThanHalf;
1922 else if (lost_fraction == lfMoreThanHalf)
1923 lost_fraction = lfLessThanHalf;
1924 }
1925 carry = temp_rhs.subtractSignificand(*this, borrow);
1926 copySignificand(temp_rhs);
1927 sign = !sign;
1928 } else if (cmp_result == cmpGreaterThan) {
1929 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1930 if (borrow) {
1931 // The lost fraction is being subtracted, borrow from the significand
1932 // and invert `lost_fraction`.
1933 if (lost_fraction == lfLessThanHalf)
1934 lost_fraction = lfMoreThanHalf;
1935 else if (lost_fraction == lfMoreThanHalf)
1936 lost_fraction = lfLessThanHalf;
1937 }
1938 carry = subtractSignificand(temp_rhs, borrow);
1939 } else { // cmpEqual
1940 zeroSignificand();
1941 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1942 // rhs is slightly larger due to the lost fraction, flip the sign.
1943 sign = !sign;
1944 }
1945 }
1946
1947 /* The code above is intended to ensure that no borrow is
1948 necessary. */
1949 assert(!carry);
1950 (void)carry;
1951 } else {
1952 if (bits > 0) {
1953 IEEEFloat temp_rhs(rhs);
1954
1955 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1956 carry = addSignificand(temp_rhs);
1957 } else {
1958 lost_fraction = shiftSignificandRight(-bits);
1959 carry = addSignificand(rhs);
1960 }
1961
1962 /* We have a guard bit; generating a carry cannot happen. */
1963 assert(!carry);
1964 (void)carry;
1965 }
1966
1967 return lost_fraction;
1968}
1969
1970APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1971 switch (PackCategoriesIntoKey(category, rhs.category)) {
1972 default:
1973 llvm_unreachable(nullptr);
1974
1978 assign(rhs);
1979 sign = false;
1980 [[fallthrough]];
1985 sign ^= rhs.sign; // restore the original sign
1986 if (isSignaling()) {
1987 makeQuiet();
1988 return opInvalidOp;
1989 }
1990 return rhs.isSignaling() ? opInvalidOp : opOK;
1991
1995 category = fcInfinity;
1996 return opOK;
1997
2001 category = fcZero;
2002 return opOK;
2003
2006 makeNaN();
2007 return opInvalidOp;
2008
2010 return opOK;
2011 }
2012}
2013
2014APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
2015 switch (PackCategoriesIntoKey(category, rhs.category)) {
2016 default:
2017 llvm_unreachable(nullptr);
2018
2022 assign(rhs);
2023 sign = false;
2024 [[fallthrough]];
2029 sign ^= rhs.sign; // restore the original sign
2030 if (isSignaling()) {
2031 makeQuiet();
2032 return opInvalidOp;
2033 }
2034 return rhs.isSignaling() ? opInvalidOp : opOK;
2035
2040 return opOK;
2041
2043 category = fcZero;
2044 return opOK;
2045
2047 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
2048 makeNaN(false, sign);
2049 else
2050 category = fcInfinity;
2051 return opDivByZero;
2052
2055 makeNaN();
2056 return opInvalidOp;
2057
2059 return opOK;
2060 }
2061}
2062
2063APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2064 switch (PackCategoriesIntoKey(category, rhs.category)) {
2065 default:
2066 llvm_unreachable(nullptr);
2067
2071 assign(rhs);
2072 [[fallthrough]];
2077 if (isSignaling()) {
2078 makeQuiet();
2079 return opInvalidOp;
2080 }
2081 return rhs.isSignaling() ? opInvalidOp : opOK;
2082
2086 return opOK;
2087
2093 makeNaN();
2094 return opInvalidOp;
2095
2097 return opOK;
2098 }
2099}
2100
2101APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2102 switch (PackCategoriesIntoKey(category, rhs.category)) {
2103 default:
2104 llvm_unreachable(nullptr);
2105
2109 assign(rhs);
2110 [[fallthrough]];
2115 if (isSignaling()) {
2116 makeQuiet();
2117 return opInvalidOp;
2118 }
2119 return rhs.isSignaling() ? opInvalidOp : opOK;
2120
2124 return opOK;
2125
2131 makeNaN();
2132 return opInvalidOp;
2133
2135 return opDivByZero; // fake status, indicating this is not a special case
2136 }
2137}
2138
2139/* Change sign. */
2141 // With NaN-as-negative-zero, neither NaN or negative zero can change
2142 // their signs.
2143 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2144 (isZero() || isNaN()))
2145 return;
2146 /* Look mummy, this one's easy. */
2147 sign = !sign;
2148}
2149
2150/* Normalized addition or subtraction. */
2151APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2152 roundingMode rounding_mode,
2153 bool subtract) {
2154 opStatus fs;
2155
2156 fs = addOrSubtractSpecials(rhs, subtract);
2157
2158 /* This return code means it was not a simple case. */
2159 if (fs == opDivByZero) {
2160 lostFraction lost_fraction;
2161
2162 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2163 fs = normalize(rounding_mode, lost_fraction);
2164
2165 /* Can only be zero if we lost no fraction. */
2166 assert(category != fcZero || lost_fraction == lfExactlyZero);
2167 }
2168
2169 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2170 positive zero unless rounding to minus infinity, except that
2171 adding two like-signed zeroes gives that zero. */
2172 if (category == fcZero) {
2173 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2174 sign = (rounding_mode == rmTowardNegative);
2175 // NaN-in-negative-zero means zeros need to be normalized to +0.
2176 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2177 sign = false;
2178 }
2179
2180 return fs;
2181}
2182
2183/* Normalized addition. */
2185 roundingMode rounding_mode) {
2186 return addOrSubtract(rhs, rounding_mode, false);
2187}
2188
2189/* Normalized subtraction. */
2191 roundingMode rounding_mode) {
2192 return addOrSubtract(rhs, rounding_mode, true);
2193}
2194
2195/* Normalized multiply. */
2197 roundingMode rounding_mode) {
2198 opStatus fs;
2199
2200 sign ^= rhs.sign;
2201 fs = multiplySpecials(rhs);
2202
2203 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2204 sign = false;
2205 if (isFiniteNonZero()) {
2206 lostFraction lost_fraction = multiplySignificand(rhs);
2207 fs = normalize(rounding_mode, lost_fraction);
2208 if (lost_fraction != lfExactlyZero)
2209 fs = (opStatus) (fs | opInexact);
2210 }
2211
2212 return fs;
2213}
2214
2215/* Normalized divide. */
2217 roundingMode rounding_mode) {
2218 opStatus fs;
2219
2220 sign ^= rhs.sign;
2221 fs = divideSpecials(rhs);
2222
2223 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2224 sign = false;
2225 if (isFiniteNonZero()) {
2226 lostFraction lost_fraction = divideSignificand(rhs);
2227 fs = normalize(rounding_mode, lost_fraction);
2228 if (lost_fraction != lfExactlyZero)
2229 fs = (opStatus) (fs | opInexact);
2230 }
2231
2232 return fs;
2233}
2234
2235/* Normalized remainder. */
2237 opStatus fs;
2238 unsigned int origSign = sign;
2239
2240 // First handle the special cases.
2241 fs = remainderSpecials(rhs);
2242 if (fs != opDivByZero)
2243 return fs;
2244
2245 fs = opOK;
2246
2247 // Make sure the current value is less than twice the denom. If the addition
2248 // did not succeed (an overflow has happened), which means that the finite
2249 // value we currently posses must be less than twice the denom (as we are
2250 // using the same semantics).
2251 IEEEFloat P2 = rhs;
2252 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2253 fs = mod(P2);
2254 assert(fs == opOK);
2255 }
2256
2257 // Lets work with absolute numbers.
2258 IEEEFloat P = rhs;
2259 P.sign = false;
2260 sign = false;
2261
2262 //
2263 // To calculate the remainder we use the following scheme.
2264 //
2265 // The remainder is defained as follows:
2266 //
2267 // remainder = numer - rquot * denom = x - r * p
2268 //
2269 // Where r is the result of: x/p, rounded toward the nearest integral value
2270 // (with halfway cases rounded toward the even number).
2271 //
2272 // Currently, (after x mod 2p):
2273 // r is the number of 2p's present inside x, which is inherently, an even
2274 // number of p's.
2275 //
2276 // We may split the remaining calculation into 4 options:
2277 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2278 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2279 // are done as well.
2280 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2281 // to subtract 1p at least once.
2282 // - if x >= p then we must subtract p at least once, as x must be a
2283 // remainder.
2284 //
2285 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2286 //
2287 // We can now split the remaining calculation to the following 3 options:
2288 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2289 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2290 // must round up to the next even number. so we must subtract p once more.
2291 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2292 // integral, and subtract p once more.
2293 //
2294
2295 // Extend the semantics to prevent an overflow/underflow or inexact result.
2296 bool losesInfo;
2297 fltSemantics extendedSemantics = *semantics;
2298 extendedSemantics.maxExponent++;
2299 extendedSemantics.minExponent--;
2300 extendedSemantics.precision += 2;
2301
2302 IEEEFloat VEx = *this;
2303 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2304 assert(fs == opOK && !losesInfo);
2305 IEEEFloat PEx = P;
2306 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2307 assert(fs == opOK && !losesInfo);
2308
2309 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2310 // any fraction.
2311 fs = VEx.add(VEx, rmNearestTiesToEven);
2312 assert(fs == opOK);
2313
2314 if (VEx.compare(PEx) == cmpGreaterThan) {
2316 assert(fs == opOK);
2317
2318 // Make VEx = this.add(this), but because we have different semantics, we do
2319 // not want to `convert` again, so we just subtract PEx twice (which equals
2320 // to the desired value).
2321 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2322 assert(fs == opOK);
2323 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2324 assert(fs == opOK);
2325
2326 cmpResult result = VEx.compare(PEx);
2327 if (result == cmpGreaterThan || result == cmpEqual) {
2329 assert(fs == opOK);
2330 }
2331 }
2332
2333 if (isZero()) {
2334 sign = origSign; // IEEE754 requires this
2335 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2336 // But some 8-bit floats only have positive 0.
2337 sign = false;
2338 }
2339
2340 else
2341 sign ^= origSign;
2342 return fs;
2343}
2344
2345/* Normalized llvm frem (C fmod). */
2347 opStatus fs;
2348 fs = modSpecials(rhs);
2349 unsigned int origSign = sign;
2350
2351 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2353 int Exp = ilogb(*this) - ilogb(rhs);
2354 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2355 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2356 // check for it.
2357 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2358 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2359 V.sign = sign;
2360
2362
2363 // When the semantics supports zero, this loop's
2364 // exit-condition is handled by the 'isFiniteNonZero'
2365 // category check above. However, when the semantics
2366 // does not have 'fcZero' and we have reached the
2367 // minimum possible value, (and any further subtract
2368 // will underflow to the same value) explicitly
2369 // provide an exit-path here.
2370 if (!semantics->hasZero && this->isSmallest())
2371 break;
2372
2373 assert(fs==opOK);
2374 }
2375 if (isZero()) {
2376 sign = origSign; // fmod requires this
2377 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2378 sign = false;
2379 }
2380 return fs;
2381}
2382
2383/* Normalized fused-multiply-add. */
2385 const IEEEFloat &addend,
2386 roundingMode rounding_mode) {
2387 opStatus fs;
2388
2389 /* Post-multiplication sign, before addition. */
2390 sign ^= multiplicand.sign;
2391
2392 /* If and only if all arguments are normal do we need to do an
2393 extended-precision calculation. */
2394 if (isFiniteNonZero() &&
2395 multiplicand.isFiniteNonZero() &&
2396 addend.isFinite()) {
2397 lostFraction lost_fraction;
2398
2399 lost_fraction = multiplySignificand(multiplicand, addend);
2400 fs = normalize(rounding_mode, lost_fraction);
2401 if (lost_fraction != lfExactlyZero)
2402 fs = (opStatus) (fs | opInexact);
2403
2404 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2405 positive zero unless rounding to minus infinity, except that
2406 adding two like-signed zeroes gives that zero. */
2407 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2408 sign = (rounding_mode == rmTowardNegative);
2409 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2410 sign = false;
2411 }
2412 } else {
2413 fs = multiplySpecials(multiplicand);
2414
2415 /* FS can only be opOK or opInvalidOp. There is no more work
2416 to do in the latter case. The IEEE-754R standard says it is
2417 implementation-defined in this case whether, if ADDEND is a
2418 quiet NaN, we raise invalid op; this implementation does so.
2419
2420 If we need to do the addition we can do so with normal
2421 precision. */
2422 if (fs == opOK)
2423 fs = addOrSubtract(addend, rounding_mode, false);
2424 }
2425
2426 return fs;
2427}
2428
2429/* Rounding-mode correct round to integral value. */
2431 opStatus fs;
2432
2433 if (isInfinity())
2434 // [IEEE Std 754-2008 6.1]:
2435 // The behavior of infinity in floating-point arithmetic is derived from the
2436 // limiting cases of real arithmetic with operands of arbitrarily
2437 // large magnitude, when such a limit exists.
2438 // ...
2439 // Operations on infinite operands are usually exact and therefore signal no
2440 // exceptions ...
2441 return opOK;
2442
2443 if (isNaN()) {
2444 if (isSignaling()) {
2445 // [IEEE Std 754-2008 6.2]:
2446 // Under default exception handling, any operation signaling an invalid
2447 // operation exception and for which a floating-point result is to be
2448 // delivered shall deliver a quiet NaN.
2449 makeQuiet();
2450 // [IEEE Std 754-2008 6.2]:
2451 // Signaling NaNs shall be reserved operands that, under default exception
2452 // handling, signal the invalid operation exception(see 7.2) for every
2453 // general-computational and signaling-computational operation except for
2454 // the conversions described in 5.12.
2455 return opInvalidOp;
2456 } else {
2457 // [IEEE Std 754-2008 6.2]:
2458 // For an operation with quiet NaN inputs, other than maximum and minimum
2459 // operations, if a floating-point result is to be delivered the result
2460 // shall be a quiet NaN which should be one of the input NaNs.
2461 // ...
2462 // Every general-computational and quiet-computational operation involving
2463 // one or more input NaNs, none of them signaling, shall signal no
2464 // exception, except fusedMultiplyAdd might signal the invalid operation
2465 // exception(see 7.2).
2466 return opOK;
2467 }
2468 }
2469
2470 if (isZero()) {
2471 // [IEEE Std 754-2008 6.3]:
2472 // ... the sign of the result of conversions, the quantize operation, the
2473 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2474 // the sign of the first or only operand.
2475 return opOK;
2476 }
2477
2478 // If the exponent is large enough, we know that this value is already
2479 // integral, and the arithmetic below would potentially cause it to saturate
2480 // to +/-Inf. Bail out early instead.
2481 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2482 return opOK;
2483
2484 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2485 // precision of our format, and then subtract it back off again. The choice
2486 // of rounding modes for the addition/subtraction determines the rounding mode
2487 // for our integral rounding as well.
2488 // NOTE: When the input value is negative, we do subtraction followed by
2489 // addition instead.
2490 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2491 1);
2492 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2493 IEEEFloat MagicConstant(*semantics);
2494 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2496 assert(fs == opOK);
2497 MagicConstant.sign = sign;
2498
2499 // Preserve the input sign so that we can handle the case of zero result
2500 // correctly.
2501 bool inputSign = isNegative();
2502
2503 fs = add(MagicConstant, rounding_mode);
2504
2505 // Current value and 'MagicConstant' are both integers, so the result of the
2506 // subtraction is always exact according to Sterbenz' lemma.
2507 subtract(MagicConstant, rounding_mode);
2508
2509 // Restore the input sign.
2510 if (inputSign != isNegative())
2511 changeSign();
2512
2513 return fs;
2514}
2515
2516/* Comparison requires normalized numbers. */
2518 cmpResult result;
2519
2520 assert(semantics == rhs.semantics);
2521
2522 switch (PackCategoriesIntoKey(category, rhs.category)) {
2523 default:
2524 llvm_unreachable(nullptr);
2525
2533 return cmpUnordered;
2534
2538 if (sign)
2539 return cmpLessThan;
2540 else
2541 return cmpGreaterThan;
2542
2546 if (rhs.sign)
2547 return cmpGreaterThan;
2548 else
2549 return cmpLessThan;
2550
2552 if (sign == rhs.sign)
2553 return cmpEqual;
2554 else if (sign)
2555 return cmpLessThan;
2556 else
2557 return cmpGreaterThan;
2558
2560 return cmpEqual;
2561
2563 break;
2564 }
2565
2566 /* Two normal numbers. Do they have the same sign? */
2567 if (sign != rhs.sign) {
2568 if (sign)
2569 result = cmpLessThan;
2570 else
2571 result = cmpGreaterThan;
2572 } else {
2573 /* Compare absolute values; invert result if negative. */
2574 result = compareAbsoluteValue(rhs);
2575
2576 if (sign) {
2577 if (result == cmpLessThan)
2578 result = cmpGreaterThan;
2579 else if (result == cmpGreaterThan)
2580 result = cmpLessThan;
2581 }
2582 }
2583
2584 return result;
2585}
2586
2587/// IEEEFloat::convert - convert a value of one floating point type to another.
2588/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2589/// records whether the transformation lost information, i.e. whether
2590/// converting the result back to the original type will produce the
2591/// original value (this is almost the same as return value==fsOK, but there
2592/// are edge cases where this is not so).
2593
2595 roundingMode rounding_mode,
2596 bool *losesInfo) {
2598 unsigned int newPartCount, oldPartCount;
2599 opStatus fs;
2600 int shift;
2601 const fltSemantics &fromSemantics = *semantics;
2602 bool is_signaling = isSignaling();
2603
2605 newPartCount = partCountForBits(toSemantics.precision + 1);
2606 oldPartCount = partCount();
2607 shift = toSemantics.precision - fromSemantics.precision;
2608
2609 bool X86SpecialNan = false;
2610 if (&fromSemantics == &semX87DoubleExtended &&
2611 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2612 (!(*significandParts() & 0x8000000000000000ULL) ||
2613 !(*significandParts() & 0x4000000000000000ULL))) {
2614 // x86 has some unusual NaNs which cannot be represented in any other
2615 // format; note them here.
2616 X86SpecialNan = true;
2617 }
2618
2619 // If this is a truncation of a denormal number, and the target semantics
2620 // has larger exponent range than the source semantics (this can happen
2621 // when truncating from PowerPC double-double to double format), the
2622 // right shift could lose result mantissa bits. Adjust exponent instead
2623 // of performing excessive shift.
2624 // Also do a similar trick in case shifting denormal would produce zero
2625 // significand as this case isn't handled correctly by normalize.
2626 if (shift < 0 && isFiniteNonZero()) {
2627 int omsb = significandMSB() + 1;
2628 int exponentChange = omsb - fromSemantics.precision;
2629 if (exponent + exponentChange < toSemantics.minExponent)
2630 exponentChange = toSemantics.minExponent - exponent;
2631 if (exponentChange < shift)
2632 exponentChange = shift;
2633 if (exponentChange < 0) {
2634 shift -= exponentChange;
2635 exponent += exponentChange;
2636 } else if (omsb <= -shift) {
2637 exponentChange = omsb + shift - 1; // leave at least one bit set
2638 shift -= exponentChange;
2639 exponent += exponentChange;
2640 }
2641 }
2642
2643 // If this is a truncation, perform the shift before we narrow the storage.
2644 if (shift < 0 && (isFiniteNonZero() ||
2645 (category == fcNaN && semantics->nonFiniteBehavior !=
2647 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2648
2649 // Fix the storage so it can hold to new value.
2650 if (newPartCount > oldPartCount) {
2651 // The new type requires more storage; make it available.
2652 integerPart *newParts;
2653 newParts = new integerPart[newPartCount];
2654 APInt::tcSet(newParts, 0, newPartCount);
2655 if (isFiniteNonZero() || category==fcNaN)
2656 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2657 freeSignificand();
2658 significand.parts = newParts;
2659 } else if (newPartCount == 1 && oldPartCount != 1) {
2660 // Switch to built-in storage for a single part.
2661 integerPart newPart = 0;
2662 if (isFiniteNonZero() || category==fcNaN)
2663 newPart = significandParts()[0];
2664 freeSignificand();
2665 significand.part = newPart;
2666 }
2667
2668 // Now that we have the right storage, switch the semantics.
2669 semantics = &toSemantics;
2670
2671 // If this is an extension, perform the shift now that the storage is
2672 // available.
2673 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2674 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2675
2676 if (isFiniteNonZero()) {
2677 fs = normalize(rounding_mode, lostFraction);
2678 *losesInfo = (fs != opOK);
2679 } else if (category == fcNaN) {
2681 *losesInfo =
2683 makeNaN(false, sign);
2684 return is_signaling ? opInvalidOp : opOK;
2685 }
2686
2687 // If NaN is negative zero, we need to create a new NaN to avoid converting
2688 // NaN to -Inf.
2689 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2691 makeNaN(false, false);
2692
2693 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2694
2695 // For x87 extended precision, we want to make a NaN, not a special NaN if
2696 // the input wasn't special either.
2697 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2698 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2699
2700 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2701 // This also guarantees that a sNaN does not become Inf on a truncation
2702 // that loses all payload bits.
2703 if (is_signaling) {
2704 makeQuiet();
2705 fs = opInvalidOp;
2706 } else {
2707 fs = opOK;
2708 }
2709 } else if (category == fcInfinity &&
2711 makeNaN(false, sign);
2712 *losesInfo = true;
2713 fs = opInexact;
2714 } else if (category == fcZero &&
2716 // Negative zero loses info, but positive zero doesn't.
2717 *losesInfo =
2718 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2719 fs = *losesInfo ? opInexact : opOK;
2720 // NaN is negative zero means -0 -> +0, which can lose information
2721 sign = false;
2722 } else {
2723 *losesInfo = false;
2724 fs = opOK;
2725 }
2726
2727 if (category == fcZero && !semantics->hasZero)
2729 return fs;
2730}
2731
2732/* Convert a floating point number to an integer according to the
2733 rounding mode. If the rounded integer value is out of range this
2734 returns an invalid operation exception and the contents of the
2735 destination parts are unspecified. If the rounded value is in
2736 range but the floating point number is not the exact integer, the C
2737 standard doesn't require an inexact exception to be raised. IEEE
2738 854 does require it so we do that.
2739
2740 Note that for conversions to integer type the C standard requires
2741 round-to-zero to always be used. */
2742APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2743 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2744 roundingMode rounding_mode, bool *isExact) const {
2745 lostFraction lost_fraction;
2746 const integerPart *src;
2747 unsigned int dstPartsCount, truncatedBits;
2748
2749 *isExact = false;
2750
2751 /* Handle the three special cases first. */
2752 if (category == fcInfinity || category == fcNaN)
2753 return opInvalidOp;
2754
2755 dstPartsCount = partCountForBits(width);
2756 assert(dstPartsCount <= parts.size() && "Integer too big");
2757
2758 if (category == fcZero) {
2759 APInt::tcSet(parts.data(), 0, dstPartsCount);
2760 // Negative zero can't be represented as an int.
2761 *isExact = !sign;
2762 return opOK;
2763 }
2764
2765 src = significandParts();
2766
2767 /* Step 1: place our absolute value, with any fraction truncated, in
2768 the destination. */
2769 if (exponent < 0) {
2770 /* Our absolute value is less than one; truncate everything. */
2771 APInt::tcSet(parts.data(), 0, dstPartsCount);
2772 /* For exponent -1 the integer bit represents .5, look at that.
2773 For smaller exponents leftmost truncated bit is 0. */
2774 truncatedBits = semantics->precision -1U - exponent;
2775 } else {
2776 /* We want the most significant (exponent + 1) bits; the rest are
2777 truncated. */
2778 unsigned int bits = exponent + 1U;
2779
2780 /* Hopelessly large in magnitude? */
2781 if (bits > width)
2782 return opInvalidOp;
2783
2784 if (bits < semantics->precision) {
2785 /* We truncate (semantics->precision - bits) bits. */
2786 truncatedBits = semantics->precision - bits;
2787 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2788 } else {
2789 /* We want at least as many bits as are available. */
2790 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2791 0);
2792 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2793 bits - semantics->precision);
2794 truncatedBits = 0;
2795 }
2796 }
2797
2798 /* Step 2: work out any lost fraction, and increment the absolute
2799 value if we would round away from zero. */
2800 if (truncatedBits) {
2801 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2802 truncatedBits);
2803 if (lost_fraction != lfExactlyZero &&
2804 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2805 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2806 return opInvalidOp; /* Overflow. */
2807 }
2808 } else {
2809 lost_fraction = lfExactlyZero;
2810 }
2811
2812 /* Step 3: check if we fit in the destination. */
2813 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2814
2815 if (sign) {
2816 if (!isSigned) {
2817 /* Negative numbers cannot be represented as unsigned. */
2818 if (omsb != 0)
2819 return opInvalidOp;
2820 } else {
2821 /* It takes omsb bits to represent the unsigned integer value.
2822 We lose a bit for the sign, but care is needed as the
2823 maximally negative integer is a special case. */
2824 if (omsb == width &&
2825 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2826 return opInvalidOp;
2827
2828 /* This case can happen because of rounding. */
2829 if (omsb > width)
2830 return opInvalidOp;
2831 }
2832
2833 APInt::tcNegate (parts.data(), dstPartsCount);
2834 } else {
2835 if (omsb >= width + !isSigned)
2836 return opInvalidOp;
2837 }
2838
2839 if (lost_fraction == lfExactlyZero) {
2840 *isExact = true;
2841 return opOK;
2842 }
2843 return opInexact;
2844}
2845
2846/* Same as convertToSignExtendedInteger, except we provide
2847 deterministic values in case of an invalid operation exception,
2848 namely zero for NaNs and the minimal or maximal value respectively
2849 for underflow or overflow.
2850 The *isExact output tells whether the result is exact, in the sense
2851 that converting it back to the original floating point type produces
2852 the original value. This is almost equivalent to result==opOK,
2853 except for negative zeroes.
2854*/
2857 unsigned int width, bool isSigned,
2858 roundingMode rounding_mode, bool *isExact) const {
2859 opStatus fs;
2860
2861 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2862 isExact);
2863
2864 if (fs == opInvalidOp) {
2865 unsigned int bits, dstPartsCount;
2866
2867 dstPartsCount = partCountForBits(width);
2868 assert(dstPartsCount <= parts.size() && "Integer too big");
2869
2870 if (category == fcNaN)
2871 bits = 0;
2872 else if (sign)
2873 bits = isSigned;
2874 else
2875 bits = width - isSigned;
2876
2877 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2878 if (sign && isSigned)
2879 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2880 }
2881
2882 return fs;
2883}
2884
2885/* Convert an unsigned integer SRC to a floating point number,
2886 rounding according to ROUNDING_MODE. The sign of the floating
2887 point number is not modified. */
2888APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2889 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2890 unsigned int omsb, precision, dstCount;
2891 integerPart *dst;
2892 lostFraction lost_fraction;
2893
2894 category = fcNormal;
2895 omsb = APInt::tcMSB(src, srcCount) + 1;
2896 dst = significandParts();
2897 dstCount = partCount();
2898 precision = semantics->precision;
2899
2900 /* We want the most significant PRECISION bits of SRC. There may not
2901 be that many; extract what we can. */
2902 if (precision <= omsb) {
2903 exponent = omsb - 1;
2904 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2905 omsb - precision);
2906 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2907 } else {
2908 exponent = precision - 1;
2909 lost_fraction = lfExactlyZero;
2910 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2911 }
2912
2913 return normalize(rounding_mode, lost_fraction);
2914}
2915
2917 roundingMode rounding_mode) {
2918 unsigned int partCount = Val.getNumWords();
2919 APInt api = Val;
2920
2921 sign = false;
2922 if (isSigned && api.isNegative()) {
2923 sign = true;
2924 api = -api;
2925 }
2926
2927 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2928}
2929
2931IEEEFloat::convertFromHexadecimalString(StringRef s,
2932 roundingMode rounding_mode) {
2933 lostFraction lost_fraction = lfExactlyZero;
2934
2935 category = fcNormal;
2936 zeroSignificand();
2937 exponent = 0;
2938
2939 integerPart *significand = significandParts();
2940 unsigned partsCount = partCount();
2941 unsigned bitPos = partsCount * integerPartWidth;
2942 bool computedTrailingFraction = false;
2943
2944 // Skip leading zeroes and any (hexa)decimal point.
2945 StringRef::iterator begin = s.begin();
2946 StringRef::iterator end = s.end();
2948 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2949 if (!PtrOrErr)
2950 return PtrOrErr.takeError();
2951 StringRef::iterator p = *PtrOrErr;
2952 StringRef::iterator firstSignificantDigit = p;
2953
2954 while (p != end) {
2955 integerPart hex_value;
2956
2957 if (*p == '.') {
2958 if (dot != end)
2959 return createError("String contains multiple dots");
2960 dot = p++;
2961 continue;
2962 }
2963
2964 hex_value = hexDigitValue(*p);
2965 if (hex_value == UINT_MAX)
2966 break;
2967
2968 p++;
2969
2970 // Store the number while we have space.
2971 if (bitPos) {
2972 bitPos -= 4;
2973 hex_value <<= bitPos % integerPartWidth;
2974 significand[bitPos / integerPartWidth] |= hex_value;
2975 } else if (!computedTrailingFraction) {
2976 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2977 if (!FractOrErr)
2978 return FractOrErr.takeError();
2979 lost_fraction = *FractOrErr;
2980 computedTrailingFraction = true;
2981 }
2982 }
2983
2984 /* Hex floats require an exponent but not a hexadecimal point. */
2985 if (p == end)
2986 return createError("Hex strings require an exponent");
2987 if (*p != 'p' && *p != 'P')
2988 return createError("Invalid character in significand");
2989 if (p == begin)
2990 return createError("Significand has no digits");
2991 if (dot != end && p - begin == 1)
2992 return createError("Significand has no digits");
2993
2994 /* Ignore the exponent if we are zero. */
2995 if (p != firstSignificantDigit) {
2996 int expAdjustment;
2997
2998 /* Implicit hexadecimal point? */
2999 if (dot == end)
3000 dot = p;
3001
3002 /* Calculate the exponent adjustment implicit in the number of
3003 significant digits. */
3004 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
3005 if (expAdjustment < 0)
3006 expAdjustment++;
3007 expAdjustment = expAdjustment * 4 - 1;
3008
3009 /* Adjust for writing the significand starting at the most
3010 significant nibble. */
3011 expAdjustment += semantics->precision;
3012 expAdjustment -= partsCount * integerPartWidth;
3013
3014 /* Adjust for the given exponent. */
3015 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
3016 if (!ExpOrErr)
3017 return ExpOrErr.takeError();
3018 exponent = *ExpOrErr;
3019 }
3020
3021 return normalize(rounding_mode, lost_fraction);
3022}
3023
3025IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
3026 unsigned sigPartCount, int exp,
3027 roundingMode rounding_mode) {
3028 unsigned int parts, pow5PartCount;
3029 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
3031 bool isNearest;
3032
3033 isNearest = (rounding_mode == rmNearestTiesToEven ||
3034 rounding_mode == rmNearestTiesToAway);
3035
3036 parts = partCountForBits(semantics->precision + 11);
3037
3038 /* Calculate pow(5, abs(exp)). */
3039 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3040
3041 for (;; parts *= 2) {
3042 opStatus sigStatus, powStatus;
3043 unsigned int excessPrecision, truncatedBits;
3044
3045 calcSemantics.precision = parts * integerPartWidth - 1;
3046 excessPrecision = calcSemantics.precision - semantics->precision;
3047 truncatedBits = excessPrecision;
3048
3049 IEEEFloat decSig(calcSemantics, uninitialized);
3050 decSig.makeZero(sign);
3051 IEEEFloat pow5(calcSemantics);
3052
3053 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3055 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3057 /* Add exp, as 10^n = 5^n * 2^n. */
3058 decSig.exponent += exp;
3059
3060 lostFraction calcLostFraction;
3061 integerPart HUerr, HUdistance;
3062 unsigned int powHUerr;
3063
3064 if (exp >= 0) {
3065 /* multiplySignificand leaves the precision-th bit set to 1. */
3066 calcLostFraction = decSig.multiplySignificand(pow5);
3067 powHUerr = powStatus != opOK;
3068 } else {
3069 calcLostFraction = decSig.divideSignificand(pow5);
3070 /* Denormal numbers have less precision. */
3071 if (decSig.exponent < semantics->minExponent) {
3072 excessPrecision += (semantics->minExponent - decSig.exponent);
3073 truncatedBits = excessPrecision;
3074 if (excessPrecision > calcSemantics.precision)
3075 excessPrecision = calcSemantics.precision;
3076 }
3077 /* Extra half-ulp lost in reciprocal of exponent. */
3078 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3079 }
3080
3081 /* Both multiplySignificand and divideSignificand return the
3082 result with the integer bit set. */
3084 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3085
3086 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3087 powHUerr);
3088 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3089 excessPrecision, isNearest);
3090
3091 /* Are we guaranteed to round correctly if we truncate? */
3092 if (HUdistance >= HUerr) {
3093 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3094 calcSemantics.precision - excessPrecision,
3095 excessPrecision);
3096 /* Take the exponent of decSig. If we tcExtract-ed less bits
3097 above we must adjust our exponent to compensate for the
3098 implicit right shift. */
3099 exponent = (decSig.exponent + semantics->precision
3100 - (calcSemantics.precision - excessPrecision));
3101 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3102 decSig.partCount(),
3103 truncatedBits);
3104 return normalize(rounding_mode, calcLostFraction);
3105 }
3106 }
3107}
3108
3110IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3111 decimalInfo D;
3112 opStatus fs;
3113
3114 /* Scan the text. */
3115 StringRef::iterator p = str.begin();
3116 if (Error Err = interpretDecimal(p, str.end(), &D))
3117 return std::move(Err);
3118
3119 /* Handle the quick cases. First the case of no significant digits,
3120 i.e. zero, and then exponents that are obviously too large or too
3121 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3122 definitely overflows if
3123
3124 (exp - 1) * L >= maxExponent
3125
3126 and definitely underflows to zero where
3127
3128 (exp + 1) * L <= minExponent - precision
3129
3130 With integer arithmetic the tightest bounds for L are
3131
3132 93/28 < L < 196/59 [ numerator <= 256 ]
3133 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3134 */
3135
3136 // Test if we have a zero number allowing for strings with no null terminators
3137 // and zero decimals with non-zero exponents.
3138 //
3139 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3140 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3141 // be at most one dot. On the other hand, if we have a zero with a non-zero
3142 // exponent, then we know that D.firstSigDigit will be non-numeric.
3143 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3144 category = fcZero;
3145 fs = opOK;
3146 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3147 sign = false;
3148 if (!semantics->hasZero)
3150
3151 /* Check whether the normalized exponent is high enough to overflow
3152 max during the log-rebasing in the max-exponent check below. */
3153 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3154 fs = handleOverflow(rounding_mode);
3155
3156 /* If it wasn't, then it also wasn't high enough to overflow max
3157 during the log-rebasing in the min-exponent check. Check that it
3158 won't overflow min in either check, then perform the min-exponent
3159 check. */
3160 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3161 (D.normalizedExponent + 1) * 28738 <=
3162 8651 * (semantics->minExponent - (int) semantics->precision)) {
3163 /* Underflow to zero and round. */
3164 category = fcNormal;
3165 zeroSignificand();
3166 fs = normalize(rounding_mode, lfLessThanHalf);
3167
3168 /* We can finally safely perform the max-exponent check. */
3169 } else if ((D.normalizedExponent - 1) * 42039
3170 >= 12655 * semantics->maxExponent) {
3171 /* Overflow and round. */
3172 fs = handleOverflow(rounding_mode);
3173 } else {
3174 integerPart *decSignificand;
3175 unsigned int partCount;
3176
3177 /* A tight upper bound on number of bits required to hold an
3178 N-digit decimal integer is N * 196 / 59. Allocate enough space
3179 to hold the full significand, and an extra part required by
3180 tcMultiplyPart. */
3181 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3182 partCount = partCountForBits(1 + 196 * partCount / 59);
3183 decSignificand = new integerPart[partCount + 1];
3184 partCount = 0;
3185
3186 /* Convert to binary efficiently - we do almost all multiplication
3187 in an integerPart. When this would overflow do we do a single
3188 bignum multiplication, and then revert again to multiplication
3189 in an integerPart. */
3190 do {
3191 integerPart decValue, val, multiplier;
3192
3193 val = 0;
3194 multiplier = 1;
3195
3196 do {
3197 if (*p == '.') {
3198 p++;
3199 if (p == str.end()) {
3200 break;
3201 }
3202 }
3203 decValue = decDigitValue(*p++);
3204 if (decValue >= 10U) {
3205 delete[] decSignificand;
3206 return createError("Invalid character in significand");
3207 }
3208 multiplier *= 10;
3209 val = val * 10 + decValue;
3210 /* The maximum number that can be multiplied by ten with any
3211 digit added without overflowing an integerPart. */
3212 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3213
3214 /* Multiply out the current part. */
3215 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3216 partCount, partCount + 1, false);
3217
3218 /* If we used another part (likely but not guaranteed), increase
3219 the count. */
3220 if (decSignificand[partCount])
3221 partCount++;
3222 } while (p <= D.lastSigDigit);
3223
3224 category = fcNormal;
3225 fs = roundSignificandWithExponent(decSignificand, partCount,
3226 D.exponent, rounding_mode);
3227
3228 delete [] decSignificand;
3229 }
3230
3231 return fs;
3232}
3233
3234bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3235 const size_t MIN_NAME_SIZE = 3;
3236
3237 if (str.size() < MIN_NAME_SIZE)
3238 return false;
3239
3240 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3241 makeInf(false);
3242 return true;
3243 }
3244
3245 bool IsNegative = str.consume_front("-");
3246 if (IsNegative) {
3247 if (str.size() < MIN_NAME_SIZE)
3248 return false;
3249
3250 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3251 makeInf(true);
3252 return true;
3253 }
3254 }
3255
3256 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3257 bool IsSignaling = str.consume_front_insensitive("s");
3258 if (IsSignaling) {
3259 if (str.size() < MIN_NAME_SIZE)
3260 return false;
3261 }
3262
3263 if (str.consume_front("nan") || str.consume_front("NaN")) {
3264 // A NaN without payload.
3265 if (str.empty()) {
3266 makeNaN(IsSignaling, IsNegative);
3267 return true;
3268 }
3269
3270 // Allow the payload to be inside parentheses.
3271 if (str.front() == '(') {
3272 // Parentheses should be balanced (and not empty).
3273 if (str.size() <= 2 || str.back() != ')')
3274 return false;
3275
3276 str = str.slice(1, str.size() - 1);
3277 }
3278
3279 // Determine the payload number's radix.
3280 unsigned Radix = 10;
3281 if (str[0] == '0') {
3282 if (str.size() > 1 && tolower(str[1]) == 'x') {
3283 str = str.drop_front(2);
3284 Radix = 16;
3285 } else {
3286 Radix = 8;
3287 }
3288 }
3289
3290 // Parse the payload and make the NaN.
3291 APInt Payload;
3292 if (!str.getAsInteger(Radix, Payload)) {
3293 makeNaN(IsSignaling, IsNegative, &Payload);
3294 return true;
3295 }
3296 }
3297
3298 return false;
3299}
3300
3303 if (str.empty())
3304 return createError("Invalid string length");
3305
3306 // Handle special cases.
3307 if (convertFromStringSpecials(str))
3308 return opOK;
3309
3310 /* Handle a leading minus sign. */
3311 StringRef::iterator p = str.begin();
3312 size_t slen = str.size();
3313 sign = *p == '-' ? 1 : 0;
3314 if (sign && !semantics->hasSignedRepr)
3316 "This floating point format does not support signed values");
3317
3318 if (*p == '-' || *p == '+') {
3319 p++;
3320 slen--;
3321 if (!slen)
3322 return createError("String has no digits");
3323 }
3324
3325 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3326 if (slen == 2)
3327 return createError("Invalid string");
3328 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3329 rounding_mode);
3330 }
3331
3332 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3333}
3334
3335/* Write out a hexadecimal representation of the floating point value
3336 to DST, which must be of sufficient size, in the C99 form
3337 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3338 excluding the terminating NUL.
3339
3340 If UPPERCASE, the output is in upper case, otherwise in lower case.
3341
3342 HEXDIGITS digits appear altogether, rounding the value if
3343 necessary. If HEXDIGITS is 0, the minimal precision to display the
3344 number precisely is used instead. If nothing would appear after
3345 the decimal point it is suppressed.
3346
3347 The decimal exponent is always printed and has at least one digit.
3348 Zero values display an exponent of zero. Infinities and NaNs
3349 appear as "infinity" or "nan" respectively.
3350
3351 The above rules are as specified by C99. There is ambiguity about
3352 what the leading hexadecimal digit should be. This implementation
3353 uses whatever is necessary so that the exponent is displayed as
3354 stored. This implies the exponent will fall within the IEEE format
3355 range, and the leading hexadecimal digit will be 0 (for denormals),
3356 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3357 any other digits zero).
3358*/
3359unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3360 bool upperCase,
3361 roundingMode rounding_mode) const {
3362 char *p;
3363
3364 p = dst;
3365 if (sign)
3366 *dst++ = '-';
3367
3368 switch (category) {
3369 case fcInfinity:
3370 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3371 dst += sizeof infinityL - 1;
3372 break;
3373
3374 case fcNaN:
3375 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3376 dst += sizeof NaNU - 1;
3377 break;
3378
3379 case fcZero:
3380 *dst++ = '0';
3381 *dst++ = upperCase ? 'X': 'x';
3382 *dst++ = '0';
3383 if (hexDigits > 1) {
3384 *dst++ = '.';
3385 memset (dst, '0', hexDigits - 1);
3386 dst += hexDigits - 1;
3387 }
3388 *dst++ = upperCase ? 'P': 'p';
3389 *dst++ = '0';
3390 break;
3391
3392 case fcNormal:
3393 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3394 break;
3395 }
3396
3397 *dst = 0;
3398
3399 return static_cast<unsigned int>(dst - p);
3400}
3401
3402/* Does the hard work of outputting the correctly rounded hexadecimal
3403 form of a normal floating point number with the specified number of
3404 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3405 digits necessary to print the value precisely is output. */
3406char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3407 bool upperCase,
3408 roundingMode rounding_mode) const {
3409 unsigned int count, valueBits, shift, partsCount, outputDigits;
3410 const char *hexDigitChars;
3411 const integerPart *significand;
3412 char *p;
3413 bool roundUp;
3414
3415 *dst++ = '0';
3416 *dst++ = upperCase ? 'X': 'x';
3417
3418 roundUp = false;
3419 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3420
3421 significand = significandParts();
3422 partsCount = partCount();
3423
3424 /* +3 because the first digit only uses the single integer bit, so
3425 we have 3 virtual zero most-significant-bits. */
3426 valueBits = semantics->precision + 3;
3427 shift = integerPartWidth - valueBits % integerPartWidth;
3428
3429 /* The natural number of digits required ignoring trailing
3430 insignificant zeroes. */
3431 outputDigits = (valueBits - significandLSB () + 3) / 4;
3432
3433 /* hexDigits of zero means use the required number for the
3434 precision. Otherwise, see if we are truncating. If we are,
3435 find out if we need to round away from zero. */
3436 if (hexDigits) {
3437 if (hexDigits < outputDigits) {
3438 /* We are dropping non-zero bits, so need to check how to round.
3439 "bits" is the number of dropped bits. */
3440 unsigned int bits;
3441 lostFraction fraction;
3442
3443 bits = valueBits - hexDigits * 4;
3444 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3445 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3446 }
3447 outputDigits = hexDigits;
3448 }
3449
3450 /* Write the digits consecutively, and start writing in the location
3451 of the hexadecimal point. We move the most significant digit
3452 left and add the hexadecimal point later. */
3453 p = ++dst;
3454
3455 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3456
3457 while (outputDigits && count) {
3458 integerPart part;
3459
3460 /* Put the most significant integerPartWidth bits in "part". */
3461 if (--count == partsCount)
3462 part = 0; /* An imaginary higher zero part. */
3463 else
3464 part = significand[count] << shift;
3465
3466 if (count && shift)
3467 part |= significand[count - 1] >> (integerPartWidth - shift);
3468
3469 /* Convert as much of "part" to hexdigits as we can. */
3470 unsigned int curDigits = integerPartWidth / 4;
3471
3472 if (curDigits > outputDigits)
3473 curDigits = outputDigits;
3474 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3475 outputDigits -= curDigits;
3476 }
3477
3478 if (roundUp) {
3479 char *q = dst;
3480
3481 /* Note that hexDigitChars has a trailing '0'. */
3482 do {
3483 q--;
3484 *q = hexDigitChars[hexDigitValue (*q) + 1];
3485 } while (*q == '0');
3486 assert(q >= p);
3487 } else {
3488 /* Add trailing zeroes. */
3489 memset (dst, '0', outputDigits);
3490 dst += outputDigits;
3491 }
3492
3493 /* Move the most significant digit to before the point, and if there
3494 is something after the decimal point add it. This must come
3495 after rounding above. */
3496 p[-1] = p[0];
3497 if (dst -1 == p)
3498 dst--;
3499 else
3500 p[0] = '.';
3501
3502 /* Finally output the exponent. */
3503 *dst++ = upperCase ? 'P': 'p';
3504
3505 return writeSignedDecimal (dst, exponent);
3506}
3507
3509 if (!Arg.isFiniteNonZero())
3510 return hash_combine((uint8_t)Arg.category,
3511 // NaN has no sign, fix it at zero.
3512 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3513 Arg.semantics->precision);
3514
3515 // Normal floats need their exponent and significand hashed.
3516 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3517 Arg.semantics->precision, Arg.exponent,
3519 Arg.significandParts(),
3520 Arg.significandParts() + Arg.partCount()));
3521}
3522
3523// Conversion from APFloat to/from host float/double. It may eventually be
3524// possible to eliminate these and have everybody deal with APFloats, but that
3525// will take a while. This approach will not easily extend to long double.
3526// Current implementation requires integerPartWidth==64, which is correct at
3527// the moment but could be made more general.
3528
3529// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3530// the actual IEEE respresentations. We compensate for that here.
3531
3532APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3533 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3534 assert(partCount()==2);
3535
3536 uint64_t myexponent, mysignificand;
3537
3538 if (isFiniteNonZero()) {
3539 myexponent = exponent+16383; //bias
3540 mysignificand = significandParts()[0];
3541 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3542 myexponent = 0; // denormal
3543 } else if (category==fcZero) {
3544 myexponent = 0;
3545 mysignificand = 0;
3546 } else if (category==fcInfinity) {
3547 myexponent = 0x7fff;
3548 mysignificand = 0x8000000000000000ULL;
3549 } else {
3550 assert(category == fcNaN && "Unknown category");
3551 myexponent = 0x7fff;
3552 mysignificand = significandParts()[0];
3553 }
3554
3555 uint64_t words[2];
3556 words[0] = mysignificand;
3557 words[1] = ((uint64_t)(sign & 1) << 15) |
3558 (myexponent & 0x7fffLL);
3559 return APInt(80, words);
3560}
3561
3562APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3563 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3564 assert(partCount()==2);
3565
3566 uint64_t words[2];
3567 opStatus fs;
3568 bool losesInfo;
3569
3570 // Convert number to double. To avoid spurious underflows, we re-
3571 // normalize against the "double" minExponent first, and only *then*
3572 // truncate the mantissa. The result of that second conversion
3573 // may be inexact, but should never underflow.
3574 // Declare fltSemantics before APFloat that uses it (and
3575 // saves pointer to it) to ensure correct destruction order.
3576 fltSemantics extendedSemantics = *semantics;
3577 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3578 IEEEFloat extended(*this);
3579 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3580 assert(fs == opOK && !losesInfo);
3581 (void)fs;
3582
3583 IEEEFloat u(extended);
3584 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3585 assert(fs == opOK || fs == opInexact);
3586 (void)fs;
3587 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3588
3589 // If conversion was exact or resulted in a special case, we're done;
3590 // just set the second double to zero. Otherwise, re-convert back to
3591 // the extended format and compute the difference. This now should
3592 // convert exactly to double.
3593 if (u.isFiniteNonZero() && losesInfo) {
3594 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3595 assert(fs == opOK && !losesInfo);
3596 (void)fs;
3597
3598 IEEEFloat v(extended);
3599 v.subtract(u, rmNearestTiesToEven);
3600 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3601 assert(fs == opOK && !losesInfo);
3602 (void)fs;
3603 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3604 } else {
3605 words[1] = 0;
3606 }
3607
3608 return APInt(128, words);
3609}
3610
3611template <const fltSemantics &S>
3612APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3613 assert(semantics == &S);
3614 const int bias =
3615 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
3616 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3617 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3618 constexpr integerPart integer_bit =
3619 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3620 constexpr uint64_t significand_mask = integer_bit - 1;
3621 constexpr unsigned int exponent_bits =
3622 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3623 : S.sizeInBits;
3624 static_assert(exponent_bits < 64);
3625 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3626
3627 uint64_t myexponent;
3628 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3629 mysignificand;
3630
3631 if (isFiniteNonZero()) {
3632 myexponent = exponent + bias;
3633 std::copy_n(significandParts(), mysignificand.size(),
3634 mysignificand.begin());
3635 if (myexponent == 1 &&
3636 !(significandParts()[integer_bit_part] & integer_bit))
3637 myexponent = 0; // denormal
3638 } else if (category == fcZero) {
3639 if (!S.hasZero)
3640 llvm_unreachable("semantics does not support zero!");
3641 myexponent = ::exponentZero(S) + bias;
3642 mysignificand.fill(0);
3643 } else if (category == fcInfinity) {
3644 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3645 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3646 llvm_unreachable("semantics don't support inf!");
3647 myexponent = ::exponentInf(S) + bias;
3648 mysignificand.fill(0);
3649 } else {
3650 assert(category == fcNaN && "Unknown category!");
3651 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3652 llvm_unreachable("semantics don't support NaN!");
3653 myexponent = ::exponentNaN(S) + bias;
3654 std::copy_n(significandParts(), mysignificand.size(),
3655 mysignificand.begin());
3656 }
3657 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3658 auto words_iter =
3659 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3660 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3661 // Clear the integer bit.
3662 words[mysignificand.size() - 1] &= significand_mask;
3663 }
3664 std::fill(words_iter, words.end(), uint64_t{0});
3665 constexpr size_t last_word = words.size() - 1;
3666 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3667 << ((S.sizeInBits - 1) % 64);
3668 words[last_word] |= shifted_sign;
3669 uint64_t shifted_exponent = (myexponent & exponent_mask)
3670 << (trailing_significand_bits % 64);
3671 words[last_word] |= shifted_exponent;
3672 if constexpr (last_word == 0) {
3673 return APInt(S.sizeInBits, words[0]);
3674 }
3675 return APInt(S.sizeInBits, words);
3676}
3677
3678APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3679 assert(partCount() == 2);
3680 return convertIEEEFloatToAPInt<semIEEEquad>();
3681}
3682
3683APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3684 assert(partCount()==1);
3685 return convertIEEEFloatToAPInt<semIEEEdouble>();
3686}
3687
3688APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3689 assert(partCount()==1);
3690 return convertIEEEFloatToAPInt<semIEEEsingle>();
3691}
3692
3693APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3694 assert(partCount() == 1);
3695 return convertIEEEFloatToAPInt<semBFloat>();
3696}
3697
3698APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3699 assert(partCount()==1);
3700 return convertIEEEFloatToAPInt<semIEEEhalf>();
3701}
3702
3703APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3704 assert(partCount() == 1);
3705 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3706}
3707
3708APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3709 assert(partCount() == 1);
3710 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3711}
3712
3713APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3714 assert(partCount() == 1);
3715 return convertIEEEFloatToAPInt<semFloat8E4M3>();
3716}
3717
3718APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3719 assert(partCount() == 1);
3720 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3721}
3722
3723APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3724 assert(partCount() == 1);
3725 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3726}
3727
3728APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3729 assert(partCount() == 1);
3730 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3731}
3732
3733APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3734 assert(partCount() == 1);
3735 return convertIEEEFloatToAPInt<semFloat8E3M4>();
3736}
3737
3738APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3739 assert(partCount() == 1);
3740 return convertIEEEFloatToAPInt<semFloatTF32>();
3741}
3742
3743APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3744 assert(partCount() == 1);
3745 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
3746}
3747
3748APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3749 assert(partCount() == 1);
3750 return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3751}
3752
3753APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3754 assert(partCount() == 1);
3755 return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3756}
3757
3758APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3759 assert(partCount() == 1);
3760 return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3761}
3762
3763// This function creates an APInt that is just a bit map of the floating
3764// point constant as it would appear in memory. It is not a conversion,
3765// and treating the result as a normal integer is unlikely to be useful.
3766
3768 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3769 return convertHalfAPFloatToAPInt();
3770
3771 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3772 return convertBFloatAPFloatToAPInt();
3773
3774 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3775 return convertFloatAPFloatToAPInt();
3776
3777 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3778 return convertDoubleAPFloatToAPInt();
3779
3780 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3781 return convertQuadrupleAPFloatToAPInt();
3782
3783 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3784 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3785
3786 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3787 return convertFloat8E5M2APFloatToAPInt();
3788
3789 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3790 return convertFloat8E5M2FNUZAPFloatToAPInt();
3791
3792 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3793 return convertFloat8E4M3APFloatToAPInt();
3794
3795 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3796 return convertFloat8E4M3FNAPFloatToAPInt();
3797
3798 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3799 return convertFloat8E4M3FNUZAPFloatToAPInt();
3800
3801 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3802 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3803
3804 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
3805 return convertFloat8E3M4APFloatToAPInt();
3806
3807 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3808 return convertFloatTF32APFloatToAPInt();
3809
3810 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
3811 return convertFloat8E8M0FNUAPFloatToAPInt();
3812
3813 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3814 return convertFloat6E3M2FNAPFloatToAPInt();
3815
3816 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3817 return convertFloat6E2M3FNAPFloatToAPInt();
3818
3819 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3820 return convertFloat4E2M1FNAPFloatToAPInt();
3821
3822 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3823 "unknown format!");
3824 return convertF80LongDoubleAPFloatToAPInt();
3825}
3826
3828 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3829 "Float semantics are not IEEEsingle");
3830 APInt api = bitcastToAPInt();
3831 return api.bitsToFloat();
3832}
3833
3835 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3836 "Float semantics are not IEEEdouble");
3837 APInt api = bitcastToAPInt();
3838 return api.bitsToDouble();
3839}
3840
3841#ifdef HAS_IEE754_FLOAT128
3842float128 IEEEFloat::convertToQuad() const {
3843 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3844 "Float semantics are not IEEEquads");
3845 APInt api = bitcastToAPInt();
3846 return api.bitsToQuad();
3847}
3848#endif
3849
3850/// Integer bit is explicit in this format. Intel hardware (387 and later)
3851/// does not support these bit patterns:
3852/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3853/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3854/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3855/// exponent = 0, integer bit 1 ("pseudodenormal")
3856/// At the moment, the first three are treated as NaNs, the last one as Normal.
3857void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3858 uint64_t i1 = api.getRawData()[0];
3859 uint64_t i2 = api.getRawData()[1];
3860 uint64_t myexponent = (i2 & 0x7fff);
3861 uint64_t mysignificand = i1;
3862 uint8_t myintegerbit = mysignificand >> 63;
3863
3864 initialize(&semX87DoubleExtended);
3865 assert(partCount()==2);
3866
3867 sign = static_cast<unsigned int>(i2>>15);
3868 if (myexponent == 0 && mysignificand == 0) {
3869 makeZero(sign);
3870 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3871 makeInf(sign);
3872 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3873 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3874 category = fcNaN;
3875 exponent = exponentNaN();
3876 significandParts()[0] = mysignificand;
3877 significandParts()[1] = 0;
3878 } else {
3879 category = fcNormal;
3880 exponent = myexponent - 16383;
3881 significandParts()[0] = mysignificand;
3882 significandParts()[1] = 0;
3883 if (myexponent==0) // denormal
3884 exponent = -16382;
3885 }
3886}
3887
3888void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3889 uint64_t i1 = api.getRawData()[0];
3890 uint64_t i2 = api.getRawData()[1];
3891 opStatus fs;
3892 bool losesInfo;
3893
3894 // Get the first double and convert to our format.
3895 initFromDoubleAPInt(APInt(64, i1));
3897 assert(fs == opOK && !losesInfo);
3898 (void)fs;
3899
3900 // Unless we have a special case, add in second double.
3901 if (isFiniteNonZero()) {
3902 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3903 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3904 assert(fs == opOK && !losesInfo);
3905 (void)fs;
3906
3908 }
3909}
3910
3911// The E8M0 format has the following characteristics:
3912// It is an 8-bit unsigned format with only exponents (no actual significand).
3913// No encodings for {zero, infinities or denorms}.
3914// NaN is represented by all 1's.
3915// Bias is 127.
3916void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3917 const uint64_t exponent_mask = 0xff;
3918 uint64_t val = api.getRawData()[0];
3919 uint64_t myexponent = (val & exponent_mask);
3920
3921 initialize(&semFloat8E8M0FNU);
3922 assert(partCount() == 1);
3923
3924 // This format has unsigned representation only
3925 sign = 0;
3926
3927 // Set the significand
3928 // This format does not have any significand but the 'Pth' precision bit is
3929 // always set to 1 for consistency in APFloat's internal representation.
3930 uint64_t mysignificand = 1;
3931 significandParts()[0] = mysignificand;
3932
3933 // This format can either have a NaN or fcNormal
3934 // All 1's i.e. 255 is a NaN
3935 if (val == exponent_mask) {
3936 category = fcNaN;
3937 exponent = exponentNaN();
3938 return;
3939 }
3940 // Handle fcNormal...
3941 category = fcNormal;
3942 exponent = myexponent - 127; // 127 is bias
3943}
3944template <const fltSemantics &S>
3945void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3946 assert(api.getBitWidth() == S.sizeInBits);
3947 constexpr integerPart integer_bit = integerPart{1}
3948 << ((S.precision - 1) % integerPartWidth);
3949 constexpr uint64_t significand_mask = integer_bit - 1;
3950 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3951 constexpr unsigned int stored_significand_parts =
3952 partCountForBits(trailing_significand_bits);
3953 constexpr unsigned int exponent_bits =
3954 S.sizeInBits - 1 - trailing_significand_bits;
3955 static_assert(exponent_bits < 64);
3956 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3957 constexpr int bias = -(S.minExponent - 1);
3958
3959 // Copy the bits of the significand. We need to clear out the exponent and
3960 // sign bit in the last word.
3961 std::array<integerPart, stored_significand_parts> mysignificand;
3962 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3963 if constexpr (significand_mask != 0) {
3964 mysignificand[mysignificand.size() - 1] &= significand_mask;
3965 }
3966
3967 // We assume the last word holds the sign bit, the exponent, and potentially
3968 // some of the trailing significand field.
3969 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3970 uint64_t myexponent =
3971 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3972
3973 initialize(&S);
3974 assert(partCount() == mysignificand.size());
3975
3976 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3977
3978 bool all_zero_significand =
3979 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3980
3981 bool is_zero = myexponent == 0 && all_zero_significand;
3982
3983 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3984 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3985 makeInf(sign);
3986 return;
3987 }
3988 }
3989
3990 bool is_nan = false;
3991
3992 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3993 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3994 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3995 bool all_ones_significand =
3996 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3997 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3998 (!significand_mask ||
3999 mysignificand[mysignificand.size() - 1] == significand_mask);
4000 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
4001 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
4002 is_nan = is_zero && sign;
4003 }
4004
4005 if (is_nan) {
4006 category = fcNaN;
4007 exponent = ::exponentNaN(S);
4008 std::copy_n(mysignificand.begin(), mysignificand.size(),
4009 significandParts());
4010 return;
4011 }
4012
4013 if (is_zero) {
4014 makeZero(sign);
4015 return;
4016 }
4017
4018 category = fcNormal;
4019 exponent = myexponent - bias;
4020 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
4021 if (myexponent == 0) // denormal
4022 exponent = S.minExponent;
4023 else
4024 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
4025}
4026
4027void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
4028 initFromIEEEAPInt<semIEEEquad>(api);
4029}
4030
4031void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4032 initFromIEEEAPInt<semIEEEdouble>(api);
4033}
4034
4035void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4036 initFromIEEEAPInt<semIEEEsingle>(api);
4037}
4038
4039void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4040 initFromIEEEAPInt<semBFloat>(api);
4041}
4042
4043void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4044 initFromIEEEAPInt<semIEEEhalf>(api);
4045}
4046
4047void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4048 initFromIEEEAPInt<semFloat8E5M2>(api);
4049}
4050
4051void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4052 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
4053}
4054
4055void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4056 initFromIEEEAPInt<semFloat8E4M3>(api);
4057}
4058
4059void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4060 initFromIEEEAPInt<semFloat8E4M3FN>(api);
4061}
4062
4063void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4064 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
4065}
4066
4067void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4068 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
4069}
4070
4071void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4072 initFromIEEEAPInt<semFloat8E3M4>(api);
4073}
4074
4075void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4076 initFromIEEEAPInt<semFloatTF32>(api);
4077}
4078
4079void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4080 initFromIEEEAPInt<semFloat6E3M2FN>(api);
4081}
4082
4083void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4084 initFromIEEEAPInt<semFloat6E2M3FN>(api);
4085}
4086
4087void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4088 initFromIEEEAPInt<semFloat4E2M1FN>(api);
4089}
4090
4091/// Treat api as containing the bits of a floating point number.
4092void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4093 assert(api.getBitWidth() == Sem->sizeInBits);
4094 if (Sem == &semIEEEhalf)
4095 return initFromHalfAPInt(api);
4096 if (Sem == &semBFloat)
4097 return initFromBFloatAPInt(api);
4098 if (Sem == &semIEEEsingle)
4099 return initFromFloatAPInt(api);
4100 if (Sem == &semIEEEdouble)
4101 return initFromDoubleAPInt(api);
4102 if (Sem == &semX87DoubleExtended)
4103 return initFromF80LongDoubleAPInt(api);
4104 if (Sem == &semIEEEquad)
4105 return initFromQuadrupleAPInt(api);
4106 if (Sem == &semPPCDoubleDoubleLegacy)
4107 return initFromPPCDoubleDoubleLegacyAPInt(api);
4108 if (Sem == &semFloat8E5M2)
4109 return initFromFloat8E5M2APInt(api);
4110 if (Sem == &semFloat8E5M2FNUZ)
4111 return initFromFloat8E5M2FNUZAPInt(api);
4112 if (Sem == &semFloat8E4M3)
4113 return initFromFloat8E4M3APInt(api);
4114 if (Sem == &semFloat8E4M3FN)
4115 return initFromFloat8E4M3FNAPInt(api);
4116 if (Sem == &semFloat8E4M3FNUZ)
4117 return initFromFloat8E4M3FNUZAPInt(api);
4118 if (Sem == &semFloat8E4M3B11FNUZ)
4119 return initFromFloat8E4M3B11FNUZAPInt(api);
4120 if (Sem == &semFloat8E3M4)
4121 return initFromFloat8E3M4APInt(api);
4122 if (Sem == &semFloatTF32)
4123 return initFromFloatTF32APInt(api);
4124 if (Sem == &semFloat8E8M0FNU)
4125 return initFromFloat8E8M0FNUAPInt(api);
4126 if (Sem == &semFloat6E3M2FN)
4127 return initFromFloat6E3M2FNAPInt(api);
4128 if (Sem == &semFloat6E2M3FN)
4129 return initFromFloat6E2M3FNAPInt(api);
4130 if (Sem == &semFloat4E2M1FN)
4131 return initFromFloat4E2M1FNAPInt(api);
4132
4133 llvm_unreachable("unsupported semantics");
4134}
4135
4136/// Make this number the largest magnitude normal number in the given
4137/// semantics.
4138void IEEEFloat::makeLargest(bool Negative) {
4139 if (Negative && !semantics->hasSignedRepr)
4141 "This floating point format does not support signed values");
4142 // We want (in interchange format):
4143 // sign = {Negative}
4144 // exponent = 1..10
4145 // significand = 1..1
4146 category = fcNormal;
4147 sign = Negative;
4148 exponent = semantics->maxExponent;
4149
4150 // Use memset to set all but the highest integerPart to all ones.
4151 integerPart *significand = significandParts();
4152 unsigned PartCount = partCount();
4153 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4154
4155 // Set the high integerPart especially setting all unused top bits for
4156 // internal consistency.
4157 const unsigned NumUnusedHighBits =
4158 PartCount*integerPartWidth - semantics->precision;
4159 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4160 ? (~integerPart(0) >> NumUnusedHighBits)
4161 : 0;
4162 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4163 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4164 (semantics->precision > 1))
4165 significand[0] &= ~integerPart(1);
4166}
4167
4168/// Make this number the smallest magnitude denormal number in the given
4169/// semantics.
4170void IEEEFloat::makeSmallest(bool Negative) {
4171 if (Negative && !semantics->hasSignedRepr)
4173 "This floating point format does not support signed values");
4174 // We want (in interchange format):
4175 // sign = {Negative}
4176 // exponent = 0..0
4177 // significand = 0..01
4178 category = fcNormal;
4179 sign = Negative;
4180 exponent = semantics->minExponent;
4181 APInt::tcSet(significandParts(), 1, partCount());
4182}
4183
4184void IEEEFloat::makeSmallestNormalized(bool Negative) {
4185 if (Negative && !semantics->hasSignedRepr)
4187 "This floating point format does not support signed values");
4188 // We want (in interchange format):
4189 // sign = {Negative}
4190 // exponent = 0..0
4191 // significand = 10..0
4192
4193 category = fcNormal;
4194 zeroSignificand();
4195 sign = Negative;
4196 exponent = semantics->minExponent;
4197 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4198}
4199
4200IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4201 initFromAPInt(&Sem, API);
4202}
4203
4204IEEEFloat::IEEEFloat(float f) {
4205 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4206}
4207
4208IEEEFloat::IEEEFloat(double d) {
4209 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4210}
4211
4212namespace {
4213 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4214 Buffer.append(Str.begin(), Str.end());
4215 }
4216
4217 /// Removes data from the given significand until it is no more
4218 /// precise than is required for the desired precision.
4219 void AdjustToPrecision(APInt &significand,
4220 int &exp, unsigned FormatPrecision) {
4221 unsigned bits = significand.getActiveBits();
4222
4223 // 196/59 is a very slight overestimate of lg_2(10).
4224 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4225
4226 if (bits <= bitsRequired) return;
4227
4228 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4229 if (!tensRemovable) return;
4230
4231 exp += tensRemovable;
4232
4233 APInt divisor(significand.getBitWidth(), 1);
4234 APInt powten(significand.getBitWidth(), 10);
4235 while (true) {
4236 if (tensRemovable & 1)
4237 divisor *= powten;
4238 tensRemovable >>= 1;
4239 if (!tensRemovable) break;
4240 powten *= powten;
4241 }
4242
4243 significand = significand.udiv(divisor);
4244
4245 // Truncate the significand down to its active bit count.
4246 significand = significand.trunc(significand.getActiveBits());
4247 }
4248
4249
4250 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4251 int &exp, unsigned FormatPrecision) {
4252 unsigned N = buffer.size();
4253 if (N <= FormatPrecision) return;
4254
4255 // The most significant figures are the last ones in the buffer.
4256 unsigned FirstSignificant = N - FormatPrecision;
4257
4258 // Round.
4259 // FIXME: this probably shouldn't use 'round half up'.
4260
4261 // Rounding down is just a truncation, except we also want to drop
4262 // trailing zeros from the new result.
4263 if (buffer[FirstSignificant - 1] < '5') {
4264 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4265 FirstSignificant++;
4266
4267 exp += FirstSignificant;
4268 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4269 return;
4270 }
4271
4272 // Rounding up requires a decimal add-with-carry. If we continue
4273 // the carry, the newly-introduced zeros will just be truncated.
4274 for (unsigned I = FirstSignificant; I != N; ++I) {
4275 if (buffer[I] == '9') {
4276 FirstSignificant++;
4277 } else {
4278 buffer[I]++;
4279 break;
4280 }
4281 }
4282
4283 // If we carried through, we have exactly one digit of precision.
4284 if (FirstSignificant == N) {
4285 exp += FirstSignificant;
4286 buffer.clear();
4287 buffer.push_back('1');
4288 return;
4289 }
4290
4291 exp += FirstSignificant;
4292 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4293 }
4294
4295 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4296 APInt significand, unsigned FormatPrecision,
4297 unsigned FormatMaxPadding, bool TruncateZero) {
4298 const int semanticsPrecision = significand.getBitWidth();
4299
4300 if (isNeg)
4301 Str.push_back('-');
4302
4303 // Set FormatPrecision if zero. We want to do this before we
4304 // truncate trailing zeros, as those are part of the precision.
4305 if (!FormatPrecision) {
4306 // We use enough digits so the number can be round-tripped back to an
4307 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4308 // Accurately" by Steele and White.
4309 // FIXME: Using a formula based purely on the precision is conservative;
4310 // we can print fewer digits depending on the actual value being printed.
4311
4312 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4313 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4314 }
4315
4316 // Ignore trailing binary zeros.
4317 int trailingZeros = significand.countr_zero();
4318 exp += trailingZeros;
4319 significand.lshrInPlace(trailingZeros);
4320
4321 // Change the exponent from 2^e to 10^e.
4322 if (exp == 0) {
4323 // Nothing to do.
4324 } else if (exp > 0) {
4325 // Just shift left.
4326 significand = significand.zext(semanticsPrecision + exp);
4327 significand <<= exp;
4328 exp = 0;
4329 } else { /* exp < 0 */
4330 int texp = -exp;
4331
4332 // We transform this using the identity:
4333 // (N)(2^-e) == (N)(5^e)(10^-e)
4334 // This means we have to multiply N (the significand) by 5^e.
4335 // To avoid overflow, we have to operate on numbers large
4336 // enough to store N * 5^e:
4337 // log2(N * 5^e) == log2(N) + e * log2(5)
4338 // <= semantics->precision + e * 137 / 59
4339 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4340
4341 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4342
4343 // Multiply significand by 5^e.
4344 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4345 significand = significand.zext(precision);
4346 APInt five_to_the_i(precision, 5);
4347 while (true) {
4348 if (texp & 1)
4349 significand *= five_to_the_i;
4350
4351 texp >>= 1;
4352 if (!texp)
4353 break;
4354 five_to_the_i *= five_to_the_i;
4355 }
4356 }
4357
4358 AdjustToPrecision(significand, exp, FormatPrecision);
4359
4361
4362 // Fill the buffer.
4363 unsigned precision = significand.getBitWidth();
4364 if (precision < 4) {
4365 // We need enough precision to store the value 10.
4366 precision = 4;
4367 significand = significand.zext(precision);
4368 }
4369 APInt ten(precision, 10);
4370 APInt digit(precision, 0);
4371
4372 bool inTrail = true;
4373 while (significand != 0) {
4374 // digit <- significand % 10
4375 // significand <- significand / 10
4376 APInt::udivrem(significand, ten, significand, digit);
4377
4378 unsigned d = digit.getZExtValue();
4379
4380 // Drop trailing zeros.
4381 if (inTrail && !d)
4382 exp++;
4383 else {
4384 buffer.push_back((char) ('0' + d));
4385 inTrail = false;
4386 }
4387 }
4388
4389 assert(!buffer.empty() && "no characters in buffer!");
4390
4391 // Drop down to FormatPrecision.
4392 // TODO: don't do more precise calculations above than are required.
4393 AdjustToPrecision(buffer, exp, FormatPrecision);
4394
4395 unsigned NDigits = buffer.size();
4396
4397 // Check whether we should use scientific notation.
4398 bool FormatScientific;
4399 if (!FormatMaxPadding)
4400 FormatScientific = true;
4401 else {
4402 if (exp >= 0) {
4403 // 765e3 --> 765000
4404 // ^^^
4405 // But we shouldn't make the number look more precise than it is.
4406 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4407 NDigits + (unsigned) exp > FormatPrecision);
4408 } else {
4409 // Power of the most significant digit.
4410 int MSD = exp + (int) (NDigits - 1);
4411 if (MSD >= 0) {
4412 // 765e-2 == 7.65
4413 FormatScientific = false;
4414 } else {
4415 // 765e-5 == 0.00765
4416 // ^ ^^
4417 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4418 }
4419 }
4420 }
4421
4422 // Scientific formatting is pretty straightforward.
4423 if (FormatScientific) {
4424 exp += (NDigits - 1);
4425
4426 Str.push_back(buffer[NDigits-1]);
4427 Str.push_back('.');
4428 if (NDigits == 1 && TruncateZero)
4429 Str.push_back('0');
4430 else
4431 for (unsigned I = 1; I != NDigits; ++I)
4432 Str.push_back(buffer[NDigits-1-I]);
4433 // Fill with zeros up to FormatPrecision.
4434 if (!TruncateZero && FormatPrecision > NDigits - 1)
4435 Str.append(FormatPrecision - NDigits + 1, '0');
4436 // For !TruncateZero we use lower 'e'.
4437 Str.push_back(TruncateZero ? 'E' : 'e');
4438
4439 Str.push_back(exp >= 0 ? '+' : '-');
4440 if (exp < 0)
4441 exp = -exp;
4442 SmallVector<char, 6> expbuf;
4443 do {
4444 expbuf.push_back((char) ('0' + (exp % 10)));
4445 exp /= 10;
4446 } while (exp);
4447 // Exponent always at least two digits if we do not truncate zeros.
4448 if (!TruncateZero && expbuf.size() < 2)
4449 expbuf.push_back('0');
4450 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4451 Str.push_back(expbuf[E-1-I]);
4452 return;
4453 }
4454
4455 // Non-scientific, positive exponents.
4456 if (exp >= 0) {
4457 for (unsigned I = 0; I != NDigits; ++I)
4458 Str.push_back(buffer[NDigits-1-I]);
4459 for (unsigned I = 0; I != (unsigned) exp; ++I)
4460 Str.push_back('0');
4461 return;
4462 }
4463
4464 // Non-scientific, negative exponents.
4465
4466 // The number of digits to the left of the decimal point.
4467 int NWholeDigits = exp + (int) NDigits;
4468
4469 unsigned I = 0;
4470 if (NWholeDigits > 0) {
4471 for (; I != (unsigned) NWholeDigits; ++I)
4472 Str.push_back(buffer[NDigits-I-1]);
4473 Str.push_back('.');
4474 } else {
4475 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4476
4477 Str.push_back('0');
4478 Str.push_back('.');
4479 for (unsigned Z = 1; Z != NZeros; ++Z)
4480 Str.push_back('0');
4481 }
4482
4483 for (; I != NDigits; ++I)
4484 Str.push_back(buffer[NDigits-I-1]);
4485
4486 }
4487} // namespace
4488
4489void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4490 unsigned FormatMaxPadding, bool TruncateZero) const {
4491 switch (category) {
4492 case fcInfinity:
4493 if (isNegative())
4494 return append(Str, "-Inf");
4495 else
4496 return append(Str, "+Inf");
4497
4498 case fcNaN: return append(Str, "NaN");
4499
4500 case fcZero:
4501 if (isNegative())
4502 Str.push_back('-');
4503
4504 if (!FormatMaxPadding) {
4505 if (TruncateZero)
4506 append(Str, "0.0E+0");
4507 else {
4508 append(Str, "0.0");
4509 if (FormatPrecision > 1)
4510 Str.append(FormatPrecision - 1, '0');
4511 append(Str, "e+00");
4512 }
4513 } else {
4514 Str.push_back('0');
4515 }
4516 return;
4517
4518 case fcNormal:
4519 break;
4520 }
4521
4522 // Decompose the number into an APInt and an exponent.
4523 int exp = exponent - ((int) semantics->precision - 1);
4524 APInt significand(
4525 semantics->precision,
4526 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4527
4528 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4529 FormatMaxPadding, TruncateZero);
4530
4531}
4532
4533int IEEEFloat::getExactLog2Abs() const {
4534 if (!isFinite() || isZero())
4535 return INT_MIN;
4536
4537 const integerPart *Parts = significandParts();
4538 const int PartCount = partCountForBits(semantics->precision);
4539
4540 int PopCount = 0;
4541 for (int i = 0; i < PartCount; ++i) {
4542 PopCount += llvm::popcount(Parts[i]);
4543 if (PopCount > 1)
4544 return INT_MIN;
4545 }
4546
4547 if (exponent != semantics->minExponent)
4548 return exponent;
4549
4550 int CountrParts = 0;
4551 for (int i = 0; i < PartCount;
4552 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4553 if (Parts[i] != 0) {
4554 return exponent - semantics->precision + CountrParts +
4555 llvm::countr_zero(Parts[i]) + 1;
4556 }
4557 }
4558
4559 llvm_unreachable("didn't find the set bit");
4560}
4561
4562bool IEEEFloat::isSignaling() const {
4563 if (!isNaN())
4564 return false;
4565 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4566 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4567 return false;
4568
4569 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4570 // first bit of the trailing significand being 0.
4571 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4572}
4573
4574/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4575///
4576/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4577/// appropriate sign switching before/after the computation.
4578APFloat::opStatus IEEEFloat::next(bool nextDown) {
4579 // If we are performing nextDown, swap sign so we have -x.
4580 if (nextDown)
4581 changeSign();
4582
4583 // Compute nextUp(x)
4584 opStatus result = opOK;
4585
4586 // Handle each float category separately.
4587 switch (category) {
4588 case fcInfinity:
4589 // nextUp(+inf) = +inf
4590 if (!isNegative())
4591 break;
4592 // nextUp(-inf) = -getLargest()
4593 makeLargest(true);
4594 break;
4595 case fcNaN:
4596 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4597 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4598 // change the payload.
4599 if (isSignaling()) {
4600 result = opInvalidOp;
4601 // For consistency, propagate the sign of the sNaN to the qNaN.
4602 makeNaN(false, isNegative(), nullptr);
4603 }
4604 break;
4605 case fcZero:
4606 // nextUp(pm 0) = +getSmallest()
4607 makeSmallest(false);
4608 break;
4609 case fcNormal:
4610 // nextUp(-getSmallest()) = -0
4611 if (isSmallest() && isNegative()) {
4612 APInt::tcSet(significandParts(), 0, partCount());
4613 category = fcZero;
4614 exponent = 0;
4615 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4616 sign = false;
4617 if (!semantics->hasZero)
4618 makeSmallestNormalized(false);
4619 break;
4620 }
4621
4622 if (isLargest() && !isNegative()) {
4623 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4624 // nextUp(getLargest()) == NAN
4625 makeNaN();
4626 break;
4627 } else if (semantics->nonFiniteBehavior ==
4629 // nextUp(getLargest()) == getLargest()
4630 break;
4631 } else {
4632 // nextUp(getLargest()) == INFINITY
4633 APInt::tcSet(significandParts(), 0, partCount());
4634 category = fcInfinity;
4635 exponent = semantics->maxExponent + 1;
4636 break;
4637 }
4638 }
4639
4640 // nextUp(normal) == normal + inc.
4641 if (isNegative()) {
4642 // If we are negative, we need to decrement the significand.
4643
4644 // We only cross a binade boundary that requires adjusting the exponent
4645 // if:
4646 // 1. exponent != semantics->minExponent. This implies we are not in the
4647 // smallest binade or are dealing with denormals.
4648 // 2. Our significand excluding the integral bit is all zeros.
4649 bool WillCrossBinadeBoundary =
4650 exponent != semantics->minExponent && isSignificandAllZeros();
4651
4652 // Decrement the significand.
4653 //
4654 // We always do this since:
4655 // 1. If we are dealing with a non-binade decrement, by definition we
4656 // just decrement the significand.
4657 // 2. If we are dealing with a normal -> normal binade decrement, since
4658 // we have an explicit integral bit the fact that all bits but the
4659 // integral bit are zero implies that subtracting one will yield a
4660 // significand with 0 integral bit and 1 in all other spots. Thus we
4661 // must just adjust the exponent and set the integral bit to 1.
4662 // 3. If we are dealing with a normal -> denormal binade decrement,
4663 // since we set the integral bit to 0 when we represent denormals, we
4664 // just decrement the significand.
4665 integerPart *Parts = significandParts();
4666 APInt::tcDecrement(Parts, partCount());
4667
4668 if (WillCrossBinadeBoundary) {
4669 // Our result is a normal number. Do the following:
4670 // 1. Set the integral bit to 1.
4671 // 2. Decrement the exponent.
4672 APInt::tcSetBit(Parts, semantics->precision - 1);
4673 exponent--;
4674 }
4675 } else {
4676 // If we are positive, we need to increment the significand.
4677
4678 // We only cross a binade boundary that requires adjusting the exponent if
4679 // the input is not a denormal and all of said input's significand bits
4680 // are set. If all of said conditions are true: clear the significand, set
4681 // the integral bit to 1, and increment the exponent. If we have a
4682 // denormal always increment since moving denormals and the numbers in the
4683 // smallest normal binade have the same exponent in our representation.
4684 // If there are only exponents, any increment always crosses the
4685 // BinadeBoundary.
4686 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4687 (!isDenormal() && isSignificandAllOnes());
4688
4689 if (WillCrossBinadeBoundary) {
4690 integerPart *Parts = significandParts();
4691 APInt::tcSet(Parts, 0, partCount());
4692 APInt::tcSetBit(Parts, semantics->precision - 1);
4693 assert(exponent != semantics->maxExponent &&
4694 "We can not increment an exponent beyond the maxExponent allowed"
4695 " by the given floating point semantics.");
4696 exponent++;
4697 } else {
4698 incrementSignificand();
4699 }
4700 }
4701 break;
4702 }
4703
4704 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4705 if (nextDown)
4706 changeSign();
4707
4708 return result;
4709}
4710
4711APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4712 return ::exponentNaN(*semantics);
4713}
4714
4715APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4716 return ::exponentInf(*semantics);
4717}
4718
4719APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4720 return ::exponentZero(*semantics);
4721}
4722
4723void IEEEFloat::makeInf(bool Negative) {
4724 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4725 llvm_unreachable("This floating point format does not support Inf");
4726
4727 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4728 // There is no Inf, so make NaN instead.
4729 makeNaN(false, Negative);
4730 return;
4731 }
4732 category = fcInfinity;
4733 sign = Negative;
4734 exponent = exponentInf();
4735 APInt::tcSet(significandParts(), 0, partCount());
4736}
4737
4738void IEEEFloat::makeZero(bool Negative) {
4739 if (!semantics->hasZero)
4740 llvm_unreachable("This floating point format does not support Zero");
4741
4742 category = fcZero;
4743 sign = Negative;
4744 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4745 // Merge negative zero to positive because 0b10000...000 is used for NaN
4746 sign = false;
4747 }
4748 exponent = exponentZero();
4749 APInt::tcSet(significandParts(), 0, partCount());
4750}
4751
4752void IEEEFloat::makeQuiet() {
4753 assert(isNaN());
4754 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4755 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4756}
4757
4758int ilogb(const IEEEFloat &Arg) {
4759 if (Arg.isNaN())
4760 return APFloat::IEK_NaN;
4761 if (Arg.isZero())
4762 return APFloat::IEK_Zero;
4763 if (Arg.isInfinity())
4764 return APFloat::IEK_Inf;
4765 if (!Arg.isDenormal())
4766 return Arg.exponent;
4767
4768 IEEEFloat Normalized(Arg);
4769 int SignificandBits = Arg.getSemantics().precision - 1;
4770
4771 Normalized.exponent += SignificandBits;
4772 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4773 return Normalized.exponent - SignificandBits;
4774}
4775
4777 auto MaxExp = X.getSemantics().maxExponent;
4778 auto MinExp = X.getSemantics().minExponent;
4779
4780 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4781 // overflow; clamp it to a safe range before adding, but ensure that the range
4782 // is large enough that the clamp does not change the result. The range we
4783 // need to support is the difference between the largest possible exponent and
4784 // the normalized exponent of half the smallest denormal.
4785
4786 int SignificandBits = X.getSemantics().precision - 1;
4787 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4788
4789 // Clamp to one past the range ends to let normalize handle overlflow.
4790 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4791 X.normalize(RoundingMode, lfExactlyZero);
4792 if (X.isNaN())
4793 X.makeQuiet();
4794 return X;
4795}
4796
4797IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4798 Exp = ilogb(Val);
4799
4800 // Quiet signalling nans.
4801 if (Exp == APFloat::IEK_NaN) {
4802 IEEEFloat Quiet(Val);
4803 Quiet.makeQuiet();
4804 return Quiet;
4805 }
4806
4807 if (Exp == APFloat::IEK_Inf)
4808 return Val;
4809
4810 // 1 is added because frexp is defined to return a normalized fraction in
4811 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4812 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4813 return scalbn(Val, -Exp, RM);
4814}
4815
4816DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4817 : Semantics(&S),
4819 assert(Semantics == &semPPCDoubleDouble);
4820}
4821
4823 : Semantics(&S),
4826 assert(Semantics == &semPPCDoubleDouble);
4827}
4828
4830 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4832 assert(Semantics == &semPPCDoubleDouble);
4833}
4834
4836 : Semantics(&S),
4837 Floats(new APFloat[2]{
4838 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4839 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4840 assert(Semantics == &semPPCDoubleDouble);
4841}
4842
4844 APFloat &&Second)
4845 : Semantics(&S),
4846 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4847 assert(Semantics == &semPPCDoubleDouble);
4848 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4849 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4850}
4851
4853 : Semantics(RHS.Semantics),
4854 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4855 APFloat(RHS.Floats[1])}
4856 : nullptr) {
4857 assert(Semantics == &semPPCDoubleDouble);
4858}
4859
4861 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4862 RHS.Semantics = &semBogus;
4863 RHS.Floats = nullptr;
4864 assert(Semantics == &semPPCDoubleDouble);
4865}
4866
4868 if (Semantics == RHS.Semantics && RHS.Floats) {
4869 Floats[0] = RHS.Floats[0];
4870 Floats[1] = RHS.Floats[1];
4871 } else if (this != &RHS) {
4872 this->~DoubleAPFloat();
4873 new (this) DoubleAPFloat(RHS);
4874 }
4875 return *this;
4876}
4877
4878// Returns a result such that:
4879// 1. abs(Lo) <= ulp(Hi)/2
4880// 2. Hi == RTNE(Hi + Lo)
4881// 3. Hi + Lo == X + Y
4882//
4883// Requires that log2(X) >= log2(Y).
4884static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4885 if (!X.isFinite())
4886 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4887 APFloat Hi = X + Y;
4888 APFloat Delta = Hi - X;
4889 APFloat Lo = Y - Delta;
4890 return {Hi, Lo};
4891}
4892
4893// Implement addition, subtraction, multiplication and division based on:
4894// "Software for Doubled-Precision Floating-Point Computations",
4895// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4896APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4897 const APFloat &c, const APFloat &cc,
4898 roundingMode RM) {
4899 int Status = opOK;
4900 APFloat z = a;
4901 Status |= z.add(c, RM);
4902 if (!z.isFinite()) {
4903 if (!z.isInfinity()) {
4904 Floats[0] = std::move(z);
4905 Floats[1].makeZero(/* Neg = */ false);
4906 return (opStatus)Status;
4907 }
4908 Status = opOK;
4909 auto AComparedToC = a.compareAbsoluteValue(c);
4910 z = cc;
4911 Status |= z.add(aa, RM);
4912 if (AComparedToC == APFloat::cmpGreaterThan) {
4913 // z = cc + aa + c + a;
4914 Status |= z.add(c, RM);
4915 Status |= z.add(a, RM);
4916 } else {
4917 // z = cc + aa + a + c;
4918 Status |= z.add(a, RM);
4919 Status |= z.add(c, RM);
4920 }
4921 if (!z.isFinite()) {
4922 Floats[0] = std::move(z);
4923 Floats[1].makeZero(/* Neg = */ false);
4924 return (opStatus)Status;
4925 }
4926 Floats[0] = z;
4927 APFloat zz = aa;
4928 Status |= zz.add(cc, RM);
4929 if (AComparedToC == APFloat::cmpGreaterThan) {
4930 // Floats[1] = a - z + c + zz;
4931 Floats[1] = a;
4932 Status |= Floats[1].subtract(z, RM);
4933 Status |= Floats[1].add(c, RM);
4934 Status |= Floats[1].add(zz, RM);
4935 } else {
4936 // Floats[1] = c - z + a + zz;
4937 Floats[1] = c;
4938 Status |= Floats[1].subtract(z, RM);
4939 Status |= Floats[1].add(a, RM);
4940 Status |= Floats[1].add(zz, RM);
4941 }
4942 } else {
4943 // q = a - z;
4944 APFloat q = a;
4945 Status |= q.subtract(z, RM);
4946
4947 // zz = q + c + (a - (q + z)) + aa + cc;
4948 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4949 auto zz = q;
4950 Status |= zz.add(c, RM);
4951 Status |= q.add(z, RM);
4952 Status |= q.subtract(a, RM);
4953 q.changeSign();
4954 Status |= zz.add(q, RM);
4955 Status |= zz.add(aa, RM);
4956 Status |= zz.add(cc, RM);
4957 if (zz.isZero() && !zz.isNegative()) {
4958 Floats[0] = std::move(z);
4959 Floats[1].makeZero(/* Neg = */ false);
4960 return opOK;
4961 }
4962 Floats[0] = z;
4963 Status |= Floats[0].add(zz, RM);
4964 if (!Floats[0].isFinite()) {
4965 Floats[1].makeZero(/* Neg = */ false);
4966 return (opStatus)Status;
4967 }
4968 Floats[1] = std::move(z);
4969 Status |= Floats[1].subtract(Floats[0], RM);
4970 Status |= Floats[1].add(zz, RM);
4971 }
4972 return (opStatus)Status;
4973}
4974
4975APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4976 const DoubleAPFloat &RHS,
4977 DoubleAPFloat &Out,
4978 roundingMode RM) {
4979 if (LHS.getCategory() == fcNaN) {
4980 Out = LHS;
4981 return opOK;
4982 }
4983 if (RHS.getCategory() == fcNaN) {
4984 Out = RHS;
4985 return opOK;
4986 }
4987 if (LHS.getCategory() == fcZero) {
4988 Out = RHS;
4989 return opOK;
4990 }
4991 if (RHS.getCategory() == fcZero) {
4992 Out = LHS;
4993 return opOK;
4994 }
4995 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4996 LHS.isNegative() != RHS.isNegative()) {
4997 Out.makeNaN(false, Out.isNegative(), nullptr);
4998 return opInvalidOp;
4999 }
5000 if (LHS.getCategory() == fcInfinity) {
5001 Out = LHS;
5002 return opOK;
5003 }
5004 if (RHS.getCategory() == fcInfinity) {
5005 Out = RHS;
5006 return opOK;
5007 }
5008 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
5009
5010 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
5011 CC(RHS.Floats[1]);
5012 assert(&A.getSemantics() == &semIEEEdouble);
5013 assert(&AA.getSemantics() == &semIEEEdouble);
5014 assert(&C.getSemantics() == &semIEEEdouble);
5015 assert(&CC.getSemantics() == &semIEEEdouble);
5016 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
5017 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
5018 return Out.addImpl(A, AA, C, CC, RM);
5019}
5020
5022 roundingMode RM) {
5023 return addWithSpecial(*this, RHS, *this, RM);
5024}
5025
5027 roundingMode RM) {
5028 changeSign();
5029 auto Ret = add(RHS, RM);
5030 changeSign();
5031 return Ret;
5032}
5033
5036 const auto &LHS = *this;
5037 auto &Out = *this;
5038 /* Interesting observation: For special categories, finding the lowest
5039 common ancestor of the following layered graph gives the correct
5040 return category:
5041
5042 NaN
5043 / \
5044 Zero Inf
5045 \ /
5046 Normal
5047
5048 e.g. NaN * NaN = NaN
5049 Zero * Inf = NaN
5050 Normal * Zero = Zero
5051 Normal * Inf = Inf
5052 */
5053 if (LHS.getCategory() == fcNaN) {
5054 Out = LHS;
5055 return opOK;
5056 }
5057 if (RHS.getCategory() == fcNaN) {
5058 Out = RHS;
5059 return opOK;
5060 }
5061 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5062 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5063 Out.makeNaN(false, false, nullptr);
5064 return opOK;
5065 }
5066 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5067 Out = LHS;
5068 return opOK;
5069 }
5070 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5071 Out = RHS;
5072 return opOK;
5073 }
5074 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5075 "Special cases not handled exhaustively");
5076
5077 int Status = opOK;
5078 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5079 // t = a * c
5080 APFloat T = A;
5081 Status |= T.multiply(C, RM);
5082 if (!T.isFiniteNonZero()) {
5083 Floats[0] = T;
5084 Floats[1].makeZero(/* Neg = */ false);
5085 return (opStatus)Status;
5086 }
5087
5088 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5089 APFloat Tau = A;
5090 T.changeSign();
5091 Status |= Tau.fusedMultiplyAdd(C, T, RM);
5092 T.changeSign();
5093 {
5094 // v = a * d
5095 APFloat V = A;
5096 Status |= V.multiply(D, RM);
5097 // w = b * c
5098 APFloat W = B;
5099 Status |= W.multiply(C, RM);
5100 Status |= V.add(W, RM);
5101 // tau += v + w
5102 Status |= Tau.add(V, RM);
5103 }
5104 // u = t + tau
5105 APFloat U = T;
5106 Status |= U.add(Tau, RM);
5107
5108 Floats[0] = U;
5109 if (!U.isFinite()) {
5110 Floats[1].makeZero(/* Neg = */ false);
5111 } else {
5112 // Floats[1] = (t - u) + tau
5113 Status |= T.subtract(U, RM);
5114 Status |= T.add(Tau, RM);
5115 Floats[1] = T;
5116 }
5117 return (opStatus)Status;
5118}
5119
5122 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5124 auto Ret =
5125 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5127 return Ret;
5128}
5129
5131 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5133 auto Ret =
5134 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5136 return Ret;
5137}
5138
5140 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5142 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5144 return Ret;
5145}
5146
5149 const DoubleAPFloat &Addend,
5151 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5153 auto Ret = Tmp.fusedMultiplyAdd(
5157 return Ret;
5158}
5159
5161 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5162 const APFloat &Hi = getFirst();
5163 const APFloat &Lo = getSecond();
5164
5165 APFloat RoundedHi = Hi;
5166 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5167
5168 // We can reduce the problem to just the high part if the input:
5169 // 1. Represents a non-finite value.
5170 // 2. Has a component which is zero.
5171 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5172 Floats[0] = std::move(RoundedHi);
5173 Floats[1].makeZero(/*Neg=*/false);
5174 return HiStatus;
5175 }
5176
5177 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5178 // halfway point.
5179 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5180 APFloat TieBreaker) {
5181 // RoundingError tells us which direction we rounded:
5182 // - RoundingError > 0: we rounded up.
5183 // - RoundingError < 0: we rounded down.
5184 // Sterbenz' lemma ensures that RoundingError is exact.
5185 const APFloat RoundingError = Rounded - ToRound;
5186 if (TieBreaker.isNonZero() &&
5187 TieBreaker.isNegative() != RoundingError.isNegative() &&
5188 abs(RoundingError).isExactlyValue(0.5))
5189 Rounded.add(
5190 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5192 return Rounded;
5193 };
5194
5195 // Case 1: Hi is not an integer.
5196 // Special cases are for rounding modes that are sensitive to ties.
5197 if (RoundedHi != Hi) {
5198 // We need to consider the case where Hi was between two integers and the
5199 // rounding mode broke the tie when, in fact, Lo may have had a different
5200 // sign than Hi.
5201 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5202 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5203
5204 Floats[0] = std::move(RoundedHi);
5205 Floats[1].makeZero(/*Neg=*/false);
5206 return HiStatus;
5207 }
5208
5209 // Case 2: Hi is an integer.
5210 // Special cases are for rounding modes which are rounding towards or away from zero.
5211 RoundingMode LoRoundingMode;
5212 if (RM == rmTowardZero)
5213 // When our input is positive, we want the Lo component rounded toward
5214 // negative infinity to get the smallest result magnitude. Likewise,
5215 // negative inputs want the Lo component rounded toward positive infinity.
5216 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5217 else
5218 LoRoundingMode = RM;
5219
5220 APFloat RoundedLo = Lo;
5221 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5222 if (LoRoundingMode == rmNearestTiesToAway)
5223 // We need to consider the case where Lo was between two integers and the
5224 // rounding mode broke the tie when, in fact, Hi may have had a different
5225 // sign than Lo.
5226 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5227
5228 // We must ensure that the final result has no overlap between the two APFloat values.
5229 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5230
5231 Floats[0] = std::move(RoundedHi);
5232 Floats[1] = std::move(RoundedLo);
5233 return LoStatus;
5234}
5235
5237 Floats[0].changeSign();
5238 Floats[1].changeSign();
5239}
5240
5243 // Compare absolute values of the high parts.
5244 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5245 if (HiPartCmp != cmpEqual)
5246 return HiPartCmp;
5247
5248 // Zero, regardless of sign, is equal.
5249 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5250 return cmpEqual;
5251
5252 // At this point, |this->Hi| == |RHS.Hi|.
5253 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5254 // same, and Hi-|Lo| if signs are different.
5255 const bool ThisIsSubtractive =
5256 Floats[0].isNegative() != Floats[1].isNegative();
5257 const bool RHSIsSubtractive =
5258 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5259
5260 // Case 1: The low part of 'this' is zero.
5261 if (Floats[1].isZero())
5262 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5263 // If RHS is subtractive, its magnitude is smaller.
5264 // If RHS is additive, its magnitude is larger.
5265 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5266
5267 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5268 if (RHS.Floats[1].isZero())
5269 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5270 // If 'this' is subtractive, its magnitude is smaller.
5271 // If 'this' is additive, its magnitude is larger.
5272 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5273
5274 // If their natures differ, the additive one is larger.
5275 if (ThisIsSubtractive != RHSIsSubtractive)
5276 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5277
5278 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5279 // The comparison now depends on the magnitude of the low parts.
5280 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5281
5282 if (ThisIsSubtractive) {
5283 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5284 if (LoPartCmp == cmpLessThan)
5285 return cmpGreaterThan;
5286 if (LoPartCmp == cmpGreaterThan)
5287 return cmpLessThan;
5288 }
5289
5290 // If additive, the comparison of |Lo| is direct.
5291 // If equal, they are equal.
5292 return LoPartCmp;
5293}
5294
5296 return Floats[0].getCategory();
5297}
5298
5299bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5300
5302 Floats[0].makeInf(Neg);
5303 Floats[1].makeZero(/* Neg = */ false);
5304}
5305
5307 Floats[0].makeZero(Neg);
5308 Floats[1].makeZero(/* Neg = */ false);
5309}
5310
5312 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5313 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5314 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5315 if (Neg)
5316 changeSign();
5317}
5318
5320 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5321 Floats[0].makeSmallest(Neg);
5322 Floats[1].makeZero(/* Neg = */ false);
5323}
5324
5326 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5327 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5328 if (Neg)
5329 Floats[0].changeSign();
5330 Floats[1].makeZero(/* Neg = */ false);
5331}
5332
5333void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5334 Floats[0].makeNaN(SNaN, Neg, fill);
5335 Floats[1].makeZero(/* Neg = */ false);
5336}
5337
5339 auto Result = Floats[0].compare(RHS.Floats[0]);
5340 // |Float[0]| > |Float[1]|
5341 if (Result == APFloat::cmpEqual)
5342 return Floats[1].compare(RHS.Floats[1]);
5343 return Result;
5344}
5345
5347 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5348 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5349}
5350
5352 if (Arg.Floats)
5353 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5354 return hash_combine(Arg.Semantics);
5355}
5356
5358 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5359 uint64_t Data[] = {
5360 Floats[0].bitcastToAPInt().getRawData()[0],
5361 Floats[1].bitcastToAPInt().getRawData()[0],
5362 };
5363 return APInt(128, 2, Data);
5364}
5365
5367 roundingMode RM) {
5368 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5370 auto Ret = Tmp.convertFromString(S, RM);
5372 return Ret;
5373}
5374
5375// The double-double lattice of values corresponds to numbers which obey:
5376// - abs(lo) <= 1/2 * ulp(hi)
5377// - roundTiesToEven(hi + lo) == hi
5378//
5379// nextUp must choose the smallest output > input that follows these rules.
5380// nexDown must choose the largest output < input that follows these rules.
5382 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5383 // nextDown(x) = -nextUp(-x)
5384 if (nextDown) {
5385 changeSign();
5386 APFloat::opStatus Result = next(/*nextDown=*/false);
5387 changeSign();
5388 return Result;
5389 }
5390 switch (getCategory()) {
5391 case fcInfinity:
5392 // nextUp(+inf) = +inf
5393 // nextUp(-inf) = -getLargest()
5394 if (isNegative())
5395 makeLargest(true);
5396 return opOK;
5397
5398 case fcNaN:
5399 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5400 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5401 // change the payload.
5402 if (getFirst().isSignaling()) {
5403 // For consistency, propagate the sign of the sNaN to the qNaN.
5404 makeNaN(false, isNegative(), nullptr);
5405 return opInvalidOp;
5406 }
5407 return opOK;
5408
5409 case fcZero:
5410 // nextUp(pm 0) = +getSmallest()
5411 makeSmallest(false);
5412 return opOK;
5413
5414 case fcNormal:
5415 break;
5416 }
5417
5418 const APFloat &HiOld = getFirst();
5419 const APFloat &LoOld = getSecond();
5420
5421 APFloat NextLo = LoOld;
5422 NextLo.next(/*nextDown=*/false);
5423
5424 // We want to admit values where:
5425 // 1. abs(Lo) <= ulp(Hi)/2
5426 // 2. Hi == RTNE(Hi + lo)
5427 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5428 return Hi + Lo == Hi;
5429 };
5430
5431 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5432 if (InLattice(HiOld, NextLo)) {
5433 // Yes, the result is (HiOld, nextUp(LoOld)).
5434 Floats[1] = std::move(NextLo);
5435
5436 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5437 // value is defined to have exactly 106 bits of precision. This limitation
5438 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5439 // value.
5440 DoubleAPFloat Largest{*Semantics, uninitialized};
5441 Largest.makeLargest(/*Neg=*/false);
5442 if (compare(Largest) == cmpGreaterThan)
5443 makeInf(/*Neg=*/false);
5444
5445 return opOK;
5446 }
5447
5448 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5449 // correct result. We know the new hi component will be nextUp(HiOld) but our
5450 // lattice rules make it a little ambiguous what the correct NextLo must be.
5451 APFloat NextHi = HiOld;
5452 NextHi.next(/*nextDown=*/false);
5453
5454 // nextUp(getLargest()) == INFINITY
5455 if (NextHi.isInfinity()) {
5456 makeInf(/*Neg=*/false);
5457 return opOK;
5458 }
5459
5460 // IEEE 754-2019 5.3.1:
5461 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5462 // -0."
5463 if (NextHi.isZero()) {
5464 makeZero(/*Neg=*/true);
5465 return opOK;
5466 }
5467
5468 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5469 // negative infinity as possible.
5470 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5471 if (!InLattice(NextHi, NextLo))
5472 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5473 NextLo.next(/*nextDown=*/false);
5474
5475 Floats[0] = std::move(NextHi);
5476 Floats[1] = std::move(NextLo);
5477
5478 return opOK;
5479}
5480
5481APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5482 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5483 roundingMode RM, bool *IsExact) const {
5484 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5485
5486 // If Hi is not finite, or Lo is zero, the value is entirely represented
5487 // by Hi. Delegate to the simpler single-APFloat conversion.
5488 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5489 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5490
5491 // First, round the full double-double value to an integral value. This
5492 // simplifies the rest of the function, as we no longer need to consider
5493 // fractional parts.
5494 *IsExact = false;
5495 DoubleAPFloat Integral = *this;
5496 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5497 if (RoundStatus == opInvalidOp)
5498 return opInvalidOp;
5499 const APFloat &IntegralHi = Integral.getFirst();
5500 const APFloat &IntegralLo = Integral.getSecond();
5501
5502 // If rounding results in either component being zero, the sum is trivial.
5503 // Delegate to the simpler single-APFloat conversion.
5504 bool HiIsExact;
5505 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5506 const opStatus HiStatus =
5507 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5508 // The conversion from an integer-valued float to an APInt may fail if the
5509 // result would be out of range. Regardless, taking this path is only
5510 // possible if rounding occurred during the initial `roundToIntegral`.
5511 return HiStatus == opOK ? opInexact : HiStatus;
5512 }
5513
5514 // A negative number cannot be represented by an unsigned integer.
5515 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5516 if (!IsSigned && IntegralHi.isNegative())
5517 return opInvalidOp;
5518
5519 // Handle the special boundary case where |Hi| is exactly the power of two
5520 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5521 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5522 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5523 // signed, N for unsigned).
5524 bool LoIsExact;
5525 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5526 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5527 if (HiExactLog2 >= 0 &&
5528 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5529 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5530 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5531 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5532 return opInvalidOp;
5533
5534 // If the signs differ, the sum will fit. We can compute the result using
5535 // properties of two's complement arithmetic without a wide intermediate
5536 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5537 const opStatus LoStatus = IntegralLo.convertToInteger(
5538 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5539 if (LoStatus == opInvalidOp)
5540 return opInvalidOp;
5541
5542 // Adjust the bit pattern of Lo to account for Hi's value:
5543 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5544 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5545 // already produced the correct final bit pattern.
5546 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5547 // can be computed by taking the two's complement pattern for `Lo` and
5548 // clearing the sign bit.
5549 if (IsSigned && !IntegralHi.isNegative())
5550 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5551 *IsExact = RoundStatus == opOK;
5552 return RoundStatus;
5553 }
5554
5555 // Convert Hi into an integer. This may not fit but that is OK: we know that
5556 // Hi + Lo would not fit either in this situation.
5557 const opStatus HiStatus = IntegralHi.convertToInteger(
5558 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5559 if (HiStatus == opInvalidOp)
5560 return HiStatus;
5561
5562 // Convert Lo into a temporary integer of the same width.
5563 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5564 const opStatus LoStatus =
5565 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5566 if (LoStatus == opInvalidOp)
5567 return LoStatus;
5568
5569 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5570 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5571 // where the sum could cross the integer type's boundary is when Hi is a
5572 // power of two, which is handled by the special case block above.
5573 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5574
5575 *IsExact = RoundStatus == opOK;
5576 return RoundStatus;
5577}
5578
5581 unsigned int Width, bool IsSigned,
5582 roundingMode RM, bool *IsExact) const {
5583 opStatus FS =
5584 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5585
5586 if (FS == opInvalidOp) {
5587 const unsigned DstPartsCount = partCountForBits(Width);
5588 assert(DstPartsCount <= Input.size() && "Integer too big");
5589
5590 unsigned Bits;
5591 if (getCategory() == fcNaN)
5592 Bits = 0;
5593 else if (isNegative())
5594 Bits = IsSigned;
5595 else
5596 Bits = Width - IsSigned;
5597
5598 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5599 if (isNegative() && IsSigned)
5600 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5601 }
5602
5603 return FS;
5604}
5605
5606APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5607 switch (RM) {
5609 makeLargest(/*Neg=*/isNegative());
5610 break;
5612 if (isNegative())
5613 makeInf(/*Neg=*/true);
5614 else
5615 makeLargest(/*Neg=*/false);
5616 break;
5618 if (isNegative())
5619 makeLargest(/*Neg=*/true);
5620 else
5621 makeInf(/*Neg=*/false);
5622 break;
5625 makeInf(/*Neg=*/isNegative());
5626 break;
5627 default:
5628 llvm_unreachable("Invalid rounding mode found");
5629 }
5630 opStatus S = opInexact;
5631 if (!getFirst().isFinite())
5632 S = static_cast<opStatus>(S | opOverflow);
5633 return S;
5634}
5635
5636APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5637 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5638 // Find the most significant bit of the source integer. APInt::tcMSB returns
5639 // UINT_MAX for a zero value.
5640 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5641 if (SrcMSB == UINT_MAX) {
5642 // The source integer is 0.
5643 makeZero(/*Neg=*/false);
5644 return opOK;
5645 }
5646
5647 // Create a minimally-sized APInt to represent the source value.
5648 const unsigned SrcBitWidth = SrcMSB + 1;
5649 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth,
5650 /*numWords=*/SrcCount, Src},
5651 /*isUnsigned=*/true};
5652
5653 // Stage 1: Initial Approximation.
5654 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5655 // We use round-to-nearest because it minimizes the initial error, which is
5656 // crucial for the subsequent steps.
5658 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5659
5660 // If the first approximation already overflows, the number is too large.
5661 // NOTE: The underlying semantics are *more* conservative when choosing to
5662 // overflow because their notion of ULP is much larger. As such, it is always
5663 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5664 if (!Hi.isFinite())
5665 return handleOverflow(RM);
5666
5667 // Stage 2: Exact Error Calculation.
5668 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5669 // This is done by converting Hi back to an integer and subtracting it from
5670 // the original source.
5671 bool HiAsIntIsExact;
5672 // Create an integer representation of Hi. Its width is determined by the
5673 // exponent of Hi, ensuring it's just large enough. This width can exceed
5674 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5675 // accurately when converted back to an integer.
5676 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5677 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5678 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5679
5680 // Stage 3: Error Approximation and Rounding.
5681 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5682 // captures the remainder of the original number. The rounding mode for this
5683 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5684 // ensure the final sum (Hi + Lo) rounds correctly.
5685 roundingMode LoRM = RM;
5686 // Adjustments are only necessary when the initial approximation Hi was an
5687 // overestimate, making the Error negative.
5688 if (Error.isNegative()) {
5689 if (RM == rmNearestTiesToAway) {
5690 // For rmNearestTiesToAway, a tie should round away from zero. Since
5691 // SrcInt is positive, this means rounding toward +infinity.
5692 // A standard conversion of a negative Error would round ties toward
5693 // -infinity, causing the final sum Hi + Lo to be smaller. To
5694 // counteract this, we detect the tie case and override the rounding
5695 // mode for Lo to rmTowardPositive.
5696 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5697 const unsigned LoPrecision = getSecond().getSemantics().precision;
5698 if (ErrorActiveBits > LoPrecision) {
5699 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5700 // A tie occurs when the bits to be truncated are of the form 100...0.
5701 // This is detected by checking if the number of trailing zeros is
5702 // exactly one less than the number of bits being truncated.
5703 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5704 LoRM = rmTowardPositive;
5705 }
5706 } else if (RM == rmTowardZero) {
5707 // For rmTowardZero, the final positive result must be truncated (rounded
5708 // down). When Hi is an overestimate, Error is negative. A standard
5709 // rmTowardZero conversion of Error would make it *less* negative,
5710 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5711 // rounds down correctly, we force Lo to round toward -infinity.
5712 LoRM = rmTowardNegative;
5713 }
5714 }
5715
5717 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5718
5719 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5720 // components do not overlap. fastTwoSum performs this operation.
5721 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5722 Floats[0] = std::move(Hi);
5723 Floats[1] = std::move(Lo);
5724
5725 // A final check for overflow is needed because fastTwoSum can cause a
5726 // carry-out from Lo that pushes Hi to infinity.
5727 if (!getFirst().isFinite())
5728 return handleOverflow(RM);
5729
5730 // The largest DoubleAPFloat must be canonical. Values which are larger are
5731 // not canonical and are equivalent to overflow.
5732 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5733 DoubleAPFloat Largest{*Semantics};
5734 Largest.makeLargest(/*Neg=*/false);
5735 if (compare(Largest) == APFloat::cmpGreaterThan)
5736 return handleOverflow(RM);
5737 }
5738
5739 // The final status of the operation is determined by the conversion of the
5740 // error term. If Lo could represent Error exactly, the entire conversion
5741 // is exact. Otherwise, it's inexact.
5742 return Status;
5743}
5744
5746 bool IsSigned,
5747 roundingMode RM) {
5748 const bool NegateInput = IsSigned && Input.isNegative();
5749 APInt API = Input;
5750 if (NegateInput)
5751 API.negate();
5752
5754 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5755 if (NegateInput)
5756 changeSign();
5757 return Status;
5758}
5759
5761 unsigned int HexDigits,
5762 bool UpperCase,
5763 roundingMode RM) const {
5764 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5766 .convertToHexString(DST, HexDigits, UpperCase, RM);
5767}
5768
5770 return getCategory() == fcNormal &&
5771 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5772 // (double)(Hi + Lo) == Hi defines a normal number.
5773 Floats[0] != Floats[0] + Floats[1]);
5774}
5775
5777 if (getCategory() != fcNormal)
5778 return false;
5779 DoubleAPFloat Tmp(*this);
5780 Tmp.makeSmallest(this->isNegative());
5781 return Tmp.compare(*this) == cmpEqual;
5782}
5783
5785 if (getCategory() != fcNormal)
5786 return false;
5787
5788 DoubleAPFloat Tmp(*this);
5790 return Tmp.compare(*this) == cmpEqual;
5791}
5792
5794 if (getCategory() != fcNormal)
5795 return false;
5796 DoubleAPFloat Tmp(*this);
5797 Tmp.makeLargest(this->isNegative());
5798 return Tmp.compare(*this) == cmpEqual;
5799}
5800
5802 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5803 return Floats[0].isInteger() && Floats[1].isInteger();
5804}
5805
5807 unsigned FormatPrecision,
5808 unsigned FormatMaxPadding,
5809 bool TruncateZero) const {
5810 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5812 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5813}
5814
5816 // In order for Hi + Lo to be a power of two, the following must be true:
5817 // 1. Hi must be a power of two.
5818 // 2. Lo must be zero.
5819 if (getSecond().isNonZero())
5820 return INT_MIN;
5821 return getFirst().getExactLog2Abs();
5822}
5823
5824int ilogb(const DoubleAPFloat &Arg) {
5825 const APFloat &Hi = Arg.getFirst();
5826 const APFloat &Lo = Arg.getSecond();
5827 int IlogbResult = ilogb(Hi);
5828 // Zero and non-finite values can delegate to ilogb(Hi).
5829 if (Arg.getCategory() != fcNormal)
5830 return IlogbResult;
5831 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5832 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5833 return IlogbResult;
5834 if (Hi.getExactLog2Abs() == INT_MIN)
5835 return IlogbResult;
5836 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5837 // get nudged out of the binade by the low component.
5838 return IlogbResult - 1;
5839}
5840
5843 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5844 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5845 scalbn(Arg.Floats[1], Exp, RM));
5846}
5847
5848DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5850 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5851
5852 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5853 // [1.0, 2.0).
5854 Exp = ilogb(Arg);
5855
5856 // For NaNs, quiet any signaling NaN and return the result, as per standard
5857 // practice.
5858 if (Exp == APFloat::IEK_NaN) {
5859 DoubleAPFloat Quiet{Arg};
5860 Quiet.getFirst().makeQuiet();
5861 return Quiet;
5862 }
5863
5864 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5865 if (Exp == APFloat::IEK_Inf)
5866 return Arg;
5867
5868 // For zero, the fraction is zero and the standard requires the exponent be 0.
5869 if (Exp == APFloat::IEK_Zero) {
5870 Exp = 0;
5871 return Arg;
5872 }
5873
5874 const APFloat &Hi = Arg.getFirst();
5875 const APFloat &Lo = Arg.getSecond();
5876
5877 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5878 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5879 // Increment the exponent to ensure the fraction is in the correct range.
5880 ++Exp;
5881
5882 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5883 APFloat Second = Lo;
5884 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5885 roundingMode LoRoundingMode;
5886 // The interpretation of rmTowardZero depends on the sign of the combined
5887 // Arg rather than the sign of the component.
5888 if (RM == rmTowardZero)
5889 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5890 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5891 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5892 // "away from zero" based on its own sign would move the value in the
5893 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5894 // direction-agnostic. We only need to bother with this if Lo is scaled
5895 // down.
5896 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5897 LoRoundingMode = rmNearestTiesToEven;
5898 else
5899 LoRoundingMode = RM;
5900 Second = scalbn(Lo, -Exp, LoRoundingMode);
5901 // The rmNearestTiesToEven proxy is correct most of the time, but it
5902 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5903 // exact midpoint.
5904 // NOTE: This is morally equivalent to roundTiesTowardZero.
5905 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5906 // Re-scale the result back to check if rounding occurred.
5907 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5908 if (RecomposedLo != Lo) {
5909 // RoundingError tells us which direction we rounded:
5910 // - RoundingError > 0: we rounded up.
5911 // - RoundingError < 0: we down up.
5912 const APFloat RoundingError = RecomposedLo - Lo;
5913 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5914 // We do this by checking if the absolute rounding error is exactly
5915 // half a ULP of the result.
5916 const APFloat UlpOfSecond = harrisonUlp(Second);
5917 const APFloat ScaledUlpOfSecond =
5918 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5919 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5920 const bool RoundedLoAway =
5921 Second.isNegative() == RoundingError.isNegative();
5922 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5923 // decrease the magnitude of Second to increase the magnitude
5924 // First+Second.
5925 if (IsMidpoint && RoundedLoAway)
5926 Second.next(/*nextDown=*/!Second.isNegative());
5927 }
5928 }
5929 // Handle a tricky edge case where Arg is slightly less than a power of two
5930 // (e.g., Arg = 2^k - epsilon). In this situation:
5931 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5932 // 2. ilogb(Arg) correctly returns k-1.
5933 // 3. Our initial Exp becomes (k-1) + 1 = k.
5934 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5935 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5936 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5937 // We detect this specific case by checking if Hi is a power of two and if
5938 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5939 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5940 // valid fraction.
5941 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5942 ++Exp;
5943 }
5944
5945 APFloat First = scalbn(Hi, -Exp, RM);
5946 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5947}
5948
5949} // namespace detail
5950
5951APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5952 if (usesLayout<IEEEFloat>(Semantics)) {
5953 new (&IEEE) IEEEFloat(std::move(F));
5954 return;
5955 }
5956 if (usesLayout<DoubleAPFloat>(Semantics)) {
5957 const fltSemantics& S = F.getSemantics();
5958 new (&Double)
5959 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5961 return;
5962 }
5963 llvm_unreachable("Unexpected semantics");
5964}
5965
5967 roundingMode RM) {
5969}
5970
5972 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5973 return hash_value(Arg.U.IEEE);
5974 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5975 return hash_value(Arg.U.Double);
5976 llvm_unreachable("Unexpected semantics");
5977}
5978
5979APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5980 : APFloat(Semantics) {
5981 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5982 assert(StatusOrErr && "Invalid floating point representation");
5983 consumeError(StatusOrErr.takeError());
5984}
5985
5987 if (isZero())
5988 return isNegative() ? fcNegZero : fcPosZero;
5989 if (isNormal())
5990 return isNegative() ? fcNegNormal : fcPosNormal;
5991 if (isDenormal())
5993 if (isInfinity())
5994 return isNegative() ? fcNegInf : fcPosInf;
5995 assert(isNaN() && "Other class of FP constant");
5996 return isSignaling() ? fcSNan : fcQNan;
5997}
5998
6000 // Only finite, non-zero numbers can have a useful, representable inverse.
6001 // This check filters out +/- zero, +/- infinity, and NaN.
6002 if (!isFiniteNonZero())
6003 return false;
6004
6005 // Historically, this function rejects subnormal inputs. One reason why this
6006 // might be important is that subnormals may behave differently under FTZ/DAZ
6007 // runtime behavior.
6008 if (isDenormal())
6009 return false;
6010
6011 // A number has an exact, representable inverse if and only if it is a power
6012 // of two.
6013 //
6014 // Mathematical Rationale:
6015 // 1. A binary floating-point number x is a dyadic rational, meaning it can
6016 // be written as x = M / 2^k for integers M (the significand) and k.
6017 // 2. The inverse is 1/x = 2^k / M.
6018 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
6019 // in binary), its denominator M must also be a power of two.
6020 // Let's say M = 2^m.
6021 // 4. Substituting this back into the formula for x, we get
6022 // x = (2^m) / (2^k) = 2^(m-k).
6023 //
6024 // This proves that x must be a power of two.
6025
6026 // getExactLog2Abs() returns the integer exponent if the number is a power of
6027 // two or INT_MIN if it is not.
6028 const int Exp = getExactLog2Abs();
6029 if (Exp == INT_MIN)
6030 return false;
6031
6032 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
6033 // scaling 1.0 by the negated exponent.
6034 APFloat Reciprocal =
6035 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
6036 rmTowardZero);
6037
6038 // scalbn might round if the resulting exponent -Exp is outside the
6039 // representable range, causing overflow (to infinity) or underflow. We
6040 // must verify that the result is still the exact power of two we expect.
6041 if (Reciprocal.getExactLog2Abs() != -Exp)
6042 return false;
6043
6044 // Avoid multiplication with a subnormal, it is not safe on all platforms and
6045 // may be slower than a normal division.
6046 if (Reciprocal.isDenormal())
6047 return false;
6048
6049 assert(Reciprocal.isFiniteNonZero());
6050
6051 if (Inv)
6052 *Inv = std::move(Reciprocal);
6053
6054 return true;
6055}
6056
6058 roundingMode RM, bool *losesInfo) {
6059 if (&getSemantics() == &ToSemantics) {
6060 *losesInfo = false;
6061 return opOK;
6062 }
6063 if (usesLayout<IEEEFloat>(getSemantics()) &&
6064 usesLayout<IEEEFloat>(ToSemantics))
6065 return U.IEEE.convert(ToSemantics, RM, losesInfo);
6066 if (usesLayout<IEEEFloat>(getSemantics()) &&
6067 usesLayout<DoubleAPFloat>(ToSemantics)) {
6068 assert(&ToSemantics == &semPPCDoubleDouble);
6069 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
6070 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
6071 return Ret;
6072 }
6073 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
6074 usesLayout<IEEEFloat>(ToSemantics)) {
6075 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
6076 *this = APFloat(std::move(getIEEE()), ToSemantics);
6077 return Ret;
6078 }
6079 llvm_unreachable("Unexpected semantics");
6080}
6081
6083 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
6084}
6085
6087 SmallVector<char, 16> Buffer;
6088 toString(Buffer);
6089 OS << Buffer;
6090}
6091
6092#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6094 print(dbgs());
6095 dbgs() << '\n';
6096}
6097#endif
6098
6100 NID.Add(bitcastToAPInt());
6101}
6102
6104 roundingMode rounding_mode,
6105 bool *isExact) const {
6106 unsigned bitWidth = result.getBitWidth();
6107 SmallVector<uint64_t, 4> parts(result.getNumWords());
6108 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
6109 rounding_mode, isExact);
6110 // Keeps the original signed-ness.
6111 result = APInt(bitWidth, parts);
6112 return status;
6113}
6114
6116 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
6117 return getIEEE().convertToDouble();
6119 "Float semantics is not representable by IEEEdouble");
6120 APFloat Temp = *this;
6121 bool LosesInfo;
6122 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
6123 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6124 (void)St;
6125 return Temp.getIEEE().convertToDouble();
6126}
6127
6128#ifdef HAS_IEE754_FLOAT128
6129float128 APFloat::convertToQuad() const {
6130 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
6131 return getIEEE().convertToQuad();
6133 "Float semantics is not representable by IEEEquad");
6134 APFloat Temp = *this;
6135 bool LosesInfo;
6136 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
6137 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6138 (void)St;
6139 return Temp.getIEEE().convertToQuad();
6140}
6141#endif
6142
6144 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
6145 return getIEEE().convertToFloat();
6147 "Float semantics is not representable by IEEEsingle");
6148 APFloat Temp = *this;
6149 bool LosesInfo;
6150 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6151 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6152 (void)St;
6153 return Temp.getIEEE().convertToFloat();
6154}
6155
6156} // namespace llvm
6157
6158#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition: APFloat.cpp:48
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition: APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
Given that RA is a live value
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:1120
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition: APFloat.cpp:6099
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1208
bool isFiniteNonZero() const
Definition: APFloat.h:1459
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:6057
LLVM_READONLY int getExactLog2Abs() const
Definition: APFloat.h:1497
opStatus subtract(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1190
bool bitwiseIsEqual(const APFloat &RHS) const
Definition: APFloat.h:1414
bool isNegative() const
Definition: APFloat.h:1449
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition: APFloat.cpp:5999
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:6115
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition: APFloat.h:1478
bool isNormal() const
Definition: APFloat.h:1453
bool isDenormal() const
Definition: APFloat.h:1450
opStatus add(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1181
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition: APFloat.cpp:6082
const fltSemantics & getSemantics() const
Definition: APFloat.h:1457
bool isFinite() const
Definition: APFloat.h:1454
bool isNaN() const
Definition: APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1088
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition: APFloat.h:1439
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:6143
bool isSignaling() const
Definition: APFloat.h:1451
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1235
opStatus remainder(const APFloat &RHS)
Definition: APFloat.h:1217
bool isZero() const
Definition: APFloat.h:1445
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
opStatus next(bool nextDown)
Definition: APFloat.h:1254
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1098
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition: APFloat.h:1148
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition: APFloat.cpp:5986
opStatus mod(const APFloat &RHS)
Definition: APFloat.h:1226
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition: APFloat.cpp:5966
fltCategory getCategory() const
Definition: APFloat.h:1456
bool isInteger() const
Definition: APFloat.h:1466
LLVM_DUMP_METHOD void dump() const
Definition: APFloat.cpp:6093
LLVM_ABI void print(raw_ostream &) const
Definition: APFloat.cpp:6086
opStatus roundToIntegral(roundingMode RM)
Definition: APFloat.h:1248
void changeSign()
Definition: APFloat.h:1297
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition: APFloat.h:1173
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:1079
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1404
bool isInfinity() const
Definition: APFloat.h:1446
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1573
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition: APInt.cpp:2368
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition: APInt.cpp:2340
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1758
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition: APInt.cpp:2363
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition: APInt.cpp:2442
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition: APInt.cpp:2412
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition: APInt.cpp:2752
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition: APInt.h:1752
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition: APInt.cpp:2348
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
uint64_t WordType
Definition: APInt.h:80
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition: APInt.cpp:2726
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition: APInt.cpp:2632
unsigned getNumWords() const
Get the number of words.
Definition: APInt.h:1495
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition: APInt.cpp:2373
void negate()
Negate this APInt in place.
Definition: APInt.h:1468
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition: APInt.h:1918
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition: APInt.cpp:2379
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition: APInt.cpp:2699
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition: APInt.cpp:2354
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition: APInt.cpp:2392
float bitsToFloat() const
Converts APInt bits to a float.
Definition: APInt.h:1736
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition: APInt.cpp:2530
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition: APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition: APInt.cpp:2477
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition: APInt.cpp:2516
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition: APInt.h:1744
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition: APInt.h:1913
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1722
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition: APInt.h:569
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
bool isSigned() const
Definition: APSInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
static ErrorSuccess success()
Create a success value.
Definition: Error.h:336
Tagged union holding either a T or a Error.
Definition: Error.h:485
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition: FoldingSet.h:330
void Add(const T &x)
Definition: FoldingSet.h:374
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
T * data() const
Definition: ArrayRef.h:345
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:480
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:619
iterator begin() const
Definition: StringRef.h:120
char back() const
back - Get the last character in the string.
Definition: StringRef.h:163
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:694
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
char front() const
front - Get the first character in the string.
Definition: StringRef.h:157
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition: StringRef.h:645
iterator end() const
Definition: StringRef.h:122
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition: StringRef.h:655
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition: APFloat.cpp:5325
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition: APFloat.cpp:4867
LLVM_ABI void changeSign()
Definition: APFloat.cpp:5236
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI bool isLargest() const
Definition: APFloat.cpp:5793
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition: APFloat.cpp:5130
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:5034
LLVM_ABI fltCategory getCategory() const
Definition: APFloat.cpp:5295
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition: APFloat.cpp:5346
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition: APFloat.cpp:5815
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.cpp:5745
LLVM_ABI APInt bitcastToAPInt() const
Definition: APFloat.cpp:5357
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition: APFloat.cpp:5366
LLVM_ABI bool isSmallest() const
Definition: APFloat.cpp:5776
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:5026
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition: APFloat.cpp:5242
LLVM_ABI bool isDenormal() const
Definition: APFloat.cpp:5769
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.cpp:5580
LLVM_ABI void makeSmallest(bool Neg)
Definition: APFloat.cpp:5319
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition: APFloat.cpp:5824
LLVM_ABI opStatus next(bool nextDown)
Definition: APFloat.cpp:5381
LLVM_ABI void makeInf(bool Neg)
Definition: APFloat.cpp:5301
LLVM_ABI bool isInteger() const
Definition: APFloat.cpp:5801
LLVM_ABI void makeZero(bool Neg)
Definition: APFloat.cpp:5306
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:5120
LLVM_ABI bool isSmallestNormalized() const
Definition: APFloat.cpp:5784
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition: APFloat.cpp:5139
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition: APFloat.cpp:4816
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition: APFloat.cpp:5806
LLVM_ABI void makeLargest(bool Neg)
Definition: APFloat.cpp:5311
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition: APFloat.cpp:5338
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition: APFloat.cpp:5160
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition: APFloat.cpp:5148
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition: APFloat.cpp:5760
LLVM_ABI bool isNegative() const
Definition: APFloat.cpp:5299
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:5021
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition: APFloat.cpp:5333
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition: APFloat.cpp:3359
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition: APFloat.cpp:4776
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition: APFloat.cpp:1575
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition: APFloat.cpp:2346
fltCategory getCategory() const
Definition: APFloat.h:533
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition: APFloat.cpp:2916
bool isFiniteNonZero() const
Definition: APFloat.h:536
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition: APFloat.h:423
LLVM_ABI APInt bitcastToAPInt() const
Definition: APFloat.cpp:3767
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition: APFloat.cpp:2517
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition: APFloat.h:498
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2216
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition: APFloat.h:523
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition: APFloat.cpp:2236
LLVM_ABI double convertToDouble() const
Definition: APFloat.cpp:3834
LLVM_ABI float convertToFloat() const
Definition: APFloat.cpp:3827
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2190
LLVM_ABI void makeInf(bool Neg=false)
Definition: APFloat.cpp:4723
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.cpp:1075
LLVM_ABI void makeQuiet()
Definition: APFloat.cpp:4752
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition: APFloat.cpp:1177
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2184
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition: APFloat.h:510
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition: APFloat.cpp:3302
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition: APFloat.cpp:964
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2196
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition: APFloat.cpp:2430
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition: APFloat.cpp:1036
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition: APFloat.cpp:1202
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.cpp:4184
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition: APFloat.cpp:1194
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition: APFloat.cpp:1229
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2384
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4758
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition: APFloat.h:520
const fltSemantics & getSemantics() const
Definition: APFloat.h:534
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition: APFloat.h:513
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition: APFloat.cpp:4562
LLVM_ABI void makeZero(bool Neg=false)
Definition: APFloat.cpp:4738
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition: APFloat.cpp:2594
LLVM_ABI void changeSign()
Definition: APFloat.cpp:2140
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition: APFloat.cpp:1061
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition: APFloat.cpp:2856
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition: APFloat.cpp:1067
An opaque object representing a hash code.
Definition: Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
static constexpr opStatus opInexact
Definition: APFloat.h:399
APFloatBase::roundingMode roundingMode
Definition: APFloat.h:376
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition: APFloat.h:401
static constexpr opStatus opDivByZero
Definition: APFloat.h:396
static constexpr opStatus opOverflow
Definition: APFloat.h:397
static constexpr cmpResult cmpLessThan
Definition: APFloat.h:391
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition: APFloat.cpp:1600
APFloatBase::opStatus opStatus
Definition: APFloat.h:377
static constexpr roundingMode rmTowardPositive
Definition: APFloat.h:387
static constexpr uninitializedTag uninitialized
Definition: APFloat.h:381
static constexpr fltCategory fcZero
Definition: APFloat.h:403
static constexpr opStatus opOK
Definition: APFloat.h:394
static constexpr cmpResult cmpGreaterThan
Definition: APFloat.h:392
static constexpr unsigned integerPartWidth
Definition: APFloat.h:389
APFloatBase::ExponentType ExponentType
Definition: APFloat.h:380
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition: APFloat.cpp:3508
static constexpr fltCategory fcNormal
Definition: APFloat.h:402
static constexpr opStatus opInvalidOp
Definition: APFloat.h:395
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition: APFloat.cpp:4776
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition: APFloat.cpp:4797
APFloatBase::integerPart integerPart
Definition: APFloat.h:374
static constexpr cmpResult cmpUnordered
Definition: APFloat.h:393
static constexpr roundingMode rmTowardNegative
Definition: APFloat.h:386
static constexpr fltCategory fcInfinity
Definition: APFloat.h:400
APFloatBase::cmpResult cmpResult
Definition: APFloat.h:378
static constexpr roundingMode rmNearestTiesToAway
Definition: APFloat.h:384
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:388
static constexpr opStatus opUnderflow
Definition: APFloat.h:398
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:382
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4758
static constexpr cmpResult cmpEqual
Definition: APFloat.h:390
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition: APFloat.cpp:4884
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition: APFloat.cpp:855
static constexpr fltSemantics semBogus
Definition: APFloat.cpp:167
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1764
static const char infinityL[]
Definition: APFloat.cpp:846
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:137
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition: APFloat.cpp:419
static const char NaNU[]
Definition: APFloat.cpp:849
static constexpr fltSemantics semFloat8E8M0FNU
Definition: APFloat.cpp:150
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition: APFloat.cpp:730
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition: APFloat.cpp:789
static constexpr fltSemantics semFloat6E2M3FN
Definition: APFloat.cpp:162
static APFloat harrisonUlp(const APFloat &X)
Definition: APFloat.cpp:906
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition: APFloat.cpp:393
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition: APFloat.cpp:481
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition: Error.cpp:98
static constexpr fltSemantics semIEEEquad
Definition: APFloat.cpp:137
const unsigned int maxPowerOfFiveExponent
Definition: APFloat.cpp:319
static constexpr fltSemantics semFloat6E3M2FN
Definition: APFloat.cpp:160
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition: APFloat.h:1534
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition: APFloat.cpp:873
static constexpr fltSemantics semFloat8E4M3FNUZ
Definition: APFloat.cpp:144
const unsigned int maxPrecision
Definition: APFloat.cpp:318
static constexpr fltSemantics semIEEEdouble
Definition: APFloat.cpp:136
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1555
static const char NaNL[]
Definition: APFloat.cpp:848
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
static constexpr fltSemantics semFloat8E4M3FN
Definition: APFloat.cpp:142
static const char infinityU[]
Definition: APFloat.cpp:847
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition: APFloat.h:50
@ lfMoreThanHalf
Definition: APFloat.h:54
@ lfLessThanHalf
Definition: APFloat.h:52
@ lfExactlyHalf
Definition: APFloat.h:53
@ lfExactlyZero
Definition: APFloat.h:51
static constexpr fltSemantics semPPCDoubleDouble
Definition: APFloat.cpp:168
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition: APFloat.cpp:573
static constexpr fltSemantics semFloat8E5M2FNUZ
Definition: APFloat.cpp:139
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
Definition: LoopInfo.cpp:1164
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition: APFloat.cpp:320
static constexpr fltSemantics semIEEEsingle
Definition: APFloat.cpp:135
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition: APFloat.h:1543
static constexpr fltSemantics semFloat4E2M1FN
Definition: APFloat.cpp:164
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition: APFloat.cpp:403
static Error createError(const Twine &Err)
Definition: APFloat.cpp:415
static constexpr fltSemantics semIEEEhalf
Definition: APFloat.cpp:133
static constexpr fltSemantics semPPCDoubleDoubleLegacy
Definition: APFloat.cpp:169
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition: APFloat.cpp:696
static constexpr fltSemantics semFloat8E5M2
Definition: APFloat.cpp:138
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
static const char hexDigitsUpper[]
Definition: APFloat.cpp:845
const unsigned int maxExponent
Definition: APFloat.cpp:317
static unsigned int decDigitValue(unsigned int c)
Definition: APFloat.cpp:426
static constexpr fltSemantics semFloat8E4M3B11FNUZ
Definition: APFloat.cpp:146
fltNonfiniteBehavior
Definition: APFloat.cpp:57
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition: APFloat.cpp:709
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition: APFloat.cpp:533
RoundingMode
Rounding mode.
static constexpr fltSemantics semX87DoubleExtended
Definition: APFloat.cpp:166
static constexpr fltSemantics semFloatTF32
Definition: APFloat.cpp:149
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition: APFloat.cpp:398
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition: APFloat.cpp:674
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1569
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition: APFloat.cpp:744
static char * writeSignedDecimal(char *dst, int value)
Definition: APFloat.cpp:891
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:595
static constexpr fltSemantics semBFloat
Definition: APFloat.cpp:134
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition: APFloat.cpp:643
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1083
static constexpr fltSemantics semFloat8E3M4
Definition: APFloat.cpp:148
fltNanEncoding
Definition: APFloat.cpp:81
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition: APFloat.cpp:436
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:469
static constexpr fltSemantics semFloat8E4M3
Definition: APFloat.cpp:141
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
static const char hexDigitsLower[]
Definition: APFloat.cpp:844
#define N
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition: APFloat.cpp:172
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition: APFloat.cpp:357
static LLVM_ABI const fltSemantics & Float6E3M2FN() LLVM_READNONE
Definition: APFloat.cpp:286
static constexpr roundingMode rmNearestTiesToAway
Definition: APFloat.h:309
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:294
static LLVM_ABI const fltSemantics & PPCDoubleDoubleLegacy() LLVM_READNONE
Definition: APFloat.cpp:272
static constexpr roundingMode rmTowardNegative
Definition: APFloat.h:307
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:332
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition: APFloat.cpp:335
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition: APFloat.cpp:353
static LLVM_ABI const fltSemantics & Float8E4M3() LLVM_READNONE
Definition: APFloat.cpp:277
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition: APFloat.cpp:388
static LLVM_ABI const fltSemantics & Float8E4M3FN() LLVM_READNONE
Definition: APFloat.cpp:278
static LLVM_ABI const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:269
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:308
static LLVM_ABI const fltSemantics & x87DoubleExtended() LLVM_READNONE
Definition: APFloat.cpp:289
uninitializedTag
Convenience enum used to construct an uninitialized APFloat.
Definition: APFloat.h:338
static LLVM_ABI const fltSemantics & IEEEquad() LLVM_READNONE
Definition: APFloat.cpp:268
static LLVM_ABI const fltSemantics & Float4E2M1FN() LLVM_READNONE
Definition: APFloat.cpp:288
static LLVM_ABI const fltSemantics & Float8E8M0FNU() LLVM_READNONE
Definition: APFloat.cpp:285
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition: APFloat.cpp:370
static LLVM_ABI const fltSemantics & Float8E4M3B11FNUZ() LLVM_READNONE
Definition: APFloat.cpp:280
static LLVM_ABI const fltSemantics & Bogus() LLVM_READNONE
A Pseudo fltsemantic used to construct APFloats that cannot conflict with anything real.
Definition: APFloat.cpp:292
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:328
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:324
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:267
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition: APFloat.cpp:361
static LLVM_ABI const fltSemantics & Float8E5M2() LLVM_READNONE
Definition: APFloat.cpp:275
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition: APFloat.cpp:219
static constexpr unsigned integerPartWidth
Definition: APFloat.h:146
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:264
APInt::WordType integerPart
Definition: APFloat.h:145
static constexpr roundingMode rmTowardPositive
Definition: APFloat.h:306
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition: APFloat.cpp:349
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition: APFloat.cpp:374
static LLVM_ABI const fltSemantics & Float8E4M3FNUZ() LLVM_READNONE
Definition: APFloat.cpp:279
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:265
static LLVM_ABI const fltSemantics & FloatTF32() LLVM_READNONE
Definition: APFloat.cpp:284
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition: APFloat.cpp:294
static LLVM_ABI const fltSemantics & Float8E5M2FNUZ() LLVM_READNONE
Definition: APFloat.cpp:276
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition: APFloat.cpp:365
static LLVM_ABI const fltSemantics & Float6E2M3FN() LLVM_READNONE
Definition: APFloat.cpp:287
fltCategory
Category of internally-represented number.
Definition: APFloat.h:330
@ S_PPCDoubleDoubleLegacy
Definition: APFloat.h:193
static LLVM_ABI const fltSemantics & Float8E3M4() LLVM_READNONE
Definition: APFloat.cpp:283
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:320
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition: APFloat.h:149
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:338
const char * lastSigDigit
Definition: APFloat.cpp:568
const char * firstSigDigit
Definition: APFloat.cpp:567
APFloatBase::ExponentType maxExponent
Definition: APFloat.cpp:106
fltNonfiniteBehavior nonFiniteBehavior
Definition: APFloat.cpp:119
APFloatBase::ExponentType minExponent
Definition: APFloat.cpp:110
unsigned int sizeInBits
Definition: APFloat.cpp:117
unsigned int precision
Definition: APFloat.cpp:114
fltNanEncoding nanEncoding
Definition: APFloat.cpp:121