[libc] Capture floating point encoding and arrange it sequentially in memory

Redefined FPBits.h and LongDoubleBitsX86 so its implementation works for the Windows
and Linux platform while maintaining a packed memory alignment of the precision floating
point numbers. For its size in memory to be the same as the data type of the float point number.
This change was necessary because the previous attribute((packed)) specification in the struct was not working
for Windows like it was for Linux and consequently static_asserts in the FPBits.h file were failing.

Reviewed By: aeubanks, sivachandra

Differential Revision: https://reviews.llvm.org/D105561
This commit is contained in:
Hedin Garca 2021-07-13 17:19:58 +00:00 committed by Hedin García
parent a16071e409
commit a5a337e55e
17 changed files with 250 additions and 225 deletions

View File

@ -121,7 +121,7 @@ public:
FPBits resultBits(result);
ASSERT_FALSE(resultBits.isZero());
// Verify that the result is indeed subnormal.
ASSERT_EQ(resultBits.encoding.exponent, uint16_t(0));
ASSERT_EQ(resultBits.getUnbiasedExponent(), uint16_t(0));
// But if the exp is so less that normalization leads to zero, then
// the result should be zero.
result = func(x, -FPBits::maxExponent - int(mantissaWidth) - 5);

View File

@ -163,31 +163,29 @@ public:
result = func(x, 0);
FPBits xBits = FPBits(x);
FPBits resultBits = FPBits(result);
ASSERT_EQ(resultBits.encoding.exponent,
uint16_t(xBits.encoding.exponent - 1));
ASSERT_EQ(resultBits.encoding.mantissa,
ASSERT_EQ(resultBits.getUnbiasedExponent(),
uint16_t(xBits.getUnbiasedExponent() - 1));
ASSERT_EQ(resultBits.getMantissa(),
(UIntType(1) << MantissaWidth::value) - 1);
result = func(x, T(33.0));
resultBits = FPBits(result);
ASSERT_EQ(resultBits.encoding.exponent, xBits.encoding.exponent);
ASSERT_EQ(resultBits.encoding.mantissa,
xBits.encoding.mantissa + UIntType(1));
ASSERT_EQ(resultBits.getUnbiasedExponent(), xBits.getUnbiasedExponent());
ASSERT_EQ(resultBits.getMantissa(), xBits.getMantissa() + UIntType(1));
x = -x;
result = func(x, 0);
resultBits = FPBits(result);
ASSERT_EQ(resultBits.encoding.exponent,
uint16_t(xBits.encoding.exponent - 1));
ASSERT_EQ(resultBits.encoding.mantissa,
ASSERT_EQ(resultBits.getUnbiasedExponent(),
uint16_t(xBits.getUnbiasedExponent() - 1));
ASSERT_EQ(resultBits.getMantissa(),
(UIntType(1) << MantissaWidth::value) - 1);
result = func(x, T(-33.0));
resultBits = FPBits(result);
ASSERT_EQ(resultBits.encoding.exponent, xBits.encoding.exponent);
ASSERT_EQ(resultBits.encoding.mantissa,
xBits.encoding.mantissa + UIntType(1));
ASSERT_EQ(resultBits.getUnbiasedExponent(), xBits.getUnbiasedExponent());
ASSERT_EQ(resultBits.getMantissa(), xBits.getMantissa() + UIntType(1));
}
};

View File

@ -135,9 +135,9 @@ public:
// We start with 1.0 so that the implicit bit for x86 long doubles
// is set.
FPBits bits(F(1.0));
bits.encoding.exponent = exponentLimit + FPBits::exponentBias;
bits.encoding.sign = 1;
bits.encoding.mantissa = 0;
bits.setUnbiasedExponent(exponentLimit + FPBits::exponentBias);
bits.setSign(1);
bits.setMantissa(0);
F x = F(bits);
long mpfrResult;
@ -199,10 +199,9 @@ public:
// We start with 1.0 so that the implicit bit for x86 long doubles
// is set.
FPBits bits(F(1.0));
bits.encoding.exponent = exponentLimit + FPBits::exponentBias;
bits.encoding.sign = 1;
bits.encoding.mantissa =
UIntType(0x1) << (__llvm_libc::fputil::MantissaWidth<F>::value - 1);
bits.setUnbiasedExponent(exponentLimit + FPBits::exponentBias);
bits.setSign(1);
bits.setMantissa(UIntType(0x1) << (__llvm_libc::fputil::MantissaWidth<F>::value - 1));
F x = F(bits);
if (TestModes) {

View File

@ -39,7 +39,7 @@ public:
void testDenormalValues(SqrtFunc func) {
for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) {
FPBits denormal(T(0.0));
denormal.encoding.mantissa = mant;
denormal.setMantissa(mant);
ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, T(denormal), func(T(denormal)),
T(0.5));

View File

@ -20,7 +20,7 @@ template <typename T,
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
static inline T abs(T x) {
FPBits<T> bits(x);
bits.encoding.sign = 0;
bits.setSign(0);
return T(bits);
}
@ -33,11 +33,11 @@ static inline T fmin(T x, T y) {
return y;
} else if (bity.isNaN()) {
return x;
} else if (bitx.encoding.sign != bity.encoding.sign) {
} else if (bitx.getSign() != bity.getSign()) {
// To make sure that fmin(+0, -0) == -0 == fmin(-0, +0), whenever x and
// y has different signs and both are not NaNs, we return the number
// with negative sign.
return (bitx.encoding.sign ? x : y);
return (bitx.getSign() ? x : y);
} else {
return (x < y ? x : y);
}
@ -52,11 +52,11 @@ static inline T fmax(T x, T y) {
return y;
} else if (bity.isNaN()) {
return x;
} else if (bitx.encoding.sign != bity.encoding.sign) {
} else if (bitx.getSign() != bity.getSign()) {
// To make sure that fmax(+0, -0) == +0 == fmax(-0, +0), whenever x and
// y has different signs and both are not NaNs, we return the number
// with positive sign.
return (bitx.encoding.sign ? y : x);
return (bitx.getSign() ? y : x);
} else {
return (x > y ? x : y);
}

View File

@ -43,12 +43,13 @@ static inline T remquo(T x, T y, int &q) {
return x;
}
bool resultSign = (xbits.encoding.sign == ybits.encoding.sign ? false : true);
bool resultSign = (xbits.getSign() == ybits.getSign() ? false : true);
// Once we know the sign of the result, we can just operate on the absolute
// values. The correct sign can be applied to the result after the result
// is evaluated.
xbits.encoding.sign = ybits.encoding.sign = 0;
xbits.setSign(0);
ybits.setSign(0);
NormalFloat<T> normalx(xbits), normaly(ybits);
int exp = normalx.exponent - normaly.exponent;

View File

@ -13,47 +13,19 @@
#include "utils/CPP/TypeTraits.h"
#include "FloatProperties.h"
#include <stdint.h>
namespace __llvm_libc {
namespace fputil {
template <typename T> struct MantissaWidth {};
template <> struct MantissaWidth<float> {
static constexpr unsigned value = 23;
};
template <> struct MantissaWidth<double> {
static constexpr unsigned value = 52;
template <typename T> struct MantissaWidth {
static constexpr unsigned value = FloatProperties<T>::mantissaWidth;
};
template <typename T> struct ExponentWidth {};
template <> struct ExponentWidth<float> {
static constexpr unsigned value = 8;
template <typename T> struct ExponentWidth {
static constexpr unsigned value = FloatProperties<T>::exponentWidth;
};
template <> struct ExponentWidth<double> {
static constexpr unsigned value = 11;
};
template <> struct ExponentWidth<long double> {
static constexpr unsigned value = 15;
};
template <typename T> struct FPUIntType {};
template <> struct FPUIntType<float> { using Type = uint32_t; };
template <> struct FPUIntType<double> { using Type = uint64_t; };
#ifdef LONG_DOUBLE_IS_DOUBLE
template <> struct MantissaWidth<long double> {
static constexpr unsigned value = MantissaWidth<double>::value;
};
template <> struct FPUIntType<long double> {
using Type = FPUIntType<double>::Type;
};
#elif !defined(SPECIAL_X86_LONG_DOUBLE)
template <> struct MantissaWidth<long double> {
static constexpr unsigned value = 112;
};
template <> struct FPUIntType<long double> { using Type = __uint128_t; };
#endif
// A generic class to represent single precision, double precision, and quad
// precision IEEE 754 floating point formats.
@ -70,20 +42,44 @@ template <typename T> union FPBits {
// Reinterpreting bits as an integer value and interpreting the bits of an
// integer value as a floating point value is used in tests. So, a convenient
// type is provided for such reinterpretations.
using UIntType = typename FPUIntType<T>::Type;
using FloatProp = FloatProperties<T>;
// TODO: Change UintType name to BitsType for consistency.
using UIntType = typename FloatProp::BitsType;
struct __attribute__((packed)) {
UIntType mantissa : MantissaWidth<T>::value;
uint16_t exponent : ExponentWidth<T>::value;
uint8_t sign : 1;
} encoding;
UIntType integer;
UIntType bits;
void setMantissa(UIntType mantVal) {
mantVal &= (FloatProp::mantissaMask);
bits &= ~(FloatProp::mantissaMask);
bits |= mantVal;
}
UIntType getMantissa() const { return bits & FloatProp::mantissaMask; }
void setUnbiasedExponent(UIntType expVal) {
expVal = (expVal << (FloatProp::mantissaWidth)) & FloatProp::exponentMask;
bits &= ~(FloatProp::exponentMask);
bits |= expVal;
}
uint16_t getUnbiasedExponent() const {
return uint16_t((bits & FloatProp::exponentMask) >>
(FloatProp::mantissaWidth));
}
void setSign(bool signVal) {
bits &= ~(FloatProp::signMask);
UIntType sign = UIntType(signVal) << (FloatProp::bitWidth - 1);
bits |= sign;
}
bool getSign() const {
return ((bits & FloatProp::signMask) >> (FloatProp::bitWidth - 1));
}
T val;
static_assert(sizeof(encoding) == sizeof(UIntType),
"Encoding and integral representation have different sizes.");
static_assert(sizeof(integer) == sizeof(UIntType),
"Integral representation and value type have different sizes.");
static_assert(sizeof(T) == sizeof(UIntType),
"Data type and integral representation have different sizes.");
static constexpr int exponentBias = (1 << (ExponentWidth<T>::value - 1)) - 1;
static constexpr int maxExponent = (1 << ExponentWidth<T>::value) - 1;
@ -104,29 +100,29 @@ template <typename T> union FPBits {
template <typename XType,
cpp::EnableIfType<cpp::IsSame<XType, UIntType>::Value, int> = 0>
explicit FPBits(XType x) : integer(x) {}
explicit FPBits(XType x) : bits(x) {}
FPBits() : integer(0) {}
FPBits() : bits(0) {}
explicit operator T() { return val; }
UIntType uintval() const { return integer; }
UIntType uintval() const { return bits; }
int getExponent() const { return int(encoding.exponent) - exponentBias; }
int getExponent() const { return int(getUnbiasedExponent()) - exponentBias; }
bool isZero() const {
return encoding.mantissa == 0 && encoding.exponent == 0;
return getMantissa() == 0 && getUnbiasedExponent() == 0;
}
bool isInf() const {
return encoding.mantissa == 0 && encoding.exponent == maxExponent;
return getMantissa() == 0 && getUnbiasedExponent() == maxExponent;
}
bool isNaN() const {
return encoding.exponent == maxExponent && encoding.mantissa != 0;
return getUnbiasedExponent() == maxExponent && getMantissa() != 0;
}
bool isInfOrNaN() const { return encoding.exponent == maxExponent; }
bool isInfOrNaN() const { return getUnbiasedExponent() == maxExponent; }
static FPBits<T> zero() { return FPBits(); }
@ -136,19 +132,19 @@ template <typename T> union FPBits {
static FPBits<T> inf() {
FPBits<T> bits;
bits.encoding.exponent = maxExponent;
bits.setUnbiasedExponent(maxExponent);
return bits;
}
static FPBits<T> negInf() {
FPBits<T> bits = inf();
bits.encoding.sign = 1;
bits.setSign(1);
return bits;
}
static T buildNaN(UIntType v) {
FPBits<T> bits = inf();
bits.encoding.mantissa = v;
bits.setMantissa(v);
return T(bits);
}
};

View File

@ -139,27 +139,27 @@ static inline T hypot(T x, T y) {
DUIntType a_mant_sq, b_mant_sq;
bool sticky_bits;
if ((x_bits.encoding.exponent >=
y_bits.encoding.exponent + MantissaWidth<T>::value + 2) ||
if ((x_bits.getUnbiasedExponent() >=
y_bits.getUnbiasedExponent() + MantissaWidth<T>::value + 2) ||
(y == 0)) {
return abs(x);
} else if ((y_bits.encoding.exponent >=
x_bits.encoding.exponent + MantissaWidth<T>::value + 2) ||
} else if ((y_bits.getUnbiasedExponent() >=
x_bits.getUnbiasedExponent() + MantissaWidth<T>::value + 2) ||
(x == 0)) {
y_bits.encoding.sign = 0;
y_bits.setSign(0);
return abs(y);
}
if (x >= y) {
a_exp = x_bits.encoding.exponent;
a_mant = x_bits.encoding.mantissa;
b_exp = y_bits.encoding.exponent;
b_mant = y_bits.encoding.mantissa;
a_exp = x_bits.getUnbiasedExponent();
a_mant = x_bits.getMantissa();
b_exp = y_bits.getUnbiasedExponent();
b_mant = y_bits.getMantissa();
} else {
a_exp = y_bits.encoding.exponent;
a_mant = y_bits.encoding.mantissa;
b_exp = x_bits.encoding.exponent;
b_mant = x_bits.encoding.mantissa;
a_exp = y_bits.getUnbiasedExponent();
a_mant = y_bits.getMantissa();
b_exp = x_bits.getUnbiasedExponent();
b_mant = x_bits.getMantissa();
}
out_exp = a_exp;

View File

@ -16,10 +16,6 @@
namespace __llvm_libc {
namespace fputil {
template <> struct MantissaWidth<long double> {
static constexpr unsigned value = 63;
};
template <unsigned Width> struct Padding;
// i386 padding.
@ -43,25 +39,61 @@ template <> union FPBits<long double> {
((UIntType(maxExponent) - 1) << (MantissaWidth<long double>::value + 1)) |
(UIntType(1) << MantissaWidth<long double>::value) | maxSubnormal;
struct __attribute__((packed)) {
UIntType mantissa : MantissaWidth<long double>::value;
uint8_t implicitBit : 1;
uint16_t exponent : ExponentWidth<long double>::value;
uint8_t sign : 1;
uint64_t padding : Padding<sizeof(uintptr_t)>::value;
} encoding;
UIntType integer;
using FloatProp = FloatProperties<long double>;
UIntType bits;
void setMantissa(UIntType mantVal) {
mantVal &= (FloatProp::mantissaMask);
bits &= ~(FloatProp::mantissaMask);
bits |= mantVal;
}
UIntType getMantissa() const { return bits & FloatProp::mantissaMask; }
void setUnbiasedExponent(UIntType expVal) {
expVal = (expVal << (FloatProp::bitWidth - 1 - FloatProp::exponentWidth)) &
FloatProp::exponentMask;
bits &= ~(FloatProp::exponentMask);
bits |= expVal;
}
uint16_t getUnbiasedExponent() const {
return uint16_t((bits & FloatProp::exponentMask) >>
(FloatProp::bitWidth - 1 - FloatProp::exponentWidth));
}
void setImplicitBit(bool implicitVal) {
bits &= ~(UIntType(1) << FloatProp::mantissaWidth);
bits |= (UIntType(implicitVal) << FloatProp::mantissaWidth);
}
bool getImplicitBit() const {
return ((bits & (UIntType(1) << FloatProp::mantissaWidth)) >>
FloatProp::mantissaWidth);
}
void setSign(bool signVal) {
bits &= ~(FloatProp::signMask);
UIntType sign1 = UIntType(signVal) << (FloatProp::bitWidth - 1);
bits |= sign1;
}
bool getSign() const {
return ((bits & FloatProp::signMask) >> (FloatProp::bitWidth - 1));
}
long double val;
FPBits() : integer(0) {}
FPBits() : bits(0) {}
template <typename XType,
cpp::EnableIfType<cpp::IsSame<long double, XType>::Value, int> = 0>
explicit FPBits<long double>(XType x) : val(x) {}
explicit FPBits(XType x) : val(x) {}
template <typename XType,
cpp::EnableIfType<cpp::IsSame<XType, UIntType>::Value, int> = 0>
explicit FPBits(XType x) : integer(x) {}
explicit FPBits(XType x) : bits(x) {}
operator long double() { return val; }
@ -71,37 +103,37 @@ template <> union FPBits<long double> {
(UIntType(1) << (sizeof(long double) * 8 -
Padding<sizeof(uintptr_t)>::value)) -
1;
return integer & mask;
return bits & mask;
}
int getExponent() const {
if (encoding.exponent == 0)
if (getUnbiasedExponent() == 0)
return int(1) - exponentBias;
return int(encoding.exponent) - exponentBias;
return int(getUnbiasedExponent()) - exponentBias;
}
bool isZero() const {
return encoding.exponent == 0 && encoding.mantissa == 0 &&
encoding.implicitBit == 0;
return getUnbiasedExponent() == 0 && getMantissa() == 0 &&
getImplicitBit() == 0;
}
bool isInf() const {
return encoding.exponent == maxExponent && encoding.mantissa == 0 &&
encoding.implicitBit == 1;
return getUnbiasedExponent() == maxExponent && getMantissa() == 0 &&
getImplicitBit() == 1;
}
bool isNaN() const {
if (encoding.exponent == maxExponent) {
return (encoding.implicitBit == 0) || encoding.mantissa != 0;
} else if (encoding.exponent != 0) {
return encoding.implicitBit == 0;
if (getUnbiasedExponent() == maxExponent) {
return (getImplicitBit() == 0) || getMantissa() != 0;
} else if (getUnbiasedExponent() != 0) {
return getImplicitBit() == 0;
}
return false;
}
bool isInfOrNaN() const {
return (encoding.exponent == maxExponent) ||
(encoding.exponent != 0 && encoding.implicitBit == 0);
return (getUnbiasedExponent() == maxExponent) ||
(getUnbiasedExponent() != 0 && getImplicitBit() == 0);
}
// Methods below this are used by tests.
@ -110,30 +142,30 @@ template <> union FPBits<long double> {
static FPBits<long double> negZero() {
FPBits<long double> bits(0.0l);
bits.encoding.sign = 1;
bits.setSign(1);
return bits;
}
static FPBits<long double> inf() {
FPBits<long double> bits(0.0l);
bits.encoding.exponent = maxExponent;
bits.encoding.implicitBit = 1;
bits.setUnbiasedExponent(maxExponent);
bits.setImplicitBit(1);
return bits;
}
static FPBits<long double> negInf() {
FPBits<long double> bits(0.0l);
bits.encoding.exponent = maxExponent;
bits.encoding.implicitBit = 1;
bits.encoding.sign = 1;
bits.setUnbiasedExponent(maxExponent);
bits.setImplicitBit(1);
bits.setSign(1);
return bits;
}
static long double buildNaN(UIntType v) {
FPBits<long double> bits(0.0l);
bits.encoding.exponent = maxExponent;
bits.encoding.implicitBit = 1;
bits.encoding.mantissa = v;
bits.setUnbiasedExponent(maxExponent);
bits.setImplicitBit(1);
bits.setMantissa(v);
return bits;
}
};

View File

@ -48,14 +48,13 @@ static inline T modf(T x, T &iptr) {
return x;
} else if (bits.isInf()) {
iptr = x;
return bits.encoding.sign ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
return bits.getSign() ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
} else {
iptr = trunc(x);
if (x == iptr) {
// If x is already an integer value, then return zero with the right
// sign.
return bits.encoding.sign ? T(FPBits<T>::negZero())
: T(FPBits<T>::zero());
return bits.getSign() ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
} else {
return x - iptr;
}
@ -66,7 +65,7 @@ template <typename T,
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
static inline T copysign(T x, T y) {
FPBits<T> xbits(x);
xbits.encoding.sign = FPBits<T>(y).encoding.sign;
xbits.setSign(FPBits<T>(y).getSign());
return T(xbits);
}
@ -133,11 +132,11 @@ static inline T ldexp(T x, int exp) {
// calculating the limit.
int expLimit = FPBits<T>::maxExponent + MantissaWidth<T>::value + 1;
if (exp > expLimit)
return bits.encoding.sign ? T(FPBits<T>::negInf()) : T(FPBits<T>::inf());
return bits.getSign() ? T(FPBits<T>::negInf()) : T(FPBits<T>::inf());
// Similarly on the negative side we return zero early if |exp| is too small.
if (exp < -expLimit)
return bits.encoding.sign ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
return bits.getSign() ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
// For all other values, NormalFloat to T conversion handles it the right way.
NormalFloat<T> normal(bits);

View File

@ -43,14 +43,14 @@ static inline T trunc(T x) {
// If the exponent is such that abs(x) is less than 1, then return 0.
if (exponent <= -1) {
if (bits.encoding.sign)
if (bits.getSign())
return T(-0.0);
else
return T(0.0);
}
int trimSize = MantissaWidth<T>::value - exponent;
bits.encoding.mantissa = (bits.encoding.mantissa >> trimSize) << trimSize;
bits.setMantissa((bits.getMantissa() >> trimSize) << trimSize);
return T(bits);
}
@ -63,7 +63,7 @@ static inline T ceil(T x) {
if (bits.isInfOrNaN() || bits.isZero())
return x;
bool isNeg = bits.encoding.sign;
bool isNeg = bits.getSign();
int exponent = bits.getExponent();
// If the exponent is greater than the most negative mantissa
@ -79,7 +79,7 @@ static inline T ceil(T x) {
}
uint32_t trimSize = MantissaWidth<T>::value - exponent;
bits.encoding.mantissa = (bits.encoding.mantissa >> trimSize) << trimSize;
bits.setMantissa((bits.getMantissa() >> trimSize) << trimSize);
T truncValue = T(bits);
// If x is already an integer, return it.
@ -97,7 +97,7 @@ template <typename T,
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
static inline T floor(T x) {
FPBits<T> bits(x);
if (bits.encoding.sign) {
if (bits.getSign()) {
return -ceil(-x);
} else {
return trunc(x);
@ -114,7 +114,7 @@ static inline T round(T x) {
if (bits.isInfOrNaN() || bits.isZero())
return x;
bool isNeg = bits.encoding.sign;
bool isNeg = bits.getSign();
int exponent = bits.getExponent();
// If the exponent is greater than the most negative mantissa
@ -139,8 +139,8 @@ static inline T round(T x) {
}
uint32_t trimSize = MantissaWidth<T>::value - exponent;
bool halfBitSet = bits.encoding.mantissa & (UIntType(1) << (trimSize - 1));
bits.encoding.mantissa = (bits.encoding.mantissa >> trimSize) << trimSize;
bool halfBitSet = bits.getMantissa() & (UIntType(1) << (trimSize - 1));
bits.setMantissa((bits.getMantissa() >> trimSize) << trimSize);
T truncValue = T(bits);
// If x is already an integer, return it.
@ -166,7 +166,7 @@ static inline T roundUsingCurrentRoundingMode(T x) {
if (bits.isInfOrNaN() || bits.isZero())
return x;
bool isNeg = bits.encoding.sign;
bool isNeg = bits.getSign();
int exponent = bits.getExponent();
int roundingMode = getRound();
@ -184,7 +184,7 @@ static inline T roundUsingCurrentRoundingMode(T x) {
case FE_TOWARDZERO:
return isNeg ? T(-0.0) : T(0.0);
case FE_TONEAREST:
if (exponent <= -2 || bits.encoding.mantissa == 0)
if (exponent <= -2 || bits.getMantissa() == 0)
return isNeg ? T(-0.0) : T(0.0); // abs(x) <= 0.5
else
return isNeg ? T(-1.0) : T(1.0); // abs(x) > 0.5
@ -195,19 +195,19 @@ static inline T roundUsingCurrentRoundingMode(T x) {
uint32_t trimSize = MantissaWidth<T>::value - exponent;
FPBits<T> newBits = bits;
newBits.encoding.mantissa = (bits.encoding.mantissa >> trimSize) << trimSize;
newBits.setMantissa((bits.getMantissa() >> trimSize) << trimSize);
T truncValue = T(newBits);
// If x is already an integer, return it.
if (truncValue == x)
return x;
UIntType trimValue = bits.encoding.mantissa & ((UIntType(1) << trimSize) - 1);
UIntType trimValue = bits.getMantissa() & ((UIntType(1) << trimSize) - 1);
UIntType halfValue = (UIntType(1) << (trimSize - 1));
// If exponent is 0, trimSize will be equal to the mantissa width, and
// truncIsOdd` will not be correct. So, we handle it as a special case
// below.
UIntType truncIsOdd = newBits.encoding.mantissa & (UIntType(1) << trimSize);
UIntType truncIsOdd = newBits.getMantissa() & (UIntType(1) << trimSize);
switch (roundingMode) {
case FE_DOWNWARD:
@ -255,18 +255,18 @@ static inline I roundedFloatToSignedInteger(F x) {
if (bits.isInfOrNaN()) {
setDomainErrorAndRaiseInvalid();
return bits.encoding.sign ? IntegerMin : IntegerMax;
return bits.getSign() ? IntegerMin : IntegerMax;
}
int exponent = bits.getExponent();
constexpr int exponentLimit = sizeof(I) * 8 - 1;
if (exponent > exponentLimit) {
setDomainErrorAndRaiseInvalid();
return bits.encoding.sign ? IntegerMin : IntegerMax;
return bits.getSign() ? IntegerMin : IntegerMax;
} else if (exponent == exponentLimit) {
if (bits.encoding.sign == 0 || bits.encoding.mantissa != 0) {
if (bits.getSign() == 0 || bits.getMantissa() != 0) {
setDomainErrorAndRaiseInvalid();
return bits.encoding.sign ? IntegerMin : IntegerMax;
return bits.getSign() ? IntegerMin : IntegerMax;
}
// If the control reaches here, then it means that the rounded
// value is the most negative number for the signed integer type I.

View File

@ -30,8 +30,8 @@ static inline long double nextafter(long double from, long double to) {
return to;
// Convert pseudo subnormal number to normal number.
if (fromBits.encoding.implicitBit == 1 && fromBits.encoding.exponent == 0) {
fromBits.encoding.exponent = 1;
if (fromBits.getImplicitBit() == 1 && fromBits.getUnbiasedExponent() == 0) {
fromBits.setUnbiasedExponent(1);
}
using UIntType = FPBits::UIntType;
@ -46,11 +46,11 @@ static inline long double nextafter(long double from, long double to) {
// dealing with the implicit bit.
intVal = signVal + FPBits::minNormal;
} else if ((intVal & mantissaMask) == mantissaMask) {
fromBits.encoding.mantissa = 0;
fromBits.setMantissa(0);
// Incrementing exponent might overflow the value to infinity,
// which is what is expected. Since NaNs are handling separately,
// it will never overflow "beyond" infinity.
++fromBits.encoding.exponent;
fromBits.setUnbiasedExponent(fromBits.getUnbiasedExponent() + 1);
return fromBits;
} else {
++intVal;
@ -61,10 +61,10 @@ static inline long double nextafter(long double from, long double to) {
// dealing with the implicit bit.
intVal = signVal + FPBits::maxSubnormal;
} else if ((intVal & mantissaMask) == 0) {
fromBits.encoding.mantissa = mantissaMask;
fromBits.setMantissa(mantissaMask);
// from == 0 is handled separately so decrementing the exponent will not
// lead to underflow.
--fromBits.encoding.exponent;
fromBits.setUnbiasedExponent(fromBits.getUnbiasedExponent() - 1);
return fromBits;
} else {
--intVal;
@ -80,10 +80,10 @@ static inline long double nextafter(long double from, long double to) {
if (intVal == FPBits::minNormal) {
intVal = FPBits::maxSubnormal;
} else if ((intVal & mantissaMask) == 0) {
fromBits.encoding.mantissa = mantissaMask;
fromBits.setMantissa(mantissaMask);
// from == 0 is handled separately so decrementing the exponent will not
// lead to underflow.
--fromBits.encoding.exponent;
fromBits.setUnbiasedExponent(fromBits.getUnbiasedExponent() - 1);
return fromBits;
} else {
--intVal;
@ -92,11 +92,11 @@ static inline long double nextafter(long double from, long double to) {
if (intVal == FPBits::maxSubnormal) {
intVal = FPBits::minNormal;
} else if ((intVal & mantissaMask) == mantissaMask) {
fromBits.encoding.mantissa = 0;
fromBits.setMantissa(0);
// Incrementing exponent might overflow the value to infinity,
// which is what is expected. Since NaNs are handling separately,
// it will never overflow "beyond" infinity.
++fromBits.encoding.exponent;
fromBits.setUnbiasedExponent(fromBits.getUnbiasedExponent() + 1);
return fromBits;
} else {
++intVal;

View File

@ -97,7 +97,7 @@ template <typename T> struct NormalFloat {
}
FPBits<T> result(T(0.0));
result.encoding.sign = sign;
result.setSign(sign);
constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1;
if (exponent < subnormalExponent) {
@ -110,36 +110,36 @@ template <typename T> struct NormalFloat {
const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
const UIntType shiftOutValue = mantissa & shiftOutMask;
const UIntType halfwayValue = UIntType(1) << (shift - 1);
result.encoding.exponent = 0;
result.encoding.mantissa = mantissa >> shift;
UIntType newMantissa = result.encoding.mantissa;
result.setUnbiasedExponent(0);
result.setMantissa(mantissa >> shift);
UIntType newMantissa = result.getMantissa();
if (shiftOutValue > halfwayValue) {
newMantissa += 1;
} else if (shiftOutValue == halfwayValue) {
// Round to even.
if (result.encoding.mantissa & 0x1)
if (result.getMantissa() & 0x1)
newMantissa += 1;
}
result.encoding.mantissa = newMantissa;
result.setMantissa(newMantissa);
// Adding 1 to mantissa can lead to overflow. This can only happen if
// mantissa was all ones (0b111..11). For such a case, we will carry
// the overflow into the exponent.
if (newMantissa == one)
result.encoding.exponent = 1;
result.setUnbiasedExponent(1);
return T(result);
} else {
return T(result);
}
}
result.encoding.exponent = exponent + FPBits<T>::exponentBias;
result.encoding.mantissa = mantissa;
result.setUnbiasedExponent(exponent + FPBits<T>::exponentBias);
result.setMantissa(mantissa);
return T(result);
}
private:
void initFromBits(FPBits<T> bits) {
sign = bits.encoding.sign;
sign = bits.getSign();
if (bits.isInfOrNaN() || bits.isZero()) {
// Ignore special bit patterns. Implementations deal with them separately
@ -150,13 +150,13 @@ private:
}
// Normalize subnormal numbers.
if (bits.encoding.exponent == 0) {
unsigned shift = evaluateNormalizationShift(bits.encoding.mantissa);
mantissa = UIntType(bits.encoding.mantissa) << shift;
if (bits.getUnbiasedExponent() == 0) {
unsigned shift = evaluateNormalizationShift(bits.getMantissa());
mantissa = UIntType(bits.getMantissa()) << shift;
exponent = 1 - FPBits<T>::exponentBias - shift;
} else {
exponent = bits.encoding.exponent - FPBits<T>::exponentBias;
mantissa = one | bits.encoding.mantissa;
exponent = bits.getUnbiasedExponent() - FPBits<T>::exponentBias;
mantissa = one | bits.getMantissa();
}
}
@ -172,7 +172,7 @@ private:
#ifdef SPECIAL_X86_LONG_DOUBLE
template <>
inline void NormalFloat<long double>::initFromBits(FPBits<long double> bits) {
sign = bits.encoding.sign;
sign = bits.getSign();
if (bits.isInfOrNaN() || bits.isZero()) {
// Ignore special bit patterns. Implementations deal with them separately
@ -182,25 +182,24 @@ inline void NormalFloat<long double>::initFromBits(FPBits<long double> bits) {
return;
}
if (bits.encoding.exponent == 0) {
if (bits.encoding.implicitBit == 0) {
if (bits.getUnbiasedExponent() == 0) {
if (bits.getImplicitBit() == 0) {
// Since we ignore zero value, the mantissa in this case is non-zero.
int normalizationShift =
evaluateNormalizationShift(bits.encoding.mantissa);
int normalizationShift = evaluateNormalizationShift(bits.getMantissa());
exponent = -16382 - normalizationShift;
mantissa = (bits.encoding.mantissa << normalizationShift);
mantissa = (bits.getMantissa() << normalizationShift);
} else {
exponent = -16382;
mantissa = one | bits.encoding.mantissa;
mantissa = one | bits.getMantissa();
}
} else {
if (bits.encoding.implicitBit == 0) {
if (bits.getImplicitBit() == 0) {
// Invalid number so just store 0 similar to a NaN.
exponent = 0;
mantissa = 0;
} else {
exponent = bits.encoding.exponent - 16383;
mantissa = one | bits.encoding.mantissa;
exponent = bits.getUnbiasedExponent() - 16383;
mantissa = one | bits.getMantissa();
}
}
}
@ -214,7 +213,7 @@ template <> inline NormalFloat<long double>::operator long double() const {
}
FPBits<long double> result(0.0l);
result.encoding.sign = sign;
result.setSign(sign);
constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1;
if (exponent < subnormalExponent) {
@ -225,25 +224,25 @@ template <> inline NormalFloat<long double>::operator long double() const {
const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
const UIntType shiftOutValue = mantissa & shiftOutMask;
const UIntType halfwayValue = UIntType(1) << (shift - 1);
result.encoding.exponent = 0;
result.encoding.mantissa = mantissa >> shift;
UIntType newMantissa = result.encoding.mantissa;
result.setUnbiasedExponent(0);
result.setMantissa(mantissa >> shift);
UIntType newMantissa = result.getMantissa();
if (shiftOutValue > halfwayValue) {
newMantissa += 1;
} else if (shiftOutValue == halfwayValue) {
// Round to even.
if (result.encoding.mantissa & 0x1)
if (result.getMantissa() & 0x1)
newMantissa += 1;
}
result.encoding.mantissa = newMantissa;
result.setMantissa(newMantissa);
// Adding 1 to mantissa can lead to overflow. This can only happen if
// mantissa was all ones (0b111..11). For such a case, we will carry
// the overflow into the exponent and set the implicit bit to 1.
if (newMantissa == one) {
result.encoding.exponent = 1;
result.encoding.implicitBit = 1;
result.setUnbiasedExponent(1);
result.setImplicitBit(1);
} else {
result.encoding.implicitBit = 0;
result.setImplicitBit(0);
}
return static_cast<long double>(result);
} else {
@ -251,9 +250,9 @@ template <> inline NormalFloat<long double>::operator long double() const {
}
}
result.encoding.exponent = biasedExponent;
result.encoding.mantissa = mantissa;
result.encoding.implicitBit = 1;
result.setUnbiasedExponent(biasedExponent);
result.setMantissa(mantissa);
result.setImplicitBit(1);
return static_cast<long double>(result);
}
#endif // SPECIAL_X86_LONG_DOUBLE

View File

@ -102,7 +102,7 @@ static inline T sqrt(T x) {
FPBits<T> bits(x);
if (bits.isInfOrNaN()) {
if (bits.encoding.sign && (bits.encoding.mantissa == 0)) {
if (bits.getSign() && (bits.getMantissa() == 0)) {
// sqrt(-Inf) = NaN
return FPBits<T>::buildNaN(One >> 1);
} else {
@ -114,15 +114,15 @@ static inline T sqrt(T x) {
// sqrt(+0) = +0
// sqrt(-0) = -0
return x;
} else if (bits.encoding.sign) {
} else if (bits.getSign()) {
// sqrt( negative numbers ) = NaN
return FPBits<T>::buildNaN(One >> 1);
} else {
int xExp = bits.getExponent();
UIntType xMant = bits.encoding.mantissa;
UIntType xMant = bits.getMantissa();
// Step 1a: Normalize denormal input and append hiddent bit to the mantissa
if (bits.encoding.exponent == 0) {
if (bits.getUnbiasedExponent() == 0) {
++xExp; // let xExp be the correct exponent of One bit.
internal::normalize<T>(xExp, xMant);
} else {

View File

@ -50,7 +50,7 @@ template <> inline long double sqrt<long double, 0>(long double x) {
FPBits<long double> bits(x);
if (bits.isInfOrNaN()) {
if (bits.encoding.sign && (bits.encoding.mantissa == 0)) {
if (bits.getSign() && (bits.getMantissa() == 0)) {
// sqrt(-Inf) = NaN
return FPBits<long double>::buildNaN(One >> 1);
} else {
@ -62,17 +62,17 @@ template <> inline long double sqrt<long double, 0>(long double x) {
// sqrt(+0) = +0
// sqrt(-0) = -0
return x;
} else if (bits.encoding.sign) {
} else if (bits.getSign()) {
// sqrt( negative numbers ) = NaN
return FPBits<long double>::buildNaN(One >> 1);
} else {
int xExp = bits.getExponent();
UIntType xMant = bits.encoding.mantissa;
UIntType xMant = bits.getMantissa();
// Step 1a: Normalize denormal input
if (bits.encoding.implicitBit) {
if (bits.getImplicitBit()) {
xMant |= One;
} else if (bits.encoding.exponent == 0) {
} else if (bits.getUnbiasedExponent() == 0) {
internal::normalize<long double>(xExp, xMant);
}
@ -128,9 +128,9 @@ template <> inline long double sqrt<long double, 0>(long double x) {
// Extract output
FPBits<long double> out(0.0L);
out.encoding.exponent = xExp;
out.encoding.implicitBit = 1;
out.encoding.mantissa = (y & (One - 1));
out.setUnbiasedExponent(xExp);
out.setImplicitBit(1);
out.setMantissa((y & (One - 1)));
return out;
}

View File

@ -40,7 +40,7 @@ describeValue(const char *label, ValType value,
if (bits.isNaN()) {
stream << "(NaN)";
} else if (bits.isInf()) {
if (bits.encoding.sign)
if (bits.getSign())
stream << "(-Infinity)";
else
stream << "(+Infinity)";
@ -50,13 +50,14 @@ describeValue(const char *label, ValType value,
constexpr int mantissaWidthInHex =
(fputil::MantissaWidth<ValType>::value - 1) / 4 + 1;
stream << "Sign: " << (bits.encoding.sign ? '1' : '0') << ", "
stream << "Sign: " << (bits.getSign() ? '1' : '0') << ", "
<< "Exponent: 0x"
<< uintToHex<uint16_t>(bits.encoding.exponent, exponentWidthInHex)
<< uintToHex<uint16_t>(bits.getUnbiasedExponent(),
exponentWidthInHex)
<< ", "
<< "Mantissa: 0x"
<< uintToHex<typename fputil::FPBits<ValType>::UIntType>(
bits.encoding.mantissa, mantissaWidthInHex);
bits.getMantissa(), mantissaWidthInHex);
}
stream << '\n';

View File

@ -47,17 +47,17 @@ static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
// bit of sum, so that the sticky bits used when rounding sum to float are
// correct (when it matters).
fputil::FPBits<double> t(
(bit_prod.encoding.exponent >= bitz.encoding.exponent)
(bit_prod.getUnbiasedExponent() >= bitz.getUnbiasedExponent())
? ((double(bit_sum) - double(bit_prod)) - double(bitz))
: ((double(bit_sum) - double(bitz)) - double(bit_prod)));
// Update sticky bits if t != 0.0 and the least (52 - 23 - 1 = 28) bits are
// zero.
if (!t.isZero() && ((bit_sum.encoding.mantissa & 0xfff'ffffULL) == 0)) {
if (bit_sum.encoding.sign != t.encoding.sign) {
++bit_sum.encoding.mantissa;
} else if (bit_sum.encoding.mantissa) {
--bit_sum.encoding.mantissa;
if (!t.isZero() && ((bit_sum.getMantissa() & 0xfff'ffffULL) == 0)) {
if (bit_sum.getSign() != t.getSign()) {
bit_sum.setMantissa(bit_sum.getMantissa() + 1);
} else if (bit_sum.getMantissa()) {
bit_sum.setMantissa(bit_sum.getMantissa() - 1);
}
}
}