FloatingPointValue.ofDouble constructor
- double inDouble, {
- required int exponentWidth,
- required int mantissaWidth,
- FloatingPointRoundingMode roundingMode = FloatingPointRoundingMode.roundNearestEven,
Convert from double using its native binary representation
Implementation
factory FloatingPointValue.ofDouble(double inDouble,
{required int exponentWidth,
required int mantissaWidth,
FloatingPointRoundingMode roundingMode =
FloatingPointRoundingMode.roundNearestEven}) {
if ((exponentWidth == 8) && (mantissaWidth == 23)) {
// TODO(desmonddak): handle rounding mode for 32 bit?
return FloatingPoint32Value.ofDouble(inDouble);
} else if ((exponentWidth == 11) && (mantissaWidth == 52)) {
return FloatingPoint64Value.ofDouble(inDouble);
}
if (roundingMode != FloatingPointRoundingMode.roundNearestEven &&
roundingMode != FloatingPointRoundingMode.truncate) {
throw UnimplementedError(
'Only roundNearestEven or truncate is supported for this width');
}
final fp64 = FloatingPoint64Value.ofDouble(inDouble);
final exponent64 = fp64.exponent;
var expVal = (exponent64.toInt() - fp64.bias) +
FloatingPointValue.computeBias(exponentWidth);
// Handle subnormal
final mantissa64 = [
if (expVal <= 0)
([LogicValue.one, fp64.mantissa].swizzle() >>> -expVal).slice(52, 1)
else
fp64.mantissa
].first;
var mantissa = mantissa64.slice(51, 51 - mantissaWidth + 1);
if (roundingMode == FloatingPointRoundingMode.roundNearestEven) {
final sticky = mantissa64.slice(51 - (mantissaWidth + 2), 0).or();
final roundPos = 51 - (mantissaWidth + 2) + 1;
final round = mantissa64[roundPos];
final guard = mantissa64[roundPos + 1];
// RNE Rounding
if (guard == LogicValue.one) {
if ((round == LogicValue.one) |
(sticky == LogicValue.one) |
(mantissa[0] == LogicValue.one)) {
mantissa += 1;
if (mantissa == LogicValue.zero.zeroExtend(mantissa.width)) {
expVal += 1;
}
}
}
}
final exponent =
LogicValue.ofBigInt(BigInt.from(max(expVal, 0)), exponentWidth);
return FloatingPointValue(
sign: fp64.sign, exponent: exponent, mantissa: mantissa);
}