FloatingPointValue.ofDoubleUnrounded constructor

  1. @internal
FloatingPointValue.ofDoubleUnrounded(
  1. double inDouble, {
  2. required int exponentWidth,
  3. required int mantissaWidth,
})

Convert a floating point number into a FloatingPointValue representation. This form performs NO ROUNDING.

Implementation

@internal
factory FloatingPointValue.ofDoubleUnrounded(double inDouble,
    {required int exponentWidth, required int mantissaWidth}) {
  if ((exponentWidth == 8) && (mantissaWidth == 23)) {
    return FloatingPoint32Value.ofDouble(inDouble);
  } else if ((exponentWidth == 11) && (mantissaWidth == 52)) {
    return FloatingPoint64Value.ofDouble(inDouble);
  }

  var doubleVal = inDouble;
  if (inDouble.isNaN) {
    return FloatingPointValue(
      exponent:
          LogicValue.ofInt(pow(2, exponentWidth).toInt() - 1, exponentWidth),
      mantissa: LogicValue.zero,
      sign: LogicValue.zero,
    );
  }
  LogicValue sign;
  if (inDouble < 0.0) {
    doubleVal = -doubleVal;
    sign = LogicValue.one;
  } else {
    sign = LogicValue.zero;
  }

  // If we are dealing with a really small number we need to scale it up
  var scaleToWhole = (doubleVal != 0) ? (-log(doubleVal) / log(2)).ceil() : 0;

  if (doubleVal < 1.0) {
    var myCnt = 0;
    var myVal = doubleVal;
    while (myVal % 1 != 0.0) {
      myVal = myVal * 2.0;
      myCnt++;
    }
    if (myCnt < scaleToWhole) {
      scaleToWhole = myCnt;
    }
  }

  // Scale it up to go beyond the mantissa and include the GRS bits
  final scale = mantissaWidth + scaleToWhole;
  var s = scale;

  var sVal = doubleVal;
  if (s > 0) {
    while (s > 0) {
      sVal *= 2.0;
      s = s - 1;
    }
  } else {
    sVal = doubleVal * pow(2.0, scale);
  }

  final scaledValue = BigInt.from(sVal);
  final fullLength = scaledValue.bitLength;

  var fullValue = LogicValue.ofBigInt(scaledValue, fullLength);
  var e = (fullLength > 0)
      ? fullLength - mantissaWidth - scaleToWhole
      : FloatingPointValue.computeMinExponent(exponentWidth);

  if (e <= -FloatingPointValue.computeBias(exponentWidth)) {
    fullValue = fullValue >>>
        (scaleToWhole - FloatingPointValue.computeBias(exponentWidth));
    e = -FloatingPointValue.computeBias(exponentWidth);
  } else {
    // Could be just one away from subnormal
    e -= 1;
    if (e > -FloatingPointValue.computeBias(exponentWidth)) {
      fullValue = fullValue << 1; // Chop the first '1'
    }
  }
  // We reverse so that we fit into a shorter BigInt, we keep the MSB.
  // The conversion fills leftward.
  // We reverse again after conversion.
  final exponent = LogicValue.ofInt(
      e + FloatingPointValue.computeBias(exponentWidth), exponentWidth);
  final mantissa =
      LogicValue.ofBigInt(fullValue.reversed.toBigInt(), mantissaWidth)
          .reversed;

  return FloatingPointValue(
    exponent: exponent,
    mantissa: mantissa,
    sign: sign,
  );
}