FloatingPointValue.ofDouble constructor

FloatingPointValue.ofDouble(
  1. double inDouble, {
  2. required int exponentWidth,
  3. required int mantissaWidth,
  4. FloatingPointRoundingMode roundingMode = FloatingPointRoundingMode.roundNearestEven,
})

Convert from double using its native binary representation

Implementation

factory FloatingPointValue.ofDouble(double inDouble,
    {required int exponentWidth,
    required int mantissaWidth,
    FloatingPointRoundingMode roundingMode =
        FloatingPointRoundingMode.roundNearestEven}) {
  if ((exponentWidth == 8) && (mantissaWidth == 23)) {
    // TODO(desmonddak): handle rounding mode for 32 bit?
    return FloatingPoint32Value.ofDouble(inDouble);
  } else if ((exponentWidth == 11) && (mantissaWidth == 52)) {
    return FloatingPoint64Value.ofDouble(inDouble);
  }

  if (roundingMode != FloatingPointRoundingMode.roundNearestEven &&
      roundingMode != FloatingPointRoundingMode.truncate) {
    throw UnimplementedError(
        'Only roundNearestEven or truncate is supported for this width');
  }

  final fp64 = FloatingPoint64Value.ofDouble(inDouble);
  final exponent64 = fp64.exponent;

  var expVal = (exponent64.toInt() - fp64.bias) +
      FloatingPointValue.computeBias(exponentWidth);
  // Handle subnormal
  final mantissa64 = [
    if (expVal <= 0)
      ([LogicValue.one, fp64.mantissa].swizzle() >>> -expVal).slice(52, 1)
    else
      fp64.mantissa
  ].first;
  var mantissa = mantissa64.slice(51, 51 - mantissaWidth + 1);

  if (roundingMode == FloatingPointRoundingMode.roundNearestEven) {
    final sticky = mantissa64.slice(51 - (mantissaWidth + 2), 0).or();
    final roundPos = 51 - (mantissaWidth + 2) + 1;
    final round = mantissa64[roundPos];
    final guard = mantissa64[roundPos + 1];

    // RNE Rounding
    if (guard == LogicValue.one) {
      if ((round == LogicValue.one) |
          (sticky == LogicValue.one) |
          (mantissa[0] == LogicValue.one)) {
        mantissa += 1;
        if (mantissa == LogicValue.zero.zeroExtend(mantissa.width)) {
          expVal += 1;
        }
      }
    }
  }

  final exponent =
      LogicValue.ofBigInt(BigInt.from(max(expVal, 0)), exponentWidth);

  return FloatingPointValue(
      sign: fp64.sign, exponent: exponent, mantissa: mantissa);
}