FloatToFixed constructor

FloatToFixed(
  1. FloatingPoint float, {
  2. String name = 'FloatToFixed',
  3. int? integerWidth,
  4. int? fractionWidth,
  5. bool checkOverflow = false,
  6. bool reserveName = false,
  7. bool reserveDefinitionName = false,
  8. String? definitionName,
})

Build a FloatingPoint to FixedPoint converter.

  • if integerWidth and fractionWidth are supplied, an m.n fixed-point output will be produced. Otherwise, the converter will compute a lossless size for integerWidth and fractionWidth for outputing the floating-point value into a fixed-point value.
  • checkOverflow set to true will cause overflow detection to happen in case that loss can occur and an optional output overflow will be produced that returns true when overflow occurs.

Implementation

FloatToFixed(FloatingPoint float,
    {super.name = 'FloatToFixed',
    int? integerWidth,
    int? fractionWidth,
    this.checkOverflow = false,
    super.reserveName,
    super.reserveDefinitionName,
    String? definitionName})
    : super(
          definitionName: definitionName ??
              'FloatE${float.exponent.width}'
                  'M${float.mantissa.width}ToFixed') {
  float = float.clone()..gets(addInput('float', float, width: float.width));

  final bias = float.floatingPointValue.bias;
  // E4M3 expands the max exponent by 1.
  final noLossM = ((float.exponent.width == 4) & (float.mantissa.width == 3))
      ? bias + 2
      : bias + 1; // accomodate the jbit
  final noLossN = bias + float.mantissa.width - 1;

  // TODO(desmonddak): Check what happens with an explicitJBit FP

  this.integerWidth = integerWidth ?? noLossM;
  this.fractionWidth = fractionWidth ?? noLossN;
  final outputWidth = this.integerWidth + this.fractionWidth + 1;

  final jBit = Logic(name: 'jBit')..gets(float.isNormal);
  final fullMantissa = [jBit, float.mantissa].swizzle().named('fullMantissa');

  final eWidth = max(log2Ceil(this.fractionWidth + this.integerWidth),
          float.exponent.width) +
      2;
  final shift = Logic(name: 'shift', width: eWidth);
  final exp = (float.exponent - 1).zeroExtend(eWidth).named('expMinus1');

  if (this.fractionWidth > noLossN) {
    shift <=
        mux(jBit, exp, Const(0, width: eWidth)) +
            Const(this.fractionWidth - noLossN, width: eWidth)
                .named('deltaN');
  } else if (this.fractionWidth == noLossN) {
    shift <= mux(jBit, exp, Const(0, width: eWidth));
  } else {
    shift <=
        mux(jBit, exp, Const(0, width: eWidth)) -
            Const(noLossN - this.fractionWidth, width: eWidth)
                .named('deltaN');
  }
  // TODO(desmonddak): Could use signed shifter if we unified shift math
  final shiftRight = ((fullMantissa.width > outputWidth)
          ? (~shift + 1) - (fullMantissa.width - outputWidth)
          : (~shift + 1))
      .named('shiftRight');

  if (checkOverflow &
      ((this.integerWidth < noLossM) | (this.fractionWidth < noLossN))) {
    final overflow = Logic(name: 'overflow');
    final leadDetect = RecursiveModulePriorityEncoder(fullMantissa.reversed,
        name: 'leadone_detector');

    final sWidth = max(eWidth, leadDetect.out.width);
    final fShift = shift.zeroExtend(sWidth).named('wideShift');
    final leadOne = leadDetect.out.zeroExtend(sWidth).named('leadOne');

    Combinational([
      If(jBit, then: [
        overflow < shift.gte(outputWidth - float.mantissa.width - 1),
      ], orElse: [
        If(fShift.gt(leadOne), then: [
          overflow <
              (fShift - leadOne).gte(outputWidth - float.mantissa.width - 1),
        ], orElse: [
          overflow < Const(0),
        ]),
      ]),
    ]);
    addOutput('overflow') <= overflow;
  }
  final preNumber = ((outputWidth >= fullMantissa.width)
          ? fullMantissa.zeroExtend(outputWidth)
          : fullMantissa.slice(-1, fullMantissa.width - outputWidth))
      .named('newMantissaPreShift');
  // TODO(desmonddak): Rounder is needed when shifting right

  final number = mux(shift[-1], preNumber >>> shiftRight, preNumber << shift)
      .named('number');

  _fixed <= mux(float.sign, ~number + 1, number).named('signedNumber');
  addOutput('fixed', width: outputWidth) <= _fixed;
}