FloatToFixed constructor
Build a FloatingPoint to FixedPoint converter.
- if
integerWidthandfractionWidthare supplied, an m.n fixed-point output will be produced. Otherwise, the converter will compute a lossless size forintegerWidthandfractionWidthfor outputing the floating-point value into a fixed-point value. checkOverflowset totruewill cause overflow detection to happen in case that loss can occur and an optional output overflow will be produced that returnstruewhen overflow occurs.
Implementation
FloatToFixed(FloatingPoint float,
{super.name = 'FloatToFixed',
int? integerWidth,
int? fractionWidth,
this.checkOverflow = false,
super.reserveName,
super.reserveDefinitionName,
String? definitionName})
: super(
definitionName: definitionName ??
'FloatE${float.exponent.width}'
'M${float.mantissa.width}ToFixed') {
float = float.clone()..gets(addInput('float', float, width: float.width));
final bias = float.floatingPointValue.bias;
// E4M3 expands the max exponent by 1.
final noLossM = ((float.exponent.width == 4) & (float.mantissa.width == 3))
? bias + 2
: bias + 1; // accomodate the jbit
final noLossN = bias + float.mantissa.width - 1;
// TODO(desmonddak): Check what happens with an explicitJBit FP
this.integerWidth = integerWidth ?? noLossM;
this.fractionWidth = fractionWidth ?? noLossN;
final outputWidth = this.integerWidth + this.fractionWidth + 1;
final jBit = Logic(name: 'jBit')..gets(float.isNormal);
final fullMantissa = [jBit, float.mantissa].swizzle().named('fullMantissa');
final eWidth = max(log2Ceil(this.fractionWidth + this.integerWidth),
float.exponent.width) +
2;
final shift = Logic(name: 'shift', width: eWidth);
final exp = (float.exponent - 1).zeroExtend(eWidth).named('expMinus1');
if (this.fractionWidth > noLossN) {
shift <=
mux(jBit, exp, Const(0, width: eWidth)) +
Const(this.fractionWidth - noLossN, width: eWidth)
.named('deltaN');
} else if (this.fractionWidth == noLossN) {
shift <= mux(jBit, exp, Const(0, width: eWidth));
} else {
shift <=
mux(jBit, exp, Const(0, width: eWidth)) -
Const(noLossN - this.fractionWidth, width: eWidth)
.named('deltaN');
}
// TODO(desmonddak): Could use signed shifter if we unified shift math
final shiftRight = ((fullMantissa.width > outputWidth)
? (~shift + 1) - (fullMantissa.width - outputWidth)
: (~shift + 1))
.named('shiftRight');
if (checkOverflow &
((this.integerWidth < noLossM) | (this.fractionWidth < noLossN))) {
final overflow = Logic(name: 'overflow');
final leadDetect = RecursiveModulePriorityEncoder(fullMantissa.reversed,
name: 'leadone_detector');
final sWidth = max(eWidth, leadDetect.out.width);
final fShift = shift.zeroExtend(sWidth).named('wideShift');
final leadOne = leadDetect.out.zeroExtend(sWidth).named('leadOne');
Combinational([
If(jBit, then: [
overflow < shift.gte(outputWidth - float.mantissa.width - 1),
], orElse: [
If(fShift.gt(leadOne), then: [
overflow <
(fShift - leadOne).gte(outputWidth - float.mantissa.width - 1),
], orElse: [
overflow < Const(0),
]),
]),
]);
addOutput('overflow') <= overflow;
}
final preNumber = ((outputWidth >= fullMantissa.width)
? fullMantissa.zeroExtend(outputWidth)
: fullMantissa.slice(-1, fullMantissa.width - outputWidth))
.named('newMantissaPreShift');
// TODO(desmonddak): Rounder is needed when shifting right
final number = mux(shift[-1], preNumber >>> shiftRight, preNumber << shift)
.named('number');
_fixed <= mux(float.sign, ~number + 1, number).named('signedNumber');
addOutput('fixed', width: outputWidth) <= _fixed;
}