FloatToFixed constructor
Build a FloatingPoint to FixedPoint converter.
- if
integerWidth
andfractionWidth
are supplied, an m.n fixed-point output will be produced. Otherwise, the converter will compute a lossless size forintegerWidth
andfractionWidth
for outputing the floating-point value into a fixed-point value. checkOverflow
set totrue
will cause overflow detection to happen in case that loss can occur and an optional output overflow will be produced that returnstrue
when overflow occurs.
Implementation
FloatToFixed(FloatingPoint float,
{super.name = 'FloatToFixed',
int? integerWidth,
int? fractionWidth,
this.checkOverflow = false,
super.reserveName,
super.reserveDefinitionName,
String? definitionName})
: super(
definitionName: definitionName ??
'FloatE${float.exponent.width}'
'M${float.mantissa.width}ToFixed') {
float = float.clone()..gets(addInput('float', float, width: float.width));
final bias = float.floatingPointValue.bias;
// E4M3 expands the max exponent by 1.
final noLossM = ((float.exponent.width == 4) & (float.mantissa.width == 3))
? bias + 2
: bias + 1; // accomodate the jbit
final noLossN = bias + float.mantissa.width - 1;
// TODO(desmonddak): Check what happens with an explicitJBit FP
this.integerWidth = integerWidth ?? noLossM;
this.fractionWidth = fractionWidth ?? noLossN;
final outputWidth = this.integerWidth + this.fractionWidth + 1;
final jBit = Logic(name: 'jBit')..gets(float.isNormal);
final fullMantissa = [jBit, float.mantissa].swizzle().named('fullMantissa');
final eWidth = max(log2Ceil(this.fractionWidth + this.integerWidth),
float.exponent.width) +
2;
final shift = Logic(name: 'shift', width: eWidth);
final exp = (float.exponent - 1).zeroExtend(eWidth).named('expMinus1');
if (this.fractionWidth > noLossN) {
shift <=
mux(jBit, exp, Const(0, width: eWidth)) +
Const(this.fractionWidth - noLossN, width: eWidth)
.named('deltaN');
} else if (this.fractionWidth == noLossN) {
shift <= mux(jBit, exp, Const(0, width: eWidth));
} else {
shift <=
mux(jBit, exp, Const(0, width: eWidth)) -
Const(noLossN - this.fractionWidth, width: eWidth)
.named('deltaN');
}
// TODO(desmonddak): Could use signed shifter if we unified shift math
final shiftRight = ((fullMantissa.width > outputWidth)
? (~shift + 1) - (fullMantissa.width - outputWidth)
: (~shift + 1))
.named('shiftRight');
if (checkOverflow &
((this.integerWidth < noLossM) | (this.fractionWidth < noLossN))) {
final overflow = Logic(name: 'overflow');
final leadDetect = RecursiveModulePriorityEncoder(fullMantissa.reversed,
name: 'leadone_detector');
final sWidth = max(eWidth, leadDetect.out.width);
final fShift = shift.zeroExtend(sWidth).named('wideShift');
final leadOne = leadDetect.out.zeroExtend(sWidth).named('leadOne');
Combinational([
If(jBit, then: [
overflow < shift.gte(outputWidth - float.mantissa.width - 1),
], orElse: [
If(fShift.gt(leadOne), then: [
overflow <
(fShift - leadOne).gte(outputWidth - float.mantissa.width - 1),
], orElse: [
overflow < Const(0),
]),
]),
]);
addOutput('overflow') <= overflow;
}
final preNumber = ((outputWidth >= fullMantissa.width)
? fullMantissa.zeroExtend(outputWidth)
: fullMantissa.slice(-1, fullMantissa.width - outputWidth))
.named('newMantissaPreShift');
// TODO(desmonddak): Rounder is needed when shifting right
final number = mux(shift[-1], preNumber >>> shiftRight, preNumber << shift)
.named('number');
_fixed <= mux(float.sign, ~number + 1, number).named('signedNumber');
addOutput('fixed', width: outputWidth) <= _fixed;
}