* [PATCH 01/18] target/riscv: rvv: Fix NOP_UU_B vs2 width
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type Max Chou
` (17 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/vector_helper.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 5aea553814..ec0ea4c143 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4972,7 +4972,7 @@ GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
/* (TD, T2, TX2) */
-#define NOP_UU_B uint8_t, uint16_t, uint32_t
+#define NOP_UU_B uint8_t, uint16_t, uint16_t
#define NOP_UU_H uint16_t, uint32_t, uint32_t
#define NOP_UU_W uint32_t, uint64_t, uint64_t
/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
2026-01-08 15:16 ` [PATCH 01/18] target/riscv: rvv: Fix NOP_UU_B vs2 width Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-09 7:29 ` Chao Liu
2026-01-10 2:57 ` Richard Henderson
2026-01-08 15:16 ` [PATCH 03/18] fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types Max Chou
` (16 subsequent siblings)
18 siblings, 2 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou
This commit provides the implementation defined behavior flags and the basic
operation support for the OCP float8 data types(E4M3 & E5M2).
Signed-off-by: Max Chou <max.chou@sifive.com>
---
fpu/softfloat-specialize.c.inc | 57 ++++++++++++++++++++++++++-
include/fpu/softfloat-helpers.h | 20 ++++++++++
include/fpu/softfloat-types.h | 23 +++++++++++
include/fpu/softfloat.h | 70 +++++++++++++++++++++++++++++++++
4 files changed, 169 insertions(+), 1 deletion(-)
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index ba4fa08b7b..3a3bcd22ae 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -226,6 +226,30 @@ floatx80 floatx80_default_inf(bool zSign, float_status *status)
return packFloatx80(zSign, 0x7fff, z ? 0 : (1ULL << 63));
}
+/*----------------------------------------------------------------------------
+| Returns 1 if the OCP(Open Compute Platform) FP8 value `a' is a quiet NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+bool float8_e4m3_is_quiet_nan(float8_e4m3 a_, float_status *status)
+{
+ return float8_e4m3_is_any_nan(a_);
+}
+
+bool float8_e5m2_is_quiet_nan(float8_e5m2 a_, float_status *status)
+{
+ if (no_signaling_nans(status) || status->ocp_fp8e5m2_no_signal_nan) {
+ return float8_e5m2_is_any_nan(a_);
+ } else {
+ uint8_t a = float8_e5m2_val(a_);
+ if (snan_bit_is_one(status)) {
+ return (((a >> 1) & 0x3F) == 0x3E) && (a & 0x1);
+ } else {
+ return ((a >> 1) & 0x3F) == 0x3F;
+ }
+ }
+}
+
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a quiet
| NaN; otherwise returns 0.
@@ -240,7 +264,6 @@ bool float16_is_quiet_nan(float16 a_, float_status *status)
if (snan_bit_is_one(status)) {
return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
} else {
-
return ((a >> 9) & 0x3F) == 0x3F;
}
}
@@ -265,6 +288,38 @@ bool bfloat16_is_quiet_nan(bfloat16 a_, float_status *status)
}
}
+/*----------------------------------------------------------------------------
+| Returns 1 if the OCP(Open Compute Platform) FP8 value `a' is a signaling NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+bool float8_e4m3_is_signaling_nan(float8_e4m3 a_, float_status *status)
+{
+ if (no_signaling_nans(status)) {
+ return false;
+ } else {
+ if (snan_bit_is_one(status)) {
+ return float8_e4m3_is_any_nan(a_);
+ } else {
+ return false;
+ }
+ }
+}
+
+bool float8_e5m2_is_signaling_nan(float8_e5m2 a_, float_status *status)
+{
+ if (no_signaling_nans(status)) {
+ return false;
+ } else {
+ uint8_t a = float8_e5m2_val(a_);
+ if (snan_bit_is_one(status)) {
+ return ((a >> 1) & 0x3F) == 0x3F;
+ } else {
+ return (((a >> 1) & 0x3F) == 0x3E && (a & 0x1));
+ }
+ }
+}
+
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
index 90862f5cd2..4e278a3ee3 100644
--- a/include/fpu/softfloat-helpers.h
+++ b/include/fpu/softfloat-helpers.h
@@ -136,6 +136,26 @@ static inline void set_no_signaling_nans(bool val, float_status *status)
status->no_signaling_nans = val;
}
+static inline void set_ocp_fp8e5m2_no_signal_nan(bool val, float_status *status)
+{
+ status->ocp_fp8e5m2_no_signal_nan = val;
+}
+
+static inline bool get_ocp_fp8e5m2_no_signal_nan(const float_status *status)
+{
+ return status->ocp_fp8e5m2_no_signal_nan;
+}
+
+static inline void set_ocp_fp8_same_canonical_nan(bool val, float_status *status)
+{
+ status->ocp_fp8_same_canonical_nan = val;
+}
+
+static inline bool get_ocp_fp8_same_canonical_nan(const float_status *status)
+{
+ return status->ocp_fp8_same_canonical_nan;
+}
+
static inline bool get_float_detect_tininess(const float_status *status)
{
return status->tininess_before_rounding;
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 8f82fdfc97..835dd33bf1 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -119,6 +119,18 @@ typedef struct {
*/
typedef uint16_t bfloat16;
+/*
+ * Software OCP(Open Compute Project) 8-bit floating point types
+ */
+typedef uint8_t float8_e4m3;
+typedef uint8_t float8_e5m2;
+#define float8_e4m3_val(x) (x)
+#define float8_e5m2_val(x) (x)
+#define make_float8_e4m3(x) (x)
+#define make_float8_e5m2(x) (x)
+#define const_float8_e4m3(x) (x)
+#define const_float8_e5m2(x) (x)
+
/*
* Software IEC/IEEE floating-point underflow tininess-detection mode.
*/
@@ -410,6 +422,17 @@ typedef struct float_status {
*/
bool snan_bit_is_one;
bool no_signaling_nans;
+ /*
+ * When true, OCP FP8 E5M2 format does not generate signaling NaNs.
+ * RISC-V uses only quiet NaNs in its OCP FP8 implementation.
+ */
+ bool ocp_fp8e5m2_no_signal_nan;
+ /*
+ * When true, OCP FP8 formats use the same canonical NaN representation
+ * (0x7F) for all NaN outputs. RISC-V specifies a single canonical NaN
+ * for both E4M3 and E5M2.
+ */
+ bool ocp_fp8_same_canonical_nan;
/* should overflowed results subtract re_bias to its exponent? */
bool rebias_overflow;
/* should underflowed results add re_bias to its exponent? */
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index c18ab2cb60..6f7259f9dd 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -189,6 +189,76 @@ float128 int128_to_float128(Int128, float_status *status);
float128 uint64_to_float128(uint64_t, float_status *status);
float128 uint128_to_float128(Int128, float_status *status);
+/*----------------------------------------------------------------------------
+| Software OCP FP8 operations.
+*----------------------------------------------------------------------------*/
+
+bool float8_e4m3_is_quiet_nan(float8_e4m3, float_status *status);
+bool float8_e4m3_is_signaling_nan(float8_e4m3, float_status *status);
+bool float8_e5m2_is_quiet_nan(float8_e5m2, float_status *status);
+bool float8_e5m2_is_signaling_nan(float8_e5m2, float_status *status);
+
+static inline bool float8_e4m3_is_any_nan(float8_e4m3 a)
+{
+ return ((float8_e4m3_val(a) & ~0x80) == 0x7f);
+}
+
+static inline bool float8_e5m2_is_any_nan(float8_e5m2 a)
+{
+ return ((float8_e5m2_val(a) & ~0x80) > 0x7c);
+}
+
+static inline bool float8_e4m3_is_neg(float8_e4m3 a)
+{
+ return float8_e4m3_val(a) >> 7;
+}
+
+static inline bool float8_e5m2_is_neg(float8_e5m2 a)
+{
+ return float8_e5m2_val(a) >> 7;
+}
+
+static inline bool float8_e4m3_is_infinity(float8_e4m3 a)
+{
+ return false;
+}
+
+static inline bool float8_e5m2_is_infinity(float8_e5m2 a)
+{
+ return (float8_e5m2_val(a) & 0x7f) == 0x7c;
+}
+
+static inline bool float8_e4m3_is_zero(float8_e4m3 a)
+{
+ return (float8_e4m3_val(a) & 0x7f) == 0;
+}
+
+static inline bool float8_e5m2_is_zero(float8_e5m2 a)
+{
+ return (float8_e5m2_val(a) & 0x7f) == 0;
+}
+
+static inline bool float8_e4m3_is_zero_or_denormal(float8_e4m3 a)
+{
+ return (float8_e4m3_val(a) & 0x78) == 0;
+}
+
+static inline bool float8_e5m2_is_zero_or_denormal(float8_e5m2 a)
+{
+ return (float8_e5m2_val(a) & 0x7c) == 0;
+}
+
+static inline bool float8_e4m3_is_normal(float8_e4m3 a)
+{
+ uint8_t em = float8_e4m3_val(a) & 0x7f;
+ return em >= 0x8 && em <= 0x7e;
+}
+
+static inline bool float8_e5m2_is_normal(float8_e5m2 a)
+{
+ return (((float8_e5m2_val(a) >> 2) + 1) & 0x1f) >= 2;
+}
+
/*----------------------------------------------------------------------------
| Software half-precision conversion routines.
*----------------------------------------------------------------------------*/
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type
2026-01-08 15:16 ` [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type Max Chou
@ 2026-01-09 7:29 ` Chao Liu
2026-01-15 7:46 ` Max Chou
2026-01-10 2:57 ` Richard Henderson
1 sibling, 1 reply; 33+ messages in thread
From: Chao Liu @ 2026-01-09 7:29 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
Hi, Max:
On 1/8/2026 11:16 PM, Max Chou wrote:
> This commit provides the implementation defined behavior flags and the basic
> operation support for the OCP float8 data types(E4M3 & E5M2).
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> fpu/softfloat-specialize.c.inc | 57 ++++++++++++++++++++++++++-
> include/fpu/softfloat-helpers.h | 20 ++++++++++
> include/fpu/softfloat-types.h | 23 +++++++++++
> include/fpu/softfloat.h | 70 +++++++++++++++++++++++++++++++++
> 4 files changed, 169 insertions(+), 1 deletion(-)
>
> diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
> index ba4fa08b7b..3a3bcd22ae 100644
> --- a/fpu/softfloat-specialize.c.inc
> +++ b/fpu/softfloat-specialize.c.inc
> @@ -226,6 +226,30 @@ floatx80 floatx80_default_inf(bool zSign, float_status *status)
> return packFloatx80(zSign, 0x7fff, z ? 0 : (1ULL << 63));
> }
>
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the OCP(Open Compute Platform) FP8 value `a' is a quiet NaN;
Open Compute Platform -> Open Compute Project
> +| otherwise returns 0.
> +*----------------------------------------------------------------------------*/
> +
> +bool float8_e4m3_is_quiet_nan(float8_e4m3 a_, float_status *status)
> +{
> + return float8_e4m3_is_any_nan(a_);
> +}
> +
...
> +/*----------------------------------------------------------------------------
> +| Returns 1 if the OCP(Open Compute Platform) FP8 value `a' is a signaling NaN;
Open Compute Platform -> Open Compute Project
Thanks,
Chao
> +| otherwise returns 0.
> +*----------------------------------------------------------------------------*/
> +
> +bool float8_e4m3_is_signaling_nan(float8_e4m3 a_, float_status *status)
> +{
> + if (no_signaling_nans(status)) {
> + return false;
> + } else {
> + if (snan_bit_is_one(status)) {
> + return float8_e4m3_is_any_nan(a_);
> + } else {
> + return false;
> + }
> + }
> +}
> +
> +bool float8_e5m2_is_signaling_nan(float8_e5m2 a_, float_status *status)
> +{
> + if (no_signaling_nans(status)) {
> + return false;
> + } else {
> + uint8_t a = float8_e5m2_val(a_);
> + if (snan_bit_is_one(status)) {
> + return ((a >> 1) & 0x3F) == 0x3F;
> + } else {
> + return (((a >> 1) & 0x3F) == 0x3E && (a & 0x1));
> + }
> + }
> +}
> +
> /*----------------------------------------------------------------------------
> | Returns 1 if the half-precision floating-point value `a' is a signaling
> | NaN; otherwise returns 0.
> diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
> index 90862f5cd2..4e278a3ee3 100644
> --- a/include/fpu/softfloat-helpers.h
> +++ b/include/fpu/softfloat-helpers.h
> @@ -136,6 +136,26 @@ static inline void set_no_signaling_nans(bool val, float_status *status)
> status->no_signaling_nans = val;
> }
>
> +static inline void set_ocp_fp8e5m2_no_signal_nan(bool val, float_status *status)
> +{
> + status->ocp_fp8e5m2_no_signal_nan = val;
> +}
> +
> +static inline bool get_ocp_fp8e5m2_no_signal_nan(const float_status *status)
> +{
> + return status->ocp_fp8e5m2_no_signal_nan;
> +}
> +
> +static inline void set_ocp_fp8_same_canonical_nan(bool val, float_status *status)
> +{
> + status->ocp_fp8_same_canonical_nan = val;
> +}
> +
> +static inline bool get_ocp_fp8_same_canonical_nan(const float_status *status)
> +{
> + return status->ocp_fp8_same_canonical_nan;
> +}
> +
> static inline bool get_float_detect_tininess(const float_status *status)
> {
> return status->tininess_before_rounding;
> diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
> index 8f82fdfc97..835dd33bf1 100644
> --- a/include/fpu/softfloat-types.h
> +++ b/include/fpu/softfloat-types.h
> @@ -119,6 +119,18 @@ typedef struct {
> */
> typedef uint16_t bfloat16;
>
> +/*
> + * Software OCP(Open Compute Project) 8-bit floating point types
> + */
> +typedef uint8_t float8_e4m3;
> +typedef uint8_t float8_e5m2;
> +#define float8_e4m3_val(x) (x)
> +#define float8_e5m2_val(x) (x)
> +#define make_float8_e4m3(x) (x)
> +#define make_float8_e5m2(x) (x)
> +#define const_float8_e4m3(x) (x)
> +#define const_float8_e5m2(x) (x)
> +
> /*
> * Software IEC/IEEE floating-point underflow tininess-detection mode.
> */
> @@ -410,6 +422,17 @@ typedef struct float_status {
> */
> bool snan_bit_is_one;
> bool no_signaling_nans;
> + /*
> + * When true, OCP FP8 E5M2 format does not generate signaling NaNs.
> + * RISC-V uses only quiet NaNs in its OCP FP8 implementation.
> + */
> + bool ocp_fp8e5m2_no_signal_nan;
> + /*
> + * When true, OCP FP8 formats use the same canonical NaN representation
> + * (0x7F) for all NaN outputs. RISC-V specifies a single canonical NaN
> + * for both E4M3 and E5M2.
> + */
> + bool ocp_fp8_same_canonical_nan;
> /* should overflowed results subtract re_bias to its exponent? */
> bool rebias_overflow;
> /* should underflowed results add re_bias to its exponent? */
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index c18ab2cb60..6f7259f9dd 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -189,6 +189,76 @@ float128 int128_to_float128(Int128, float_status *status);
> float128 uint64_to_float128(uint64_t, float_status *status);
> float128 uint128_to_float128(Int128, float_status *status);
>
> +/*----------------------------------------------------------------------------
> +| Software OCP FP8 operations.
> +*----------------------------------------------------------------------------*/
> +
> +bool float8_e4m3_is_quiet_nan(float8_e4m3, float_status *status);
> +bool float8_e4m3_is_signaling_nan(float8_e4m3, float_status *status);
> +bool float8_e5m2_is_quiet_nan(float8_e5m2, float_status *status);
> +bool float8_e5m2_is_signaling_nan(float8_e5m2, float_status *status);
> +
> +static inline bool float8_e4m3_is_any_nan(float8_e4m3 a)
> +{
> + return ((float8_e4m3_val(a) & ~0x80) == 0x7f);
> +}
> +
> +static inline bool float8_e5m2_is_any_nan(float8_e5m2 a)
> +{
> + return ((float8_e5m2_val(a) & ~0x80) > 0x7c);
> +}
> +
> +static inline bool float8_e4m3_is_neg(float8_e4m3 a)
> +{
> + return float8_e4m3_val(a) >> 7;
> +}
> +
> +static inline bool float8_e5m2_is_neg(float8_e5m2 a)
> +{
> + return float8_e5m2_val(a) >> 7;
> +}
> +
> +static inline bool float8_e4m3_is_infinity(float8_e4m3 a)
> +{
> + return false;
> +}
> +
> +static inline bool float8_e5m2_is_infinity(float8_e5m2 a)
> +{
> + return (float8_e5m2_val(a) & 0x7f) == 0x7c;
> +}
> +
> +static inline bool float8_e4m3_is_zero(float8_e4m3 a)
> +{
> + return (float8_e4m3_val(a) & 0x7f) == 0;
> +}
> +
> +static inline bool float8_e5m2_is_zero(float8_e5m2 a)
> +{
> + return (float8_e5m2_val(a) & 0x7f) == 0;
> +}
> +
> +static inline bool float8_e4m3_is_zero_or_denormal(float8_e4m3 a)
> +{
> + return (float8_e4m3_val(a) & 0x78) == 0;
> +}
> +
> +static inline bool float8_e5m2_is_zero_or_denormal(float8_e5m2 a)
> +{
> + return (float8_e5m2_val(a) & 0x7c) == 0;
> +}
> +
> +static inline bool float8_e4m3_is_normal(float8_e4m3 a)
> +{
> + uint8_t em = float8_e4m3_val(a) & 0x7f;
> + return em >= 0x8 && em <= 0x7e;
> +}
> +
> +static inline bool float8_e5m2_is_normal(float8_e5m2 a)
> +{
> + return (((float8_e5m2_val(a) >> 2) + 1) & 0x1f) >= 2;
> +}
> +
> /*----------------------------------------------------------------------------
> | Software half-precision conversion routines.
> *----------------------------------------------------------------------------*/
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type
2026-01-09 7:29 ` Chao Liu
@ 2026-01-15 7:46 ` Max Chou
0 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-15 7:46 UTC (permalink / raw)
To: Chao Liu
Cc: qemu-devel, qemu-riscv, Palmer Dabbelt, Alistair Francis,
Aurelien Jarno, Peter Maydell, Alex Bennée, Weiwei Li,
Daniel Henrique Barboza, Liu Zhiwei
On 2026-01-09 15:29, Chao Liu wrote:
> Open Compute Platform -> Open Compute Project
>
Thanks. Will fix it at v2.
rnax
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type
2026-01-08 15:16 ` [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type Max Chou
2026-01-09 7:29 ` Chao Liu
@ 2026-01-10 2:57 ` Richard Henderson
2026-01-14 11:54 ` Max Chou
1 sibling, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2026-01-10 2:57 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
On 1/9/26 02:16, Max Chou wrote:
> +bool float8_e4m3_is_quiet_nan(float8_e4m3 a_, float_status *status)
> +{
> + return float8_e4m3_is_any_nan(a_);
> +}
> +
> +bool float8_e5m2_is_quiet_nan(float8_e5m2 a_, float_status *status)
> +{
> + if (no_signaling_nans(status) || status->ocp_fp8e5m2_no_signal_nan) {
What is this new thing?
> + return float8_e5m2_is_any_nan(a_);
> + } else {
> + uint8_t a = float8_e5m2_val(a_);
> + if (snan_bit_is_one(status)) {
> + return (((a >> 1) & 0x3F) == 0x3E) && (a & 0x1);
> + } else {
> + return ((a >> 1) & 0x3F) == 0x3F;
> + }
> + }
> +}
...
> +bool float8_e4m3_is_signaling_nan(float8_e4m3 a_, float_status *status)
> +{
> + if (no_signaling_nans(status)) {
> + return false;
> + } else {
> + if (snan_bit_is_one(status)) {
> + return float8_e4m3_is_any_nan(a_);
> + } else {
> + return false;
> + }
> + }
> +}
> +
> +bool float8_e5m2_is_signaling_nan(float8_e5m2 a_, float_status *status)
> +{
> + if (no_signaling_nans(status)) {
... which is not also reflected here?
> + return false;
> + } else {
> + uint8_t a = float8_e5m2_val(a_);
> + if (snan_bit_is_one(status)) {
> + return ((a >> 1) & 0x3F) == 0x3F;
> + } else {
> + return (((a >> 1) & 0x3F) == 0x3E && (a & 0x1));
> + }
> + }
> +}
(0) We really should clean up this code so that there's not so much duplication.
FOO_is_quiet_nan and FOO_is_signaling_nan really should share code.
That would have caught the above.
(1) RISC-V always uses default nan mode, the OCP spec declines to define SNaN vs QNaN,
leaving the 8 unique NaN encodings unspecified, and RISC-V does not do so either. You
assert later:
+ * RISC-V uses only quiet NaNs in its OCP FP8 implementation.
Is this out-of-band discussion with engineers?
Because it's missing from the (remarkably short) document.
(2) Arm does specify (see FP8Unpack in the ARM pseudocode), doing the usual thing in
taking the msb of the mantissa for SNaN. Which means that E4M3 is *always* SNaN.
Both architectures then immediately convert to FP16 default nan, however Arm *does* raise
invalid operand exception for the SNaN, so we can't just ignore it.
Given that there's exactly one RISC-V instruction for which this matters,
vfwcvtbf16.f.f.v, it seems like it might be better to simply adjust
float_status.no_signaling_nans within the helper rather than introduce
ocp_fp8e5m2_no_signal_nan.
> + /*
> + * When true, OCP FP8 formats use the same canonical NaN representation
> + * (0x7F) for all NaN outputs. RISC-V specifies a single canonical NaN
> + * for both E4M3 and E5M2.
> + */
> + bool ocp_fp8_same_canonical_nan;
Similarly you could adjust the canonical nan around the 4 FP16->FP8 conversion insn helpers:
/* Default NaN value: sign bit clear, all frac bits set */
set_float_default_nan_pattern(0b01111111, &env->fp_status);
In either case, "bool" doesn't seem appropriate.
FWIW, Arm retains the msb set pattern as for all other fp formats (FP8DefaultNaN).
r~
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type
2026-01-10 2:57 ` Richard Henderson
@ 2026-01-14 11:54 ` Max Chou
0 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-14 11:54 UTC (permalink / raw)
To: Richard Henderson
Cc: qemu-devel, qemu-riscv, Palmer Dabbelt, Alistair Francis,
Aurelien Jarno, Peter Maydell, Alex Bennée, Weiwei Li,
Daniel Henrique Barboza, Liu Zhiwei
On 2026-01-10 13:57, Richard Henderson wrote:
> (0) We really should clean up this code so that there's not so much duplication.
> FOO_is_quiet_nan and FOO_is_signaling_nan really should share code.
> That would have caught the above.
>
Thanks for the suggestion and I think that maybe we can remove the
FOO_is_[quiet|signaling]_nan functions here.
These OCP FP8 nan checkings should be different implemntation defined
behaviors.
> (1) RISC-V always uses default nan mode, the OCP spec declines to define
> SNaN vs QNaN, leaving the 8 unique NaN encodings unspecified, and RISC-V
> does not do so either. You assert later:
>
> + * RISC-V uses only quiet NaNs in its OCP FP8 implementation.
>
> Is this out-of-band discussion with engineers?
> Because it's missing from the (remarkably short) document.
>
The RISC-V Zvfofp8min extension specification (v0.2.1) explicitly states the
NaN handling behavior for OFP8 conversions:
1. Canonical NaN Definition (Section: Zvfofp8min):
"The canonical NaN for both E4M3 and E5M2 is 0x7f."
2. Widening Conversion Behavior (vfwcvtbf16.f.f.v instruction):
"No rounding occurs, and no floating-point exception flags are set."
The specification's explicit statement that "no floating-point
exception flags are set" for vfwcvtbf16.f.f.v provides clear
justification for treating all OFP8 NaNs as quiet in this specific
context.
3. Narrowing Conversion Behavior (vfncvtbf16.f.f.w instruction):
"Since E4M3 cannot represent infinity, infinite results are converted
to the canonical NaN, 0x7f."
This demonstrates that RISC-V uses quiet NaN propagation semantics
throughout the OFP8 conversion pipeline.
> (2) Arm does specify (see FP8Unpack in the ARM pseudocode), doing the usual
> thing in taking the msb of the mantissa for SNaN. Which means that E4M3 is
> *always* SNaN.
>
> Both architectures then immediately convert to FP16 default nan, however Arm
> *does* raise invalid operand exception for the SNaN, so we can't just ignore
> it.
>
> Given that there's exactly one RISC-V instruction for which this matters,
> vfwcvtbf16.f.f.v, it seems like it might be better to simply adjust
> float_status.no_signaling_nans within the helper rather than introduce
> ocp_fp8e5m2_no_signal_nan.
>
> > + /*
> > + * When true, OCP FP8 formats use the same canonical NaN representation
> > + * (0x7F) for all NaN outputs. RISC-V specifies a single canonical NaN
> > + * for both E4M3 and E5M2.
> > + */
> > + bool ocp_fp8_same_canonical_nan;
>
> Similarly you could adjust the canonical nan around the 4 FP16->FP8 conversion insn helpers:
>
> /* Default NaN value: sign bit clear, all frac bits set */
> set_float_default_nan_pattern(0b01111111, &env->fp_status);
>
> In either case, "bool" doesn't seem appropriate.
>
> FWIW, Arm retains the msb set pattern as for all other fp formats (FP8DefaultNaN).
>
>
> r~
Thank you for the review feedbacks and suggestions.
The suggestion to handle the canonical nan and quiet nan within the
helper function rather than adding global state to float_status is the
cleaner solution.
I will incorporate this change in v2 of the patchset.
Thanks a lot,
rnax
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 03/18] fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
2026-01-08 15:16 ` [PATCH 01/18] target/riscv: rvv: Fix NOP_UU_B vs2 width Max Chou
2026-01-08 15:16 ` [PATCH 02/18] fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-10 3:20 ` Richard Henderson
2026-01-08 15:16 ` [PATCH 04/18] fpu/softfloat: Add OCP(Open Compute Project) OFP4 data type Max Chou
` (15 subsequent siblings)
18 siblings, 1 reply; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou
This commit provides the covert operations for ofp8(e4m3, e5m2) with
following implementation defined behaviors required by RISC-V Zvfofp8min
extension:
- The canonical NaN of OFP8 e4m3 and e5m2 is 0x7f (ocpfp8_same_cnan)
- All of the NaNs of OFP8 are quiet NaNs (ocpfp8_only_qnan)
According to the definition in OFP8 spec, the conversion from a wider
format infinity depends on the saturation mode defined in the spec.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
fpu/softfloat-parts.c.inc | 77 +++++++++++-
fpu/softfloat.c | 241 ++++++++++++++++++++++++++++++++++++++
include/fpu/softfloat.h | 11 ++
3 files changed, 323 insertions(+), 6 deletions(-)
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 5e0438fc0b..d9ec3ca8ae 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -227,7 +227,8 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
p->exp = fmt->frac_shift - fmt->exp_bias
- shift + !has_pseudo_denormals;
}
- } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
+ } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp ||
+ ocpfp_is_normal(p, fmt, false)) {
p->cls = float_class_normal;
p->exp -= fmt->exp_bias;
frac_shl(p, fmt->frac_shift);
@@ -236,8 +237,12 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
p->cls = float_class_inf;
} else {
frac_shl(p, fmt->frac_shift);
- p->cls = (parts_is_snan_frac(p->frac_hi, status)
- ? float_class_snan : float_class_qnan);
+ if (parts_is_snan_frac(p->frac_hi, status) == false ||
+ (fmt->ocpfp && status->ocp_fp8e5m2_no_signal_nan)) {
+ p->cls = float_class_qnan;
+ } else {
+ p->cls = float_class_snan;
+ }
}
}
@@ -313,8 +318,40 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
}
p->frac_lo &= ~round_mask;
}
+ p->exp = exp;
- if (fmt->arm_althp) {
+ if (fmt->ocpfp) {
+ if (ocpfp_is_normal(p, fmt, true) == false) {
+ flags |= float_flag_overflow;
+ flags |= float_flag_inexact;
+ if (fmt->exp_size == 4) {
+ if (fmt->ocpfp_sat || overflow_norm) {
+ /* S.1111.110 */
+ exp = exp_max;
+ frac_clear(p);
+ frac_addi(p, p, 0b110);
+ frac_shl(p, frac_shift);
+ } else {
+ /* S.1111.111 NaN */
+ p->cls = float_class_qnan;
+ p->sign = (s->ocp_fp8_same_canonical_nan ? 0 : p->sign);
+ exp = exp_max;
+ frac_allones(p);
+ }
+ } else if (fmt->exp_size == 5) {
+ if (fmt->ocpfp_sat || overflow_norm) {
+ /* S.11110.11 */
+ exp = exp_max - 1;
+ frac_allones(p);
+ } else {
+ /* S.11111.00 Inf */
+ p->cls = float_class_inf;
+ exp = exp_max;
+ frac_clear(p);
+ }
+ }
+ }
+ } else if (fmt->arm_althp) {
/* ARM Alt HP eschews Inf and NaN for a wider exponent. */
if (unlikely(exp > exp_max)) {
/* Overflow. Return the maximum normal. */
@@ -435,8 +472,36 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
return;
case float_class_inf:
g_assert(!fmt->arm_althp);
- p->exp = fmt->exp_max;
- frac_clear(p);
+ if (fmt->ocpfp) {
+ if (fmt->ocpfp_sat) {
+ if (fmt->exp_size == 4) {
+ /* S.1111.110 */
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ frac_addi(p, p, 0x6);
+ } else {
+ /* S.11110.11 */
+ p->exp = fmt->exp_max - 1;
+ frac_allones(p);
+ }
+ } else {
+ if (fmt->exp_size == 4) {
+ /* S.1111.111 NaN */
+ p->cls = float_class_qnan;
+ p->sign = (s->ocp_fp8_same_canonical_nan ? 0 : p->sign);
+ p->exp = fmt->exp_max;
+ frac_allones(p);
+ } else {
+ /* S.11111.00 Inf */
+ p->cls = float_class_inf;
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ }
+ }
+ } else {
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ }
return;
case float_class_qnan:
case float_class_snan:
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8094358c2e..0c7f052ec0 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -544,6 +544,8 @@ typedef struct {
int frac_shift;
bool arm_althp;
bool has_explicit_bit;
+ bool ocpfp;
+ bool ocpfp_sat;
uint64_t round_mask;
} FloatFmt;
@@ -560,6 +562,28 @@ typedef struct {
.frac_shift = (-F - 1) & 63, \
.round_mask = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float8_e4m3_params = {
+ FLOAT_PARAMS(4, 3),
+ .ocpfp = true
+};
+
+static const FloatFmt float8_e4m3_params_sat = {
+ FLOAT_PARAMS(4, 3),
+ .ocpfp = true,
+ .ocpfp_sat = true
+};
+
+static const FloatFmt float8_e5m2_params = {
+ FLOAT_PARAMS(5, 2),
+ .ocpfp = true
+};
+
+static const FloatFmt float8_e5m2_params_sat = {
+ FLOAT_PARAMS(5, 2),
+ .ocpfp = true,
+ .ocpfp_sat = true
+};
+
static const FloatFmt float16_params = {
FLOAT_PARAMS(5, 10)
};
@@ -614,6 +638,16 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
};
}
+static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f)
+{
+ unpack_raw64(p, &float8_e4m3_params, f);
+}
+
+static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f)
+{
+ unpack_raw64(p, &float8_e5m2_params, f);
+}
+
static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
{
unpack_raw64(p, &float16_params, f);
@@ -671,6 +705,16 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
return ret;
}
+static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p)
+{
+ return make_float8_e4m3(pack_raw64(p, &float8_e4m3_params));
+}
+
+static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p)
+{
+ return make_float8_e5m2(pack_raw64(p, &float8_e5m2_params));
+}
+
static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
{
return make_float16(pack_raw64(p, &float16_params));
@@ -1604,6 +1648,91 @@ static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
+#define OCPFP_GENERIC_64_128(NAME, P) \
+ _Generic((P), FloatParts64 *: ocpfp64_##NAME, \
+ FloatParts128 *: ocpfp128_##NAME)
+
+static bool ocpfp64_is_normal(const FloatParts64 *a, const FloatFmt *fmt,
+ bool is_normalized)
+{
+ FloatParts64 input;
+ input.exp = a->exp;
+ input.frac = a->frac;
+ if (!is_normalized) {
+ frac64_shl(&input, fmt->frac_shift);
+ input.frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ }
+
+ if (fmt->ocpfp) {
+ if (fmt->exp_size == 4 && fmt->frac_size == 3) {
+ /*
+ * The OCP E4M3 format uses only two bit patterns for NaN (a
+ * single mantissa-exponent bit pattern with the sign bit) in
+ * order to increase emax to 8 and thus to increase the dynamic
+ * range by one binade.
+ */
+ FloatParts64 tmp;
+ frac64_clear(&tmp);
+ tmp.frac_lo = 0b110;
+ frac64_shl(&tmp, fmt->frac_shift);
+ tmp.frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ if (!(input.exp > fmt->exp_max ||
+ (input.exp == fmt->exp_max &&
+ frac64_cmp(&input, &tmp) == float_relation_greater))) {
+ return true;
+ }
+ } else if (fmt->exp_size == 5 && fmt->frac_size == 2) {
+ if (input.exp < fmt->exp_max) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool ocpfp128_is_normal(const FloatParts128 *a, const FloatFmt *fmt,
+ bool is_normalized)
+{
+ FloatParts128 input;
+ input.exp = a->exp;
+ input.frac_hi = a->frac_hi;
+ input.frac_lo = a->frac_lo;
+ if (!is_normalized) {
+ frac128_shl(&input, fmt->frac_shift);
+ input.frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ }
+
+ if (fmt->ocpfp) {
+ if (fmt->exp_size == 4 && fmt->frac_size == 3) {
+ /*
+ * The OCP E4M3 format uses only two bit patterns for NaN (a
+ * single mantissa-exponent bit pattern with the sign bit) in
+ * order to increase emax to 8 and thus to increase the dynamic
+ * range by one binade.
+ */
+ FloatParts128 tmp;
+ frac128_clear(&tmp);
+ tmp.frac_lo = 0b110;
+ frac128_shl(&tmp, fmt->frac_shift);
+ tmp.frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ if (!(input.exp > fmt->exp_max ||
+ (input.exp == fmt->exp_max &&
+ frac128_cmp(&input, &tmp) == float_relation_greater))) {
+ return true;
+ }
+ } else if (fmt->exp_size == 5 && fmt->frac_size == 2) {
+ if (input.exp < fmt->exp_max) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+#define ocpfp_is_normal(A, F, N) OCPFP_GENERIC_64_128(is_normal, A)(A, F, N)
+
/*
* Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
* From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
@@ -1662,6 +1791,20 @@ static const uint16_t rsqrt_tab[128] = {
* Pack/unpack routines with a specific FloatFmt.
*/
+static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f,
+ float_status *s)
+{
+ float8_e4m3_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float8_e4m3_params);
+}
+
+static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f,
+ float_status *s)
+{
+ float8_e5m2_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float8_e5m2_params);
+}
+
static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
float_status *s, const FloatFmt *params)
{
@@ -1682,6 +1825,22 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
parts_canonicalize(p, s, &bfloat16_params);
}
+static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p,
+ float_status *status,
+ const FloatFmt *params)
+{
+ parts_uncanon(p, status, params);
+ return float8_e4m3_pack_raw(p);
+}
+
+static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p,
+ float_status *status,
+ const FloatFmt *params)
+{
+ parts_uncanon(p, status, params);
+ return float8_e5m2_pack_raw(p);
+}
+
static float16 float16a_round_pack_canonical(FloatParts64 *p,
float_status *s,
const FloatFmt *params)
@@ -2759,6 +2918,23 @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
}
}
+static void parts_float_to_ofp8(FloatParts64 *a, float_status *s,
+ const FloatFmt *fmt)
+{
+ if (is_nan(a->cls)) {
+ if (s->ocp_fp8_same_canonical_nan) {
+ if (a->cls == float_class_snan) {
+ float_raise(float_flag_invalid | float_flag_invalid_snan, s);
+ }
+ a->sign = 0;
+ a->exp = fmt->exp_max;
+ frac_allones(a);
+ } else {
+ parts_return_nan(a, s);
+ }
+ }
+}
+
static void parts64_float_to_float(FloatParts64 *a, float_status *s)
{
if (is_nan(a->cls)) {
@@ -2823,6 +2999,71 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
}
}
+
+bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float8_e4m3_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
+bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float8_e5m2_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
+float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status *s)
+{
+ const FloatFmt *fmt = saturate ? &float8_e4m3_params_sat
+ : &float8_e4m3_params;
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_ofp8(&p, s, fmt);
+ return float8_e4m3_round_pack_canonical(&p, s, fmt);
+}
+
+float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s)
+{
+ const FloatFmt *fmt = saturate ? &float8_e5m2_params_sat
+ : &float8_e5m2_params;
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_ofp8(&p, s, fmt);
+ return float8_e5m2_round_pack_canonical(&p, s, fmt);
+}
+
+float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s)
+{
+ const FloatFmt *fmt = saturate ? &float8_e4m3_params_sat
+ : &float8_e4m3_params;
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_ofp8(&p, s, fmt);
+ return float8_e4m3_round_pack_canonical(&p, s, fmt);
+}
+
+float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s)
+{
+ const FloatFmt *fmt = saturate ? &float8_e5m2_params_sat
+ : &float8_e5m2_params;
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_ofp8(&p, s, fmt);
+ return float8_e5m2_round_pack_canonical(&p, s, fmt);
+}
+
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 6f7259f9dd..7ab585bfc8 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -189,6 +189,17 @@ float128 int128_to_float128(Int128, float_status *status);
float128 uint64_to_float128(uint64_t, float_status *status);
float128 uint128_to_float128(Int128, float_status *status);
+/*----------------------------------------------------------------------------
+| Software OCP FP8 conversion routines.
+*----------------------------------------------------------------------------*/
+
+bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status);
+bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status);
+float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool saturate, float_status *status);
+float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool saturate, float_status *status);
+float8_e4m3 float32_to_float8_e4m3(float32, bool saturate, float_status *status);
+float8_e5m2 float32_to_float8_e5m2(float32, bool saturate, float_status *status);
+
/*----------------------------------------------------------------------------
| Software OCP FP8 operations.
*----------------------------------------------------------------------------*/
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 03/18] fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types
2026-01-08 15:16 ` [PATCH 03/18] fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types Max Chou
@ 2026-01-10 3:20 ` Richard Henderson
2026-01-15 9:13 ` Max Chou
0 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2026-01-10 3:20 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
On 1/9/26 02:16, Max Chou wrote:
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 8094358c2e..0c7f052ec0 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -544,6 +544,8 @@ typedef struct {
> int frac_shift;
> bool arm_althp;
> bool has_explicit_bit;
> + bool ocpfp;
> + bool ocpfp_sat;
> uint64_t round_mask;
> } FloatFmt;
>
> @@ -560,6 +562,28 @@ typedef struct {
> .frac_shift = (-F - 1) & 63, \
> .round_mask = (1ull << ((-F - 1) & 63)) - 1
>
> +static const FloatFmt float8_e4m3_params = {
> + FLOAT_PARAMS(4, 3),
> + .ocpfp = true
> +};
> +
> +static const FloatFmt float8_e4m3_params_sat = {
> + FLOAT_PARAMS(4, 3),
> + .ocpfp = true,
> + .ocpfp_sat = true
> +};
> +
> +static const FloatFmt float8_e5m2_params = {
> + FLOAT_PARAMS(5, 2),
> + .ocpfp = true
> +};
> +
> +static const FloatFmt float8_e5m2_params_sat = {
> + FLOAT_PARAMS(5, 2),
> + .ocpfp = true,
> + .ocpfp_sat = true
> +};
Saturation is not part of the format, it's part of the conversion operation.
I suggest you pass that as a bool parameter to bfloat16_to_float8_e4m3 etc.
This would then be handled as part of round-and-pack, maybe a separate step, maybe via
float_round_nearest_even_max.
I'm not sure what to do with arm_althp vs ocpfp. It seems like they have a couple of
things in common. Perhaps we should decompose these to separate behavior flags.
r~
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 03/18] fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types
2026-01-10 3:20 ` Richard Henderson
@ 2026-01-15 9:13 ` Max Chou
0 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-15 9:13 UTC (permalink / raw)
To: Richard Henderson
Cc: qemu-devel, qemu-riscv, Palmer Dabbelt, Alistair Francis,
Aurelien Jarno, Peter Maydell, Alex Bennée, Weiwei Li,
Daniel Henrique Barboza, Liu Zhiwei
On 2026-01-10 14:20, Richard Henderson wrote:
> I suggest you pass that as a bool parameter to bfloat16_to_float8_e4m3 etc.
> This would then be handled as part of round-and-pack, maybe a separate step,
> maybe via float_round_nearest_even_max.
>
Thanks for the suggestion!
Will replace the format saturate flag by a bool parameter of convert
function at v2.
> I'm not sure what to do with arm_althp vs ocpfp. It seems like they have a
> couple of things in common. Perhaps we should decompose these to separate
> behavior flags.
>
>
> r~
I agree that we should decompose these to seperate behavior flags. I
will try to introduce some behavior flas (liked no infinity, maximum
normal pattern, etc.) at v2.
rnax
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 04/18] fpu/softfloat: Add OCP(Open Compute Project) OFP4 data type
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (2 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 03/18] fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-09 6:08 ` Chao Liu
2026-01-08 15:16 ` [PATCH 05/18] fpu/softfloat: Add OCP FP4 E2M1 to OCP FP8 E4M3 convert operation Max Chou
` (14 subsequent siblings)
18 siblings, 1 reply; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou
This commit provides the basic operation support for the OCP float4 data
type(e2m1).
Signed-off-by: Max Chou <max.chou@sifive.com>
---
include/fpu/softfloat-types.h | 7 +++++-
include/fpu/softfloat.h | 45 +++++++++++++++++++++++++++++++++--
2 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 835dd33bf1..82a54e9e6d 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -120,7 +120,7 @@ typedef struct {
typedef uint16_t bfloat16;
/*
- * Software OCP(Open Compute Project) 8-bit floating point types
+ * Software OCP(Open Compute Project) floating point types
*/
typedef uint8_t float8_e4m3;
typedef uint8_t float8_e5m2;
@@ -131,6 +131,11 @@ typedef uint8_t float8_e5m2;
#define const_float8_e4m3(x) (x)
#define const_float8_e5m2(x) (x)
+typedef uint8_t float4_e2m1;
+#define float4_e2m1_val(x) (x & 0xf)
+#define make_float4_e2m1(x) (x & 0xf)
+#define const_float4_e2m1(x) (x & 0xf)
+
/*
* Software IEC/IEEE floating-point underflow tininess-detection mode.
*/
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 7ab585bfc8..13b882bc67 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -190,7 +190,7 @@ float128 uint64_to_float128(uint64_t, float_status *status);
float128 uint128_to_float128(Int128, float_status *status);
/*----------------------------------------------------------------------------
-| Software OCP FP8 conversion routines.
+| Software OCP FP conversion routines.
*----------------------------------------------------------------------------*/
bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status);
@@ -201,7 +201,7 @@ float8_e4m3 float32_to_float8_e4m3(float32, bool saturate, float_status *status)
float8_e5m2 float32_to_float8_e5m2(float32, bool saturate, float_status *status);
/*----------------------------------------------------------------------------
-| Software OCP FP8 operations.
+| Software OCP FP operations.
*----------------------------------------------------------------------------*/
bool float8_e4m3_is_quiet_nan(float8_e4m3, float_status *status);
@@ -270,6 +270,47 @@ static inline bool float8_e5m2_is_normal(float8_e5m2 a)
return (((float8_e5m2_val(a) >> 2) + 1) & 0x1f) >= 2;
}
+static inline bool float4_e2m1_is_quiet_nan(float4_e2m1 a, float_status *status)
+{
+ return false;
+}
+
+static inline bool float4_e2m1_is_signaling_nan(float4_e2m1 a, float_status *status)
+{
+ return false;
+}
+
+static inline bool float4_e2m1_is_any_nan(float4_e2m1 a)
+{
+ return false;
+}
+
+static inline bool float4_e2m1_is_neg(float4_e2m1 a)
+{
+ return float4_e2m1_val(a) >> 3;
+}
+
+static inline bool float4_e2m1_is_infinity(float4_e2m1 a)
+{
+ return false;
+}
+
+static inline bool float4_e2m1_is_zero(float4_e2m1 a)
+{
+ return (float4_e2m1_val(a) & 0x7) == 0;
+}
+
+static inline bool float4_e2m1_is_zero_or_denormal(float4_e2m1 a)
+{
+ return (float4_e2m1_val(a) & 0x6) == 0;
+}
+
+static inline bool float4_e2m1_is_normal(float4_e2m1 a)
+{
+ uint8_t em = float4_e2m1_val(a) & 0x7;
+ return em >= 0x2 && em <= 0x7;
+}
+
/*----------------------------------------------------------------------------
| Software half-precision conversion routines.
*----------------------------------------------------------------------------*/
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 04/18] fpu/softfloat: Add OCP(Open Compute Project) OFP4 data type
2026-01-08 15:16 ` [PATCH 04/18] fpu/softfloat: Add OCP(Open Compute Project) OFP4 data type Max Chou
@ 2026-01-09 6:08 ` Chao Liu
0 siblings, 0 replies; 33+ messages in thread
From: Chao Liu @ 2026-01-09 6:08 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
Hi, Max:
On 1/8/2026 11:16 PM, Max Chou wrote:
> This commit provides the basic operation support for the OCP float4 data
> type(e2m1).
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> include/fpu/softfloat-types.h | 7 +++++-
> include/fpu/softfloat.h | 45 +++++++++++++++++++++++++++++++++--
> 2 files changed, 49 insertions(+), 3 deletions(-)
>
> diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
> index 835dd33bf1..82a54e9e6d 100644
> --- a/include/fpu/softfloat-types.h
> +++ b/include/fpu/softfloat-types.h
> @@ -120,7 +120,7 @@ typedef struct {
> typedef uint16_t bfloat16;
>
> /*
> - * Software OCP(Open Compute Project) 8-bit floating point types
> + * Software OCP(Open Compute Project) floating point types
> */
> typedef uint8_t float8_e4m3;
> typedef uint8_t float8_e5m2;
> @@ -131,6 +131,11 @@ typedef uint8_t float8_e5m2;
> #define const_float8_e4m3(x) (x)
> #define const_float8_e5m2(x) (x)
>
> +typedef uint8_t float4_e2m1;
> +#define float4_e2m1_val(x) (x & 0xf)
> +#define make_float4_e2m1(x) (x & 0xf)
> +#define const_float4_e2m1(x) (x & 0xf)
> +
I recommend adding parentheses around the parameter `x`:
#define float4_e2m1_val(x) ((x) & 0xf)
#define make_float4_e2m1(x) ((x) & 0xf)
#define const_float4_e2m1(x) ((x) & 0xf)
Thanks,
Chao
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 05/18] fpu/softfloat: Add OCP FP4 E2M1 to OCP FP8 E4M3 convert operation
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (3 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 04/18] fpu/softfloat: Add OCP(Open Compute Project) OFP4 data type Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 06/18] target/riscv: Add cfg properity for Zvfofp8min extension Max Chou
` (13 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou
This commit provides the OCP FP4 E2M1 to OCP FP8 E4M3 covert operation
for the Zvfofp4min extension.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
fpu/softfloat.c | 37 +++++++++++++++++++++++++++++++++++--
include/fpu/softfloat.h | 2 ++
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 0c7f052ec0..30ba6d6c09 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -562,6 +562,11 @@ typedef struct {
.frac_shift = (-F - 1) & 63, \
.round_mask = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float4_e2m1_params = {
+ FLOAT_PARAMS(2, 1),
+ .ocpfp = true
+};
+
static const FloatFmt float8_e4m3_params = {
FLOAT_PARAMS(4, 3),
.ocpfp = true
@@ -638,6 +643,11 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
};
}
+static void QEMU_FLATTEN float4_e2m1_unpack_raw(FloatParts64 *p, float4_e2m1 f)
+{
+ unpack_raw64(p, &float4_e2m1_params, f);
+}
+
static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f)
{
unpack_raw64(p, &float8_e4m3_params, f);
@@ -1664,7 +1674,11 @@ static bool ocpfp64_is_normal(const FloatParts64 *a, const FloatFmt *fmt,
}
if (fmt->ocpfp) {
- if (fmt->exp_size == 4 && fmt->frac_size == 3) {
+ if (fmt->exp_size == 2 && fmt->frac_size == 1) {
+ if (input.exp <= fmt->exp_max) {
+ return true;
+ }
+ } else if (fmt->exp_size == 4 && fmt->frac_size == 3) {
/*
* The OCP E4M3 format uses only two bit patterns for NaN (a
* single mantissa-exponent bit pattern with the sign bit) in
@@ -1704,7 +1718,11 @@ static bool ocpfp128_is_normal(const FloatParts128 *a, const FloatFmt *fmt,
}
if (fmt->ocpfp) {
- if (fmt->exp_size == 4 && fmt->frac_size == 3) {
+ if (fmt->exp_size == 2 && fmt->frac_size == 1) {
+ if (input.exp <= fmt->exp_max) {
+ return true;
+ }
+ } else if (fmt->exp_size == 4 && fmt->frac_size == 3) {
/*
* The OCP E4M3 format uses only two bit patterns for NaN (a
* single mantissa-exponent bit pattern with the sign bit) in
@@ -1791,6 +1809,13 @@ static const uint16_t rsqrt_tab[128] = {
* Pack/unpack routines with a specific FloatFmt.
*/
+static void float4_e2m1_unpack_canonical(FloatParts64 *p, float4_e2m1 f,
+ float_status *s)
+{
+ float4_e2m1_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float4_e2m1_params);
+}
+
static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f,
float_status *s)
{
@@ -2999,6 +3024,14 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
}
}
+float8_e4m3 float4_e2m1_to_float8_e4m3(float4_e2m1 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float4_e2m1_unpack_canonical(&p, a, s);
+ parts_float_to_ofp8(&p, s, &float8_e4m3_params);
+ return float8_e4m3_round_pack_canonical(&p, s, &float8_e4m3_params);
+}
bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s)
{
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 13b882bc67..b199a029b0 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -200,6 +200,8 @@ float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool saturate, float_status *statu
float8_e4m3 float32_to_float8_e4m3(float32, bool saturate, float_status *status);
float8_e5m2 float32_to_float8_e5m2(float32, bool saturate, float_status *status);
+float8_e4m3 float4_e2m1_to_float8_e4m3(float4_e2m1, float_status *status);
+
/*----------------------------------------------------------------------------
| Software OCP FP operations.
*----------------------------------------------------------------------------*/
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 06/18] target/riscv: Add cfg properity for Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (4 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 05/18] fpu/softfloat: Add OCP FP4 E2M1 to OCP FP8 E4M3 convert operation Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-09 7:05 ` Chao Liu
2026-01-08 15:16 ` [PATCH 07/18] target/riscv: Add implied rules " Max Chou
` (12 subsequent siblings)
18 siblings, 1 reply; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
According to the ISA spec of Zvfofp8min extension,
"The Zvfofp8min extension requires on the Zve32f extension."
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/cpu.c | 8 ++++++++
target/riscv/cpu_cfg_fields.h.inc | 1 +
target/riscv/tcg/tcg-cpu.c | 5 +++++
target/riscv/vector_helper.c | 3 ++-
4 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 8eab992154..b4b10b52d8 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -193,6 +193,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
+ ISA_EXT_DATA_ENTRY(zvfofp8min, PRIV_VERSION_1_12_0, ext_zvfofp8min),
ISA_EXT_DATA_ENTRY(zvkb, PRIV_VERSION_1_12_0, ext_zvkb),
ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
ISA_EXT_DATA_ENTRY(zvkn, PRIV_VERSION_1_12_0, ext_zvkn),
@@ -778,6 +779,13 @@ static void riscv_cpu_reset_hold(Object *obj, ResetType type)
set_default_nan_mode(1, &env->fp_status);
/* Default NaN value: sign bit clear, frac msb set */
set_float_default_nan_pattern(0b01000000, &env->fp_status);
+ /*
+ * RISC-V Zvfofp8min extension requires:
+ * - Same canonical NaN (0x7F) for both E4M3 and E5M2 formats
+ * - E5M2 format does not generate signaling NaNs (all NaNs are quiet)
+ */
+ set_ocp_fp8_same_canonical_nan(true, &env->fp_status);
+ set_ocp_fp8e5m2_no_signal_nan(true, &env->fp_status);
env->vill = true;
#ifndef CONFIG_USER_ONLY
diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc
index 3696f02ee0..59302894af 100644
--- a/target/riscv/cpu_cfg_fields.h.inc
+++ b/target/riscv/cpu_cfg_fields.h.inc
@@ -104,6 +104,7 @@ BOOL_FIELD(ext_zvfbfmin)
BOOL_FIELD(ext_zvfbfwma)
BOOL_FIELD(ext_zvfh)
BOOL_FIELD(ext_zvfhmin)
+BOOL_FIELD(ext_zvfofp8min)
BOOL_FIELD(ext_smaia)
BOOL_FIELD(ext_ssaia)
BOOL_FIELD(ext_smctr)
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 378b298886..ba89436f13 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -710,6 +710,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
return;
}
+ if (cpu->cfg.ext_zvfofp8min && !cpu->cfg.ext_zve32f) {
+ error_setg(errp, "Zvfofp8min extension depends on Zve32f extension");
+ return;
+ }
+
if (cpu->cfg.ext_zvfh && !cpu->cfg.ext_zfhmin) {
error_setg(errp, "Zvfh extensions requires Zfhmin extension");
return;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ec0ea4c143..ee5a1e595b 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -38,7 +38,8 @@ static target_ulong vtype_reserved(CPURISCVState *env, target_ulong vtype)
int xlen = riscv_cpu_xlen(env);
target_ulong reserved = 0;
- if (riscv_cpu_cfg(env)->ext_zvfbfa) {
+ if (riscv_cpu_cfg(env)->ext_zvfbfa ||
+ riscv_cpu_cfg(env)->ext_zvfofp8min) {
reserved = vtype & MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
xlen - 1 - R_VTYPE_RESERVED_SHIFT);
} else {
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 06/18] target/riscv: Add cfg properity for Zvfofp8min extension
2026-01-08 15:16 ` [PATCH 06/18] target/riscv: Add cfg properity for Zvfofp8min extension Max Chou
@ 2026-01-09 7:05 ` Chao Liu
2026-01-15 9:18 ` Max Chou
0 siblings, 1 reply; 33+ messages in thread
From: Chao Liu @ 2026-01-09 7:05 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
Hi, Max:
On 1/8/2026 11:16 PM, Max Chou wrote:
> According to the ISA spec of Zvfofp8min extension,
>
> "The Zvfofp8min extension requires on the Zve32f extension."
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> target/riscv/cpu.c | 8 ++++++++
> target/riscv/cpu_cfg_fields.h.inc | 1 +
> target/riscv/tcg/tcg-cpu.c | 5 +++++
> target/riscv/vector_helper.c | 3 ++-
> 4 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 8eab992154..b4b10b52d8 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -193,6 +193,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
> ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
> ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
> ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
> + ISA_EXT_DATA_ENTRY(zvfofp8min, PRIV_VERSION_1_12_0, ext_zvfofp8min),
> ISA_EXT_DATA_ENTRY(zvkb, PRIV_VERSION_1_12_0, ext_zvkb),
> ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
> ISA_EXT_DATA_ENTRY(zvkn, PRIV_VERSION_1_12_0, ext_zvkn),
> @@ -778,6 +779,13 @@ static void riscv_cpu_reset_hold(Object *obj, ResetType type)
> set_default_nan_mode(1, &env->fp_status);
> /* Default NaN value: sign bit clear, frac msb set */
> set_float_default_nan_pattern(0b01000000, &env->fp_status);
> + /*
> + * RISC-V Zvfofp8min extension requires:
> + * - Same canonical NaN (0x7F) for both E4M3 and E5M2 formats
> + * - E5M2 format does not generate signaling NaNs (all NaNs are quiet)
> + */
> + set_ocp_fp8_same_canonical_nan(true, &env->fp_status);
> + set_ocp_fp8e5m2_no_signal_nan(true, &env->fp_status);
Should ocp_fp8_* flags be conditional on Zvfofp8min instead of always enabled at
reset? Better for maintainability.
Thanks,
Chao
> env->vill = true;
>
> #ifndef CONFIG_USER_ONLY
> diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc
> index 3696f02ee0..59302894af 100644
> --- a/target/riscv/cpu_cfg_fields.h.inc
> +++ b/target/riscv/cpu_cfg_fields.h.inc
> @@ -104,6 +104,7 @@ BOOL_FIELD(ext_zvfbfmin)
> BOOL_FIELD(ext_zvfbfwma)
> BOOL_FIELD(ext_zvfh)
> BOOL_FIELD(ext_zvfhmin)
> +BOOL_FIELD(ext_zvfofp8min)
> BOOL_FIELD(ext_smaia)
> BOOL_FIELD(ext_ssaia)
> BOOL_FIELD(ext_smctr)
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index 378b298886..ba89436f13 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -710,6 +710,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
> return;
> }
>
> + if (cpu->cfg.ext_zvfofp8min && !cpu->cfg.ext_zve32f) {
> + error_setg(errp, "Zvfofp8min extension depends on Zve32f extension");
> + return;
> + }
> +
> if (cpu->cfg.ext_zvfh && !cpu->cfg.ext_zfhmin) {
> error_setg(errp, "Zvfh extensions requires Zfhmin extension");
> return;
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index ec0ea4c143..ee5a1e595b 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -38,7 +38,8 @@ static target_ulong vtype_reserved(CPURISCVState *env, target_ulong vtype)
> int xlen = riscv_cpu_xlen(env);
> target_ulong reserved = 0;
>
> - if (riscv_cpu_cfg(env)->ext_zvfbfa) {
> + if (riscv_cpu_cfg(env)->ext_zvfbfa ||
> + riscv_cpu_cfg(env)->ext_zvfofp8min) {
> reserved = vtype & MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
> xlen - 1 - R_VTYPE_RESERVED_SHIFT);
> } else {
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 06/18] target/riscv: Add cfg properity for Zvfofp8min extension
2026-01-09 7:05 ` Chao Liu
@ 2026-01-15 9:18 ` Max Chou
0 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-15 9:18 UTC (permalink / raw)
To: Chao Liu
Cc: qemu-devel, qemu-riscv, Palmer Dabbelt, Alistair Francis,
Aurelien Jarno, Peter Maydell, Alex Bennée, Weiwei Li,
Daniel Henrique Barboza, Liu Zhiwei
On 2026-01-09 15:05, Chao Liu wrote:
> > + set_ocp_fp8_same_canonical_nan(true, &env->fp_status);
> > + set_ocp_fp8e5m2_no_signal_nan(true, &env->fp_status);
> Should ocp_fp8_* flags be conditional on Zvfofp8min instead of always enabled at
> reset? Better for maintainability.
>
Thanks for the suggestions. I will consider it at v2.
rnax
> Thanks,
> Chao
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 07/18] target/riscv: Add implied rules for Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (5 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 06/18] target/riscv: Add cfg properity for Zvfofp8min extension Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 08/18] target/riscv: rvv: Make vfwcvtbf16.f.f.v support OFP8 to BF16 conversion " Max Chou
` (11 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
Add implied rules to enable the implied extensions of Zvfofp8min
extension recursively.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/cpu.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index b4b10b52d8..53b5aeb9b2 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -2513,6 +2513,15 @@ static RISCVCPUImpliedExtsRule ZVFHMIN_IMPLIED = {
},
};
+static RISCVCPUImpliedExtsRule ZVFOFP8MIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvfofp8min),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32f),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
static RISCVCPUImpliedExtsRule ZVKN_IMPLIED = {
.ext = CPU_CFG_OFFSET(ext_zvkn),
.implied_multi_exts = {
@@ -2650,7 +2659,8 @@ RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = {
&ZKS_IMPLIED, &ZVBB_IMPLIED, &ZVE32F_IMPLIED,
&ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED, &ZVE64X_IMPLIED,
&ZVFBFA_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED,
- &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVKN_IMPLIED,
+ &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVFOFP8MIN_IMPLIED,
+ &ZVKN_IMPLIED,
&ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED,
&ZVKS_IMPLIED, &ZVKSC_IMPLIED, &ZVKSG_IMPLIED, &SSCFG_IMPLIED,
&SUPM_IMPLIED, &SSPM_IMPLIED, &SMCTR_IMPLIED, &SSCTR_IMPLIED,
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 08/18] target/riscv: rvv: Make vfwcvtbf16.f.f.v support OFP8 to BF16 conversion for Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (6 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 07/18] target/riscv: Add implied rules " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 09/18] target/riscv: rvv: Make vfncvtbf16.f.f.w support BF16 to OFP8 " Max Chou
` (10 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
According to the Zvfofp8min extension, the vfwcvtbf16.f.f.v instruction
supports OFP8 to BF16 conversion when SEW is 8.
And the VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: OFP8.e4m3 to BF16
* altfmt = 1: OFP8.e5m2 to BF16
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/helper.h | 12 +++
target/riscv/insn_trans/trans_rvbf16.c.inc | 16 +++-
target/riscv/vector_helper.c | 93 ++++++++++++++++++++++
3 files changed, 117 insertions(+), 4 deletions(-)
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index eb0a488ba8..356c24d9fb 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1247,6 +1247,18 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
+/* OFP8 functions */
+DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+
/* Vector crypto functions */
DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc
index 6cfda03d2e..9aafd4d2ef 100644
--- a/target/riscv/insn_trans/trans_rvbf16.c.inc
+++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
@@ -92,11 +92,20 @@ static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
{
REQUIRE_FPU;
- REQUIRE_ZVFBFMIN(ctx);
- if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) {
+ if (opfv_widen_check(ctx, a) &&
+ ((ctx->sew == MO_16 && ctx->cfg_ptr->ext_zvfbfmin) ||
+ (ctx->sew == MO_8 && ctx->cfg_ptr->ext_zvfofp8min))) {
+ gen_helper_gvec_3_ptr *fn;
uint32_t data = 0;
+ if (ctx->sew == MO_16) {
+ fn = gen_helper_vfwcvtbf16_f_f_v;
+ } else {
+ fn = ctx->altfmt ? gen_helper_vfwcvtbf16_f_f_v_ofp8e5m2 :
+ gen_helper_vfwcvtbf16_f_f_v_ofp8e4m3;
+ }
+
gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -106,8 +115,7 @@ static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
vreg_ofs(ctx, a->rs2), tcg_env,
ctx->cfg_ptr->vlenb,
- ctx->cfg_ptr->vlenb, data,
- gen_helper_vfwcvtbf16_f_f_v);
+ ctx->cfg_ptr->vlenb, data, fn);
finalize_rvv_inst(ctx);
return true;
}
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ee5a1e595b..759ebb3251 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5024,6 +5024,99 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
+/*
+ * Vector OFP8 conversion operations for Zvfofp8min
+ *
+ * Note: The OCP FP8 conversion functions use flags in float_status to control
+ * the same_canonical_nan and only_quiet_nan behavior. RISC-V should set
+ * ocp_fp8_same_canonical_nan and ocp_fp8e5m2_no_signal_nan flags during CPU
+ * initialization to get the correct Zvfofp8min behavior.
+ */
+
+/* Wrapper functions for RVVCALL macro compatibility */
+static uint8_t vfncvt_bf16_to_e4m3(uint16_t a, float_status *s)
+{
+ return bfloat16_to_float8_e4m3(a, false, s);
+}
+
+static uint8_t vfncvt_bf16_to_e5m2(uint16_t a, float_status *s)
+{
+ return bfloat16_to_float8_e5m2(a, false, s);
+}
+
+static uint8_t vfncvt_bf16_to_e4m3_sat(uint16_t a, float_status *s)
+{
+ return bfloat16_to_float8_e4m3(a, true, s);
+}
+
+static uint8_t vfncvt_bf16_to_e5m2_sat(uint16_t a, float_status *s)
+{
+ return bfloat16_to_float8_e5m2(a, true, s);
+}
+
+static uint8_t vfncvt_f32_to_e4m3(uint32_t a, float_status *s)
+{
+ return float32_to_float8_e4m3(a, false, s);
+}
+
+static uint8_t vfncvt_f32_to_e5m2(uint32_t a, float_status *s)
+{
+ return float32_to_float8_e5m2(a, false, s);
+}
+
+static uint8_t vfncvt_f32_to_e4m3_sat(uint32_t a, float_status *s)
+{
+ return float32_to_float8_e4m3(a, true, s);
+}
+
+static uint8_t vfncvt_f32_to_e5m2_sat(uint32_t a, float_status *s)
+{
+ return float32_to_float8_e5m2(a, true, s);
+}
+
+/* vfwcvtbf16.f.f.w vd, vs2, vm # Convert OFP8 to BF16. */
+RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e4m3, WOP_UU_B, H2, H1,
+ float8_e4m3_to_bfloat16)
+RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e5m2, WOP_UU_B, H2, H1,
+ float8_e5m2_to_bfloat16)
+GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e4m3, 2)
+GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e5m2, 2)
+
+/* vfncvtbf16.f.f.w vd, vs2, vm # Convert BF16 to OFP8 without saturation. */
+RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e4m3)
+RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e5m2)
+GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e5m2, 1)
+
+/* vfncvtbf16.sat.f.f.w vd, vs2, vm # Convert BF16 to OFP8 with saturation. */
+RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e4m3_sat)
+RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e5m2_sat)
+GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e5m2, 1)
+
+/* Quad-width narrowing type for FP32 to OFP8 */
+#define QOP_UU_B uint8_t, uint32_t, uint32_t
+
+/* vfncvt.f.f.q vd, vs2, vm # Convert FP32 to OFP8. */
+RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e4m3)
+RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e5m2)
+GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e5m2, 1)
+
+/* vfncvt.sat.f.f.q vd, vs2, vm # Convert FP32 to OFP8 with saturation. */
+RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e4m3_sat)
+RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e5m2_sat)
+GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
+
/*
* Vector Reduction Operations
*/
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 09/18] target/riscv: rvv: Make vfncvtbf16.f.f.w support BF16 to OFP8 conversion for Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (7 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 08/18] target/riscv: rvv: Make vfwcvtbf16.f.f.v support OFP8 to BF16 conversion " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 10/18] target/riscv: rvv: Add vfncvtbf16.sat.f.f.w instruction " Max Chou
` (9 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
According to the Zvfofp8min extension, the vfncvtbf16.f.f.w instruction
supports BF16 to OFP8 conversion without satuation when SEW is 8.
And the VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: BF16 to OFP8.e4m3
* altfmt = 1: BF16 to OFP8.e5m2
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/insn_trans/trans_rvbf16.c.inc | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc
index 9aafd4d2ef..16f4403909 100644
--- a/target/riscv/insn_trans/trans_rvbf16.c.inc
+++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
@@ -67,11 +67,20 @@ static bool trans_fcvt_s_bf16(DisasContext *ctx, arg_fcvt_s_bf16 *a)
static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
{
REQUIRE_FPU;
- REQUIRE_ZVFBFMIN(ctx);
- if (opfv_narrow_check(ctx, a) && (ctx->sew == MO_16)) {
+ if (opfv_narrow_check(ctx, a) &&
+ ((ctx->sew == MO_16 && ctx->cfg_ptr->ext_zvfbfmin) ||
+ (ctx->sew == MO_8 && ctx->cfg_ptr->ext_zvfofp8min))) {
+ gen_helper_gvec_3_ptr *fn;
uint32_t data = 0;
+ if (ctx->sew == MO_16) {
+ fn = gen_helper_vfncvtbf16_f_f_w;
+ } else {
+ fn = ctx->altfmt ? gen_helper_vfncvtbf16_f_f_w_ofp8e5m2 :
+ gen_helper_vfncvtbf16_f_f_w_ofp8e4m3;
+ }
+
gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -81,8 +90,7 @@ static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
vreg_ofs(ctx, a->rs2), tcg_env,
ctx->cfg_ptr->vlenb,
- ctx->cfg_ptr->vlenb, data,
- gen_helper_vfncvtbf16_f_f_w);
+ ctx->cfg_ptr->vlenb, data, fn);
finalize_rvv_inst(ctx);
return true;
}
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 10/18] target/riscv: rvv: Add vfncvtbf16.sat.f.f.w instruction for Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (8 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 09/18] target/riscv: rvv: Make vfncvtbf16.f.f.w support BF16 to OFP8 " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions " Max Chou
` (8 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
The vfncvtbf16.sat.f.f.w instruction converts a vector of 16-bit
floating-point numbers to a vector of 8-bit floating-point numbers with
saturation.
The VTYPE.altfmt field is used to select the format of the 8-bit floating-point
numbers.
* altfmt = 0: BF16 to OFP8.e4m3
* altfmt = 1: BF16 to OFP8.e5m2
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/insn32.decode | 3 ++
target/riscv/insn_trans/trans_rvofp8.c.inc | 53 ++++++++++++++++++++++
target/riscv/translate.c | 1 +
3 files changed, 57 insertions(+)
create mode 100644 target/riscv/insn_trans/trans_rvofp8.c.inc
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 6e35c4b1e6..49201c0c20 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -973,6 +973,9 @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm
vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm
vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm
+# *** Zvfofp8min Extension ***
+vfncvtbf16_sat_f_f_w 010010 . ..... 11111 001 ..... 1010111 @r2_vm
+
# *** Zvbc vector crypto extension ***
vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm
vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm
diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
new file mode 100644
index 0000000000..70a22616dc
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
@@ -0,0 +1,53 @@
+/*
+ * RISC-V translation routines for the OFP8 Standard Extensions.
+ *
+ * Copyright (C) 2025 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_ZVFOFP8MIN(ctx) do { \
+ if (!ctx->cfg_ptr->ext_zvfofp8min) { \
+ return false; \
+ } \
+} while (0)
+
+
+static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZVFOFP8MIN(ctx);
+
+ if (opfv_narrow_check(ctx, a) && ctx->sew == MO_8) {
+ gen_helper_gvec_3_ptr *fn;
+ uint32_t data = 0;
+
+ fn = ctx->altfmt ? gen_helper_vfncvtbf16_sat_f_f_w_ofp8e5m2 :
+ gen_helper_vfncvtbf16_sat_f_f_w_ofp8e4m3;
+
+ gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+ data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+ tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+ vreg_ofs(ctx, a->rs2), tcg_env,
+ ctx->cfg_ptr->vlenb,
+ ctx->cfg_ptr->vlenb, data, fn);
+ finalize_rvv_inst(ctx);
+ return true;
+ }
+ return false;
+}
+
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index a1c4b325e5..137022d7fb 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1219,6 +1219,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
#include "insn_trans/trans_privileged.c.inc"
#include "insn_trans/trans_svinval.c.inc"
#include "insn_trans/trans_rvbf16.c.inc"
+#include "insn_trans/trans_rvofp8.c.inc"
#include "decode-xthead.c.inc"
#include "decode-xmips.c.inc"
#include "insn_trans/trans_xthead.c.inc"
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (9 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 10/18] target/riscv: rvv: Add vfncvtbf16.sat.f.f.w instruction " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-09 6:37 ` Chao Liu
2026-01-08 15:16 ` [PATCH 12/18] target/riscv: Expose Zvfofp8min properity Max Chou
` (7 subsequent siblings)
18 siblings, 1 reply; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
The VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: FP32 to OFP8.e4m3
* altfmt = 1: FP32 to OFP8.e5m2
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/insn32.decode | 2 +
target/riscv/insn_trans/trans_rvofp8.c.inc | 62 ++++++++++++++++++++++
target/riscv/insn_trans/trans_rvv.c.inc | 39 ++++++++++++++
3 files changed, 103 insertions(+)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 49201c0c20..f2b413c7d4 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -974,6 +974,8 @@ vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm
vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm
# *** Zvfofp8min Extension ***
+vfncvt_f_f_q 010010 . ..... 11001 001 ..... 1010111 @r2_vm
+vfncvt_sat_f_f_q 010010 . ..... 11011 001 ..... 1010111 @r2_vm
vfncvtbf16_sat_f_f_w 010010 . ..... 11111 001 ..... 1010111 @r2_vm
# *** Zvbc vector crypto extension ***
diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
index 70a22616dc..4b44417c47 100644
--- a/target/riscv/insn_trans/trans_rvofp8.c.inc
+++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
@@ -22,6 +22,13 @@
} \
} while (0)
+static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
+{
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_sq(s, a->rd, a->rs2, a->vm) &&
+ (s->sew == MO_8);
+}
static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
{
@@ -51,3 +58,58 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
return false;
}
+static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZVFOFP8MIN(ctx);
+
+ if (zvfofp8min_narrow_quad_check(ctx, a)) {
+ gen_helper_gvec_3_ptr *fn;
+ uint32_t data = 0;
+
+ fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
+ gen_helper_vfncvt_f_f_q_ofp8e4m3;
+
+ gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+ data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+ tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+ vreg_ofs(ctx, a->rs2), tcg_env,
+ ctx->cfg_ptr->vlenb,
+ ctx->cfg_ptr->vlenb, data, fn);
+ finalize_rvv_inst(ctx);
+ return true;
+ }
+ return false;
+}
+
+static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZVFOFP8MIN(ctx);
+
+ if (zvfofp8min_narrow_quad_check(ctx, a)) {
+ gen_helper_gvec_3_ptr *fn;
+ uint32_t data = 0;
+
+ fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
+ gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
+
+ gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+ data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+ tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+ vreg_ofs(ctx, a->rs2), tcg_env,
+ ctx->cfg_ptr->vlenb,
+ ctx->cfg_ptr->vlenb, data, fn);
+ finalize_rvv_inst(ctx);
+ return true;
+ }
+ return false;
+}
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index bcd45b0aa3..93c0761171 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
require_align(vs1, s->lmul);
}
+/*
+ * Common check function for vector narrowing instructions
+ * of single-width result (SEW) and quad-width source (4*SEW).
+ *
+ * Rules to be checked here:
+ * 1. The largest vector register group used by an instruction
+ * can not be greater than 8 vector registers
+ * (Section 31.5.2)
+ * 2. Quad-width SEW cannot greater than ELEN.
+ * (Section 31.2)
+ * 3. Source vector register number is multiples of 4 * LMUL.
+ * (Section 31.3.4.2)
+ * 4. Destination vector register number is multiples of LMUL.
+ * (Section 31.3.4.2)
+ * 5. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 31.5.3)
+ * risc-v unprivileged spec
+ */
+static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
+ int vm)
+{
+ return (s->lmul <= 1) &&
+ (s->sew < MO_32) &&
+ ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
+ require_align(vs2, s->lmul + 2) &&
+ require_align(vd, s->lmul) &&
+ require_vm(vm, vd);
+}
+
+static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
+{
+ bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
+ if (vd != vs) {
+ ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
+ }
+ return ret;
+}
+
/*
* Check function for vector reduction instructions.
*
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension
2026-01-08 15:16 ` [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions " Max Chou
@ 2026-01-09 6:37 ` Chao Liu
2026-01-15 9:24 ` Max Chou
0 siblings, 1 reply; 33+ messages in thread
From: Chao Liu @ 2026-01-09 6:37 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
Hi, Max:
On 1/8/2026 11:16 PM, Max Chou wrote:
> The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
> FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
> converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
> The VTYPE.altfmt field is used to select the OFP8 format.
> * altfmt = 0: FP32 to OFP8.e4m3
> * altfmt = 1: FP32 to OFP8.e5m2
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> target/riscv/insn32.decode | 2 +
> target/riscv/insn_trans/trans_rvofp8.c.inc | 62 ++++++++++++++++++++++
> target/riscv/insn_trans/trans_rvv.c.inc | 39 ++++++++++++++
> 3 files changed, 103 insertions(+)
>
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 49201c0c20..f2b413c7d4 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -974,6 +974,8 @@ vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm
> vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm
>
> # *** Zvfofp8min Extension ***
> +vfncvt_f_f_q 010010 . ..... 11001 001 ..... 1010111 @r2_vm
> +vfncvt_sat_f_f_q 010010 . ..... 11011 001 ..... 1010111 @r2_vm
> vfncvtbf16_sat_f_f_w 010010 . ..... 11111 001 ..... 1010111 @r2_vm
>
> # *** Zvbc vector crypto extension ***
> diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
> index 70a22616dc..4b44417c47 100644
> --- a/target/riscv/insn_trans/trans_rvofp8.c.inc
> +++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
> @@ -22,6 +22,13 @@
> } \
> } while (0)
>
> +static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
> +{
> + return require_rvv(s) &&
> + vext_check_isa_ill(s) &&
> + vext_check_sq(s, a->rd, a->rs2, a->vm) &&
> + (s->sew == MO_8);
> +}
>
> static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
> {
> @@ -51,3 +58,58 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
> return false;
> }
>
> +static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> + REQUIRE_FPU;
> + REQUIRE_ZVFOFP8MIN(ctx);
> +
> + if (zvfofp8min_narrow_quad_check(ctx, a)) {
> + gen_helper_gvec_3_ptr *fn;
> + uint32_t data = 0;
> +
> + fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
> + gen_helper_vfncvt_f_f_q_ofp8e4m3;
> +
> + gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> + data = FIELD_DP32(data, VDATA, VM, a->vm);
> + data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> + data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> + data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> + tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> + vreg_ofs(ctx, a->rs2), tcg_env,
> + ctx->cfg_ptr->vlenb,
> + ctx->cfg_ptr->vlenb, data, fn);
> + finalize_rvv_inst(ctx);
> + return true;
> + }
> + return false;
> +}
> +
> +static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> + REQUIRE_FPU;
> + REQUIRE_ZVFOFP8MIN(ctx);
> +
> + if (zvfofp8min_narrow_quad_check(ctx, a)) {
> + gen_helper_gvec_3_ptr *fn;
> + uint32_t data = 0;
> +
> + fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
> + gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
> +
> + gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> + data = FIELD_DP32(data, VDATA, VM, a->vm);
> + data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> + data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> + data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> + tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> + vreg_ofs(ctx, a->rs2), tcg_env,
> + ctx->cfg_ptr->vlenb,
> + ctx->cfg_ptr->vlenb, data, fn);
> + finalize_rvv_inst(ctx);
> + return true;
> + }
> + return false;
> +}
Missing the trailing blank line.
Thanks,
Chao
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index bcd45b0aa3..93c0761171 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
> require_align(vs1, s->lmul);
> }
>
> +/*
> + * Common check function for vector narrowing instructions
> + * of single-width result (SEW) and quad-width source (4*SEW).
> + *
> + * Rules to be checked here:
> + * 1. The largest vector register group used by an instruction
> + * can not be greater than 8 vector registers
> + * (Section 31.5.2)
> + * 2. Quad-width SEW cannot greater than ELEN.
> + * (Section 31.2)
> + * 3. Source vector register number is multiples of 4 * LMUL.
> + * (Section 31.3.4.2)
> + * 4. Destination vector register number is multiples of LMUL.
> + * (Section 31.3.4.2)
> + * 5. Destination vector register group for a masked vector
> + * instruction cannot overlap the source mask register (v0).
> + * (Section 31.5.3)
> + * risc-v unprivileged spec
> + */
> +static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
> + int vm)
> +{
> + return (s->lmul <= 1) &&
> + (s->sew < MO_32) &&
> + ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
> + require_align(vs2, s->lmul + 2) &&
> + require_align(vd, s->lmul) &&
> + require_vm(vm, vd);
> +}
> +
> +static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
> +{
> + bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
> + if (vd != vs) {
> + ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
> + }
> + return ret;
> +}
> +
> /*
> * Check function for vector reduction instructions.
> *
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension
2026-01-09 6:37 ` Chao Liu
@ 2026-01-15 9:24 ` Max Chou
0 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-15 9:24 UTC (permalink / raw)
To: Chao Liu
Cc: qemu-devel, qemu-riscv, Palmer Dabbelt, Alistair Francis,
Aurelien Jarno, Peter Maydell, Alex Bennée, Weiwei Li,
Daniel Henrique Barboza, Liu Zhiwei
On 2026-01-09 14:37, Chao Liu wrote:
> > + return false;
> > +}
> Missing the trailing blank line.
>
Will fix it at v2.
Thanks,
rnax
> Thanks,
> Chao
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 12/18] target/riscv: Expose Zvfofp8min properity
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (10 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 13/18] disas/riscv: Add support of Zvfofp8min extension Max Chou
` (6 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/cpu.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 53b5aeb9b2..081891c97a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1387,6 +1387,9 @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
MULTI_EXT_CFG_BOOL("x-svukte", ext_svukte, false),
MULTI_EXT_CFG_BOOL("x-zvfbfa", ext_zvfbfa, false),
+ /* Zvfofp8min extension for OFP8 conversion */
+ MULTI_EXT_CFG_BOOL("x-zvfofp8min", ext_zvfofp8min, false),
+
{ },
};
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 13/18] disas/riscv: Add support of Zvfofp8min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (11 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 12/18] target/riscv: Expose Zvfofp8min properity Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 14/18] target/riscv: Add cfg properity for Zvfofp4min extension Max Chou
` (5 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou
This patch adds support to disassemble Zvfofp8min instructions.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
disas/riscv.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/disas/riscv.c b/disas/riscv.c
index 85cd2a9c2a..daffe9917f 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -984,6 +984,9 @@ typedef enum {
rv_op_ssamoswap_d = 953,
rv_op_c_sspush = 954,
rv_op_c_sspopchk = 955,
+ rv_op_vfncvtbf16_sat_f_f_w = 956,
+ rv_op_vfncvt_f_f_q = 957,
+ rv_op_vfncvt_sat_f_f_q = 958,
} rv_op;
/* register names */
@@ -2254,6 +2257,9 @@ const rv_opcode_data rvi_opcode_data[] = {
rv_op_sspush, 0 },
{ "c.sspopchk", rv_codec_cmop_ss, rv_fmt_rs1, NULL, rv_op_sspopchk,
rv_op_sspopchk, 0 },
+ { "vfncvtbf16.sat.f.f.w", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+ { "vfncvt.f.f.q", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+ { "vfncvt.sat.f.f.q", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
};
/* CSR names */
@@ -3630,7 +3636,10 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
case 21: op = rv_op_vfncvt_rod_f_f_w; break;
case 22: op = rv_op_vfncvt_rtz_xu_f_w; break;
case 23: op = rv_op_vfncvt_rtz_x_f_w; break;
+ case 25: op = rv_op_vfncvt_f_f_q; break;
+ case 27: op = rv_op_vfncvt_sat_f_f_q; break;
case 29: op = rv_op_vfncvtbf16_f_f_w; break;
+ case 31: op = rv_op_vfncvtbf16_sat_f_f_w; break;
}
break;
case 19:
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 14/18] target/riscv: Add cfg properity for Zvfofp4min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (12 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 13/18] disas/riscv: Add support of Zvfofp8min extension Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-09 6:59 ` Chao Liu
2026-01-08 15:16 ` [PATCH 15/18] target/riscv: Add implied rules " Max Chou
` (4 subsequent siblings)
18 siblings, 1 reply; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
According to the ISA spec of Zvfofp4min extension,
"The Zvfofp4min extension requires on the Zve32f extension."
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/cpu.c | 1 +
target/riscv/cpu_cfg_fields.h.inc | 1 +
target/riscv/tcg/tcg-cpu.c | 10 ++++++++++
3 files changed, 12 insertions(+)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 081891c97a..9d4fc3ab6b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -193,6 +193,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
+ ISA_EXT_DATA_ENTRY(zvfofp4min, PRIV_VERSION_1_12_0, ext_zvfofp4min),
ISA_EXT_DATA_ENTRY(zvfofp8min, PRIV_VERSION_1_12_0, ext_zvfofp8min),
ISA_EXT_DATA_ENTRY(zvkb, PRIV_VERSION_1_12_0, ext_zvkb),
ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc
index 59302894af..353a932c36 100644
--- a/target/riscv/cpu_cfg_fields.h.inc
+++ b/target/riscv/cpu_cfg_fields.h.inc
@@ -104,6 +104,7 @@ BOOL_FIELD(ext_zvfbfmin)
BOOL_FIELD(ext_zvfbfwma)
BOOL_FIELD(ext_zvfh)
BOOL_FIELD(ext_zvfhmin)
+BOOL_FIELD(ext_zvfofp4min)
BOOL_FIELD(ext_zvfofp8min)
BOOL_FIELD(ext_smaia)
BOOL_FIELD(ext_ssaia)
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index ba89436f13..c095bc9efd 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -715,6 +715,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
return;
}
+ if (cpu->cfg.ext_zvfofp4min && !cpu->cfg.ext_zve32f) {
+ error_setg(errp, "Zvfofp4min extension depends on Zve32f extension");
+ return;
+ }
+
if (cpu->cfg.ext_zvfh && !cpu->cfg.ext_zfhmin) {
error_setg(errp, "Zvfh extensions requires Zfhmin extension");
return;
@@ -738,6 +743,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
}
}
+ if (cpu->cfg.ext_zvfofp4min && !cpu->cfg.ext_zve32f) {
+ error_setg(errp, "Zvfofp4min extension depends on Zve32f extension");
+ return;
+ }
+
if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) {
error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx");
return;
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 14/18] target/riscv: Add cfg properity for Zvfofp4min extension
2026-01-08 15:16 ` [PATCH 14/18] target/riscv: Add cfg properity for Zvfofp4min extension Max Chou
@ 2026-01-09 6:59 ` Chao Liu
2026-01-15 9:22 ` Max Chou
0 siblings, 1 reply; 33+ messages in thread
From: Chao Liu @ 2026-01-09 6:59 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
Hi, Max:
On 1/8/2026 11:16 PM, Max Chou wrote:
> According to the ISA spec of Zvfofp4min extension,
> "The Zvfofp4min extension requires on the Zve32f extension."
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> target/riscv/cpu.c | 1 +
> target/riscv/cpu_cfg_fields.h.inc | 1 +
> target/riscv/tcg/tcg-cpu.c | 10 ++++++++++
> 3 files changed, 12 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 081891c97a..9d4fc3ab6b 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -193,6 +193,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
> ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
> ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
> ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
> + ISA_EXT_DATA_ENTRY(zvfofp4min, PRIV_VERSION_1_12_0, ext_zvfofp4min),
> ISA_EXT_DATA_ENTRY(zvfofp8min, PRIV_VERSION_1_12_0, ext_zvfofp8min),
> ISA_EXT_DATA_ENTRY(zvkb, PRIV_VERSION_1_12_0, ext_zvkb),
> ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
> diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc
> index 59302894af..353a932c36 100644
> --- a/target/riscv/cpu_cfg_fields.h.inc
> +++ b/target/riscv/cpu_cfg_fields.h.inc
> @@ -104,6 +104,7 @@ BOOL_FIELD(ext_zvfbfmin)
> BOOL_FIELD(ext_zvfbfwma)
> BOOL_FIELD(ext_zvfh)
> BOOL_FIELD(ext_zvfhmin)
> +BOOL_FIELD(ext_zvfofp4min)
> BOOL_FIELD(ext_zvfofp8min)
> BOOL_FIELD(ext_smaia)
> BOOL_FIELD(ext_ssaia)
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index ba89436f13..c095bc9efd 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -715,6 +715,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
> return;
> }
>
> + if (cpu->cfg.ext_zvfofp4min && !cpu->cfg.ext_zve32f) {
> + error_setg(errp, "Zvfofp4min extension depends on Zve32f extension");
> + return;
> + }
> +
First check.
> if (cpu->cfg.ext_zvfh && !cpu->cfg.ext_zfhmin) {
> error_setg(errp, "Zvfh extensions requires Zfhmin extension");
> return;
> @@ -738,6 +743,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
> }
> }
>
> + if (cpu->cfg.ext_zvfofp4min && !cpu->cfg.ext_zve32f) {
> + error_setg(errp, "Zvfofp4min extension depends on Zve32f extension");
> + return;
> + }
> +
Re-verified Zvfofp4min -> Zve32f.
Thanks,
Chao
> if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) {
> error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx");
> return;
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 14/18] target/riscv: Add cfg properity for Zvfofp4min extension
2026-01-09 6:59 ` Chao Liu
@ 2026-01-15 9:22 ` Max Chou
0 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-15 9:22 UTC (permalink / raw)
To: Chao Liu
Cc: qemu-devel, qemu-riscv, Palmer Dabbelt, Alistair Francis,
Aurelien Jarno, Peter Maydell, Alex Bennée, Weiwei Li,
Daniel Henrique Barboza, Liu Zhiwei
On 2026-01-09 14:59, Chao Liu wrote:
> Re-verified Zvfofp4min -> Zve32f.
>
Ooops. I missed it when rebasing.
Thanks. Will fix it at v2.
rnax
> Thanks,
> Chao
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 15/18] target/riscv: Add implied rules for Zvfofp4min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (13 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 14/18] target/riscv: Add cfg properity for Zvfofp4min extension Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 16/18] target/riscv: rvv: Add vfext.vf2 instruction " Max Chou
` (3 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
Add implied rules to enable the implied extensions of Zvfofp4min
extension recursively.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/cpu.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9d4fc3ab6b..ec1bf8034f 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -2526,6 +2526,15 @@ static RISCVCPUImpliedExtsRule ZVFOFP8MIN_IMPLIED = {
},
};
+static RISCVCPUImpliedExtsRule ZVFOFP4MIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvfofp4min),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32f),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
static RISCVCPUImpliedExtsRule ZVKN_IMPLIED = {
.ext = CPU_CFG_OFFSET(ext_zvkn),
.implied_multi_exts = {
@@ -2663,7 +2672,7 @@ RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = {
&ZKS_IMPLIED, &ZVBB_IMPLIED, &ZVE32F_IMPLIED,
&ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED, &ZVE64X_IMPLIED,
&ZVFBFA_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED,
- &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVFOFP8MIN_IMPLIED,
+ &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVFOFP4MIN_IMPLIED, &ZVFOFP8MIN_IMPLIED,
&ZVKN_IMPLIED,
&ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED,
&ZVKS_IMPLIED, &ZVKSC_IMPLIED, &ZVKSG_IMPLIED, &SSCFG_IMPLIED,
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 16/18] target/riscv: rvv: Add vfext.vf2 instruction for Zvfofp4min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (14 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 15/18] target/riscv: Add implied rules " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 17/18] target/riscv: Expose Zvfofp4min properity Max Chou
` (2 subsequent siblings)
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
The vfext.vf2 instruction converts a vector of OCP FP4 E2M1
floating-point numbers to a vector of OFP FP8 E4M3 floating-points
numbers.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/helper.h | 3 ++
target/riscv/insn32.decode | 3 ++
target/riscv/insn_trans/trans_rvofp4.c.inc | 54 ++++++++++++++++++++++
target/riscv/translate.c | 1 +
target/riscv/vector_helper.c | 33 +++++++++++++
5 files changed, 94 insertions(+)
create mode 100644 target/riscv/insn_trans/trans_rvofp4.c.inc
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 356c24d9fb..162303fb6c 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1259,6 +1259,9 @@ DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+/* OFP4 function */
+DEF_HELPER_5(vfext_vf2, void, ptr, ptr, ptr, env, i32)
+
/* Vector crypto functions */
DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f2b413c7d4..c58223ebd8 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -754,6 +754,9 @@ vsext_vf2 010010 . ..... 00111 010 ..... 1010111 @r2_vm
vsext_vf4 010010 . ..... 00101 010 ..... 1010111 @r2_vm
vsext_vf8 010010 . ..... 00011 010 ..... 1010111 @r2_vm
+# Zvfofp4min Extension
+vfext_vf2 010010 . ..... 10110 010 ..... 1010111 @r2_vm
+
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm11
vsetivli 11 .......... ..... 111 ..... 1010111 @r2_zimm10
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
diff --git a/target/riscv/insn_trans/trans_rvofp4.c.inc b/target/riscv/insn_trans/trans_rvofp4.c.inc
new file mode 100644
index 0000000000..4040a54ab6
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvofp4.c.inc
@@ -0,0 +1,54 @@
+/*
+ * RISC-V translation routines for the OFP4 Standard Extensions.
+ *
+ * Copyright (C) 2025 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool vext_zvfofp4min_check(DisasContext *s, arg_rmr *a)
+{
+ return s->cfg_ptr->ext_zvfofp4min &&
+ (s->sew == MO_8) &&
+ vext_check_altfmt(s, -1) &&
+ (s->lmul >= -2) &&
+ require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ (a->rd != a->rs2) &&
+ require_align(a->rd, s->lmul) &&
+ require_align(a->rs2, s->lmul - 1) &&
+ require_vm(a->vm, a->rd) &&
+ require_noover(a->rd, s->lmul, a->rs2, s->lmul - 1);
+}
+
+static bool trans_vfext_vf2(DisasContext *s, arg_rmr *a)
+{
+ if (vext_zvfofp4min_check(s, a)) {
+ uint32_t data = 0;
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+ vreg_ofs(s, a->rs2), tcg_env,
+ s->cfg_ptr->vlenb, s->cfg_ptr->vlenb, data,
+ gen_helper_vfext_vf2);
+ tcg_gen_movi_tl(cpu_vstart, 0);
+ finalize_rvv_inst(s);
+
+ return true;
+ }
+ return false;
+}
+
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 137022d7fb..bf403785b5 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1220,6 +1220,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
#include "insn_trans/trans_svinval.c.inc"
#include "insn_trans/trans_rvbf16.c.inc"
#include "insn_trans/trans_rvofp8.c.inc"
+#include "insn_trans/trans_rvofp4.c.inc"
#include "decode-xthead.c.inc"
#include "decode-xmips.c.inc"
#include "insn_trans/trans_xthead.c.inc"
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 759ebb3251..86057f2bf4 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5117,6 +5117,7 @@ RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
+/* Zvfofp4min: vfext.vf2 - OFP4 E2M1 to OFP8 E4M3 conversion */
/*
* Vector Reduction Operations
*/
@@ -5916,3 +5917,35 @@ GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
+
+
+void HELPER(vfext_vf2)(void *vd, void *v0, void *vs2, CPURISCVState *env,
+ uint32_t desc)
+{
+ float_status fp_status = env->fp_status;
+ uint32_t vl = env->vl;
+ uint32_t vm = vext_vm(desc);
+ uint32_t esz = sizeof(uint8_t);
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
+ uint32_t vma = vext_vma(desc);
+ uint32_t i;
+
+ VSTART_CHECK_EARLY_EXIT(env, vl);
+
+ for (i = env->vstart; i < vl; ++i) {
+ if (!vm && !vext_elem_mask(v0, i)) {
+ /* set masked-off elements to 1s */
+ vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
+ continue;
+ }
+
+ uint8_t input = *((uint8_t *)vs2 + H1((i % 2 ? i - 1 : i) / 2));
+ input = (i % 2) ? ((input >> 4) & 0xf) : (input & 0xf);
+ *((uint8_t *)vd + H1(i)) = float4_e2m1_to_float8_e4m3(input,
+ &fp_status);
+ }
+ env->vstart = 0;
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
+}
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 17/18] target/riscv: Expose Zvfofp4min properity
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (15 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 16/18] target/riscv: rvv: Add vfext.vf2 instruction " Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-08 15:16 ` [PATCH 18/18] disas/riscv: Add support of Zvfofp4min extension Max Chou
2026-01-09 7:12 ` [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Chao Liu
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou, Alistair Francis
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/cpu.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ec1bf8034f..0345a4f037 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1390,6 +1390,8 @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
/* Zvfofp8min extension for OFP8 conversion */
MULTI_EXT_CFG_BOOL("x-zvfofp8min", ext_zvfofp8min, false),
+ /* Zvfofp4min extension for OFP4 conversion */
+ MULTI_EXT_CFG_BOOL("x-zvfofp4min", ext_zvfofp4min, false),
{ },
};
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 18/18] disas/riscv: Add support of Zvfofp4min extension
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (16 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 17/18] target/riscv: Expose Zvfofp4min properity Max Chou
@ 2026-01-08 15:16 ` Max Chou
2026-01-09 7:12 ` [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Chao Liu
18 siblings, 0 replies; 33+ messages in thread
From: Max Chou @ 2026-01-08 15:16 UTC (permalink / raw)
To: qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei,
Max Chou
This patch adds support to disassemble Zvfofp4min instructions.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
disas/riscv.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/disas/riscv.c b/disas/riscv.c
index daffe9917f..9abf86f2d7 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -987,6 +987,7 @@ typedef enum {
rv_op_vfncvtbf16_sat_f_f_w = 956,
rv_op_vfncvt_f_f_q = 957,
rv_op_vfncvt_sat_f_f_q = 958,
+ rv_op_vfext_vf2 = 959,
} rv_op;
/* register names */
@@ -2260,6 +2261,7 @@ const rv_opcode_data rvi_opcode_data[] = {
{ "vfncvtbf16.sat.f.f.w", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
{ "vfncvt.f.f.q", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
{ "vfncvt.sat.f.f.q", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+ { "vfext.vf2", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
};
/* CSR names */
@@ -3715,6 +3717,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
case 12: op = rv_op_vclz_v; break;
case 13: op = rv_op_vctz_v; break;
case 14: op = rv_op_vcpop_v; break;
+ case 22: op = rv_op_vfext_vf2; break;
}
break;
case 20:
--
2.43.7
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support
2026-01-08 15:16 [PATCH 00/18] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support Max Chou
` (17 preceding siblings ...)
2026-01-08 15:16 ` [PATCH 18/18] disas/riscv: Add support of Zvfofp4min extension Max Chou
@ 2026-01-09 7:12 ` Chao Liu
18 siblings, 0 replies; 33+ messages in thread
From: Chao Liu @ 2026-01-09 7:12 UTC (permalink / raw)
To: Max Chou, qemu-devel, qemu-riscv
Cc: Palmer Dabbelt, Alistair Francis, Aurelien Jarno, Peter Maydell,
Alex Bennée, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei
Hi, Max:
On 1/8/2026 11:16 PM, Max Chou wrote:
> This patchset adds support for the OCP (Open Compute Project) 8-bit and
> 4-bit floating-point formats, along with the RISC-V Zvfofp8min and
> Zvfofp4min vector extensions that provide conversion operations for
> these formats.
>
> OCP Floating-Point Formats
> * The OCP FP8 specification defines two 8-bit floating-point formats:
> - E4M3: 4-bit exponent, 3-bit mantissa
> * No infinity representation; only 0x7f and 0xff are NaN
> - E5M2: 5-bit exponent, 2-bit mantissa
> * IEEE-like format with infinity representation
> * Multiple NaN encodings supported
> * The OCP FP4 specification defines the E2M1 format:
> - E2M1: 2-bit exponent, 1-bit mantissa
> * No NaN representation
>
> RISC-V ISA Extensions
> * Zvfofp8min (Version 0.2.1):
> The Zvfofp8min extension provides minimal vector conversion support
> for OFP8 formats. It requires the Zve32f extension and leverages the
> altfmt field in the VTYPE CSR (introduced by Zvfbfa) to select between
> E4M3 (altfmt=0) and E5M2 (altfmt=1) formats.
> - Canonical NaN for both E4M3 and E5M2 is 0x7f
> - All NaNs are treated as quiet NaNs
> Instructions added/extended:
> - vfwcvtbf16.f.f.v: OFP8 to BF16 widening conversion
> - vfncvtbf16.f.f.w: BF16 to OFP8 narrowing conversion
> - vfncvtbf16.sat.f.f.w: BF16 to OFP8 with saturation (new)
> - vfncvt.f.f.q: FP32 to OFP8 quad-narrowing conversion (new)
> - vfncvt.sat.f.f.q: FP32 to OFP8 with saturation (new)
>
> * Zvfofp4min (Version 0.1):
> The Zvfofp4min extension provides minimal vector conversion support
> for the OFP4 E2M1 format. It requires the Zve32f extension.
> Instructions added:
> - vfext.vf2: OFP4 E2M1 to OFP8 E4M3 widening conversion
>
> Modifications
> * Softfloat library:
> - New float8_e4m3 and float8_e5m2 types with NaN checking functions
> - New float4_e2m1 type for OFP4 support
> - Conversion functions: bfloat16/float32 <-> float8_e4m3/float8_e5m2
> - Conversion function: float4_e2m1 -> float8_e4m3
> - Implementation-defined behavior flags in float_status:
> * ocp_fp8e5m2_no_signal_nan: Treat all E5M2 NaNs as quiet
> * ocp_fp8_same_canonical_nan: Use 0x7f as canonical NaN for all OFP8
> * RISC-V target:
> - CPU configuration properties for Zvfofp8min and Zvfofp4min
> - Extension implied rules (Zvfofp8min requires Zve32f and Zvfbfa)
> - Vector helper functions for OFP8/OFP4 conversion instructions
> - Disassembler support for new instructions
>
Nice work! The code quality looks good. I've been working on similar stuff
recently, so happy to review.
A quick suggestion: run checkpatch.pl to catch any style issues.
Also, are we planning to add softfloat unit tests for OCP floating-point
formats? Would be great for code quality coverage.
Thanks,
Chao
> References
> * OCP FP8 specification:
> https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-12-01-pdf-1
> * Zvfofp8min specification (v0.2.1 commit e1e20a7):
> https://github.com/aswaterman/riscv-misc/blob/main/isa/zvfofp8min.adoc
> * Zvfofp4min specification (v0.1 commit e1e20a7):
> https://github.com/aswaterman/riscv-misc/blob/main/isa/zvfofp4min.adoc
>
> PS: This series depends on the Zvfbfa extension patchset which introduces:
> - The altfmt field in VTYPE CSR
> - BF16 vector operations infrastructure
> - vfwcvtbf16.f.f.v and vfncvtbf16.f.f.w base instructions
>
> Based-on: 20260108132631.9429-1-max.chou@sifive.com
>
> Max Chou (18):
> target/riscv: rvv: Fix NOP_UU_B vs2 width
> fpu/softfloat: Add OCP(Open Compute Project) OFP8 data type
> fpu/softfloat: Add convert operations(bf16, fp32) for OFP8 data types
> fpu/softfloat: Add OCP(Open Compute Project) OFP4 data type
> fpu/softfloat: Add OCP FP4 E2M1 to OCP FP8 E4M3 convert operation
> target/riscv: Add cfg properity for Zvfofp8min extension
> target/riscv: Add implied rules for Zvfofp8min extension
> target/riscv: rvv: Make vfwcvtbf16.f.f.v support OFP8 to BF16
> conversion for Zvfofp8min extension
> target/riscv: rvv: Make vfncvtbf16.f.f.w support BF16 to OFP8
> conversion for Zvfofp8min extension
> target/riscv: rvv: Add vfncvtbf16.sat.f.f.w instruction for Zvfofp8min
> extension
> target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions
> for Zvfofp8min extension
> target/riscv: Expose Zvfofp8min properity
> disas/riscv: Add support of Zvfofp8min extension
> target/riscv: Add cfg properity for Zvfofp4min extension
> target/riscv: Add implied rules for Zvfofp4min extension
> target/riscv: rvv: Add vfext.vf2 instruction for Zvfofp4min extension
> target/riscv: Expose Zvfofp4min properity
> disas/riscv: Add support of Zvfofp4min extension
>
> disas/riscv.c | 12 +
> fpu/softfloat-parts.c.inc | 77 +++++-
> fpu/softfloat-specialize.c.inc | 57 ++++-
> fpu/softfloat.c | 274 +++++++++++++++++++++
> include/fpu/softfloat-helpers.h | 20 ++
> include/fpu/softfloat-types.h | 28 +++
> include/fpu/softfloat.h | 124 ++++++++++
> target/riscv/cpu.c | 35 ++-
> target/riscv/cpu_cfg_fields.h.inc | 2 +
> target/riscv/helper.h | 15 ++
> target/riscv/insn32.decode | 8 +
> target/riscv/insn_trans/trans_rvbf16.c.inc | 32 ++-
> target/riscv/insn_trans/trans_rvofp4.c.inc | 54 ++++
> target/riscv/insn_trans/trans_rvofp8.c.inc | 115 +++++++++
> target/riscv/insn_trans/trans_rvv.c.inc | 39 +++
> target/riscv/tcg/tcg-cpu.c | 15 ++
> target/riscv/translate.c | 2 +
> target/riscv/vector_helper.c | 131 +++++++++-
> 18 files changed, 1022 insertions(+), 18 deletions(-)
> create mode 100644 target/riscv/insn_trans/trans_rvofp4.c.inc
> create mode 100644 target/riscv/insn_trans/trans_rvofp8.c.inc
>
^ permalink raw reply [flat|nested] 33+ messages in thread