[PATCH 09/13] target/hexagon: add v73 HVX IEEE bfloat16 insns

public inbox for qemu-devel@nongnu.org
 help / color / mirror / Atom feed

From: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
To: qemu-devel@nongnu.org
Cc: brian.cain@oss.qualcomm.com, ale@rev.ng, anjo@rev.ng,
	ltaylorsimpson@gmail.com, marco.liebel@oss.qualcomm.com,
	philmd@linaro.org, quic_mburton@quicinc.com,
	sid.manning@oss.qualcomm.com
Subject: [PATCH 09/13] target/hexagon: add v73 HVX IEEE bfloat16 insns
Date: Mon, 23 Mar 2026 06:15:45 -0700	[thread overview]
Message-ID: <003328f47c0b5e286ef06ba55cc9734e7bba4af8.1774271525.git.matheus.bernardino@oss.qualcomm.com> (raw)
In-Reply-To: <cover.1774271525.git.matheus.bernardino@oss.qualcomm.com>

Add HVX IEEE bfloat16 (bf16) instructions:

Arithmetic operations:
- V6_vadd_sf_bf, V6_vsub_sf_bf: add/sub bf16 widening to sf output
- V6_vmpy_sf_bf: multiply bf16 widening to sf output
- V6_vmpy_sf_bf_acc: multiply-accumulate bf16 widening to sf output

Min/Max operations:
- V6_vmin_bf, V6_vmax_bf: bf16 min/max

Comparison operations:
- V6_vgtbf: greater-than compare
- V6_vgtbf_and, V6_vgtbf_or, V6_vgtbf_xor: predicate variants

Conversion operations:
- V6_vcvt_bf_sf: convert sf to bf16

Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
---
 target/hexagon/mmvec/kvx_ieee.h              | 36 +++++++++++
 target/hexagon/mmvec/macros.h                |  5 ++
 target/hexagon/mmvec/mmvec.h                 |  1 +
 target/hexagon/mmvec/kvx_ieee.c              |  3 +
 target/hexagon/imported/mmvec/encode_ext.def | 15 +++++
 target/hexagon/imported/mmvec/ext.idef       | 64 ++++++++++++++++++++
 6 files changed, 124 insertions(+)

diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h
index 8a6816f6b3..eb670d4ec3 100644
--- a/target/hexagon/mmvec/kvx_ieee.h
+++ b/target/hexagon/mmvec/kvx_ieee.h
@@ -80,4 +80,40 @@ int16_t conv_hf_h(int16_t a, float_status *fp_status);
 int32_t conv_w_sf(uint32_t a, float_status *fp_status);
 int16_t conv_h_hf(uint16_t a, float_status *fp_status);
 
+/* IEEE BFloat instructions */
+
+#define fp_mult_sf_bf(A, B) \
+    fp_mult_sf_sf(((uint32_t)(A)) << 16, ((uint32_t)(B)) << 16, &env->fp_status)
+#define fp_add_sf_bf(A, B) \
+    fp_add_sf_sf(((uint32_t)(A)) << 16, ((uint32_t)(B)) << 16, &env->fp_status)
+#define fp_sub_sf_bf(A, B) \
+    fp_sub_sf_sf(((uint32_t)(A)) << 16, ((uint32_t)(B)) << 16, &env->fp_status)
+
+uint32_t fp_mult_sf_bf_acc(uint16_t op1, uint16_t op2, uint32_t acc,
+                           float_status *fp_status);
+
+#define bf_to_sf(A) (((uint32_t)(A)) << 16)
+
+#define fp_min_bf(A, B) ({ \
+    uint32_t _bf_res = fp_min_sf(bf_to_sf(A), bf_to_sf(B), &env->fp_status); \
+    (uint16_t)((_bf_res >> 16) & 0xffff); \
+})
+
+#define fp_max_bf(A, B) ({ \
+    uint32_t _bf_res = fp_max_sf(bf_to_sf(A), bf_to_sf(B), &env->fp_status); \
+    (uint16_t)((_bf_res >> 16) & 0xffff); \
+})
+
+static inline uint16_t sf_to_bf(int32_t A)
+{
+    uint32_t rslt = A;
+    if ((rslt & 0x1FFFF) == 0x08000) {
+        /* do not round up if exactly .5 and even already */
+    } else if ((rslt & 0x8000) == 0x8000) {
+        rslt += 0x8000; /* rounding to nearest number */
+    }
+    rslt = float32_is_any_nan(A) ? FP32_DEF_NAN : rslt;
+    return rslt >> 16;
+}
+
 #endif
diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h
index c342507d1a..b70996578e 100644
--- a/target/hexagon/mmvec/macros.h
+++ b/target/hexagon/mmvec/macros.h
@@ -25,6 +25,9 @@
 #include "accel/tcg/probe.h"
 #include "mmvec/kvx_ieee.h"
 
+#define fBFLOAT()
+#define fCVI_VX_NO_TMP_LD()
+
 #ifndef QEMU_GENERATE
 #define VdV      (*(MMVector *restrict)(VdV_void))
 #define VsV      (*(MMVector *restrict)(VsV_void))
@@ -366,4 +369,6 @@
     (int16_t)(A) > (int16_t)(B) : \
     float16_compare((A), (B), &env->fp_status) == float_relation_greater)
 
+#define fCMPGT_BF(A, B) fCMPGT_SF(((int)A) << 16, ((int)B) << 16)
+
 #endif
diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h
index eaedfe0d6d..9d8d57c7c6 100644
--- a/target/hexagon/mmvec/mmvec.h
+++ b/target/hexagon/mmvec/mmvec.h
@@ -40,6 +40,7 @@ typedef union {
     int8_t    b[MAX_VEC_SIZE_BYTES / 1];
     int32_t  sf[MAX_VEC_SIZE_BYTES / 4];   /* single float (32-bit) */
     int16_t  hf[MAX_VEC_SIZE_BYTES / 2];   /* half float (16-bit) */
+    uint16_t bf[MAX_VEC_SIZE_BYTES / 2];   /* bfloat16 */
 } MMVector;
 
 typedef union {
diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c
index bbeec09707..b5c434ad6d 100644
--- a/target/hexagon/mmvec/kvx_ieee.c
+++ b/target/hexagon/mmvec/kvx_ieee.c
@@ -229,3 +229,6 @@ int16_t conv_h_hf(uint16_t a, float_status *fp_status)
     }
     return float16_to_int16_round_to_zero(f1, fp_status);
 }
+
+DEF_FP_INSN_3(mult_sf_bf_acc, 32, 16, 16, 32,
+              float32_muladd(bf_to_sf(f1), bf_to_sf(f2), f3, 0, fp_status))
diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def
index 3f84a1691b..352a8ec14b 100644
--- a/target/hexagon/imported/mmvec/encode_ext.def
+++ b/target/hexagon/imported/mmvec/encode_ext.def
@@ -869,4 +869,19 @@ DEF_ENC(V6_vgthf_or,"00011100100vvvvvPP1uuuuu001101xx")
 DEF_ENC(V6_vgtsf_xor,"00011100100vvvvvPP1uuuuu111010xx")
 DEF_ENC(V6_vgthf_xor,"00011100100vvvvvPP1uuuuu111011xx")
 
+/* BFLOAT instructions */
+DEF_ENC(V6_vmpy_sf_bf,"00011101010vvvvvPP1uuuuu100ddddd")
+DEF_ENC(V6_vmpy_sf_bf_acc,"00011101000vvvvvPP1uuuuu000xxxxx")
+DEF_ENC(V6_vadd_sf_bf,"00011101010vvvvvPP1uuuuu110ddddd")
+DEF_ENC(V6_vsub_sf_bf,"00011101010vvvvvPP1uuuuu101ddddd")
+DEF_ENC(V6_vmax_bf,"00011101010vvvvvPP1uuuuu111ddddd")
+DEF_ENC(V6_vmin_bf,"00011101010vvvvvPP1uuuuu000ddddd")
+DEF_ENC(V6_vcvt_bf_sf,"00011101010vvvvvPP1uuuuu011ddddd")
+
+/* BFLOAT compare instructions */
+DEF_ENC(V6_vgtbf,"00011100100vvvvvPP1uuuuu011110dd")
+DEF_ENC(V6_vgtbf_and,"00011100100vvvvvPP1uuuuu110100xx")
+DEF_ENC(V6_vgtbf_or,"00011100100vvvvvPP1uuuuu001110xx")
+DEF_ENC(V6_vgtbf_xor,"00011100100vvvvvPP1uuuuu111100xx")
+
 #endif /* NO MMVEC */
diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
index 304c4966d8..afe9de3716 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -3149,6 +3149,15 @@ ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_hf_h,"Vd32.hf=Vu32.h",
     } \
 }
 
+#define VCMPGT_BF(DEST, ASRC, ASRCOP, CMP, N, SRC, MASK, WIDTH) \
+{ \
+    fBFLOAT(); \
+    for (fHIDE(int) i = 0; i < fVBYTES(); i += WIDTH) { \
+        fHIDE(int) VAL = fCMPGT_BF(VuV.SRC[i/WIDTH],VvV.SRC[i/WIDTH]) ? MASK : 0; \
+        fSETQBITS(DEST,WIDTH,MASK,i,ASRC ASRCOP VAL); \
+    } \
+}
+
 /* Vector SF compare */
 #define MMVEC_CMPGT_SF(TYPE,TYPE2,DESCR,N,MASK,WIDTH,SRC) \
     EXTINSN(V6_vgt##TYPE##_and, "Qx4&=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", \
@@ -3187,8 +3196,63 @@ ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_hf_h,"Vd32.hf=Vu32.h",
         DESCR" greater than", \
         VCMPGT_HF(QdV, , , ">", N, SRC, MASK, WIDTH))
 
+/* Vector BF compare */
+#define MMVEC_CMPGT_BF(TYPE,TYPE2,DESCR,N,MASK,WIDTH,SRC) \
+    EXTINSN(V6_vgt##TYPE##_and, "Qx4&=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")",\
+        ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA,A_CVI_VA_2SRC,A_HVX_FLT), \
+        DESCR" greater than with predicate-and", \
+        VCMPGT_BF(QxV, fGETQBITS(QxV,WIDTH,MASK,i), &, ">", N, SRC, MASK, WIDTH)) \
+    EXTINSN(V6_vgt##TYPE##_xor, "Qx4^=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", \
+        ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA,A_CVI_VA_2SRC,A_HVX_FLT), \
+        DESCR" greater than with predicate-xor", \
+        VCMPGT_BF(QxV, fGETQBITS(QxV,WIDTH,MASK,i), ^, ">", N, SRC, MASK, WIDTH)) \
+    EXTINSN(V6_vgt##TYPE##_or, "Qx4|=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", \
+        ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA,A_CVI_VA_2SRC,A_HVX_FLT), \
+        DESCR" greater than with predicate-or", \
+        VCMPGT_BF(QxV, fGETQBITS(QxV,WIDTH,MASK,i), |, ">", N, SRC, MASK, WIDTH)) \
+    EXTINSN(V6_vgt##TYPE, "Qd4=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", \
+        ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA,A_CVI_VA_2SRC,A_HVX_FLT), \
+        DESCR" greater than", \
+        VCMPGT_BF(QdV, , , ">", N, SRC, MASK, WIDTH))
+
 MMVEC_CMPGT_SF(sf,"sf","Vector sf Compare ", fVELEM(32), 0xF, 4, sf)
 MMVEC_CMPGT_HF(hf,"hf","Vector hf Compare ", fVELEM(16), 0x3, 2, hf)
+MMVEC_CMPGT_BF(bf,"bf","Vector bf Compare ", fVELEM(16), 0x3, 2, bf)
+
+/******************************************************************************
+ BFloat arithmetic and max/min instructions
+ ******************************************************************************/
+
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vadd_sf_bf,
+    "Vdd32.sf=vadd(Vu32.bf,Vv32.bf)",  "Vector IEEE add: bf widen to sf",
+    VddV.v[0].sf[i] = fp_add_sf_bf(VuV.bf[2*i], VvV.bf[2*i]);
+    VddV.v[1].sf[i] = fp_add_sf_bf(VuV.bf[2*i+1], VvV.bf[2*i+1]); fBFLOAT())
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vsub_sf_bf,
+    "Vdd32.sf=vsub(Vu32.bf,Vv32.bf)",  "Vector IEEE sub: bf widen to sf",
+    VddV.v[0].sf[i] = fp_sub_sf_bf(VuV.bf[2*i], VvV.bf[2*i]);
+    VddV.v[1].sf[i] = fp_sub_sf_bf(VuV.bf[2*i+1], VvV.bf[2*i+1]); fBFLOAT())
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_bf,
+    "Vdd32.sf=vmpy(Vu32.bf,Vv32.bf)",  "Vector IEEE mul: hf widen to sf",
+    VddV.v[0].sf[i] = fp_mult_sf_bf(VuV.bf[2*i], VvV.bf[2*i]);
+    VddV.v[1].sf[i] = fp_mult_sf_bf(VuV.bf[2*i+1], VvV.bf[2*i+1]); fBFLOAT())
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_bf_acc,
+    "Vxx32.sf+=vmpy(Vu32.bf,Vv32.bf)", "Vector IEEE fma: hf widen to sf",
+    VxxV.v[0].sf[i] = fp_mult_sf_bf_acc(VuV.bf[2*i], VvV.bf[2*i],
+                                        VxxV.v[0].sf[i], &env->fp_status);
+    VxxV.v[1].sf[i] = fp_mult_sf_bf_acc(VuV.bf[2*i+1], VvV.bf[2*i+1],
+                                        VxxV.v[1].sf[i], &env->fp_status);
+    fCVI_VX_NO_TMP_LD(); fBFLOAT())
+ITERATOR_INSN_IEEE_FP_16(32, vcvt_bf_sf,
+    "Vd32.bf=vcvt(Vu32.sf,Vv32.sf)",   "Vector IEEE cvt: sf to bf",
+    VdV.bf[2*i]   = sf_to_bf(VuV.sf[i]);
+    VdV.bf[2*i+1] = sf_to_bf(VvV.sf[i]); fBFLOAT())
+
+ITERATOR_INSN_IEEE_FP_16_32_LATE(16, vmax_bf, "Vd32.bf=vmax(Vu32.bf,Vv32.bf)",
+    "Vector IEEE max: bf", VdV.bf[i] = fp_max_bf(VuV.bf[i], VvV.bf[i]);
+    fBFLOAT())
+ITERATOR_INSN_IEEE_FP_16_32_LATE(16, vmin_bf, "Vd32.bf=vmin(Vu32.bf,Vv32.bf)",
+    "Vector IEEE max: bf", VdV.bf[i] = fp_min_bf(VuV.bf[i], VvV.bf[i]);
+    fBFLOAT())
 
 /******************************************************************************
  DEBUG Vector/Register Printing
-- 
2.37.2

next prev parent reply	other threads:[~2026-03-23 13:17 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-23 13:15 [PATCH 00/13] hexagon: add missing HVX float instructions Matheus Tavares Bernardino
2026-03-23 13:15 ` [PATCH 01/13] tests/docker: Update hexagon cross toolchain to 22.1.0 Matheus Tavares Bernardino
2026-03-23 13:15 ` [PATCH 02/13] target/hexagon: fix incorrect/too-permissive HVX encodings Matheus Tavares Bernardino
2026-03-23 19:21   ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 03/13] target/hexagon/cpu: add HVX IEEE FP extension Matheus Tavares Bernardino
2026-03-23 19:32   ` Taylor Simpson
2026-03-24 16:52     ` Matheus Bernardino
2026-03-24 18:48       ` Taylor Simpson
2026-03-24 19:20         ` Brian Cain
2026-03-24 19:46           ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 04/13] target/hexagon: add v68 HVX IEEE float arithmetic insns Matheus Tavares Bernardino
2026-03-23 20:28   ` Taylor Simpson
2026-03-24 19:30     ` Matheus Bernardino
2026-03-24 19:51       ` Taylor Simpson
2026-03-24 19:59         ` Matheus Bernardino
2026-03-25  1:18           ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 05/13] target/hexagon: add v68 HVX IEEE float min/max insns Matheus Tavares Bernardino
2026-03-23 20:47   ` Taylor Simpson
2026-03-24 20:15     ` Matheus Bernardino
2026-03-23 13:15 ` [PATCH 06/13] target/hexagon: add v68 HVX IEEE float misc insns Matheus Tavares Bernardino
2026-03-23 21:08   ` Taylor Simpson
2026-03-24 20:25     ` Matheus Bernardino
2026-03-23 13:15 ` [PATCH 07/13] target/hexagon: add v68 HVX IEEE float conversion insns Matheus Tavares Bernardino
2026-03-23 21:25   ` Taylor Simpson
2026-03-24 21:04     ` Matheus Bernardino
2026-03-25  1:15       ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 08/13] target/hexagon: add v68 HVX IEEE float compare insns Matheus Tavares Bernardino
2026-03-23 21:42   ` Taylor Simpson
2026-03-26 13:00     ` Matheus Bernardino
2026-03-23 13:15 ` Matheus Tavares Bernardino [this message]
2026-03-23 22:03   ` [PATCH 09/13] target/hexagon: add v73 HVX IEEE bfloat16 insns Taylor Simpson
2026-03-23 13:15 ` [PATCH 10/13] tests/hexagon: add tests for v68 HVX IEEE float arithmetics Matheus Tavares Bernardino
2026-03-24 19:05   ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 11/13] tests/hexagon: add tests for v68 HVX IEEE float min/max Matheus Tavares Bernardino
2026-03-24 19:07   ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 12/13] tests/hexagon: add tests for v68 HVX IEEE float conversions Matheus Tavares Bernardino
2026-03-24 19:30   ` Taylor Simpson
2026-03-23 13:15 ` [PATCH 13/13] tests/hexagon: add tests for v68 HVX IEEE float comparisons Matheus Tavares Bernardino
2026-03-24 19:37   ` Taylor Simpson

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8a6816f6b dfblob:eb670d4ec dfblob:c342507d1 dfblob:b70996578
dfblob:eaedfe0d6 dfblob:9d8d57c7c dfblob:bbeec0970 dfblob:b5c434ad6
dfblob:3f84a1691 dfblob:352a8ec14 dfblob:304c4966d dfblob:afe9de371 )
 OR (
bs:"[PATCH 09/13] target/hexagon: add v73 HVX IEEE bfloat16 insns" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=003328f47c0b5e286ef06ba55cc9734e7bba4af8.1774271525.git.matheus.bernardino@oss.qualcomm.com \
    --to=matheus.bernardino@oss.qualcomm.com \
    --cc=ale@rev.ng \
    --cc=anjo@rev.ng \
    --cc=brian.cain@oss.qualcomm.com \
    --cc=ltaylorsimpson@gmail.com \
    --cc=marco.liebel@oss.qualcomm.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=quic_mburton@quicinc.com \
    --cc=sid.manning@oss.qualcomm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox