qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Max Chou <max.chou@sifive.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	Bin Meng <bmeng.cn@gmail.com>, Weiwei Li <liwei1518@gmail.com>,
	Daniel Henrique Barboza <dbarboza@ventanamicro.com>,
	Liu Zhiwei <zhiwei_liu@linux.alibaba.com>,
	Max Chou <max.chou@sifive.com>
Subject: [RFC PATCH v4 5/5] target/riscv: Inline unit-stride ld/st and corresponding functions for performance
Date: Fri, 14 Jun 2024 01:51:22 +0800	[thread overview]
Message-ID: <20240613175122.1299212-6-max.chou@sifive.com> (raw)
In-Reply-To: <20240613175122.1299212-1-max.chou@sifive.com>

In the vector unit-stride load/store helper functions. the vext_ldst_us
& vext_ldst_whole functions corresponding most of the execution time.
Inline the functions can avoid the function call overhead to improve the
helper function performance.

Signed-off-by: Max Chou <max.chou@sifive.com>
---
 target/riscv/vector_helper.c | 64 +++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index cba46ef16a5..29849a8b66f 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -415,20 +415,22 @@ typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
                                    uint32_t idx, void *vd, uintptr_t retaddr);
 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
 
-#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)                         \
-static void NAME##_tlb(CPURISCVState *env, abi_ptr addr,                \
-                       uint32_t byte_off, void *vd, uintptr_t retaddr)  \
-{                                                                       \
-    uint8_t *reg = ((uint8_t *)vd + byte_off);                          \
-    ETYPE *cur = ((ETYPE *)reg);                                        \
-    *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);                   \
-}                                                                       \
-                                                                        \
-static void NAME##_host(void *vd, uint32_t byte_off, void *host)        \
-{                                                                       \
-    ETYPE val = LDSUF##_p(host);                                        \
-    uint8_t *reg = (uint8_t *)(vd + byte_off);                          \
-    *(ETYPE *)(reg) = val;                                              \
+#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)                 \
+static inline QEMU_ALWAYS_INLINE                                \
+void NAME##_tlb(CPURISCVState *env, abi_ptr addr,               \
+                uint32_t byte_off, void *vd, uintptr_t retaddr) \
+{                                                               \
+    uint8_t *reg = ((uint8_t *)vd + byte_off);                  \
+    ETYPE *cur = ((ETYPE *)reg);                                \
+    *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);           \
+}                                                               \
+                                                                \
+static inline QEMU_ALWAYS_INLINE                                \
+void NAME##_host(void *vd, uint32_t byte_off, void *host)       \
+{                                                               \
+    ETYPE val = LDSUF##_p(host);                                \
+    uint8_t *reg = (uint8_t *)(vd + byte_off);                  \
+    *(ETYPE *)(reg) = val;                                      \
 }
 
 GEN_VEXT_LD_ELEM(lde_b, uint8_t,  H1, ldub)
@@ -436,20 +438,22 @@ GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
 
-#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)                         \
-static void NAME##_tlb(CPURISCVState *env, abi_ptr addr,                \
-                       uint32_t byte_off, void *vd, uintptr_t retaddr)  \
-{                                                                       \
-    uint8_t *reg = ((uint8_t *)vd + byte_off);                          \
-    ETYPE data = *((ETYPE *)reg);                                       \
-    cpu_##STSUF##_data_ra(env, addr, data, retaddr);                    \
-}                                                                       \
-                                                                        \
-static void NAME##_host(void *vd, uint32_t byte_off, void *host)        \
-{                                                                       \
-    uint8_t *reg = ((uint8_t *)vd + byte_off);                          \
-    ETYPE val = *(ETYPE *)(reg);                                        \
-    STSUF##_p(host, val);                                               \
+#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)                 \
+static inline QEMU_ALWAYS_INLINE                                \
+void NAME##_tlb(CPURISCVState *env, abi_ptr addr,               \
+                uint32_t byte_off, void *vd, uintptr_t retaddr) \
+{                                                               \
+    uint8_t *reg = ((uint8_t *)vd + byte_off);                  \
+    ETYPE data = *((ETYPE *)reg);                               \
+    cpu_##STSUF##_data_ra(env, addr, data, retaddr);            \
+}                                                               \
+                                                                \
+static inline QEMU_ALWAYS_INLINE                                \
+void NAME##_host(void *vd, uint32_t byte_off, void *host)       \
+{                                                               \
+    uint8_t *reg = ((uint8_t *)vd + byte_off);                  \
+    ETYPE val = *(ETYPE *)(reg);                                \
+    STSUF##_p(host, val);                                       \
 }
 
 GEN_VEXT_ST_ELEM(ste_b, uint8_t,  H1, stb)
@@ -611,7 +615,7 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
  */
 
 /* unmasked unit-stride load and store operation */
-static void
+static inline QEMU_ALWAYS_INLINE void
 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
              vext_ldst_elem_fn_tlb *ldst_tlb,
              vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
@@ -1013,7 +1017,7 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb)
 /*
  * load and store whole register instructions
  */
-static void
+static inline QEMU_ALWAYS_INLINE void
 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
                 vext_ldst_elem_fn_tlb *ldst_tlb,
                 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
-- 
2.34.1



  parent reply	other threads:[~2024-06-13 17:52 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-13 17:51 [RFC PATCH v4 0/5] Improve the performance of RISC-V vector unit-stride/whole register ld/st instructions Max Chou
2024-06-13 17:51 ` [RFC PATCH v4 1/5] accel/tcg: Avoid unnecessary call overhead from qemu_plugin_vcpu_mem_cb Max Chou
2024-06-20  2:48   ` Richard Henderson
2024-06-20  7:50   ` Frank Chang
2024-06-20 14:27   ` Alex Bennée
2024-06-13 17:51 ` [RFC PATCH v4 2/5] target/riscv: rvv: Provide a fast path using direct access to host ram for unmasked unit-stride load/store Max Chou
2024-06-20  4:29   ` Richard Henderson
2024-06-25 15:14     ` Max Chou
2024-06-20  4:41   ` Richard Henderson
2024-06-13 17:51 ` [RFC PATCH v4 3/5] target/riscv: rvv: Provide a fast path using direct access to host ram for unit-stride whole register load/store Max Chou
2024-06-13 17:51 ` [RFC PATCH v4 4/5] target/riscv: rvv: Provide group continuous ld/st flow for unit-stride ld/st instructions Max Chou
2024-06-20  4:38   ` Richard Henderson
2024-06-24  6:50     ` Max Chou
2024-06-13 17:51 ` Max Chou [this message]
2024-06-20  4:44   ` [RFC PATCH v4 5/5] target/riscv: Inline unit-stride ld/st and corresponding functions for performance Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240613175122.1299212-6-max.chou@sifive.com \
    --to=max.chou@sifive.com \
    --cc=alistair.francis@wdc.com \
    --cc=bmeng.cn@gmail.com \
    --cc=dbarboza@ventanamicro.com \
    --cc=liwei1518@gmail.com \
    --cc=palmer@dabbelt.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=zhiwei_liu@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).