[Qemu-devel] [PULL 17/34] s390x/tcg: Fault-safe memmove

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Cornelia Huck <cohuck@redhat.com>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-s390x@nongnu.org,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PULL 17/34] s390x/tcg: Fault-safe memmove
Date: Thu, 19 Sep 2019 14:40:58 +0200	[thread overview]
Message-ID: <20190919124115.11510-18-cohuck@redhat.com> (raw)
In-Reply-To: <20190919124115.11510-1-cohuck@redhat.com>

From: David Hildenbrand <david@redhat.com>

Replace fast_memmove() variants by access_memmove() variants, that
first try to probe access to all affected pages (maximum is two pages).

Introduce access_get_byte()/access_set_byte(). We might be able to speed
up memmove in special cases even further (do single-byte access, use
memmove() for remaining bytes in page), however, we'll skip that for now.

In MVCOS, simply always call access_memmove_as() and drop the TODO
about LAP. LAP is already handled in the MMU.

Get rid of adj_len_to_page(), which is now unused.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/mem_helper.c | 232 ++++++++++++++++++++++----------------
 1 file changed, 133 insertions(+), 99 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index dd5da7074610..e50cec9263c2 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -65,17 +65,6 @@ static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
     return dest > src && dest <= src + len - 1;
 }
 
-/* Reduce the length so that addr + len doesn't cross a page boundary.  */
-static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr)
-{
-#ifndef CONFIG_USER_ONLY
-    if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
-        return -(addr | TARGET_PAGE_MASK);
-    }
-#endif
-    return len;
-}
-
 /* Trigger a SPECIFICATION exception if an address or a length is not
    naturally aligned.  */
 static inline void check_alignment(CPUS390XState *env, uint64_t v,
@@ -208,39 +197,110 @@ static void access_memset(CPUS390XState *env, S390Access *desta,
                      desta->mmu_idx, ra);
 }
 
-#ifndef CONFIG_USER_ONLY
-static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src,
-                             uint32_t len, int dest_idx, int src_idx,
-                             uintptr_t ra)
+static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
+                                  int offset, int mmu_idx, uintptr_t ra)
 {
-    TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx);
-    TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx);
-    uint32_t len_adj;
-    void *src_p;
-    void *dest_p;
-    uint8_t x;
-
-    while (len > 0) {
-        src = wrap_address(env, src);
-        dest = wrap_address(env, dest);
-        src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx);
-        dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx);
-
-        if (src_p && dest_p) {
-            /* Access to both whole pages granted.  */
-            len_adj = adj_len_to_page(adj_len_to_page(len, src), dest);
-            memmove(dest_p, src_p, len_adj);
-        } else {
-            /* We failed to get access to one or both whole pages. The next
-               read or write access will likely fill the QEMU TLB for the
-               next iteration.  */
-            len_adj = 1;
-            x = helper_ret_ldub_mmu(env, src, oi_src, ra);
-            helper_ret_stb_mmu(env, dest, x, oi_dest, ra);
+#ifdef CONFIG_USER_ONLY
+    return ldub_p(*haddr + offset);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    uint8_t byte;
+
+    if (likely(*haddr)) {
+        return ldub_p(*haddr + offset);
+    }
+    /*
+     * Do a single access and test if we can then get access to the
+     * page. This is especially relevant to speed up TLB_NOTDIRTY.
+     */
+    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
+    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
+    return byte;
+#endif
+}
+
+static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
+                               int offset, uintptr_t ra)
+{
+    if (offset < access->size1) {
+        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
+                                  offset, access->mmu_idx, ra);
+    }
+    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
+                              offset - access->size1, access->mmu_idx, ra);
+}
+
+static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
+                               int offset, uint8_t byte, int mmu_idx,
+                               uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    stb_p(*haddr + offset, byte);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+    if (likely(*haddr)) {
+        stb_p(*haddr + offset, byte);
+        return;
+    }
+    /*
+     * Do a single access and test if we can then get access to the
+     * page. This is especially relevant to speed up TLB_NOTDIRTY.
+     */
+    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
+    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+#endif
+}
+
+static void access_set_byte(CPUS390XState *env, S390Access *access,
+                            int offset, uint8_t byte, uintptr_t ra)
+{
+    if (offset < access->size1) {
+        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
+                           access->mmu_idx, ra);
+    } else {
+        do_access_set_byte(env, access->vaddr2, &access->haddr2,
+                           offset - access->size1, byte, access->mmu_idx, ra);
+    }
+}
+
+/*
+ * Move data with the same semantics as memmove() in case ranges don't overlap
+ * or src > dest. Undefined behavior on destructive overlaps.
+ */
+static void access_memmove(CPUS390XState *env, S390Access *desta,
+                           S390Access *srca, uintptr_t ra)
+{
+    int diff;
+
+    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
+
+    /* Fallback to slow access in case we don't have access to all host pages */
+    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
+                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
+        int i;
+
+        for (i = 0; i < desta->size1 + desta->size2; i++) {
+            uint8_t byte = access_get_byte(env, srca, i, ra);
+
+            access_set_byte(env, desta, i, byte, ra);
         }
-        src += len_adj;
-        dest += len_adj;
-        len -= len_adj;
+        return;
+    }
+
+    if (srca->size1 == desta->size1) {
+        memmove(desta->haddr1, srca->haddr1, srca->size1);
+        memmove(desta->haddr2, srca->haddr2, srca->size2);
+    } else if (srca->size1 < desta->size1) {
+        diff = desta->size1 - srca->size1;
+        memmove(desta->haddr1, srca->haddr1, srca->size1);
+        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
+        memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+    } else {
+        diff = srca->size1 - desta->size1;
+        memmove(desta->haddr1, srca->haddr1, desta->size1);
+        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
+        memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
     }
 }
 
@@ -259,45 +319,6 @@ static int mmu_idx_from_as(uint8_t as)
     }
 }
 
-static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
-                            uint32_t len, uint8_t dest_as, uint8_t src_as,
-                            uintptr_t ra)
-{
-    int src_idx = mmu_idx_from_as(src_as);
-    int dest_idx = mmu_idx_from_as(dest_as);
-
-    fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
-}
-#endif
-
-static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
-                         uint32_t l, uintptr_t ra)
-{
-    int mmu_idx = cpu_mmu_index(env, false);
-
-    while (l > 0) {
-        void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
-        void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
-        if (src_p && dest_p) {
-            /* Access to both whole pages granted.  */
-            uint32_t l_adj = adj_len_to_page(l, src);
-            l_adj = adj_len_to_page(l_adj, dest);
-            memmove(dest_p, src_p, l_adj);
-            src += l_adj;
-            dest += l_adj;
-            l -= l_adj;
-        } else {
-            /* We failed to get access to one or both whole pages. The next
-               read or write access will likely fill the QEMU TLB for the
-               next iteration.  */
-            cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra);
-            src++;
-            dest++;
-            l--;
-        }
-    }
-}
-
 /* and on array */
 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
                              uint64_t src, uintptr_t ra)
@@ -388,7 +409,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
                               uint64_t src, uintptr_t ra)
 {
     const int mmu_idx = cpu_mmu_index(env, false);
-    S390Access desta;
+    S390Access srca, desta;
     uint32_t i;
 
     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
@@ -397,6 +418,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
     /* MVC always copies one more byte than specified - maximum is 256 */
     l++;
 
+    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 
     /*
@@ -405,9 +427,9 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
      * behave like memmove().
      */
     if (dest == src + 1) {
-        access_memset(env, &desta, cpu_ldub_data_ra(env, src, ra), ra);
+        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
     } else if (!is_destructive_overlap(env, dest, src, l)) {
-        fast_memmove(env, dest, src, l, ra);
+        access_memmove(env, &desta, &srca, ra);
     } else {
         for (i = 0; i < l; i++) {
             uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
@@ -756,8 +778,11 @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 /* move page */
 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
 {
+    const int mmu_idx = cpu_mmu_index(env, false);
     const bool f = extract64(r0, 11, 1);
     const bool s = extract64(r0, 10, 1);
+    uintptr_t ra = GETPC();
+    S390Access srca, desta;
 
     if ((f && s) || extract64(r0, 12, 4)) {
         s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC());
@@ -772,7 +797,11 @@ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
      * - CC-option with surpression of page-translation exceptions
      * - Store r1/r2 register identifiers at real location 162
      */
-    fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
+    srca = access_prepare(env, r2, TARGET_PAGE_SIZE, MMU_DATA_LOAD, mmu_idx,
+                          ra);
+    desta = access_prepare(env, r1, TARGET_PAGE_SIZE, MMU_DATA_STORE, mmu_idx,
+                           ra);
+    access_memmove(env, &desta, &srca, ra);
     return 0; /* data moved */
 }
 
@@ -853,7 +882,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
 {
     const int mmu_idx = cpu_mmu_index(env, false);
     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
-    S390Access desta;
+    S390Access srca, desta;
     int i, cc;
 
     if (*destlen == *srclen) {
@@ -877,7 +906,9 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
         *destlen -= len;
         *srclen -= len;
-        fast_memmove(env, *dest, *src, len, ra);
+        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
+        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+        access_memmove(env, &desta, &srca, ra);
         *src = wrap_address(env, *src + len);
         *dest = wrap_address(env, *dest + len);
     } else if (wordsize == 1) {
@@ -911,8 +942,8 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
     uint64_t src = get_address(env, r2);
     uint8_t pad = env->regs[r2 + 1] >> 24;
+    S390Access srca, desta;
     uint32_t cc, cur_len;
-    S390Access desta;
 
     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
         cc = 3;
@@ -946,7 +977,11 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
         } else {
             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
 
-            fast_memmove(env, dest, src, cur_len, ra);
+            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
+                                  ra);
+            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
+                                   ra);
+            access_memmove(env, &desta, &srca, ra);
             src = wrap_address(env, src + cur_len);
             srclen -= cur_len;
             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
@@ -2488,16 +2523,15 @@ uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
         s390_program_interrupt(env, PGM_ADDRESSING, 6, ra);
     }
 
-    /* FIXME: a) LAP
-     *        b) Access using correct keys
-     *        c) AR-mode
-     */
-#ifdef CONFIG_USER_ONLY
-    /* psw keys are never valid in user mode, we will never reach this */
-    g_assert_not_reached();
-#else
-    fast_memmove_as(env, dest, src, len, dest_as, src_as, ra);
-#endif
+    /* FIXME: Access using correct keys and AR-mode */
+    if (len) {
+        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
+                                         mmu_idx_from_as(src_as), ra);
+        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
+                                          mmu_idx_from_as(dest_as), ra);
+
+        access_memmove(env, &desta, &srca, ra);
+    }
 
     return cc;
 }
-- 
2.20.1

next prev parent reply	other threads:[~2019-09-19 13:09 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 12:40 [Qemu-devel] [PULL 00/34] s390x update Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 01/34] s390x/tcg: Reset exception_index to -1 instead of 0 Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 02/34] s390x/tcg: MVCL: Zero out unused bits of address Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 03/34] s390x/tcg: MVCL: Detect destructive overlaps Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 04/34] s390x/tcg: MVCL: Process max 4k bytes at a time Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 05/34] s390x/tcg: MVC: Increment the length once Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 06/34] s390x/tcg: MVC: Use is_destructive_overlap() Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 07/34] s390x/tcg: MVPG: Check for specification exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 08/34] s390x/tcg: MVPG: Properly wrap the addresses Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 09/34] s390x/tcg: MVCLU/MVCLE: Process max 4k bytes at a time Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 10/34] s390x/tcg: MVCS/MVCP: Check for special operation exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 11/34] s390x/tcg: MVCOS: Lengths are 32 bit in 24/31-bit mode Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 12/34] s390x/tcg: MVCS/MVCP: Properly wrap the length Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 13/34] s390x/tcg: MVST: Check for specification exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 14/34] s390x/tcg: MVST: Fix storing back the addresses to registers Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 15/34] s390x/tcg: Always use MMU_USER_IDX for CONFIG_USER_ONLY Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 16/34] s390x/tcg: Fault-safe memset Cornelia Huck
2019-09-19 12:40 ` Cornelia Huck [this message]
2019-09-19 12:40 ` [Qemu-devel] [PULL 18/34] s390x/tcg: MVCS/MVCP: Use access_memmove() Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 19/34] s390x/tcg: MVC: Fault-safe handling on destructive overlaps Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 20/34] s390x/tcg: MVCLU: Fault-safe handling Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 21/34] s390x/tcg: OC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 22/34] s390x/tcg: XC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 23/34] s390x/tcg: NC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 24/34] s390x/tcg: MVCIN: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 25/34] s390x/tcg: MVN: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 26/34] s390x/tcg: MVZ: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 27/34] s390x/tcg: MVST: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 28/34] s390x/tcg: MVO: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 29/34] tests/tcg: target/s390x: Test MVO Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 30/34] pc-bios/s390-ccw: Do not pre-initialize empty array Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 31/34] pc-bios/s390-ccw/net: fix a possible memory leak in get_uuid() Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 32/34] pc-bios/s390-ccw: Rebuild the s390-netboot.img firmware image Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 33/34] s390x/kvm: Officially require at least kernel 3.15 Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 34/34] s390x/cpumodel: Add the z15 name to the description of gen15a Cornelia Huck
2019-09-20 10:45 ` [PULL 00/34] s390x update Peter Maydell
2019-09-20 11:00   ` Cornelia Huck
2019-09-20 11:51     ` David Hildenbrand
2019-09-20 11:59       ` David Hildenbrand
2019-09-20 13:32         ` Cornelia Huck
2019-09-20 13:41           ` Peter Maydell
2019-09-20 15:34             ` David Hildenbrand

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:dd5da707461 dfblob:e50cec9263c )
 OR (
bs:"[Qemu-devel] [PULL 17/34] s390x/tcg: Fault-safe memmove" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190919124115.11510-18-cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.