[Qemu-devel] [PULL 14/18] tcg/ppc: Improve unaligned load/store handling on 64-bit backend

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PULL 14/18] tcg/ppc: Improve unaligned load/store handling on 64-bit backend
Date: Mon, 24 Aug 2015 12:37:02 -0700	[thread overview]
Message-ID: <1440445026-26522-15-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1440445026-26522-1-git-send-email-rth@twiddle.net>

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Currently, we get to the slow path for any unaligned access in the
backend, because we effectively preserve the bottom address bits
below the alignment requirement when comparing with the TLB entry,
so any non-0 bit there will cause the compare to fail.

For the same number of instructions, we can instead add the access
size - 1 to the address and stick to clearing all the bottom bits.

That means that normal unaligned accesses will not fallback (the HW
will handle them fine). Only when crossing a page boundary well we
end up having a mismatch because we'll end up pointing to the next
page which cannot possibly be in that same TLB entry.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Message-Id: <1437455978.5809.2.camel@kernel.crashing.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc/tcg-target.c | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 31fa25c..1672220 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1361,7 +1361,7 @@ static void * const qemu_st_helpers[16] = {
    in CR7, loads the addend of the TLB into R3, and returns the register
    containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
 
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
                                TCGReg addrlo, TCGReg addrhi,
                                int mem_index, bool is_read)
 {
@@ -1371,6 +1371,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
            : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     TCGReg base = TCG_AREG0;
+    TCGMemOp s_bits = opc & MO_SIZE;
 
     /* Extract the page index, shifted into place for tlb index.  */
     if (TCG_TARGET_REG_BITS == 64) {
@@ -1422,17 +1423,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
        to minimize any load use delay.  */
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
 
-    /* Clear the non-page, non-alignment bits from the address.  */
+    /* Clear the non-page, non-alignment bits from the address */
     if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
+        /* We don't support unaligned accesses on 32-bits, preserve
+         * the bottom bits and thus trigger a comparison failure on
+         * unaligned accesses
+         */
         tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
                     (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
-    } else if (!s_bits) {
-        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo,
-                    0, 63 - TARGET_PAGE_BITS);
+    } else if (s_bits) {
+        /* > byte access, we need to handle alignment */
+        if ((opc & MO_AMASK) == MO_ALIGN) {
+            /* Alignment required by the front-end, same as 32-bits */
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
+                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+       } else {
+           /* We support unaligned accesses, we need to make sure we fail
+            * if we cross a page boundary. The trick is to add the
+            * access_size-1 to the address before masking the low bits.
+            * That will make the address overflow to the next page if we
+            * cross a page boundary which will then force a mismatch of
+            * the TLB compare since the next page cannot possibly be in
+            * the same TLB index.
+            */
+            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1));
+            tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
+                        0, 63 - TARGET_PAGE_BITS);
+        }
     } else {
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
-                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+        /* Byte access, just chop off the bits below the page index */
+        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS);
     }
 
     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
@@ -1592,7 +1613,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true);
+    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
 
     /* Load a pointer into the current opcode w/conditional branch-link. */
     label_ptr = s->code_ptr;
@@ -1667,7 +1688,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false);
+    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
 
     /* Load a pointer into the current opcode w/conditional branch-link. */
     label_ptr = s->code_ptr;
-- 
2.4.3

next prev parent reply	other threads:[~2015-08-24 19:38 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-24 19:36 [Qemu-devel] [PULL 00/18] Queued TCG patches Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 01/18] tcg/optimize: fix constant signedness Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 02/18] tcg/optimize: optimize temps tracking Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 03/18] tcg/optimize: add temp_is_const and temp_is_copy functions Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 04/18] tcg/optimize: track const/copy status separately Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 05/18] tcg/optimize: allow constant to have copies Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 06/18] tcg: rename trunc_shr_i32 into trunc_shr_i64_i32 Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 07/18] tcg: don't abuse TCG type in tcg_gen_trunc_shr_i64_i32 Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 08/18] tcg: implement real ext_i32_i64 and extu_i32_i64 ops Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 09/18] tcg/optimize: add optimizations for " Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 10/18] tcg: update README about size changing ops Richard Henderson
2015-08-24 19:36 ` [Qemu-devel] [PULL 11/18] tcg: Split trunc_shr_i32 opcode into extr[lh]_i64_i32 Richard Henderson
2015-08-24 19:37 ` [Qemu-devel] [PULL 13/18] tcg/i386: use softmmu fast path for unaligned accesses Richard Henderson
2015-08-24 19:37 ` Richard Henderson [this message]
2015-08-24 19:37 ` [Qemu-devel] [PULL 15/18] tcg/s390: Use " Richard Henderson
2015-08-24 19:37 ` [Qemu-devel] [PULL 16/18] tcg/aarch64: " Richard Henderson
2015-08-24 19:37 ` [Qemu-devel] [PULL 17/18] linux-user: remove --enable-guest-base/--disable-guest-base Richard Henderson
2015-08-24 19:37 ` [Qemu-devel] [PULL 18/18] linux-user: remove useless macros GUEST_BASE and RESERVED_VA Richard Henderson
2015-08-28  8:21   ` Cornelia Huck
2015-08-28  8:33     ` Laurent Vivier
2015-08-28  8:55       ` Cornelia Huck
2015-08-25 14:33 ` [Qemu-devel] [PULL 00/18] Queued TCG patches Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:31fa25c dfblob:1672220 )
 OR (
bs:"[Qemu-devel] [PULL 14/18] tcg/ppc: Improve unaligned load/store handling on 64-bit backend" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440445026-26522-15-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).