xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xen.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Jan Beulich <JBeulich@suse.com>
Subject: [PATCH 1/3] x86/emul: Optimise decode_register() somewhat
Date: Fri, 26 Jan 2018 13:16:38 +0000	[thread overview]
Message-ID: <1516972600-26262-2-git-send-email-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <1516972600-26262-1-git-send-email-andrew.cooper3@citrix.com>

The positions of GPRs inside struct cpu_user_regs doesn't follow any
particular order, so as compiled, decode_register() becomes a jump table to 16
blocks which calculate the appropriate offset, at a total of 207 bytes.

Instead, pre-compute the offsets at build time and use pointer arithmetic to
calculate the result.  The resulting function is far more reasonable:

    test   %edx,%edx
    lea    0xbfb97(%rip),%rax        # <cpu_user_regs_high_gpr_offsets>
    lea    0xbfba0(%rip),%rdx        # <cpu_user_regs_gpr_offsets>
    cmove  %rdx,%rax
    and    $0xf,%edi
    movzbl (%rax,%rdi,1),%eax
    add    %rsi,%rax
    retq

and by observation, most callers in x86_emulate() inline and
constant-propagate the highbyte_regs value of 0 to drop the test, one lea and
the cmove.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
---
 xen/arch/x86/x86_emulate/x86_emulate.c | 82 ++++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index ff0a003..3f5636f 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -396,6 +396,51 @@ static const struct {
 /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
 static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
 
+/*
+ * Map GPRs by ModRM encoding to their offset within struct cpu_user_regs.
+ * The AH,CH,DH,BH offsets are misaligned.
+ */
+static const uint8_t cpu_user_regs_gpr_offsets[] = {
+    offsetof(struct cpu_user_regs, r(ax)),
+    offsetof(struct cpu_user_regs, r(cx)),
+    offsetof(struct cpu_user_regs, r(dx)),
+    offsetof(struct cpu_user_regs, r(bx)),
+    offsetof(struct cpu_user_regs, r(sp)),
+    offsetof(struct cpu_user_regs, r(bp)),
+    offsetof(struct cpu_user_regs, r(si)),
+    offsetof(struct cpu_user_regs, r(di)),
+#if defined(__x86_64__)
+    offsetof(struct cpu_user_regs, r8),
+    offsetof(struct cpu_user_regs, r9),
+    offsetof(struct cpu_user_regs, r10),
+    offsetof(struct cpu_user_regs, r11),
+    offsetof(struct cpu_user_regs, r12),
+    offsetof(struct cpu_user_regs, r13),
+    offsetof(struct cpu_user_regs, r14),
+    offsetof(struct cpu_user_regs, r15),
+#endif
+};
+static const uint8_t cpu_user_regs_high_gpr_offsets[] = {
+    offsetof(struct cpu_user_regs, r(ax)),
+    offsetof(struct cpu_user_regs, r(cx)),
+    offsetof(struct cpu_user_regs, r(dx)),
+    offsetof(struct cpu_user_regs, r(bx)),
+    offsetof(struct cpu_user_regs, ah),
+    offsetof(struct cpu_user_regs, ch),
+    offsetof(struct cpu_user_regs, dh),
+    offsetof(struct cpu_user_regs, bh),
+#if defined(__x86_64__)
+    offsetof(struct cpu_user_regs, r8),
+    offsetof(struct cpu_user_regs, r9),
+    offsetof(struct cpu_user_regs, r10),
+    offsetof(struct cpu_user_regs, r11),
+    offsetof(struct cpu_user_regs, r12),
+    offsetof(struct cpu_user_regs, r13),
+    offsetof(struct cpu_user_regs, r14),
+    offsetof(struct cpu_user_regs, r15),
+#endif
+};
+
 static const struct {
     uint8_t simd_size:5;
     uint8_t to_mem:1;
@@ -1939,32 +1984,21 @@ void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
 {
-    void *p;
+    const uint8_t *offsets = highbyte_regs ? cpu_user_regs_high_gpr_offsets
+                                           : cpu_user_regs_gpr_offsets;
 
-    switch ( modrm_reg )
-    {
-    case  0: p = &regs->r(ax); break;
-    case  1: p = &regs->r(cx); break;
-    case  2: p = &regs->r(dx); break;
-    case  3: p = &regs->r(bx); break;
-    case  4: p = (highbyte_regs ? &regs->ah : (void *)&regs->r(sp)); break;
-    case  5: p = (highbyte_regs ? &regs->ch : (void *)&regs->r(bp)); break;
-    case  6: p = (highbyte_regs ? &regs->dh : (void *)&regs->r(si)); break;
-    case  7: p = (highbyte_regs ? &regs->bh : (void *)&regs->r(di)); break;
-#if defined(__x86_64__)
-    case  8: p = &regs->r8;  break;
-    case  9: p = &regs->r9;  break;
-    case 10: p = &regs->r10; break;
-    case 11: p = &regs->r11; break;
-    case 12: p = &regs->r12; break;
-    case 13: p = &regs->r13; break;
-    case 14: p = &regs->r14; break;
-    case 15: p = &regs->r15; break;
-#endif
-    default: BUG(); p = NULL; break;
-    }
+    /* Check that the arrays are the same size, and a power of two. */
+    BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) !=
+                 ARRAY_SIZE(cpu_user_regs_high_gpr_offsets));
+    BUILD_BUG_ON(ARRAY_SIZE(cpu_user_regs_gpr_offsets) &
+                 (ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1));
+
+    ASSERT(modrm_reg < ARRAY_SIZE(cpu_user_regs_gpr_offsets));
+
+    /* For safety in release builds.  Debug builds will hit the ASSERT() */
+    modrm_reg &= ARRAY_SIZE(cpu_user_regs_gpr_offsets) - 1;
 
-    return p;
+    return (void *)regs + offsets[modrm_reg];
 }
 
 static void *decode_vex_gpr(unsigned int vex_reg, struct cpu_user_regs *regs,
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  reply	other threads:[~2018-01-26 13:16 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-26 13:16 [PATCH 0/3] Improvements to decode_register() Andrew Cooper
2018-01-26 13:16 ` Andrew Cooper [this message]
2018-01-29 11:05   ` [PATCH 1/3] x86/emul: Optimise decode_register() somewhat Jan Beulich
2018-01-29 11:19     ` Andrew Cooper
2018-01-26 13:16 ` [PATCH 2/3] x86/hvm: Improvements to external users of decode_register() Andrew Cooper
2018-01-29  3:11   ` Tian, Kevin
2018-01-29 11:12   ` Jan Beulich
2018-01-26 13:16 ` [PATCH 3/3] x86/emul: Improvements to internal " Andrew Cooper
2018-01-29 11:20   ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1516972600-26262-2-git-send-email-andrew.cooper3@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).