qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Emilio G. Cota" <cota@braap.org>
To: Richard Henderson <rth@twiddle.net>
Cc: "MTTCG Devel" <mttcg@greensocs.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>,
	"QEMU Developers" <qemu-devel@nongnu.org>,
	"Sergey Fedorov" <serge.fdrv@gmail.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Alex Bennée" <alex.bennee@linaro.org>
Subject: Re: [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with xxhash
Date: Tue, 5 Apr 2016 20:52:39 -0400	[thread overview]
Message-ID: <20160406005239.GA25081@flamenco> (raw)
In-Reply-To: <5704293D.1070105@twiddle.net>

On Tue, Apr 05, 2016 at 14:08:13 -0700, Richard Henderson wrote:
> But the point is that we can do better than dropping data into memory.
> Particularly for those hosts that do not support unaligned data, such as you
> created with the packed structure.

If we made sure the fields in the struct were in the right order
(larger fields first), this shouldn't be an issue.

Anyway I took your proposal and implemented the patch below.
FWIW I cannot measure a perf. difference between this and the packed
struct for arm-softmmu (i.e. 16 bytes) on an x86_64 host.

How does the appended look?

Thanks,

		E.


commit af92a0690f49172621cd8b80759e3ca567d43567
Author: Emilio G. Cota <cota@braap.org>
Date:   Tue Apr 5 18:06:21 2016 -0400

    rth
    
    Signed-off-by: Emilio G. Cota <cota@braap.org>

diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 6b97a7c..349a856 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -45,19 +45,124 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
            | (tmp & TB_JMP_ADDR_MASK));
 }
 
-static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, int flags)
+static inline uint32_t h32_finish(uint32_t h32)
 {
-    struct {
-        tb_page_addr_t phys_pc;
-        target_ulong pc;
-        int flags;
-    } QEMU_PACKED k;
-
-    k.phys_pc = phys_pc;
-    k.pc = pc;
-    k.flags = flags;
-    return qemu_xxh32((uint32_t *)&k, sizeof(k) / sizeof(uint32_t), 1);
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+static inline uint32_t tb_hash_func3(uint32_t a, uint32_t b, uint32_t c, int seed)
+{
+    uint32_t h32 = seed + PRIME32_5;
+
+    h32 += 12;
+
+    h32 += a * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    h32 += b * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    h32 += c * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    return h32_finish(h32);
+}
+
+static inline uint32_t tb_hash_func4(uint64_t a0, uint32_t c, uint32_t d, int seed)
+{
+    uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = seed + PRIME32_2;
+    uint32_t v3 = seed + 0;
+    uint32_t v4 = seed - PRIME32_1;
+    uint32_t a = a0 >> 31 >> 1;
+    uint32_t b = a0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = XXH_rotl32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = XXH_rotl32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = XXH_rotl32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = XXH_rotl32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) +
+          XXH_rotl32(v4, 18);
+    h32 += 16;
+
+    return h32_finish(h32);
+}
+
+static inline uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e, int seed)
+{
+    uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = seed + PRIME32_2;
+    uint32_t v3 = seed + 0;
+    uint32_t v4 = seed - PRIME32_1;
+    uint32_t a = a0 >> 31 >> 1;
+    uint32_t b = a0;
+    uint32_t c = b0 >> 31 >> 1;
+    uint32_t d = b0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = XXH_rotl32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = XXH_rotl32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = XXH_rotl32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = XXH_rotl32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) +
+          XXH_rotl32(v4, 18);
+    h32 += 20;
+
+    h32 += e * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    return h32_finish(h32);
+}
+
+static __attribute__((noinline))
+unsigned tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, int flags)
+{
+#if TARGET_LONG_BITS == 64
+
+    if (sizeof(phys_pc) == sizeof(pc)) {
+        return tb_hash_func5(phys_pc, pc, flags, 1);
+    }
+    return tb_hash_func4(pc, phys_pc, flags, 1);
+
+#else /* 32-bit target */
+
+    if (sizeof(phys_pc) > sizeof(pc)) {
+        return tb_hash_func4(phys_pc, pc, flags, 1);
+    }
+    return tb_hash_func3(pc, phys_pc, flags, 1);
+
+#endif /* TARGET_LONG_BITS */
 }
 
 #endif

  reply	other threads:[~2016-04-06  0:52 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-05  5:30 [Qemu-devel] [PATCH 00/10] tb hash improvements Emilio G. Cota
2016-04-05  5:30 ` [Qemu-devel] [PATCH 01/10] translate-all: add missing fold of tb_ctx into tcg_ctx Emilio G. Cota
2016-04-05  8:49   ` Paolo Bonzini
2016-04-05  5:30 ` [Qemu-devel] [PATCH 02/10] compiler.h: add QEMU_CACHELINE + QEMU_ALIGN() + QEMU_CACHELINE_ALIGNED Emilio G. Cota
2016-04-05  7:57   ` Peter Maydell
2016-04-05 17:24     ` Emilio G. Cota
2016-04-05 18:01       ` Peter Maydell
2016-04-05 19:13         ` Emilio G. Cota
2016-04-05  8:49   ` Paolo Bonzini
2016-04-05 12:57   ` Lluís Vilanova
2016-04-05 12:58     ` Peter Maydell
2016-04-05 15:29       ` Paolo Bonzini
2016-04-05 16:23       ` Lluís Vilanova
2016-04-05 16:31         ` Richard Henderson
2016-04-05 16:56           ` Peter Maydell
2016-04-05 19:02             ` Lluís Vilanova
2016-04-05 19:15               ` Richard Henderson
2016-04-05 20:09                 ` Lluís Vilanova
2016-04-06 11:44                   ` Paolo Bonzini
2016-04-06 12:02                     ` Laurent Desnogues
2016-04-05  5:30 ` [Qemu-devel] [PATCH 03/10] seqlock: remove optional mutex Emilio G. Cota
2016-04-06  8:38   ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 04/10] seqlock: rename write_lock/unlock to write_begin/end Emilio G. Cota
2016-04-06  8:42   ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 05/10] include: add spinlock wrapper Emilio G. Cota
2016-04-05  8:51   ` Paolo Bonzini
2016-04-06 15:51     ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 06/10] include: add xxhash.h Emilio G. Cota
2016-04-06 11:39   ` Alex Bennée
2016-04-06 22:59     ` Emilio G. Cota
2016-04-05  5:30 ` [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with xxhash Emilio G. Cota
2016-04-05 15:41   ` Richard Henderson
2016-04-05 15:48     ` Paolo Bonzini
2016-04-05 16:07       ` Richard Henderson
2016-04-05 19:40         ` Emilio G. Cota
2016-04-05 21:08           ` Richard Henderson
2016-04-06  0:52             ` Emilio G. Cota [this message]
2016-04-06 11:52               ` Paolo Bonzini
2016-04-06 17:44                 ` Emilio G. Cota
2016-04-06 18:23                   ` Paolo Bonzini
2016-04-06 18:27                     ` Richard Henderson
2016-04-07  0:37                     ` Emilio G. Cota
2016-04-07  8:46                       ` Paolo Bonzini
2016-04-05 16:33     ` Laurent Desnogues
2016-04-05 17:19       ` Richard Henderson
2016-04-06  6:06         ` Laurent Desnogues
2016-04-06 17:32           ` Emilio G. Cota
2016-04-06 17:42             ` Richard Henderson
2016-04-07  8:12               ` Laurent Desnogues
2016-04-05  5:30 ` [Qemu-devel] [PATCH 08/10] qht: QEMU's fast, resizable and scalable Hash Table Emilio G. Cota
2016-04-05  9:01   ` Paolo Bonzini
2016-04-05 15:50   ` Richard Henderson
2016-04-08 10:27   ` Alex Bennée
2016-04-19 23:03     ` Emilio G. Cota
2016-04-05  5:30 ` [Qemu-devel] [PATCH 09/10] qht: add test program Emilio G. Cota
2016-04-08 10:45   ` Alex Bennée
2016-04-19 23:06     ` Emilio G. Cota
2016-04-20  7:50       ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 10/10] tb hash: track translated blocks with qht Emilio G. Cota
2016-04-08 12:39   ` Alex Bennée
2016-04-05  8:47 ` [Qemu-devel] [PATCH 00/10] tb hash improvements Alex Bennée
2016-04-05  9:01 ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160406005239.GA25081@flamenco \
    --to=cota@braap.org \
    --cc=alex.bennee@linaro.org \
    --cc=crosthwaite.peter@gmail.com \
    --cc=mttcg@greensocs.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=serge.fdrv@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).