All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Emilio G. Cota" <cota@braap.org>
To: Richard Henderson <rth@twiddle.net>
Cc: "MTTCG Devel" <mttcg@greensocs.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>,
	"QEMU Developers" <qemu-devel@nongnu.org>,
	"Sergey Fedorov" <serge.fdrv@gmail.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Alex Bennée" <alex.bennee@linaro.org>
Subject: Re: [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with xxhash
Date: Tue, 5 Apr 2016 20:52:39 -0400	[thread overview]
Message-ID: <20160406005239.GA25081@flamenco> (raw)
In-Reply-To: <5704293D.1070105@twiddle.net>

On Tue, Apr 05, 2016 at 14:08:13 -0700, Richard Henderson wrote:
> But the point is that we can do better than dropping data into memory.
> Particularly for those hosts that do not support unaligned data, such as you
> created with the packed structure.

If we made sure the fields in the struct were in the right order
(larger fields first), this shouldn't be an issue.

Anyway I took your proposal and implemented the patch below.
FWIW I cannot measure a perf. difference between this and the packed
struct for arm-softmmu (i.e. 16 bytes) on an x86_64 host.

How does the appended look?

Thanks,

		E.


commit af92a0690f49172621cd8b80759e3ca567d43567
Author: Emilio G. Cota <cota@braap.org>
Date:   Tue Apr 5 18:06:21 2016 -0400

    rth
    
    Signed-off-by: Emilio G. Cota <cota@braap.org>

diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 6b97a7c..349a856 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -45,19 +45,124 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
            | (tmp & TB_JMP_ADDR_MASK));
 }
 
-static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, int flags)
+static inline uint32_t h32_finish(uint32_t h32)
 {
-    struct {
-        tb_page_addr_t phys_pc;
-        target_ulong pc;
-        int flags;
-    } QEMU_PACKED k;
-
-    k.phys_pc = phys_pc;
-    k.pc = pc;
-    k.flags = flags;
-    return qemu_xxh32((uint32_t *)&k, sizeof(k) / sizeof(uint32_t), 1);
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+static inline uint32_t tb_hash_func3(uint32_t a, uint32_t b, uint32_t c, int seed)
+{
+    uint32_t h32 = seed + PRIME32_5;
+
+    h32 += 12;
+
+    h32 += a * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    h32 += b * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    h32 += c * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    return h32_finish(h32);
+}
+
+static inline uint32_t tb_hash_func4(uint64_t a0, uint32_t c, uint32_t d, int seed)
+{
+    uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = seed + PRIME32_2;
+    uint32_t v3 = seed + 0;
+    uint32_t v4 = seed - PRIME32_1;
+    uint32_t a = a0 >> 31 >> 1;
+    uint32_t b = a0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = XXH_rotl32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = XXH_rotl32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = XXH_rotl32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = XXH_rotl32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) +
+          XXH_rotl32(v4, 18);
+    h32 += 16;
+
+    return h32_finish(h32);
+}
+
+static inline uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e, int seed)
+{
+    uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = seed + PRIME32_2;
+    uint32_t v3 = seed + 0;
+    uint32_t v4 = seed - PRIME32_1;
+    uint32_t a = a0 >> 31 >> 1;
+    uint32_t b = a0;
+    uint32_t c = b0 >> 31 >> 1;
+    uint32_t d = b0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = XXH_rotl32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = XXH_rotl32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = XXH_rotl32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = XXH_rotl32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) +
+          XXH_rotl32(v4, 18);
+    h32 += 20;
+
+    h32 += e * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    return h32_finish(h32);
+}
+
+static __attribute__((noinline))
+unsigned tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, int flags)
+{
+#if TARGET_LONG_BITS == 64
+
+    if (sizeof(phys_pc) == sizeof(pc)) {
+        return tb_hash_func5(phys_pc, pc, flags, 1);
+    }
+    return tb_hash_func4(pc, phys_pc, flags, 1);
+
+#else /* 32-bit target */
+
+    if (sizeof(phys_pc) > sizeof(pc)) {
+        return tb_hash_func4(phys_pc, pc, flags, 1);
+    }
+    return tb_hash_func3(pc, phys_pc, flags, 1);
+
+#endif /* TARGET_LONG_BITS */
 }
 
 #endif

  reply	other threads:[~2016-04-06  0:52 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-05  5:30 [Qemu-devel] [PATCH 00/10] tb hash improvements Emilio G. Cota
2016-04-05  5:30 ` [Qemu-devel] [PATCH 01/10] translate-all: add missing fold of tb_ctx into tcg_ctx Emilio G. Cota
2016-04-05  8:49   ` Paolo Bonzini
2016-04-05  5:30 ` [Qemu-devel] [PATCH 02/10] compiler.h: add QEMU_CACHELINE + QEMU_ALIGN() + QEMU_CACHELINE_ALIGNED Emilio G. Cota
2016-04-05  7:57   ` Peter Maydell
2016-04-05 17:24     ` Emilio G. Cota
2016-04-05 18:01       ` Peter Maydell
2016-04-05 19:13         ` Emilio G. Cota
2016-04-05  8:49   ` Paolo Bonzini
2016-04-05 12:57   ` Lluís Vilanova
2016-04-05 12:58     ` Peter Maydell
2016-04-05 15:29       ` Paolo Bonzini
2016-04-05 16:23       ` Lluís Vilanova
2016-04-05 16:31         ` Richard Henderson
2016-04-05 16:56           ` Peter Maydell
2016-04-05 19:02             ` Lluís Vilanova
2016-04-05 19:15               ` Richard Henderson
2016-04-05 20:09                 ` Lluís Vilanova
2016-04-06 11:44                   ` Paolo Bonzini
2016-04-06 12:02                     ` Laurent Desnogues
2016-04-05  5:30 ` [Qemu-devel] [PATCH 03/10] seqlock: remove optional mutex Emilio G. Cota
2016-04-06  8:38   ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 04/10] seqlock: rename write_lock/unlock to write_begin/end Emilio G. Cota
2016-04-06  8:42   ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 05/10] include: add spinlock wrapper Emilio G. Cota
2016-04-05  8:51   ` Paolo Bonzini
2016-04-06 15:51     ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 06/10] include: add xxhash.h Emilio G. Cota
2016-04-06 11:39   ` Alex Bennée
2016-04-06 22:59     ` Emilio G. Cota
2016-04-05  5:30 ` [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with xxhash Emilio G. Cota
2016-04-05 15:41   ` Richard Henderson
2016-04-05 15:48     ` Paolo Bonzini
2016-04-05 16:07       ` Richard Henderson
2016-04-05 19:40         ` Emilio G. Cota
2016-04-05 21:08           ` Richard Henderson
2016-04-06  0:52             ` Emilio G. Cota [this message]
2016-04-06 11:52               ` Paolo Bonzini
2016-04-06 17:44                 ` Emilio G. Cota
2016-04-06 18:23                   ` Paolo Bonzini
2016-04-06 18:27                     ` Richard Henderson
2016-04-07  0:37                     ` Emilio G. Cota
2016-04-07  8:46                       ` Paolo Bonzini
2016-04-05 16:33     ` Laurent Desnogues
2016-04-05 17:19       ` Richard Henderson
2016-04-06  6:06         ` Laurent Desnogues
2016-04-06 17:32           ` Emilio G. Cota
2016-04-06 17:42             ` Richard Henderson
2016-04-07  8:12               ` Laurent Desnogues
2016-04-05  5:30 ` [Qemu-devel] [PATCH 08/10] qht: QEMU's fast, resizable and scalable Hash Table Emilio G. Cota
2016-04-05  9:01   ` Paolo Bonzini
2016-04-05 15:50   ` Richard Henderson
2016-04-08 10:27   ` Alex Bennée
2016-04-19 23:03     ` Emilio G. Cota
2016-04-05  5:30 ` [Qemu-devel] [PATCH 09/10] qht: add test program Emilio G. Cota
2016-04-08 10:45   ` Alex Bennée
2016-04-19 23:06     ` Emilio G. Cota
2016-04-20  7:50       ` Alex Bennée
2016-04-05  5:30 ` [Qemu-devel] [PATCH 10/10] tb hash: track translated blocks with qht Emilio G. Cota
2016-04-08 12:39   ` Alex Bennée
2016-04-05  8:47 ` [Qemu-devel] [PATCH 00/10] tb hash improvements Alex Bennée
2016-04-05  9:01 ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160406005239.GA25081@flamenco \
    --to=cota@braap.org \
    --cc=alex.bennee@linaro.org \
    --cc=crosthwaite.peter@gmail.com \
    --cc=mttcg@greensocs.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=serge.fdrv@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.