qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/4] speedup memory dispatch
@ 2013-12-02 14:23 Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-02 14:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: marcel.a, mst

This avoids useless masking and shifting when a single call to the
MemoryRegion ops will do.  It cuts 30 cycles off the common case
of memory dispatch (out of ~150).

Paolo Bonzini (4):
  memory: cache min/max_access_size
  memory: streamline common case for memory dispatch
  memory: hoist coalesced MMIO flush
  memory: small tweaks

 include/exec/memory.h |   2 +
 memory.c              | 114 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 82 insertions(+), 34 deletions(-)

-- 
1.8.4.2

>From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:29:41 +0100
Subject: [PATCH 1/4] memory: cache min/max_access_size

This will simplify the code in the next patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h |  2 ++
 memory.c              | 27 +++++++++++----------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 480dfbf..cf63adb 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -163,6 +163,8 @@ struct MemoryRegion {
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
     NotifierList iommu_notify;
+
+    uint8_t min_access_size, max_access_size;
 };
 
 typedef struct MemoryListener MemoryListener;
diff --git a/memory.c b/memory.c
index 28f6449..56e54aa 100644
--- a/memory.c
+++ b/memory.c
@@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 static void access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
-                                      unsigned access_size_min,
-                                      unsigned access_size_max,
                                       void (*access)(MemoryRegion *mr,
                                                      hwaddr addr,
                                                      uint64_t *value,
@@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
     unsigned access_size;
     unsigned i;
 
-    if (!access_size_min) {
-        access_size_min = 1;
-    }
-    if (!access_size_max) {
-        access_size_max = 4;
-    }
-
     /* FIXME: support unaligned access? */
-    access_size = MAX(MIN(size, access_size_max), access_size_min);
+    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
@@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
 
     if (mr->ops->read) {
         access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
                                   memory_region_read_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
+        access_with_adjusted_size(addr, &data, size,
                                   memory_region_oldmmio_read_accessor, mr);
     }
 
@@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
 
     if (mr->ops->write) {
         access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
                                   memory_region_write_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
+        access_with_adjusted_size(addr, &data, size,
                                   memory_region_oldmmio_write_accessor, mr);
     }
     return false;
@@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
     mr->opaque = opaque;
     mr->terminates = true;
     mr->ram_addr = ~(ram_addr_t)0;
+
+    if (mr->ops->read) {
+        mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
+        mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
+    } else {
+        mr->min_access_size = 1;
+        mr->max_access_size = 4;
+    }
 }
 
 void memory_region_init_ram(MemoryRegion *mr,
-- 
1.8.4.2


>From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:39:18 +0100
Subject: [PATCH 2/4] memory: streamline common case for memory dispatch

In the common case where there is no combining or splitting,
access_with_adjusted_size is adding a lot of overhead.  Call
the MMIO ops directly in that case.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git a/memory.c b/memory.c
index 56e54aa..1ade19c 100644
--- a/memory.c
+++ b/memory.c
@@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 static void access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
+                                      unsigned access_size,
                                       void (*access)(MemoryRegion *mr,
                                                      hwaddr addr,
                                                      uint64_t *value,
@@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
                                       MemoryRegion *mr)
 {
     uint64_t access_mask;
-    unsigned access_size;
     unsigned i;
 
     /* FIXME: support unaligned access? */
-    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
@@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
                                              hwaddr addr,
                                              unsigned size)
 {
+    unsigned access_size;
     uint64_t data = 0;
 
+    if (size < mr->min_access_size) {
+        access_size = mr->min_access_size;
+        goto adjusted;
+    }
+    if (size > mr->max_access_size) {
+        access_size = mr->max_access_size;
+        goto adjusted;
+    }
+
+    if (mr->ops->read) {
+        data = mr->ops->read(mr->opaque, addr, size);
+    } else {
+        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    }
+    trace_memory_region_ops_read(mr, addr, data, size);
+    return data;
+
+adjusted:
     if (mr->ops->read) {
-        access_with_adjusted_size(addr, &data, size,
+        access_with_adjusted_size(addr, &data, size, access_size,
                                   memory_region_read_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size,
+        access_with_adjusted_size(addr, &data, size, access_size,
                                   memory_region_oldmmio_read_accessor, mr);
     }
 
@@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
     return false;
 }
 
+static void memory_region_dispatch_write1(MemoryRegion *mr,
+                                          hwaddr addr,
+                                          uint64_t data,
+                                          unsigned size)
+{
+    unsigned access_size;
+    if (size < mr->min_access_size) {
+        access_size = mr->min_access_size;
+        goto adjusted;
+    }
+    if (size > mr->max_access_size) {
+        access_size = mr->max_access_size;
+        goto adjusted;
+    }
+
+    trace_memory_region_ops_write(mr, addr, data, size);
+    if (mr->ops->write) {
+        mr->ops->write(mr->opaque, addr, data, size);
+    } else {
+        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+    }
+    return;
+
+adjusted:
+    if (mr->ops->write) {
+        access_with_adjusted_size(addr, &data, size, access_size,
+                                  memory_region_write_accessor, mr);
+    } else {
+        access_with_adjusted_size(addr, &data, size, access_size,
+                                  memory_region_oldmmio_write_accessor, mr);
+    }
+}
+
 static bool memory_region_dispatch_write(MemoryRegion *mr,
                                          hwaddr addr,
                                          uint64_t data,
@@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
     }
 
     adjust_endianness(mr, &data, size);
-
-    if (mr->ops->write) {
-        access_with_adjusted_size(addr, &data, size,
-                                  memory_region_write_accessor, mr);
-    } else {
-        access_with_adjusted_size(addr, &data, size,
-                                  memory_region_oldmmio_write_accessor, mr);
-    }
+    memory_region_dispatch_write1(mr, addr, data, size);
     return false;
 }
 
-- 
1.8.4.2


>From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:49:52 +0100
Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush

No need to flush the coalesced MMIO buffer multiple times when combining
multiple accesses into one.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/memory.c b/memory.c
index 1ade19c..495e693 100644
--- a/memory.c
+++ b/memory.c
@@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    if (mr->flush_coalesced_mmio) {
-        qemu_flush_coalesced_mmio_buffer();
-    }
     tmp = mr->ops->read(mr->opaque, addr, size);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
@@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    if (mr->flush_coalesced_mmio) {
-        qemu_flush_coalesced_mmio_buffer();
-    }
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
     mr->ops->write(mr->opaque, addr, tmp, size);
@@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
                                         uint64_t *pval,
                                         unsigned size)
 {
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
     if (!memory_region_access_valid(mr, addr, size, false)) {
         *pval = unassigned_mem_read(mr, addr, size);
         return true;
@@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
                                          uint64_t data,
                                          unsigned size)
 {
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
     if (!memory_region_access_valid(mr, addr, size, true)) {
         unassigned_mem_write(mr, addr, data, size);
         return true;
-- 
1.8.4.2


>From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:48:11 +0100
Subject: [PATCH 4/4] memory: small tweaks

Make adjust_endianness inline, and do not use a ctz instruction
when a shift will do.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/memory.c b/memory.c
index 495e693..d3b0dce 100644
--- a/memory.c
+++ b/memory.c
@@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
 #endif
 }
 
-static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
+static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
 {
     if (memory_region_wrong_endianness(mr)) {
         switch (size) {
@@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
     }
 }
 
+static inline int ctz3(unsigned size)
+{
+    return size >> 1;
+}
+
 static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *value,
@@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
 }
@@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
 
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
-    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
+    mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
 }
 
 static void memory_region_write_accessor(MemoryRegion *mr,
@@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
     if (mr->ops->read) {
         data = mr->ops->read(mr->opaque, addr, size);
     } else {
-        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+        data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
     }
     trace_memory_region_ops_read(mr, addr, data, size);
     return data;
@@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
     if (mr->ops->write) {
         mr->ops->write(mr->opaque, addr, data, size);
     } else {
-        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+        mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
     }
     return;
 
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size
  2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
@ 2013-12-02 14:40 ` Paolo Bonzini
  2013-12-05 12:19   ` Uri Lublin
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch Paolo Bonzini
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-02 14:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: mst, marcel.a

This will simplify the code in the next patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h |  2 ++
 memory.c              | 27 +++++++++++----------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 480dfbf..cf63adb 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -163,6 +163,8 @@ struct MemoryRegion {
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
     NotifierList iommu_notify;
+
+    uint8_t min_access_size, max_access_size;
 };
 
 typedef struct MemoryListener MemoryListener;
diff --git a/memory.c b/memory.c
index 28f6449..56e54aa 100644
--- a/memory.c
+++ b/memory.c
@@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 static void access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
-                                      unsigned access_size_min,
-                                      unsigned access_size_max,
                                       void (*access)(MemoryRegion *mr,
                                                      hwaddr addr,
                                                      uint64_t *value,
@@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
     unsigned access_size;
     unsigned i;
 
-    if (!access_size_min) {
-        access_size_min = 1;
-    }
-    if (!access_size_max) {
-        access_size_max = 4;
-    }
-
     /* FIXME: support unaligned access? */
-    access_size = MAX(MIN(size, access_size_max), access_size_min);
+    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
@@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
 
     if (mr->ops->read) {
         access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
                                   memory_region_read_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
+        access_with_adjusted_size(addr, &data, size,
                                   memory_region_oldmmio_read_accessor, mr);
     }
 
@@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
 
     if (mr->ops->write) {
         access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
                                   memory_region_write_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
+        access_with_adjusted_size(addr, &data, size,
                                   memory_region_oldmmio_write_accessor, mr);
     }
     return false;
@@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
     mr->opaque = opaque;
     mr->terminates = true;
     mr->ram_addr = ~(ram_addr_t)0;
+
+    if (mr->ops->read) {
+        mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
+        mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
+    } else {
+        mr->min_access_size = 1;
+        mr->max_access_size = 4;
+    }
 }
 
 void memory_region_init_ram(MemoryRegion *mr,
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch
  2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
@ 2013-12-02 14:40 ` Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush Paolo Bonzini
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-02 14:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: mst, marcel.a

In the common case where there is no combining or splitting,
access_with_adjusted_size is adding a lot of overhead.  Call
the MMIO ops directly in that case.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git a/memory.c b/memory.c
index 56e54aa..1ade19c 100644
--- a/memory.c
+++ b/memory.c
@@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 static void access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
+                                      unsigned access_size,
                                       void (*access)(MemoryRegion *mr,
                                                      hwaddr addr,
                                                      uint64_t *value,
@@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
                                       MemoryRegion *mr)
 {
     uint64_t access_mask;
-    unsigned access_size;
     unsigned i;
 
     /* FIXME: support unaligned access? */
-    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
@@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
                                              hwaddr addr,
                                              unsigned size)
 {
+    unsigned access_size;
     uint64_t data = 0;
 
+    if (size < mr->min_access_size) {
+        access_size = mr->min_access_size;
+        goto adjusted;
+    }
+    if (size > mr->max_access_size) {
+        access_size = mr->max_access_size;
+        goto adjusted;
+    }
+
+    if (mr->ops->read) {
+        data = mr->ops->read(mr->opaque, addr, size);
+    } else {
+        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    }
+    trace_memory_region_ops_read(mr, addr, data, size);
+    return data;
+
+adjusted:
     if (mr->ops->read) {
-        access_with_adjusted_size(addr, &data, size,
+        access_with_adjusted_size(addr, &data, size, access_size,
                                   memory_region_read_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size,
+        access_with_adjusted_size(addr, &data, size, access_size,
                                   memory_region_oldmmio_read_accessor, mr);
     }
 
@@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
     return false;
 }
 
+static void memory_region_dispatch_write1(MemoryRegion *mr,
+                                          hwaddr addr,
+                                          uint64_t data,
+                                          unsigned size)
+{
+    unsigned access_size;
+    if (size < mr->min_access_size) {
+        access_size = mr->min_access_size;
+        goto adjusted;
+    }
+    if (size > mr->max_access_size) {
+        access_size = mr->max_access_size;
+        goto adjusted;
+    }
+
+    trace_memory_region_ops_write(mr, addr, data, size);
+    if (mr->ops->write) {
+        mr->ops->write(mr->opaque, addr, data, size);
+    } else {
+        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+    }
+    return;
+
+adjusted:
+    if (mr->ops->write) {
+        access_with_adjusted_size(addr, &data, size, access_size,
+                                  memory_region_write_accessor, mr);
+    } else {
+        access_with_adjusted_size(addr, &data, size, access_size,
+                                  memory_region_oldmmio_write_accessor, mr);
+    }
+}
+
 static bool memory_region_dispatch_write(MemoryRegion *mr,
                                          hwaddr addr,
                                          uint64_t data,
@@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
     }
 
     adjust_endianness(mr, &data, size);
-
-    if (mr->ops->write) {
-        access_with_adjusted_size(addr, &data, size,
-                                  memory_region_write_accessor, mr);
-    } else {
-        access_with_adjusted_size(addr, &data, size,
-                                  memory_region_oldmmio_write_accessor, mr);
-    }
+    memory_region_dispatch_write1(mr, addr, data, size);
     return false;
 }
 
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush
  2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch Paolo Bonzini
@ 2013-12-02 14:40 ` Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 4/4] memory: small tweaks Paolo Bonzini
  2013-12-10 11:55 ` [Qemu-devel] [PATCH 0/4] speedup memory dispatch Marcel Apfelbaum
  4 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-02 14:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: mst, marcel.a

No need to flush the coalesced MMIO buffer multiple times when combining
multiple accesses into one.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/memory.c b/memory.c
index 1ade19c..495e693 100644
--- a/memory.c
+++ b/memory.c
@@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    if (mr->flush_coalesced_mmio) {
-        qemu_flush_coalesced_mmio_buffer();
-    }
     tmp = mr->ops->read(mr->opaque, addr, size);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
@@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    if (mr->flush_coalesced_mmio) {
-        qemu_flush_coalesced_mmio_buffer();
-    }
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
     mr->ops->write(mr->opaque, addr, tmp, size);
@@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
                                         uint64_t *pval,
                                         unsigned size)
 {
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
     if (!memory_region_access_valid(mr, addr, size, false)) {
         *pval = unassigned_mem_read(mr, addr, size);
         return true;
@@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
                                          uint64_t data,
                                          unsigned size)
 {
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
     if (!memory_region_access_valid(mr, addr, size, true)) {
         unassigned_mem_write(mr, addr, data, size);
         return true;
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 4/4] memory: small tweaks
  2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
                   ` (2 preceding siblings ...)
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush Paolo Bonzini
@ 2013-12-02 14:40 ` Paolo Bonzini
  2013-12-10 11:55 ` [Qemu-devel] [PATCH 0/4] speedup memory dispatch Marcel Apfelbaum
  4 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-02 14:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: mst, marcel.a

Make adjust_endianness inline, and do not use a ctz instruction
when a shift will do.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/memory.c b/memory.c
index 495e693..d3b0dce 100644
--- a/memory.c
+++ b/memory.c
@@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
 #endif
 }
 
-static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
+static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
 {
     if (memory_region_wrong_endianness(mr)) {
         switch (size) {
@@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
     }
 }
 
+static inline int ctz3(unsigned size)
+{
+    return size >> 1;
+}
+
 static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *value,
@@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
 }
@@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
 
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
-    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
+    mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
 }
 
 static void memory_region_write_accessor(MemoryRegion *mr,
@@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
     if (mr->ops->read) {
         data = mr->ops->read(mr->opaque, addr, size);
     } else {
-        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+        data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
     }
     trace_memory_region_ops_read(mr, addr, data, size);
     return data;
@@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
     if (mr->ops->write) {
         mr->ops->write(mr->opaque, addr, data, size);
     } else {
-        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+        mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
     }
     return;
 
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
@ 2013-12-05 12:19   ` Uri Lublin
  2013-12-05 12:20     ` Paolo Bonzini
  0 siblings, 1 reply; 8+ messages in thread
From: Uri Lublin @ 2013-12-05 12:19 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On 12/02/2013 04:40 PM, Paolo Bonzini wrote:
> This will simplify the code in the next patch.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   include/exec/memory.h |  2 ++
>   memory.c              | 27 +++++++++++----------------
>   2 files changed, 13 insertions(+), 16 deletions(-)
>
>   
>   typedef struct MemoryListener MemoryListener;
> diff --git a/memory.c b/memory.c
> index 28f6449..56e54aa 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
>   static void access_with_adjusted_size(hwaddr addr,
>                                         uint64_t *value,
>                                         unsigned size,
> -                                      unsigned access_size_min,
> -                                      unsigned access_size_max,
>                                         void (*access)(MemoryRegion *mr,
>                                                        hwaddr addr,
>                                                        uint64_t *value,
> @@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
>       unsigned access_size;
>       unsigned i;
>   
> -    if (!access_size_min) {
> -        access_size_min = 1;
> -    }
> -    if (!access_size_max) {
> -        access_size_max = 4;
> -    }
> -
>       /* FIXME: support unaligned access? */
> -    access_size = MAX(MIN(size, access_size_max), access_size_min);
> +    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
Hi Paolo,

Here it should be                       mr->max_access_size

[ btw, MAX(MIN(a,b), b) = b ]

Regards,
     Uri.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size
  2013-12-05 12:19   ` Uri Lublin
@ 2013-12-05 12:20     ` Paolo Bonzini
  0 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-05 12:20 UTC (permalink / raw)
  To: uril; +Cc: qemu-devel

Il 05/12/2013 13:19, Uri Lublin ha scritto:
>>
>>       /* FIXME: support unaligned access? */
>> -    access_size = MAX(MIN(size, access_size_max), access_size_min);
>> +    access_size = MAX(MIN(size, mr->min_access_size),
>> mr->min_access_size);
> Hi Paolo,
> 
> Here it should be                       mr->max_access_size
> 
> [ btw, MAX(MIN(a,b), b) = b ]

Thanks.

Paolo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 0/4] speedup memory dispatch
  2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
                   ` (3 preceding siblings ...)
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 4/4] memory: small tweaks Paolo Bonzini
@ 2013-12-10 11:55 ` Marcel Apfelbaum
  4 siblings, 0 replies; 8+ messages in thread
From: Marcel Apfelbaum @ 2013-12-10 11:55 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, mst

On Mon, 2013-12-02 at 15:23 +0100, Paolo Bonzini wrote:
> This avoids useless masking and shifting when a single call to the
> MemoryRegion ops will do.  It cuts 30 cycles off the common case
> of memory dispatch (out of ~150).
> 
> Paolo Bonzini (4):
>   memory: cache min/max_access_size
>   memory: streamline common case for memory dispatch
>   memory: hoist coalesced MMIO flush
>   memory: small tweaks
> 
>  include/exec/memory.h |   2 +
>  memory.c              | 114 +++++++++++++++++++++++++++++++++++---------------
>  2 files changed, 82 insertions(+), 34 deletions(-)
> 
> -- 
> 1.8.4.2
I saw a 20% performance boost on tcg (VMEXIT test)!
It looks OK to me,
Reviewed-by: Marcel Apfelbaum <marcel.a@redhat.com>

> 
> From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:29:41 +0100
> Subject: [PATCH 1/4] memory: cache min/max_access_size
> 
> This will simplify the code in the next patch.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/exec/memory.h |  2 ++
>  memory.c              | 27 +++++++++++----------------
>  2 files changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 480dfbf..cf63adb 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -163,6 +163,8 @@ struct MemoryRegion {
>      unsigned ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>      NotifierList iommu_notify;
> +
> +    uint8_t min_access_size, max_access_size;
>  };
>  
>  typedef struct MemoryListener MemoryListener;
> diff --git a/memory.c b/memory.c
> index 28f6449..56e54aa 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
>  static void access_with_adjusted_size(hwaddr addr,
>                                        uint64_t *value,
>                                        unsigned size,
> -                                      unsigned access_size_min,
> -                                      unsigned access_size_max,
>                                        void (*access)(MemoryRegion *mr,
>                                                       hwaddr addr,
>                                                       uint64_t *value,
> @@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
>      unsigned access_size;
>      unsigned i;
>  
> -    if (!access_size_min) {
> -        access_size_min = 1;
> -    }
> -    if (!access_size_max) {
> -        access_size_max = 4;
> -    }
> -
>      /* FIXME: support unaligned access? */
> -    access_size = MAX(MIN(size, access_size_max), access_size_min);
> +    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
>      access_mask = -1ULL >> (64 - access_size * 8);
>      if (memory_region_big_endian(mr)) {
>          for (i = 0; i < size; i += access_size) {
> @@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
>  
>      if (mr->ops->read) {
>          access_with_adjusted_size(addr, &data, size,
> -                                  mr->ops->impl.min_access_size,
> -                                  mr->ops->impl.max_access_size,
>                                    memory_region_read_accessor, mr);
>      } else {
> -        access_with_adjusted_size(addr, &data, size, 1, 4,
> +        access_with_adjusted_size(addr, &data, size,
>                                    memory_region_oldmmio_read_accessor, mr);
>      }
>  
> @@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
>  
>      if (mr->ops->write) {
>          access_with_adjusted_size(addr, &data, size,
> -                                  mr->ops->impl.min_access_size,
> -                                  mr->ops->impl.max_access_size,
>                                    memory_region_write_accessor, mr);
>      } else {
> -        access_with_adjusted_size(addr, &data, size, 1, 4,
> +        access_with_adjusted_size(addr, &data, size,
>                                    memory_region_oldmmio_write_accessor, mr);
>      }
>      return false;
> @@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
>      mr->opaque = opaque;
>      mr->terminates = true;
>      mr->ram_addr = ~(ram_addr_t)0;
> +
> +    if (mr->ops->read) {
> +        mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
> +        mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
> +    } else {
> +        mr->min_access_size = 1;
> +        mr->max_access_size = 4;
> +    }
>  }
>  
>  void memory_region_init_ram(MemoryRegion *mr,
> -- 
> 1.8.4.2
> 
> 
> From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:39:18 +0100
> Subject: [PATCH 2/4] memory: streamline common case for memory dispatch
> 
> In the common case where there is no combining or splitting,
> access_with_adjusted_size is adding a lot of overhead.  Call
> the MMIO ops directly in that case.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 56 insertions(+), 12 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 56e54aa..1ade19c 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
>  static void access_with_adjusted_size(hwaddr addr,
>                                        uint64_t *value,
>                                        unsigned size,
> +                                      unsigned access_size,
>                                        void (*access)(MemoryRegion *mr,
>                                                       hwaddr addr,
>                                                       uint64_t *value,
> @@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
>                                        MemoryRegion *mr)
>  {
>      uint64_t access_mask;
> -    unsigned access_size;
>      unsigned i;
>  
>      /* FIXME: support unaligned access? */
> -    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
>      access_mask = -1ULL >> (64 - access_size * 8);
>      if (memory_region_big_endian(mr)) {
>          for (i = 0; i < size; i += access_size) {
> @@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
>                                               hwaddr addr,
>                                               unsigned size)
>  {
> +    unsigned access_size;
>      uint64_t data = 0;
>  
> +    if (size < mr->min_access_size) {
> +        access_size = mr->min_access_size;
> +        goto adjusted;
> +    }
> +    if (size > mr->max_access_size) {
> +        access_size = mr->max_access_size;
> +        goto adjusted;
> +    }
> +
> +    if (mr->ops->read) {
> +        data = mr->ops->read(mr->opaque, addr, size);
> +    } else {
> +        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> +    }
> +    trace_memory_region_ops_read(mr, addr, data, size);
> +    return data;
> +
> +adjusted:
>      if (mr->ops->read) {
> -        access_with_adjusted_size(addr, &data, size,
> +        access_with_adjusted_size(addr, &data, size, access_size,
>                                    memory_region_read_accessor, mr);
>      } else {
> -        access_with_adjusted_size(addr, &data, size,
> +        access_with_adjusted_size(addr, &data, size, access_size,
>                                    memory_region_oldmmio_read_accessor, mr);
>      }
>  
> @@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
>      return false;
>  }
>  
> +static void memory_region_dispatch_write1(MemoryRegion *mr,
> +                                          hwaddr addr,
> +                                          uint64_t data,
> +                                          unsigned size)
> +{
> +    unsigned access_size;
> +    if (size < mr->min_access_size) {
> +        access_size = mr->min_access_size;
> +        goto adjusted;
> +    }
> +    if (size > mr->max_access_size) {
> +        access_size = mr->max_access_size;
> +        goto adjusted;
> +    }
> +
> +    trace_memory_region_ops_write(mr, addr, data, size);
> +    if (mr->ops->write) {
> +        mr->ops->write(mr->opaque, addr, data, size);
> +    } else {
> +        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
> +    }
> +    return;
> +
> +adjusted:
> +    if (mr->ops->write) {
> +        access_with_adjusted_size(addr, &data, size, access_size,
> +                                  memory_region_write_accessor, mr);
> +    } else {
> +        access_with_adjusted_size(addr, &data, size, access_size,
> +                                  memory_region_oldmmio_write_accessor, mr);
> +    }
> +}
> +
>  static bool memory_region_dispatch_write(MemoryRegion *mr,
>                                           hwaddr addr,
>                                           uint64_t data,
> @@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
>      }
>  
>      adjust_endianness(mr, &data, size);
> -
> -    if (mr->ops->write) {
> -        access_with_adjusted_size(addr, &data, size,
> -                                  memory_region_write_accessor, mr);
> -    } else {
> -        access_with_adjusted_size(addr, &data, size,
> -                                  memory_region_oldmmio_write_accessor, mr);
> -    }
> +    memory_region_dispatch_write1(mr, addr, data, size);
>      return false;
>  }
>  
> -- 
> 1.8.4.2
> 
> 
> From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:49:52 +0100
> Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush
> 
> No need to flush the coalesced MMIO buffer multiple times when combining
> multiple accesses into one.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  memory.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 1ade19c..495e693 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
>  {
>      uint64_t tmp;
>  
> -    if (mr->flush_coalesced_mmio) {
> -        qemu_flush_coalesced_mmio_buffer();
> -    }
>      tmp = mr->ops->read(mr->opaque, addr, size);
>      trace_memory_region_ops_read(mr, addr, tmp, size);
>      *value |= (tmp & mask) << shift;
> @@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
>  {
>      uint64_t tmp;
>  
> -    if (mr->flush_coalesced_mmio) {
> -        qemu_flush_coalesced_mmio_buffer();
> -    }
>      tmp = (*value >> shift) & mask;
>      trace_memory_region_ops_write(mr, addr, tmp, size);
>      mr->ops->write(mr->opaque, addr, tmp, size);
> @@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
>                                          uint64_t *pval,
>                                          unsigned size)
>  {
> +    if (mr->flush_coalesced_mmio) {
> +        qemu_flush_coalesced_mmio_buffer();
> +    }
> +
>      if (!memory_region_access_valid(mr, addr, size, false)) {
>          *pval = unassigned_mem_read(mr, addr, size);
>          return true;
> @@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
>                                           uint64_t data,
>                                           unsigned size)
>  {
> +    if (mr->flush_coalesced_mmio) {
> +        qemu_flush_coalesced_mmio_buffer();
> +    }
> +
>      if (!memory_region_access_valid(mr, addr, size, true)) {
>          unassigned_mem_write(mr, addr, data, size);
>          return true;
> -- 
> 1.8.4.2
> 
> 
> From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:48:11 +0100
> Subject: [PATCH 4/4] memory: small tweaks
> 
> Make adjust_endianness inline, and do not use a ctz instruction
> when a shift will do.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  memory.c | 15 ++++++++++-----
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 495e693..d3b0dce 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
>  #endif
>  }
>  
> -static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> +static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
>  {
>      if (memory_region_wrong_endianness(mr)) {
>          switch (size) {
> @@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
>      }
>  }
>  
> +static inline int ctz3(unsigned size)
> +{
> +    return size >> 1;
> +}
> +
>  static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
>                                                  hwaddr addr,
>                                                  uint64_t *value,
> @@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
>  {
>      uint64_t tmp;
>  
> -    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> +    tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
>      trace_memory_region_ops_read(mr, addr, tmp, size);
>      *value |= (tmp & mask) << shift;
>  }
> @@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
>  
>      tmp = (*value >> shift) & mask;
>      trace_memory_region_ops_write(mr, addr, tmp, size);
> -    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
> +    mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
>  }
>  
>  static void memory_region_write_accessor(MemoryRegion *mr,
> @@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
>      if (mr->ops->read) {
>          data = mr->ops->read(mr->opaque, addr, size);
>      } else {
> -        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> +        data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
>      }
>      trace_memory_region_ops_read(mr, addr, data, size);
>      return data;
> @@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
>      if (mr->ops->write) {
>          mr->ops->write(mr->opaque, addr, data, size);
>      } else {
> -        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
> +        mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
>      }
>      return;
>  

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-12-10 11:55 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
2013-12-05 12:19   ` Uri Lublin
2013-12-05 12:20     ` Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 4/4] memory: small tweaks Paolo Bonzini
2013-12-10 11:55 ` [Qemu-devel] [PATCH 0/4] speedup memory dispatch Marcel Apfelbaum

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).