All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/4] speedup memory dispatch
@ 2013-12-02 14:23 Paolo Bonzini
  2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Paolo Bonzini @ 2013-12-02 14:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: marcel.a, mst

This avoids useless masking and shifting when a single call to the
MemoryRegion ops will do.  It cuts 30 cycles off the common case
of memory dispatch (out of ~150).

Paolo Bonzini (4):
  memory: cache min/max_access_size
  memory: streamline common case for memory dispatch
  memory: hoist coalesced MMIO flush
  memory: small tweaks

 include/exec/memory.h |   2 +
 memory.c              | 114 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 82 insertions(+), 34 deletions(-)

-- 
1.8.4.2

>From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:29:41 +0100
Subject: [PATCH 1/4] memory: cache min/max_access_size

This will simplify the code in the next patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h |  2 ++
 memory.c              | 27 +++++++++++----------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 480dfbf..cf63adb 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -163,6 +163,8 @@ struct MemoryRegion {
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
     NotifierList iommu_notify;
+
+    uint8_t min_access_size, max_access_size;
 };
 
 typedef struct MemoryListener MemoryListener;
diff --git a/memory.c b/memory.c
index 28f6449..56e54aa 100644
--- a/memory.c
+++ b/memory.c
@@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 static void access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
-                                      unsigned access_size_min,
-                                      unsigned access_size_max,
                                       void (*access)(MemoryRegion *mr,
                                                      hwaddr addr,
                                                      uint64_t *value,
@@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
     unsigned access_size;
     unsigned i;
 
-    if (!access_size_min) {
-        access_size_min = 1;
-    }
-    if (!access_size_max) {
-        access_size_max = 4;
-    }
-
     /* FIXME: support unaligned access? */
-    access_size = MAX(MIN(size, access_size_max), access_size_min);
+    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
@@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
 
     if (mr->ops->read) {
         access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
                                   memory_region_read_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
+        access_with_adjusted_size(addr, &data, size,
                                   memory_region_oldmmio_read_accessor, mr);
     }
 
@@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
 
     if (mr->ops->write) {
         access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
                                   memory_region_write_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
+        access_with_adjusted_size(addr, &data, size,
                                   memory_region_oldmmio_write_accessor, mr);
     }
     return false;
@@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
     mr->opaque = opaque;
     mr->terminates = true;
     mr->ram_addr = ~(ram_addr_t)0;
+
+    if (mr->ops->read) {
+        mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
+        mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
+    } else {
+        mr->min_access_size = 1;
+        mr->max_access_size = 4;
+    }
 }
 
 void memory_region_init_ram(MemoryRegion *mr,
-- 
1.8.4.2


>From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:39:18 +0100
Subject: [PATCH 2/4] memory: streamline common case for memory dispatch

In the common case where there is no combining or splitting,
access_with_adjusted_size is adding a lot of overhead.  Call
the MMIO ops directly in that case.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git a/memory.c b/memory.c
index 56e54aa..1ade19c 100644
--- a/memory.c
+++ b/memory.c
@@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 static void access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
+                                      unsigned access_size,
                                       void (*access)(MemoryRegion *mr,
                                                      hwaddr addr,
                                                      uint64_t *value,
@@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
                                       MemoryRegion *mr)
 {
     uint64_t access_mask;
-    unsigned access_size;
     unsigned i;
 
     /* FIXME: support unaligned access? */
-    access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
@@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
                                              hwaddr addr,
                                              unsigned size)
 {
+    unsigned access_size;
     uint64_t data = 0;
 
+    if (size < mr->min_access_size) {
+        access_size = mr->min_access_size;
+        goto adjusted;
+    }
+    if (size > mr->max_access_size) {
+        access_size = mr->max_access_size;
+        goto adjusted;
+    }
+
+    if (mr->ops->read) {
+        data = mr->ops->read(mr->opaque, addr, size);
+    } else {
+        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    }
+    trace_memory_region_ops_read(mr, addr, data, size);
+    return data;
+
+adjusted:
     if (mr->ops->read) {
-        access_with_adjusted_size(addr, &data, size,
+        access_with_adjusted_size(addr, &data, size, access_size,
                                   memory_region_read_accessor, mr);
     } else {
-        access_with_adjusted_size(addr, &data, size,
+        access_with_adjusted_size(addr, &data, size, access_size,
                                   memory_region_oldmmio_read_accessor, mr);
     }
 
@@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
     return false;
 }
 
+static void memory_region_dispatch_write1(MemoryRegion *mr,
+                                          hwaddr addr,
+                                          uint64_t data,
+                                          unsigned size)
+{
+    unsigned access_size;
+    if (size < mr->min_access_size) {
+        access_size = mr->min_access_size;
+        goto adjusted;
+    }
+    if (size > mr->max_access_size) {
+        access_size = mr->max_access_size;
+        goto adjusted;
+    }
+
+    trace_memory_region_ops_write(mr, addr, data, size);
+    if (mr->ops->write) {
+        mr->ops->write(mr->opaque, addr, data, size);
+    } else {
+        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+    }
+    return;
+
+adjusted:
+    if (mr->ops->write) {
+        access_with_adjusted_size(addr, &data, size, access_size,
+                                  memory_region_write_accessor, mr);
+    } else {
+        access_with_adjusted_size(addr, &data, size, access_size,
+                                  memory_region_oldmmio_write_accessor, mr);
+    }
+}
+
 static bool memory_region_dispatch_write(MemoryRegion *mr,
                                          hwaddr addr,
                                          uint64_t data,
@@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
     }
 
     adjust_endianness(mr, &data, size);
-
-    if (mr->ops->write) {
-        access_with_adjusted_size(addr, &data, size,
-                                  memory_region_write_accessor, mr);
-    } else {
-        access_with_adjusted_size(addr, &data, size,
-                                  memory_region_oldmmio_write_accessor, mr);
-    }
+    memory_region_dispatch_write1(mr, addr, data, size);
     return false;
 }
 
-- 
1.8.4.2


>From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:49:52 +0100
Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush

No need to flush the coalesced MMIO buffer multiple times when combining
multiple accesses into one.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/memory.c b/memory.c
index 1ade19c..495e693 100644
--- a/memory.c
+++ b/memory.c
@@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    if (mr->flush_coalesced_mmio) {
-        qemu_flush_coalesced_mmio_buffer();
-    }
     tmp = mr->ops->read(mr->opaque, addr, size);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
@@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    if (mr->flush_coalesced_mmio) {
-        qemu_flush_coalesced_mmio_buffer();
-    }
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
     mr->ops->write(mr->opaque, addr, tmp, size);
@@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
                                         uint64_t *pval,
                                         unsigned size)
 {
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
     if (!memory_region_access_valid(mr, addr, size, false)) {
         *pval = unassigned_mem_read(mr, addr, size);
         return true;
@@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
                                          uint64_t data,
                                          unsigned size)
 {
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
     if (!memory_region_access_valid(mr, addr, size, true)) {
         unassigned_mem_write(mr, addr, data, size);
         return true;
-- 
1.8.4.2


>From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Nov 2013 11:48:11 +0100
Subject: [PATCH 4/4] memory: small tweaks

Make adjust_endianness inline, and do not use a ctz instruction
when a shift will do.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/memory.c b/memory.c
index 495e693..d3b0dce 100644
--- a/memory.c
+++ b/memory.c
@@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
 #endif
 }
 
-static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
+static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
 {
     if (memory_region_wrong_endianness(mr)) {
         switch (size) {
@@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
     }
 }
 
+static inline int ctz3(unsigned size)
+{
+    return size >> 1;
+}
+
 static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *value,
@@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
 {
     uint64_t tmp;
 
-    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
 }
@@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
 
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
-    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
+    mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
 }
 
 static void memory_region_write_accessor(MemoryRegion *mr,
@@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
     if (mr->ops->read) {
         data = mr->ops->read(mr->opaque, addr, size);
     } else {
-        data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+        data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
     }
     trace_memory_region_ops_read(mr, addr, data, size);
     return data;
@@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
     if (mr->ops->write) {
         mr->ops->write(mr->opaque, addr, data, size);
     } else {
-        mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+        mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
     }
     return;
 
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-12-10 11:55 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
2013-12-05 12:19   ` Uri Lublin
2013-12-05 12:20     ` Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 4/4] memory: small tweaks Paolo Bonzini
2013-12-10 11:55 ` [Qemu-devel] [PATCH 0/4] speedup memory dispatch Marcel Apfelbaum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.