From: Marcel Apfelbaum <marcel.a@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-devel@nongnu.org, mst@redhat.com
Subject: Re: [Qemu-devel] [PATCH 0/4] speedup memory dispatch
Date: Tue, 10 Dec 2013 13:55:32 +0200 [thread overview]
Message-ID: <1386676532.10879.30.camel@localhost.localdomain> (raw)
In-Reply-To: <1385994239-19016-1-git-send-email-pbonzini@redhat.com>
On Mon, 2013-12-02 at 15:23 +0100, Paolo Bonzini wrote:
> This avoids useless masking and shifting when a single call to the
> MemoryRegion ops will do. It cuts 30 cycles off the common case
> of memory dispatch (out of ~150).
>
> Paolo Bonzini (4):
> memory: cache min/max_access_size
> memory: streamline common case for memory dispatch
> memory: hoist coalesced MMIO flush
> memory: small tweaks
>
> include/exec/memory.h | 2 +
> memory.c | 114 +++++++++++++++++++++++++++++++++++---------------
> 2 files changed, 82 insertions(+), 34 deletions(-)
>
> --
> 1.8.4.2
I saw a 20% performance boost on tcg (VMEXIT test)!
It looks OK to me,
Reviewed-by: Marcel Apfelbaum <marcel.a@redhat.com>
>
> From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:29:41 +0100
> Subject: [PATCH 1/4] memory: cache min/max_access_size
>
> This will simplify the code in the next patch.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> include/exec/memory.h | 2 ++
> memory.c | 27 +++++++++++----------------
> 2 files changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 480dfbf..cf63adb 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -163,6 +163,8 @@ struct MemoryRegion {
> unsigned ioeventfd_nb;
> MemoryRegionIoeventfd *ioeventfds;
> NotifierList iommu_notify;
> +
> + uint8_t min_access_size, max_access_size;
> };
>
> typedef struct MemoryListener MemoryListener;
> diff --git a/memory.c b/memory.c
> index 28f6449..56e54aa 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
> static void access_with_adjusted_size(hwaddr addr,
> uint64_t *value,
> unsigned size,
> - unsigned access_size_min,
> - unsigned access_size_max,
> void (*access)(MemoryRegion *mr,
> hwaddr addr,
> uint64_t *value,
> @@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
> unsigned access_size;
> unsigned i;
>
> - if (!access_size_min) {
> - access_size_min = 1;
> - }
> - if (!access_size_max) {
> - access_size_max = 4;
> - }
> -
> /* FIXME: support unaligned access? */
> - access_size = MAX(MIN(size, access_size_max), access_size_min);
> + access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
> access_mask = -1ULL >> (64 - access_size * 8);
> if (memory_region_big_endian(mr)) {
> for (i = 0; i < size; i += access_size) {
> @@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
>
> if (mr->ops->read) {
> access_with_adjusted_size(addr, &data, size,
> - mr->ops->impl.min_access_size,
> - mr->ops->impl.max_access_size,
> memory_region_read_accessor, mr);
> } else {
> - access_with_adjusted_size(addr, &data, size, 1, 4,
> + access_with_adjusted_size(addr, &data, size,
> memory_region_oldmmio_read_accessor, mr);
> }
>
> @@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
>
> if (mr->ops->write) {
> access_with_adjusted_size(addr, &data, size,
> - mr->ops->impl.min_access_size,
> - mr->ops->impl.max_access_size,
> memory_region_write_accessor, mr);
> } else {
> - access_with_adjusted_size(addr, &data, size, 1, 4,
> + access_with_adjusted_size(addr, &data, size,
> memory_region_oldmmio_write_accessor, mr);
> }
> return false;
> @@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
> mr->opaque = opaque;
> mr->terminates = true;
> mr->ram_addr = ~(ram_addr_t)0;
> +
> + if (mr->ops->read) {
> + mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
> + mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
> + } else {
> + mr->min_access_size = 1;
> + mr->max_access_size = 4;
> + }
> }
>
> void memory_region_init_ram(MemoryRegion *mr,
> --
> 1.8.4.2
>
>
> From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:39:18 +0100
> Subject: [PATCH 2/4] memory: streamline common case for memory dispatch
>
> In the common case where there is no combining or splitting,
> access_with_adjusted_size is adding a lot of overhead. Call
> the MMIO ops directly in that case.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 56 insertions(+), 12 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index 56e54aa..1ade19c 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
> static void access_with_adjusted_size(hwaddr addr,
> uint64_t *value,
> unsigned size,
> + unsigned access_size,
> void (*access)(MemoryRegion *mr,
> hwaddr addr,
> uint64_t *value,
> @@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
> MemoryRegion *mr)
> {
> uint64_t access_mask;
> - unsigned access_size;
> unsigned i;
>
> /* FIXME: support unaligned access? */
> - access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
> access_mask = -1ULL >> (64 - access_size * 8);
> if (memory_region_big_endian(mr)) {
> for (i = 0; i < size; i += access_size) {
> @@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
> hwaddr addr,
> unsigned size)
> {
> + unsigned access_size;
> uint64_t data = 0;
>
> + if (size < mr->min_access_size) {
> + access_size = mr->min_access_size;
> + goto adjusted;
> + }
> + if (size > mr->max_access_size) {
> + access_size = mr->max_access_size;
> + goto adjusted;
> + }
> +
> + if (mr->ops->read) {
> + data = mr->ops->read(mr->opaque, addr, size);
> + } else {
> + data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> + }
> + trace_memory_region_ops_read(mr, addr, data, size);
> + return data;
> +
> +adjusted:
> if (mr->ops->read) {
> - access_with_adjusted_size(addr, &data, size,
> + access_with_adjusted_size(addr, &data, size, access_size,
> memory_region_read_accessor, mr);
> } else {
> - access_with_adjusted_size(addr, &data, size,
> + access_with_adjusted_size(addr, &data, size, access_size,
> memory_region_oldmmio_read_accessor, mr);
> }
>
> @@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
> return false;
> }
>
> +static void memory_region_dispatch_write1(MemoryRegion *mr,
> + hwaddr addr,
> + uint64_t data,
> + unsigned size)
> +{
> + unsigned access_size;
> + if (size < mr->min_access_size) {
> + access_size = mr->min_access_size;
> + goto adjusted;
> + }
> + if (size > mr->max_access_size) {
> + access_size = mr->max_access_size;
> + goto adjusted;
> + }
> +
> + trace_memory_region_ops_write(mr, addr, data, size);
> + if (mr->ops->write) {
> + mr->ops->write(mr->opaque, addr, data, size);
> + } else {
> + mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
> + }
> + return;
> +
> +adjusted:
> + if (mr->ops->write) {
> + access_with_adjusted_size(addr, &data, size, access_size,
> + memory_region_write_accessor, mr);
> + } else {
> + access_with_adjusted_size(addr, &data, size, access_size,
> + memory_region_oldmmio_write_accessor, mr);
> + }
> +}
> +
> static bool memory_region_dispatch_write(MemoryRegion *mr,
> hwaddr addr,
> uint64_t data,
> @@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
> }
>
> adjust_endianness(mr, &data, size);
> -
> - if (mr->ops->write) {
> - access_with_adjusted_size(addr, &data, size,
> - memory_region_write_accessor, mr);
> - } else {
> - access_with_adjusted_size(addr, &data, size,
> - memory_region_oldmmio_write_accessor, mr);
> - }
> + memory_region_dispatch_write1(mr, addr, data, size);
> return false;
> }
>
> --
> 1.8.4.2
>
>
> From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:49:52 +0100
> Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush
>
> No need to flush the coalesced MMIO buffer multiple times when combining
> multiple accesses into one.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> memory.c | 14 ++++++++------
> 1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index 1ade19c..495e693 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
> {
> uint64_t tmp;
>
> - if (mr->flush_coalesced_mmio) {
> - qemu_flush_coalesced_mmio_buffer();
> - }
> tmp = mr->ops->read(mr->opaque, addr, size);
> trace_memory_region_ops_read(mr, addr, tmp, size);
> *value |= (tmp & mask) << shift;
> @@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
> {
> uint64_t tmp;
>
> - if (mr->flush_coalesced_mmio) {
> - qemu_flush_coalesced_mmio_buffer();
> - }
> tmp = (*value >> shift) & mask;
> trace_memory_region_ops_write(mr, addr, tmp, size);
> mr->ops->write(mr->opaque, addr, tmp, size);
> @@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
> uint64_t *pval,
> unsigned size)
> {
> + if (mr->flush_coalesced_mmio) {
> + qemu_flush_coalesced_mmio_buffer();
> + }
> +
> if (!memory_region_access_valid(mr, addr, size, false)) {
> *pval = unassigned_mem_read(mr, addr, size);
> return true;
> @@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
> uint64_t data,
> unsigned size)
> {
> + if (mr->flush_coalesced_mmio) {
> + qemu_flush_coalesced_mmio_buffer();
> + }
> +
> if (!memory_region_access_valid(mr, addr, size, true)) {
> unassigned_mem_write(mr, addr, data, size);
> return true;
> --
> 1.8.4.2
>
>
> From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:48:11 +0100
> Subject: [PATCH 4/4] memory: small tweaks
>
> Make adjust_endianness inline, and do not use a ctz instruction
> when a shift will do.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> memory.c | 15 ++++++++++-----
> 1 file changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index 495e693..d3b0dce 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
> #endif
> }
>
> -static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> +static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> {
> if (memory_region_wrong_endianness(mr)) {
> switch (size) {
> @@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> }
> }
>
> +static inline int ctz3(unsigned size)
> +{
> + return size >> 1;
> +}
> +
> static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
> hwaddr addr,
> uint64_t *value,
> @@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
> {
> uint64_t tmp;
>
> - tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> + tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
> trace_memory_region_ops_read(mr, addr, tmp, size);
> *value |= (tmp & mask) << shift;
> }
> @@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
>
> tmp = (*value >> shift) & mask;
> trace_memory_region_ops_write(mr, addr, tmp, size);
> - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
> + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
> }
>
> static void memory_region_write_accessor(MemoryRegion *mr,
> @@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
> if (mr->ops->read) {
> data = mr->ops->read(mr->opaque, addr, size);
> } else {
> - data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> + data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
> }
> trace_memory_region_ops_read(mr, addr, data, size);
> return data;
> @@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
> if (mr->ops->write) {
> mr->ops->write(mr->opaque, addr, data, size);
> } else {
> - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
> + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
> }
> return;
>
prev parent reply other threads:[~2013-12-10 11:55 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
2013-12-05 12:19 ` Uri Lublin
2013-12-05 12:20 ` Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 4/4] memory: small tweaks Paolo Bonzini
2013-12-10 11:55 ` Marcel Apfelbaum [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386676532.10879.30.camel@localhost.localdomain \
--to=marcel.a@redhat.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.