From: Marcel Apfelbaum <marcel.a@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-devel@nongnu.org, mst@redhat.com
Subject: Re: [Qemu-devel] [PATCH 0/4] speedup memory dispatch
Date: Tue, 10 Dec 2013 13:55:32 +0200 [thread overview]
Message-ID: <1386676532.10879.30.camel@localhost.localdomain> (raw)
In-Reply-To: <1385994239-19016-1-git-send-email-pbonzini@redhat.com>
On Mon, 2013-12-02 at 15:23 +0100, Paolo Bonzini wrote:
> This avoids useless masking and shifting when a single call to the
> MemoryRegion ops will do. It cuts 30 cycles off the common case
> of memory dispatch (out of ~150).
>
> Paolo Bonzini (4):
> memory: cache min/max_access_size
> memory: streamline common case for memory dispatch
> memory: hoist coalesced MMIO flush
> memory: small tweaks
>
> include/exec/memory.h | 2 +
> memory.c | 114 +++++++++++++++++++++++++++++++++++---------------
> 2 files changed, 82 insertions(+), 34 deletions(-)
>
> --
> 1.8.4.2
I saw a 20% performance boost on tcg (VMEXIT test)!
It looks OK to me,
Reviewed-by: Marcel Apfelbaum <marcel.a@redhat.com>
>
> From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:29:41 +0100
> Subject: [PATCH 1/4] memory: cache min/max_access_size
>
> This will simplify the code in the next patch.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> include/exec/memory.h | 2 ++
> memory.c | 27 +++++++++++----------------
> 2 files changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 480dfbf..cf63adb 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -163,6 +163,8 @@ struct MemoryRegion {
> unsigned ioeventfd_nb;
> MemoryRegionIoeventfd *ioeventfds;
> NotifierList iommu_notify;
> +
> + uint8_t min_access_size, max_access_size;
> };
>
> typedef struct MemoryListener MemoryListener;
> diff --git a/memory.c b/memory.c
> index 28f6449..56e54aa 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
> static void access_with_adjusted_size(hwaddr addr,
> uint64_t *value,
> unsigned size,
> - unsigned access_size_min,
> - unsigned access_size_max,
> void (*access)(MemoryRegion *mr,
> hwaddr addr,
> uint64_t *value,
> @@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
> unsigned access_size;
> unsigned i;
>
> - if (!access_size_min) {
> - access_size_min = 1;
> - }
> - if (!access_size_max) {
> - access_size_max = 4;
> - }
> -
> /* FIXME: support unaligned access? */
> - access_size = MAX(MIN(size, access_size_max), access_size_min);
> + access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
> access_mask = -1ULL >> (64 - access_size * 8);
> if (memory_region_big_endian(mr)) {
> for (i = 0; i < size; i += access_size) {
> @@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
>
> if (mr->ops->read) {
> access_with_adjusted_size(addr, &data, size,
> - mr->ops->impl.min_access_size,
> - mr->ops->impl.max_access_size,
> memory_region_read_accessor, mr);
> } else {
> - access_with_adjusted_size(addr, &data, size, 1, 4,
> + access_with_adjusted_size(addr, &data, size,
> memory_region_oldmmio_read_accessor, mr);
> }
>
> @@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
>
> if (mr->ops->write) {
> access_with_adjusted_size(addr, &data, size,
> - mr->ops->impl.min_access_size,
> - mr->ops->impl.max_access_size,
> memory_region_write_accessor, mr);
> } else {
> - access_with_adjusted_size(addr, &data, size, 1, 4,
> + access_with_adjusted_size(addr, &data, size,
> memory_region_oldmmio_write_accessor, mr);
> }
> return false;
> @@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
> mr->opaque = opaque;
> mr->terminates = true;
> mr->ram_addr = ~(ram_addr_t)0;
> +
> + if (mr->ops->read) {
> + mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
> + mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
> + } else {
> + mr->min_access_size = 1;
> + mr->max_access_size = 4;
> + }
> }
>
> void memory_region_init_ram(MemoryRegion *mr,
> --
> 1.8.4.2
>
>
> From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:39:18 +0100
> Subject: [PATCH 2/4] memory: streamline common case for memory dispatch
>
> In the common case where there is no combining or splitting,
> access_with_adjusted_size is adding a lot of overhead. Call
> the MMIO ops directly in that case.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 56 insertions(+), 12 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index 56e54aa..1ade19c 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
> static void access_with_adjusted_size(hwaddr addr,
> uint64_t *value,
> unsigned size,
> + unsigned access_size,
> void (*access)(MemoryRegion *mr,
> hwaddr addr,
> uint64_t *value,
> @@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
> MemoryRegion *mr)
> {
> uint64_t access_mask;
> - unsigned access_size;
> unsigned i;
>
> /* FIXME: support unaligned access? */
> - access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
> access_mask = -1ULL >> (64 - access_size * 8);
> if (memory_region_big_endian(mr)) {
> for (i = 0; i < size; i += access_size) {
> @@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
> hwaddr addr,
> unsigned size)
> {
> + unsigned access_size;
> uint64_t data = 0;
>
> + if (size < mr->min_access_size) {
> + access_size = mr->min_access_size;
> + goto adjusted;
> + }
> + if (size > mr->max_access_size) {
> + access_size = mr->max_access_size;
> + goto adjusted;
> + }
> +
> + if (mr->ops->read) {
> + data = mr->ops->read(mr->opaque, addr, size);
> + } else {
> + data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> + }
> + trace_memory_region_ops_read(mr, addr, data, size);
> + return data;
> +
> +adjusted:
> if (mr->ops->read) {
> - access_with_adjusted_size(addr, &data, size,
> + access_with_adjusted_size(addr, &data, size, access_size,
> memory_region_read_accessor, mr);
> } else {
> - access_with_adjusted_size(addr, &data, size,
> + access_with_adjusted_size(addr, &data, size, access_size,
> memory_region_oldmmio_read_accessor, mr);
> }
>
> @@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
> return false;
> }
>
> +static void memory_region_dispatch_write1(MemoryRegion *mr,
> + hwaddr addr,
> + uint64_t data,
> + unsigned size)
> +{
> + unsigned access_size;
> + if (size < mr->min_access_size) {
> + access_size = mr->min_access_size;
> + goto adjusted;
> + }
> + if (size > mr->max_access_size) {
> + access_size = mr->max_access_size;
> + goto adjusted;
> + }
> +
> + trace_memory_region_ops_write(mr, addr, data, size);
> + if (mr->ops->write) {
> + mr->ops->write(mr->opaque, addr, data, size);
> + } else {
> + mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
> + }
> + return;
> +
> +adjusted:
> + if (mr->ops->write) {
> + access_with_adjusted_size(addr, &data, size, access_size,
> + memory_region_write_accessor, mr);
> + } else {
> + access_with_adjusted_size(addr, &data, size, access_size,
> + memory_region_oldmmio_write_accessor, mr);
> + }
> +}
> +
> static bool memory_region_dispatch_write(MemoryRegion *mr,
> hwaddr addr,
> uint64_t data,
> @@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
> }
>
> adjust_endianness(mr, &data, size);
> -
> - if (mr->ops->write) {
> - access_with_adjusted_size(addr, &data, size,
> - memory_region_write_accessor, mr);
> - } else {
> - access_with_adjusted_size(addr, &data, size,
> - memory_region_oldmmio_write_accessor, mr);
> - }
> + memory_region_dispatch_write1(mr, addr, data, size);
> return false;
> }
>
> --
> 1.8.4.2
>
>
> From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:49:52 +0100
> Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush
>
> No need to flush the coalesced MMIO buffer multiple times when combining
> multiple accesses into one.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> memory.c | 14 ++++++++------
> 1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index 1ade19c..495e693 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
> {
> uint64_t tmp;
>
> - if (mr->flush_coalesced_mmio) {
> - qemu_flush_coalesced_mmio_buffer();
> - }
> tmp = mr->ops->read(mr->opaque, addr, size);
> trace_memory_region_ops_read(mr, addr, tmp, size);
> *value |= (tmp & mask) << shift;
> @@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
> {
> uint64_t tmp;
>
> - if (mr->flush_coalesced_mmio) {
> - qemu_flush_coalesced_mmio_buffer();
> - }
> tmp = (*value >> shift) & mask;
> trace_memory_region_ops_write(mr, addr, tmp, size);
> mr->ops->write(mr->opaque, addr, tmp, size);
> @@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
> uint64_t *pval,
> unsigned size)
> {
> + if (mr->flush_coalesced_mmio) {
> + qemu_flush_coalesced_mmio_buffer();
> + }
> +
> if (!memory_region_access_valid(mr, addr, size, false)) {
> *pval = unassigned_mem_read(mr, addr, size);
> return true;
> @@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
> uint64_t data,
> unsigned size)
> {
> + if (mr->flush_coalesced_mmio) {
> + qemu_flush_coalesced_mmio_buffer();
> + }
> +
> if (!memory_region_access_valid(mr, addr, size, true)) {
> unassigned_mem_write(mr, addr, data, size);
> return true;
> --
> 1.8.4.2
>
>
> From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Fri, 8 Nov 2013 11:48:11 +0100
> Subject: [PATCH 4/4] memory: small tweaks
>
> Make adjust_endianness inline, and do not use a ctz instruction
> when a shift will do.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> memory.c | 15 ++++++++++-----
> 1 file changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/memory.c b/memory.c
> index 495e693..d3b0dce 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
> #endif
> }
>
> -static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> +static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> {
> if (memory_region_wrong_endianness(mr)) {
> switch (size) {
> @@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
> }
> }
>
> +static inline int ctz3(unsigned size)
> +{
> + return size >> 1;
> +}
> +
> static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
> hwaddr addr,
> uint64_t *value,
> @@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
> {
> uint64_t tmp;
>
> - tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> + tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
> trace_memory_region_ops_read(mr, addr, tmp, size);
> *value |= (tmp & mask) << shift;
> }
> @@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
>
> tmp = (*value >> shift) & mask;
> trace_memory_region_ops_write(mr, addr, tmp, size);
> - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
> + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
> }
>
> static void memory_region_write_accessor(MemoryRegion *mr,
> @@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
> if (mr->ops->read) {
> data = mr->ops->read(mr->opaque, addr, size);
> } else {
> - data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
> + data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
> }
> trace_memory_region_ops_read(mr, addr, data, size);
> return data;
> @@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
> if (mr->ops->write) {
> mr->ops->write(mr->opaque, addr, data, size);
> } else {
> - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
> + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
> }
> return;
>
prev parent reply other threads:[~2013-12-10 11:55 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-02 14:23 [Qemu-devel] [PATCH 0/4] speedup memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size Paolo Bonzini
2013-12-05 12:19 ` Uri Lublin
2013-12-05 12:20 ` Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush Paolo Bonzini
2013-12-02 14:40 ` [Qemu-devel] [PATCH 4/4] memory: small tweaks Paolo Bonzini
2013-12-10 11:55 ` Marcel Apfelbaum [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386676532.10879.30.camel@localhost.localdomain \
--to=marcel.a@redhat.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).