qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
To: "Alex Bennée" <alex.bennee@linaro.org>, qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Riku Voipio <riku.voipio@iki.fi>
Subject: Re: [RFC PATCH] accel/tcg: add tracepoints for cpu_loop_exit_atomic
Date: Fri, 4 Oct 2024 17:09:13 -0700	[thread overview]
Message-ID: <c6df1bc2-f725-4f34-9beb-81a370173d00@linaro.org> (raw)
In-Reply-To: <20241004135226.903570-1-alex.bennee@linaro.org>

On 10/4/24 06:52, Alex Bennée wrote:
> We try to avoid using cpu_loop_exit_atomic as it brings in an all-core
> sync point. However on some cpu/kernel/benchmark combinations it is
> starting to show up in the performance profile. To make it easier to
> see whats going on add tracepoints for the slow path so we can see
> what is triggering the wait.
> 
> It seems for a modern CPU it can be quite a bit, for example:
> 
> ./qemu-system-aarch64 \
>             -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars,gic-version=max \
>             -smp 4 \
>             -accel tcg \
>             -device virtio-net-pci,netdev=unet \
>             -device virtio-scsi-pci \
>             -device scsi-hd,drive=hd \
>             -netdev user,id=unet,hostfwd=tcp::2222-:22 \
>             -blockdev driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap \
>             -serial mon:stdio \
>             -blockdev node-name=rom,driver=file,filename=(pwd)/pc-bios/edk2-aarch64-code.fd,read-only=true \
>             -blockdev node-name=efivars,driver=file,filename=$HOME/images/qemu-arm64-efivars \
>             -m 8192 \
>             -object memory-backend-memfd,id=mem,size=8G,share=on \
>             -kernel /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append "root=/dev/sda2 console=ttyAMA0 systemd.unit=benchmark-stress-ng.service" \
>             -display none -d trace:load_atom\*_fallback,trace:store_atom\*_fallback
> 
> With:
> 
>    -cpu neoverse-v1,pauth-impdef=on => 2203343
> 
> With:
> 
>    -cpu cortex-a76 => 0
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>   accel/tcg/user-exec.c          |  2 +-
>   accel/tcg/ldst_atomicity.c.inc |  9 +++++++++
>   accel/tcg/trace-events         | 12 ++++++++++++
>   3 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
> index 7ddc47b0ba..f3a440ca29 100644
> --- a/accel/tcg/user-exec.c
> +++ b/accel/tcg/user-exec.c
> @@ -29,7 +29,7 @@
>   #include "exec/page-protection.h"
>   #include "exec/helper-proto.h"
>   #include "qemu/atomic128.h"
> -#include "trace/trace-root.h"
> +#include "trace.h"
>   #include "tcg/tcg-ldst.h"
>   #include "internal-common.h"
>   #include "internal-target.h"
> diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
> index 134da3c1da..c735add261 100644
> --- a/accel/tcg/ldst_atomicity.c.inc
> +++ b/accel/tcg/ldst_atomicity.c.inc
> @@ -168,6 +168,7 @@ static uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
>   #endif
>   
>       /* Ultimate fallback: re-execute in serial context. */
> +    trace_load_atom8_or_exit_fallback(ra);
>       cpu_loop_exit_atomic(cpu, ra);
>   }
>   
> @@ -212,6 +213,7 @@ static Int128 load_atomic16_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
>       }
>   
>       /* Ultimate fallback: re-execute in serial context. */
> +    trace_load_atom16_or_exit_fallback(ra);
>       cpu_loop_exit_atomic(cpu, ra);
>   }
>   
> @@ -519,6 +521,7 @@ static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
>           if (HAVE_al8) {
>               return load_atom_extract_al8x2(pv);
>           }
> +        trace_load_atom8_fallback(memop, ra);
>           cpu_loop_exit_atomic(cpu, ra);
>       default:
>           g_assert_not_reached();
> @@ -563,6 +566,7 @@ static Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
>           break;
>       case MO_64:
>           if (!HAVE_al8) {
> +            trace_load_atom16_fallback(memop, ra);
>               cpu_loop_exit_atomic(cpu, ra);
>           }
>           a = load_atomic8(pv);
> @@ -570,6 +574,7 @@ static Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
>           break;
>       case -MO_64:
>           if (!HAVE_al8) {
> +            trace_load_atom16_fallback(memop, ra);
>               cpu_loop_exit_atomic(cpu, ra);
>           }
>           a = load_atom_extract_al8x2(pv);
> @@ -897,6 +902,7 @@ static void store_atom_2(CPUState *cpu, uintptr_t ra,
>           g_assert_not_reached();
>       }
>   
> +    trace_store_atom2_fallback(memop, ra);
>       cpu_loop_exit_atomic(cpu, ra);
>   }
>   
> @@ -961,6 +967,7 @@ static void store_atom_4(CPUState *cpu, uintptr_t ra,
>                   return;
>               }
>           }
> +        trace_store_atom4_fallback(memop, ra);
>           cpu_loop_exit_atomic(cpu, ra);
>       default:
>           g_assert_not_reached();
> @@ -1029,6 +1036,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
>       default:
>           g_assert_not_reached();
>       }
> +    trace_store_atom8_fallback(memop, ra);
>       cpu_loop_exit_atomic(cpu, ra);
>   }
>   
> @@ -1107,5 +1115,6 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra,
>       default:
>           g_assert_not_reached();
>       }
> +    trace_store_atom16_fallback(memop, ra);
>       cpu_loop_exit_atomic(cpu, ra);
>   }
> diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events
> index 4e9b450520..0ce69d744f 100644
> --- a/accel/tcg/trace-events
> +++ b/accel/tcg/trace-events
> @@ -12,3 +12,15 @@ memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
>   
>   # translate-all.c
>   translate_block(void *tb, uintptr_t pc, const void *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
> +
> +# ldst_atomicity
> +load_atom2_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +load_atom4_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +load_atom8_or_exit_fallback(uintptr_t ra) "ra:%"PRIxPTR""
> +load_atom8_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +load_atom16_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +load_atom16_or_exit_fallback(uintptr_t ra) "ra:%"PRIxPTR""
> +store_atom2_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +store_atom4_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +store_atom8_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""
> +store_atom16_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:%"PRIxPTR""

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>

      parent reply	other threads:[~2024-10-05  0:10 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-04 13:52 [RFC PATCH] accel/tcg: add tracepoints for cpu_loop_exit_atomic Alex Bennée
2024-10-04 14:34 ` Richard Henderson
2024-10-05  0:09 ` Pierrick Bouvier [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c6df1bc2-f725-4f34-9beb-81a370173d00@linaro.org \
    --to=pierrick.bouvier@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=riku.voipio@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).