* [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
@ 2026-05-18 9:58 Abdelrahman Elbehery
2026-06-02 15:19 ` abdelrahman elbehery
2026-06-02 16:28 ` Alex Bennée
0 siblings, 2 replies; 8+ messages in thread
From: Abdelrahman Elbehery @ 2026-05-18 9:58 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell, qemu-arm, Abdelrahman Elbehery
Profiling showed g_hash_table_lookup as a hot spot when running TCG
plugins that log all CPU registers (e.g. execlog with reg=*).
Replace it with a direct array index to avoid lookup overhead.
Signed-off-by: Abdelrahman Elbehery <elbeheri95@gmail.com>
---
While profiling (via callgrind) a TCG plugin that logs all the CPU registers
(very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg is a hot spot,
mostly while calling g_hash_table_lookup. Probably this is due to some hashtable collision
that is handleded by glib internally.
Currently each DynamicGDBFeatureInfo item holds keys for cp_regs, and the keys are always
used to retrieve the ARMCPRegInfo pointer.
By replacing the keys array with a GPtrArray, we now just access reg info directly from
the given index.
The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB memory) GCP
Setup was to run qemu-system-aarch64 against a buildroot kernel and rootfs
with and without the patch mostly using:
--plugin contrib/plugins/libstoptrigger.so,icount=2000000
--plugin contrib/plugins/libexeclog.so,reg=*
-M virt -cpu cortex-a710 -smp 1
Benchmark showed ~15% performance gain when hashtable is not used.
For benchmarking, i used hyperfine with 3 warmup rounds and 10 iterations each.
---
target/arm/cpu.h | 6 +++---
target/arm/gdbstub.c | 28 ++++++++++++++++------------
2 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -121,15 +121,15 @@
* DynamicGDBFeatureInfo:
* @desc: Contains the feature descriptions.
* @data: A union with data specific to the set of registers
- * @cpregs_keys: Array that contains the corresponding Key of
- * a given cpreg with the same order of the cpreg
+ * @cpregs_regs: Array that contains the corresponding Register info pointer
+ * of a given cpreg with the same order of the cpreg
* in the XML description.
*/
typedef struct DynamicGDBFeatureInfo {
GDBFeature desc;
union {
struct {
- uint32_t *keys;
+ GPtrArray *regs;
} cpregs;
} data;
} DynamicGDBFeatureInfo;
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index d6e29c4cf46745dc7851bc5f8339c8d7c6857b8c..e66af8207b436fe8f1dd27441a587b1eb52ff928 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -238,19 +238,23 @@ static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg)
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
const ARMCPRegInfo *ri;
- uint32_t key;
- key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
- ri = get_arm_cp_reginfo(cpu->cp_regs, key);
+ ri = g_ptr_array_index(cpu->dyn_sysreg_feature.data.cpregs.regs, reg);
if (ri) {
switch (cpreg_field_type(ri)) {
case MO_64:
if (ri->vhe_redir_to_el2 &&
(arm_hcr_el2_eff(env) & HCR_E2H) &&
arm_current_el(env) == 2) {
- ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el2);
+ ri = g_ptr_array_index(
+ cpu->dyn_sysreg_feature.data.cpregs.regs,
+ ri->vhe_redir_to_el2
+ );
} else if (ri->vhe_redir_to_el01) {
- ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el01);
+ ri = g_ptr_array_index(
+ cpu->dyn_sysreg_feature.data.cpregs.regs,
+ ri->vhe_redir_to_el01
+ );
}
return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
case MO_32:
@@ -269,19 +273,18 @@ static int arm_gdb_set_sysreg(CPUState *cs, uint8_t *buf, int reg)
static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder,
DynamicGDBFeatureInfo *dyn_feature,
- ARMCPRegInfo *ri, uint32_t ri_key,
+ ARMCPRegInfo *ri,
int bitsize, int n)
{
gdb_feature_builder_append_reg(builder, ri->name, bitsize, n,
"int", "cp_regs");
- dyn_feature->data.cpregs.keys[n] = ri_key;
+ g_ptr_array_index(dyn_feature->data.cpregs.regs, n) = ri;
}
static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
gpointer p)
{
- uint32_t ri_key = (uintptr_t)key;
ARMCPRegInfo *ri = value;
RegisterSysregFeatureParam *param = p;
ARMCPU *cpu = ARM_CPU(param->cs);
@@ -292,7 +295,7 @@ static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
if (arm_feature(env, ARM_FEATURE_AARCH64)) {
if (ri->state == ARM_CP_STATE_AA64) {
arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
- ri, ri_key, 64, param->n++);
+ ri, 64, param->n++);
}
} else {
if (ri->state == ARM_CP_STATE_AA32) {
@@ -302,10 +305,10 @@ static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
}
if (ri->type & ARM_CP_64BIT) {
arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
- ri, ri_key, 64, param->n++);
+ ri, 64, param->n++);
} else {
arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
- ri, ri_key, 32, param->n++);
+ ri, 32, param->n++);
}
}
}
@@ -323,7 +326,8 @@ static GDBFeature *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg)
"org.qemu.gdb.arm.sys.regs",
"system-registers.xml",
base_reg);
- cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t, num_regs);
+ cpu->dyn_sysreg_feature.data.cpregs.regs = g_ptr_array_new();
+ g_ptr_array_set_size(cpu->dyn_sysreg_feature.data.cpregs.regs, num_regs);
g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature, ¶m);
gdb_feature_builder_end(¶m.builder);
return &cpu->dyn_sysreg_feature.desc;
---
base-commit: ac6721b88df944ade0048822b2b74210f543d656
change-id: 20260518-enhance_arm_gdb_get_sysreg_performance-4288474401d6
Best regards,
--
Abdelrahman Elbehery <elbeheri95@gmail.com>
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-05-18 9:58 [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index Abdelrahman Elbehery
@ 2026-06-02 15:19 ` abdelrahman elbehery
2026-06-02 22:18 ` Richard Henderson
2026-06-02 16:28 ` Alex Bennée
1 sibling, 1 reply; 8+ messages in thread
From: abdelrahman elbehery @ 2026-06-02 15:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell, qemu-arm
[-- Attachment #1: Type: text/plain, Size: 7364 bytes --]
Ping
On Mon, 18 May 2026, 12:58 pm Abdelrahman Elbehery, <elbeheri95@gmail.com>
wrote:
> Profiling showed g_hash_table_lookup as a hot spot when running TCG
> plugins that log all CPU registers (e.g. execlog with reg=*).
> Replace it with a direct array index to avoid lookup overhead.
>
> Signed-off-by: Abdelrahman Elbehery <elbeheri95@gmail.com>
> ---
> While profiling (via callgrind) a TCG plugin that logs all the CPU
> registers
> (very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg
> is a hot spot,
> mostly while calling g_hash_table_lookup. Probably this is due to some
> hashtable collision
> that is handleded by glib internally.
>
> Currently each DynamicGDBFeatureInfo item holds keys for cp_regs, and the
> keys are always
> used to retrieve the ARMCPRegInfo pointer.
>
> By replacing the keys array with a GPtrArray, we now just access reg info
> directly from
> the given index.
>
> The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB memory) GCP
>
> Setup was to run qemu-system-aarch64 against a buildroot kernel and rootfs
> with and without the patch mostly using:
> --plugin contrib/plugins/libstoptrigger.so,icount=2000000
> --plugin contrib/plugins/libexeclog.so,reg=*
> -M virt -cpu cortex-a710 -smp 1
>
> Benchmark showed ~15% performance gain when hashtable is not used.
> For benchmarking, i used hyperfine with 3 warmup rounds and 10 iterations
> each.
> ---
> target/arm/cpu.h | 6 +++---
> target/arm/gdbstub.c | 28 ++++++++++++++++------------
> 2 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index
> 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527
> 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -121,15 +121,15 @@
> * DynamicGDBFeatureInfo:
> * @desc: Contains the feature descriptions.
> * @data: A union with data specific to the set of registers
> - * @cpregs_keys: Array that contains the corresponding Key of
> - * a given cpreg with the same order of the cpreg
> + * @cpregs_regs: Array that contains the corresponding Register info
> pointer
> + * of a given cpreg with the same order of the cpreg
> * in the XML description.
> */
> typedef struct DynamicGDBFeatureInfo {
> GDBFeature desc;
> union {
> struct {
> - uint32_t *keys;
> + GPtrArray *regs;
> } cpregs;
> } data;
> } DynamicGDBFeatureInfo;
> diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
> index
> d6e29c4cf46745dc7851bc5f8339c8d7c6857b8c..e66af8207b436fe8f1dd27441a587b1eb52ff928
> 100644
> --- a/target/arm/gdbstub.c
> +++ b/target/arm/gdbstub.c
> @@ -238,19 +238,23 @@ static int arm_gdb_get_sysreg(CPUState *cs,
> GByteArray *buf, int reg)
> ARMCPU *cpu = ARM_CPU(cs);
> CPUARMState *env = &cpu->env;
> const ARMCPRegInfo *ri;
> - uint32_t key;
>
> - key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
> - ri = get_arm_cp_reginfo(cpu->cp_regs, key);
> + ri = g_ptr_array_index(cpu->dyn_sysreg_feature.data.cpregs.regs, reg);
> if (ri) {
> switch (cpreg_field_type(ri)) {
> case MO_64:
> if (ri->vhe_redir_to_el2 &&
> (arm_hcr_el2_eff(env) & HCR_E2H) &&
> arm_current_el(env) == 2) {
> - ri = get_arm_cp_reginfo(cpu->cp_regs,
> ri->vhe_redir_to_el2);
> + ri = g_ptr_array_index(
> + cpu->dyn_sysreg_feature.data.cpregs.regs,
> + ri->vhe_redir_to_el2
> + );
> } else if (ri->vhe_redir_to_el01) {
> - ri = get_arm_cp_reginfo(cpu->cp_regs,
> ri->vhe_redir_to_el01);
> + ri = g_ptr_array_index(
> + cpu->dyn_sysreg_feature.data.cpregs.regs,
> + ri->vhe_redir_to_el01
> + );
> }
> return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
> case MO_32:
> @@ -269,19 +273,18 @@ static int arm_gdb_set_sysreg(CPUState *cs, uint8_t
> *buf, int reg)
>
> static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder,
> DynamicGDBFeatureInfo *dyn_feature,
> - ARMCPRegInfo *ri, uint32_t ri_key,
> + ARMCPRegInfo *ri,
> int bitsize, int n)
> {
> gdb_feature_builder_append_reg(builder, ri->name, bitsize, n,
> "int", "cp_regs");
>
> - dyn_feature->data.cpregs.keys[n] = ri_key;
> + g_ptr_array_index(dyn_feature->data.cpregs.regs, n) = ri;
> }
>
> static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
> gpointer p)
> {
> - uint32_t ri_key = (uintptr_t)key;
> ARMCPRegInfo *ri = value;
> RegisterSysregFeatureParam *param = p;
> ARMCPU *cpu = ARM_CPU(param->cs);
> @@ -292,7 +295,7 @@ static void arm_register_sysreg_for_feature(gpointer
> key, gpointer value,
> if (arm_feature(env, ARM_FEATURE_AARCH64)) {
> if (ri->state == ARM_CP_STATE_AA64) {
> arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
> - ri, ri_key, 64, param->n++);
> + ri, 64, param->n++);
> }
> } else {
> if (ri->state == ARM_CP_STATE_AA32) {
> @@ -302,10 +305,10 @@ static void arm_register_sysreg_for_feature(gpointer
> key, gpointer value,
> }
> if (ri->type & ARM_CP_64BIT) {
> arm_gen_one_feature_sysreg(¶m->builder,
> dyn_feature,
> - ri, ri_key, 64,
> param->n++);
> + ri, 64, param->n++);
> } else {
> arm_gen_one_feature_sysreg(¶m->builder,
> dyn_feature,
> - ri, ri_key, 32,
> param->n++);
> + ri, 32, param->n++);
> }
> }
> }
> @@ -323,7 +326,8 @@ static GDBFeature
> *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg)
> "org.qemu.gdb.arm.sys.regs",
> "system-registers.xml",
> base_reg);
> - cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t, num_regs);
> + cpu->dyn_sysreg_feature.data.cpregs.regs = g_ptr_array_new();
> + g_ptr_array_set_size(cpu->dyn_sysreg_feature.data.cpregs.regs,
> num_regs);
> g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature,
> ¶m);
> gdb_feature_builder_end(¶m.builder);
> return &cpu->dyn_sysreg_feature.desc;
>
> ---
> base-commit: ac6721b88df944ade0048822b2b74210f543d656
> change-id: 20260518-enhance_arm_gdb_get_sysreg_performance-4288474401d6
>
> Best regards,
> --
> Abdelrahman Elbehery <elbeheri95@gmail.com>
>
>
[-- Attachment #2: Type: text/html, Size: 8949 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-06-02 15:19 ` abdelrahman elbehery
@ 2026-06-02 22:18 ` Richard Henderson
2026-06-03 8:00 ` Alex Bennée
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2026-06-02 22:18 UTC (permalink / raw)
To: qemu-devel
On 6/2/26 08:19, abdelrahman elbehery wrote:
> Ping
>
> On Mon, 18 May 2026, 12:58 pm Abdelrahman Elbehery, <elbeheri95@gmail.com
> <mailto:elbeheri95@gmail.com>> wrote:
>
> Profiling showed g_hash_table_lookup as a hot spot when running TCG
> plugins that log all CPU registers (e.g. execlog with reg=*).
> Replace it with a direct array index to avoid lookup overhead.
>
> Signed-off-by: Abdelrahman Elbehery <elbeheri95@gmail.com <mailto:elbeheri95@gmail.com>>
> ---
> While profiling (via callgrind) a TCG plugin that logs all the CPU registers
> (very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg is a hot spot,
> mostly while calling g_hash_table_lookup. Probably this is due to some hashtable collision
> that is handleded by glib internally.
>
> Currently each DynamicGDBFeatureInfo item holds keys for cp_regs, and the keys are always
> used to retrieve the ARMCPRegInfo pointer.
>
> By replacing the keys array with a GPtrArray, we now just access reg info directly from
> the given index.
>
> The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB memory) GCP
>
> Setup was to run qemu-system-aarch64 against a buildroot kernel and rootfs
> with and without the patch mostly using:
> --plugin contrib/plugins/libstoptrigger.so,icount=2000000
> --plugin contrib/plugins/libexeclog.so,reg=*
> -M virt -cpu cortex-a710 -smp 1
>
> Benchmark showed ~15% performance gain when hashtable is not used.
> For benchmarking, i used hyperfine with 3 warmup rounds and 10 iterations each.
> ---
> target/arm/cpu.h | 6 +++---
> target/arm/gdbstub.c | 28 ++++++++++++++++------------
> 2 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index
> 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -121,15 +121,15 @@
> * DynamicGDBFeatureInfo:
> * @desc: Contains the feature descriptions.
> * @data: A union with data specific to the set of registers
> - * @cpregs_keys: Array that contains the corresponding Key of
> - * a given cpreg with the same order of the cpreg
> + * @cpregs_regs: Array that contains the corresponding Register info pointer
> + * of a given cpreg with the same order of the cpreg
> * in the XML description.
This will not work, because each ARMCPU has separate cpreg info structures, and these
structures only have the lifetime of the ARMCPU struct. We create and destroy these at
runtime, so at some point you may end up with pointers to freed memory.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-06-02 22:18 ` Richard Henderson
@ 2026-06-03 8:00 ` Alex Bennée
2026-06-03 14:22 ` Richard Henderson
0 siblings, 1 reply; 8+ messages in thread
From: Alex Bennée @ 2026-06-03 8:00 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
Richard Henderson <richard.henderson@linaro.org> writes:
> On 6/2/26 08:19, abdelrahman elbehery wrote:
>> Ping
>> On Mon, 18 May 2026, 12:58 pm Abdelrahman Elbehery,
>> <elbeheri95@gmail.com <mailto:elbeheri95@gmail.com>> wrote:
>> Profiling showed g_hash_table_lookup as a hot spot when running
>> TCG
>> plugins that log all CPU registers (e.g. execlog with reg=*).
>> Replace it with a direct array index to avoid lookup overhead.
>> Signed-off-by: Abdelrahman Elbehery <elbeheri95@gmail.com
>> <mailto:elbeheri95@gmail.com>>
>> ---
>> While profiling (via callgrind) a TCG plugin that logs all the CPU registers
>> (very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg is a hot spot,
>> mostly while calling g_hash_table_lookup. Probably this is due to some hashtable collision
>> that is handleded by glib internally.
>> Currently each DynamicGDBFeatureInfo item holds keys for
>> cp_regs, and the keys are always
>> used to retrieve the ARMCPRegInfo pointer.
>> By replacing the keys array with a GPtrArray, we now just access
>> reg info directly from
>> the given index.
>> The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB
>> memory) GCP
>> Setup was to run qemu-system-aarch64 against a buildroot kernel
>> and rootfs
>> with and without the patch mostly using:
>> --plugin contrib/plugins/libstoptrigger.so,icount=2000000
>> --plugin contrib/plugins/libexeclog.so,reg=*
>> -M virt -cpu cortex-a710 -smp 1
>> Benchmark showed ~15% performance gain when hashtable is not
>> used.
>> For benchmarking, i used hyperfine with 3 warmup rounds and 10 iterations each.
>> ---
>> target/arm/cpu.h | 6 +++---
>> target/arm/gdbstub.c | 28 ++++++++++++++++------------
>> 2 files changed, 19 insertions(+), 15 deletions(-)
>> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
>> index
>> 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527 100644
>> --- a/target/arm/cpu.h
>> +++ b/target/arm/cpu.h
>> @@ -121,15 +121,15 @@
>> * DynamicGDBFeatureInfo:
>> * @desc: Contains the feature descriptions.
>> * @data: A union with data specific to the set of registers
>> - * @cpregs_keys: Array that contains the corresponding Key of
>> - * a given cpreg with the same order of the cpreg
>> + * @cpregs_regs: Array that contains the corresponding Register info pointer
>> + * of a given cpreg with the same order of the cpreg
>> * in the XML description.
>
> This will not work, because each ARMCPU has separate cpreg info
> structures, and these structures only have the lifetime of the ARMCPU
> struct. We create and destroy these at runtime, so at some point you
> may end up with pointers to freed memory.
Shouldn't it be the same set for each CPU though?
>
>
> r~
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-06-03 8:00 ` Alex Bennée
@ 2026-06-03 14:22 ` Richard Henderson
2026-06-03 15:35 ` Alex Bennée
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2026-06-03 14:22 UTC (permalink / raw)
To: Alex Bennée; +Cc: qemu-devel
On 6/3/26 01:00, Alex Bennée wrote:
>> This will not work, because each ARMCPU has separate cpreg info
>> structures, and these structures only have the lifetime of the ARMCPU
>> struct. We create and destroy these at runtime, so at some point you
>> may end up with pointers to freed memory.
>
> Shouldn't it be the same set for each CPU though?
I have thought for some time that the cpregs should be stored on the class, not the
instance, so that these structures are shared, but it's non-trivial.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-06-03 14:22 ` Richard Henderson
@ 2026-06-03 15:35 ` Alex Bennée
0 siblings, 0 replies; 8+ messages in thread
From: Alex Bennée @ 2026-06-03 15:35 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
Richard Henderson <richard.henderson@linaro.org> writes:
> On 6/3/26 01:00, Alex Bennée wrote:
>>> This will not work, because each ARMCPU has separate cpreg info
>>> structures, and these structures only have the lifetime of the ARMCPU
>>> struct. We create and destroy these at runtime, so at some point you
>>> may end up with pointers to freed memory.
>> Shouldn't it be the same set for each CPU though?
>
> I have thought for some time that the cpregs should be stored on the
> class, not the instance, so that these structures are shared, but it's
> non-trivial.
IIRC there are also some places where the list isn't complete until
realize time (I think for things like the GIC system registers).
>
> r~
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-05-18 9:58 [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index Abdelrahman Elbehery
2026-06-02 15:19 ` abdelrahman elbehery
@ 2026-06-02 16:28 ` Alex Bennée
2026-06-03 9:55 ` abdelrahman elbehery
1 sibling, 1 reply; 8+ messages in thread
From: Alex Bennée @ 2026-06-02 16:28 UTC (permalink / raw)
To: Abdelrahman Elbehery; +Cc: qemu-devel, Peter Maydell, qemu-arm
Abdelrahman Elbehery <elbeheri95@gmail.com> writes:
> Profiling showed g_hash_table_lookup as a hot spot when running TCG
> plugins that log all CPU registers (e.g. execlog with reg=*).
> Replace it with a direct array index to avoid lookup overhead.
>
> Signed-off-by: Abdelrahman Elbehery <elbeheri95@gmail.com>
> ---
> While profiling (via callgrind) a TCG plugin that logs all the CPU registers
> (very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg is a hot spot,
> mostly while calling g_hash_table_lookup. Probably this is due to some hashtable collision
> that is handleded by glib internally.
>
> Currently each DynamicGDBFeatureInfo item holds keys for cp_regs, and the keys are always
> used to retrieve the ARMCPRegInfo pointer.
>
> By replacing the keys array with a GPtrArray, we now just access reg info directly from
> the given index.
>
> The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB memory) GCP
>
> Setup was to run qemu-system-aarch64 against a buildroot kernel and rootfs
> with and without the patch mostly using:
> --plugin contrib/plugins/libstoptrigger.so,icount=2000000
> --plugin contrib/plugins/libexeclog.so,reg=*
> -M virt -cpu cortex-a710 -smp 1
>
> Benchmark showed ~15% performance gain when hashtable is not used.
> For benchmarking, i used hyperfine with 3 warmup rounds and 10 iterations each.
> ---
> target/arm/cpu.h | 6 +++---
> target/arm/gdbstub.c | 28 ++++++++++++++++------------
> 2 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -121,15 +121,15 @@
> * DynamicGDBFeatureInfo:
> * @desc: Contains the feature descriptions.
> * @data: A union with data specific to the set of registers
> - * @cpregs_keys: Array that contains the corresponding Key of
> - * a given cpreg with the same order of the cpreg
> + * @cpregs_regs: Array that contains the corresponding Register info pointer
> + * of a given cpreg with the same order of the cpreg
> * in the XML description.
> */
> typedef struct DynamicGDBFeatureInfo {
> GDBFeature desc;
> union {
> struct {
> - uint32_t *keys;
> + GPtrArray *regs;
> } cpregs;
> } data;
> } DynamicGDBFeatureInfo;
> diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
> index d6e29c4cf46745dc7851bc5f8339c8d7c6857b8c..e66af8207b436fe8f1dd27441a587b1eb52ff928 100644
> --- a/target/arm/gdbstub.c
> +++ b/target/arm/gdbstub.c
> @@ -238,19 +238,23 @@ static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg)
> ARMCPU *cpu = ARM_CPU(cs);
> CPUARMState *env = &cpu->env;
> const ARMCPRegInfo *ri;
> - uint32_t key;
>
> - key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
> - ri = get_arm_cp_reginfo(cpu->cp_regs, key);
> + ri = g_ptr_array_index(cpu->dyn_sysreg_feature.data.cpregs.regs,
> reg);
I was only re-using key because we still use get_arm_cp_reginfo but I
have no objection to making the access faster for plugins given the
system registers don't change after CPU reset.
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> if (ri) {
> switch (cpreg_field_type(ri)) {
> case MO_64:
> if (ri->vhe_redir_to_el2 &&
> (arm_hcr_el2_eff(env) & HCR_E2H) &&
> arm_current_el(env) == 2) {
> - ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el2);
> + ri = g_ptr_array_index(
> + cpu->dyn_sysreg_feature.data.cpregs.regs,
> + ri->vhe_redir_to_el2
> + );
> } else if (ri->vhe_redir_to_el01) {
> - ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el01);
> + ri = g_ptr_array_index(
> + cpu->dyn_sysreg_feature.data.cpregs.regs,
> + ri->vhe_redir_to_el01
> + );
> }
> return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
> case MO_32:
> @@ -269,19 +273,18 @@ static int arm_gdb_set_sysreg(CPUState *cs, uint8_t *buf, int reg)
>
> static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder,
> DynamicGDBFeatureInfo *dyn_feature,
> - ARMCPRegInfo *ri, uint32_t ri_key,
> + ARMCPRegInfo *ri,
> int bitsize, int n)
> {
> gdb_feature_builder_append_reg(builder, ri->name, bitsize, n,
> "int", "cp_regs");
>
> - dyn_feature->data.cpregs.keys[n] = ri_key;
> + g_ptr_array_index(dyn_feature->data.cpregs.regs, n) = ri;
> }
>
> static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
> gpointer p)
> {
> - uint32_t ri_key = (uintptr_t)key;
> ARMCPRegInfo *ri = value;
> RegisterSysregFeatureParam *param = p;
> ARMCPU *cpu = ARM_CPU(param->cs);
> @@ -292,7 +295,7 @@ static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
> if (arm_feature(env, ARM_FEATURE_AARCH64)) {
> if (ri->state == ARM_CP_STATE_AA64) {
> arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
> - ri, ri_key, 64, param->n++);
> + ri, 64, param->n++);
> }
> } else {
> if (ri->state == ARM_CP_STATE_AA32) {
> @@ -302,10 +305,10 @@ static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
> }
> if (ri->type & ARM_CP_64BIT) {
> arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
> - ri, ri_key, 64, param->n++);
> + ri, 64, param->n++);
> } else {
> arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
> - ri, ri_key, 32, param->n++);
> + ri, 32, param->n++);
> }
> }
> }
> @@ -323,7 +326,8 @@ static GDBFeature *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg)
> "org.qemu.gdb.arm.sys.regs",
> "system-registers.xml",
> base_reg);
> - cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t, num_regs);
> + cpu->dyn_sysreg_feature.data.cpregs.regs = g_ptr_array_new();
> + g_ptr_array_set_size(cpu->dyn_sysreg_feature.data.cpregs.regs, num_regs);
> g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature, ¶m);
> gdb_feature_builder_end(¶m.builder);
> return &cpu->dyn_sysreg_feature.desc;
>
> ---
> base-commit: ac6721b88df944ade0048822b2b74210f543d656
> change-id: 20260518-enhance_arm_gdb_get_sysreg_performance-4288474401d6
>
> Best regards,
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index
2026-06-02 16:28 ` Alex Bennée
@ 2026-06-03 9:55 ` abdelrahman elbehery
0 siblings, 0 replies; 8+ messages in thread
From: abdelrahman elbehery @ 2026-06-03 9:55 UTC (permalink / raw)
To: Alex Bennée, richard.henderson; +Cc: qemu-devel, Peter Maydell, qemu-arm
[-- Attachment #1: Type: text/plain, Size: 8854 bytes --]
Thanks so much for pointing that out.
I am not aware if GDBFeatureBuilder shares the same lifetime as ARMCPU
struct or not. But noticed that builder object keeps a copy of register
object name pointer.
E.g. gdb_feature_builder_append_reg does
builder->regs->pdata[regnum] = (gpointer *)name;
I guess if GDBFeatureBuilder is not destructed per core, won't this imply
we have a similar stale pointer issue (ri->name referenced at
builder->regs->pdata, and ri is freed) in the use case you mentioned?
But if it has the same lifetime as ARMCPU, won't this also imply we need to
call again the GDBFeatureBuilder append_reg method?
I will try to read the docs/tests or try any examples that do this struct
re-creation at run-time and test the above ri->name scenario.
Thank you
On Tue, Jun 2, 2026 at 7:28 PM Alex Bennée <alex.bennee@linaro.org> wrote:
> Abdelrahman Elbehery <elbeheri95@gmail.com> writes:
>
> > Profiling showed g_hash_table_lookup as a hot spot when running TCG
> > plugins that log all CPU registers (e.g. execlog with reg=*).
> > Replace it with a direct array index to avoid lookup overhead.
> >
> > Signed-off-by: Abdelrahman Elbehery <elbeheri95@gmail.com>
> > ---
> > While profiling (via callgrind) a TCG plugin that logs all the CPU
> registers
> > (very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg
> is a hot spot,
> > mostly while calling g_hash_table_lookup. Probably this is due to some
> hashtable collision
> > that is handleded by glib internally.
> >
> > Currently each DynamicGDBFeatureInfo item holds keys for cp_regs, and
> the keys are always
> > used to retrieve the ARMCPRegInfo pointer.
> >
> > By replacing the keys array with a GPtrArray, we now just access reg
> info directly from
> > the given index.
> >
> > The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB memory) GCP
> >
> > Setup was to run qemu-system-aarch64 against a buildroot kernel and
> rootfs
> > with and without the patch mostly using:
> > --plugin contrib/plugins/libstoptrigger.so,icount=2000000
> > --plugin contrib/plugins/libexeclog.so,reg=*
> > -M virt -cpu cortex-a710 -smp 1
> >
> > Benchmark showed ~15% performance gain when hashtable is not used.
> > For benchmarking, i used hyperfine with 3 warmup rounds and 10
> iterations each.
> > ---
> > target/arm/cpu.h | 6 +++---
> > target/arm/gdbstub.c | 28 ++++++++++++++++------------
> > 2 files changed, 19 insertions(+), 15 deletions(-)
> >
> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> > index
> 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527
> 100644
> > --- a/target/arm/cpu.h
> > +++ b/target/arm/cpu.h
> > @@ -121,15 +121,15 @@
> > * DynamicGDBFeatureInfo:
> > * @desc: Contains the feature descriptions.
> > * @data: A union with data specific to the set of registers
> > - * @cpregs_keys: Array that contains the corresponding Key of
> > - * a given cpreg with the same order of the cpreg
> > + * @cpregs_regs: Array that contains the corresponding Register info
> pointer
> > + * of a given cpreg with the same order of the cpreg
> > * in the XML description.
> > */
> > typedef struct DynamicGDBFeatureInfo {
> > GDBFeature desc;
> > union {
> > struct {
> > - uint32_t *keys;
> > + GPtrArray *regs;
> > } cpregs;
> > } data;
> > } DynamicGDBFeatureInfo;
> > diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
> > index
> d6e29c4cf46745dc7851bc5f8339c8d7c6857b8c..e66af8207b436fe8f1dd27441a587b1eb52ff928
> 100644
> > --- a/target/arm/gdbstub.c
> > +++ b/target/arm/gdbstub.c
> > @@ -238,19 +238,23 @@ static int arm_gdb_get_sysreg(CPUState *cs,
> GByteArray *buf, int reg)
> > ARMCPU *cpu = ARM_CPU(cs);
> > CPUARMState *env = &cpu->env;
> > const ARMCPRegInfo *ri;
> > - uint32_t key;
> >
> > - key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
> > - ri = get_arm_cp_reginfo(cpu->cp_regs, key);
> > + ri = g_ptr_array_index(cpu->dyn_sysreg_feature.data.cpregs.regs,
> > reg);
>
> I was only re-using key because we still use get_arm_cp_reginfo but I
> have no objection to making the access faster for plugins given the
> system registers don't change after CPU reset.
>
>
> Tested-by: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
>
>
> > if (ri) {
> > switch (cpreg_field_type(ri)) {
> > case MO_64:
> > if (ri->vhe_redir_to_el2 &&
> > (arm_hcr_el2_eff(env) & HCR_E2H) &&
> > arm_current_el(env) == 2) {
> > - ri = get_arm_cp_reginfo(cpu->cp_regs,
> ri->vhe_redir_to_el2);
> > + ri = g_ptr_array_index(
> > + cpu->dyn_sysreg_feature.data.cpregs.regs,
> > + ri->vhe_redir_to_el2
> > + );
> > } else if (ri->vhe_redir_to_el01) {
> > - ri = get_arm_cp_reginfo(cpu->cp_regs,
> ri->vhe_redir_to_el01);
> > + ri = g_ptr_array_index(
> > + cpu->dyn_sysreg_feature.data.cpregs.regs,
> > + ri->vhe_redir_to_el01
> > + );
> > }
> > return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env,
> ri));
> > case MO_32:
> > @@ -269,19 +273,18 @@ static int arm_gdb_set_sysreg(CPUState *cs,
> uint8_t *buf, int reg)
> >
> > static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder,
> > DynamicGDBFeatureInfo
> *dyn_feature,
> > - ARMCPRegInfo *ri, uint32_t
> ri_key,
> > + ARMCPRegInfo *ri,
> > int bitsize, int n)
> > {
> > gdb_feature_builder_append_reg(builder, ri->name, bitsize, n,
> > "int", "cp_regs");
> >
> > - dyn_feature->data.cpregs.keys[n] = ri_key;
> > + g_ptr_array_index(dyn_feature->data.cpregs.regs, n) = ri;
> > }
> >
> > static void arm_register_sysreg_for_feature(gpointer key, gpointer
> value,
> > gpointer p)
> > {
> > - uint32_t ri_key = (uintptr_t)key;
> > ARMCPRegInfo *ri = value;
> > RegisterSysregFeatureParam *param = p;
> > ARMCPU *cpu = ARM_CPU(param->cs);
> > @@ -292,7 +295,7 @@ static void arm_register_sysreg_for_feature(gpointer
> key, gpointer value,
> > if (arm_feature(env, ARM_FEATURE_AARCH64)) {
> > if (ri->state == ARM_CP_STATE_AA64) {
> > arm_gen_one_feature_sysreg(¶m->builder, dyn_feature,
> > - ri, ri_key, 64, param->n++);
> > + ri, 64, param->n++);
> > }
> > } else {
> > if (ri->state == ARM_CP_STATE_AA32) {
> > @@ -302,10 +305,10 @@ static void
> arm_register_sysreg_for_feature(gpointer key, gpointer value,
> > }
> > if (ri->type & ARM_CP_64BIT) {
> > arm_gen_one_feature_sysreg(¶m->builder,
> dyn_feature,
> > - ri, ri_key, 64,
> param->n++);
> > + ri, 64, param->n++);
> > } else {
> > arm_gen_one_feature_sysreg(¶m->builder,
> dyn_feature,
> > - ri, ri_key, 32,
> param->n++);
> > + ri, 32, param->n++);
> > }
> > }
> > }
> > @@ -323,7 +326,8 @@ static GDBFeature
> *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg)
> > "org.qemu.gdb.arm.sys.regs",
> > "system-registers.xml",
> > base_reg);
> > - cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t,
> num_regs);
> > + cpu->dyn_sysreg_feature.data.cpregs.regs = g_ptr_array_new();
> > + g_ptr_array_set_size(cpu->dyn_sysreg_feature.data.cpregs.regs,
> num_regs);
> > g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature,
> ¶m);
> > gdb_feature_builder_end(¶m.builder);
> > return &cpu->dyn_sysreg_feature.desc;
> >
> > ---
> > base-commit: ac6721b88df944ade0048822b2b74210f543d656
> > change-id: 20260518-enhance_arm_gdb_get_sysreg_performance-4288474401d6
> >
> > Best regards,
>
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro
>
[-- Attachment #2: Type: text/html, Size: 11229 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2026-06-03 15:36 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-18 9:58 [PATCH] target/arm: replace cp_regs hashtable lookup with direct array index Abdelrahman Elbehery
2026-06-02 15:19 ` abdelrahman elbehery
2026-06-02 22:18 ` Richard Henderson
2026-06-03 8:00 ` Alex Bennée
2026-06-03 14:22 ` Richard Henderson
2026-06-03 15:35 ` Alex Bennée
2026-06-02 16:28 ` Alex Bennée
2026-06-03 9:55 ` abdelrahman elbehery
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.