* [PATCH RFC V4 01/33] arm/virt, target/arm: Add new ARMCPU {socket, cluster, core, thread}-id property
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-10 14:15 ` [PATCH RFC V4 01/33] arm/virt,target/arm: Add new ARMCPU {socket,cluster,core,thread}-id property Miguel Luis
2024-10-09 3:17 ` [PATCH RFC V4 02/33] cpu-common: Add common CPU utility for possible vCPUs Salil Mehta via
` (28 subsequent siblings)
29 siblings, 1 reply; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
This shall be used to store user specified topology{socket,cluster,core,thread}
and shall be converted to a unique 'vcpu-id' which is used as slot-index during
hot(un)plug of vCPU.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 10 ++++++++++
include/hw/arm/virt.h | 28 ++++++++++++++++++++++++++++
target/arm/cpu.c | 4 ++++
target/arm/cpu.h | 4 ++++
4 files changed, 46 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7934b23651..a0aeb263dc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2240,6 +2240,14 @@ static void machvirt_init(MachineState *machine)
&error_fatal);
aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
+ object_property_set_int(cpuobj, "socket-id",
+ virt_get_socket_id(machine, n), NULL);
+ object_property_set_int(cpuobj, "cluster-id",
+ virt_get_cluster_id(machine, n), NULL);
+ object_property_set_int(cpuobj, "core-id",
+ virt_get_core_id(machine, n), NULL);
+ object_property_set_int(cpuobj, "thread-id",
+ virt_get_thread_id(machine, n), NULL);
if (!vms->secure) {
object_property_set_bool(cpuobj, "has_el3", false, NULL);
@@ -2763,6 +2771,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
{
int n;
unsigned int max_cpus = ms->smp.max_cpus;
+ unsigned int smp_threads = ms->smp.threads;
VirtMachineState *vms = VIRT_MACHINE(ms);
MachineClass *mc = MACHINE_GET_CLASS(vms);
@@ -2776,6 +2785,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
ms->possible_cpus->len = max_cpus;
for (n = 0; n < ms->possible_cpus->len; n++) {
ms->possible_cpus->cpus[n].type = ms->cpu_type;
+ ms->possible_cpus->cpus[n].vcpus_count = smp_threads;
ms->possible_cpus->cpus[n].arch_id =
virt_cpu_mp_affinity(vms, n);
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index aca4f8061b..db3e2aebb9 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -214,4 +214,32 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
vms->highmem_redists) ? 2 : 1;
}
+static inline int virt_get_socket_id(const MachineState *ms, int cpu_index)
+{
+ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+ return ms->possible_cpus->cpus[cpu_index].props.socket_id;
+}
+
+static inline int virt_get_cluster_id(const MachineState *ms, int cpu_index)
+{
+ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+ return ms->possible_cpus->cpus[cpu_index].props.cluster_id;
+}
+
+static inline int virt_get_core_id(const MachineState *ms, int cpu_index)
+{
+ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+ return ms->possible_cpus->cpus[cpu_index].props.core_id;
+}
+
+static inline int virt_get_thread_id(const MachineState *ms, int cpu_index)
+{
+ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
+
+ return ms->possible_cpus->cpus[cpu_index].props.thread_id;
+}
+
#endif /* QEMU_ARM_VIRT_H */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 19191c2391..bda95366d1 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2622,6 +2622,10 @@ static Property arm_cpu_properties[] = {
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
mp_affinity, ARM64_AFFINITY_INVALID),
DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
+ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0),
+ DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0),
+ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0),
+ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0),
DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1),
/* True to default to the backward-compat old CNTFRQ rather than 1Ghz */
DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false),
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f065756c5c..1277a0ddfc 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1086,6 +1086,10 @@ struct ArchCPU {
QLIST_HEAD(, ARMELChangeHook) el_change_hooks;
int32_t node_id; /* NUMA node this CPU belongs to */
+ int32_t socket_id;
+ int32_t cluster_id;
+ int32_t core_id;
+ int32_t thread_id;
/* Used to synchronize KVM and QEMU in-kernel device levels */
uint8_t device_irq_level;
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [PATCH RFC V4 01/33] arm/virt,target/arm: Add new ARMCPU {socket,cluster,core,thread}-id property
2024-10-09 3:17 ` [PATCH RFC V4 01/33] arm/virt, target/arm: Add new ARMCPU {socket, cluster, core, thread}-id property Salil Mehta via
@ 2024-10-10 14:15 ` Miguel Luis
0 siblings, 0 replies; 37+ messages in thread
From: Miguel Luis @ 2024-10-10 14:15 UTC (permalink / raw)
To: Salil Mehta
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org, Michael S . Tsirkin,
Marc Zyngier, Jean-Philippe Brucker, Jonathan Cameron,
Lorenzo Pieralisi, Peter Maydell, Richard Henderson,
Igor Mammedov, andrew.jones@linux.dev, david@redhat.com,
Philippe Mathieu-Daudé, Eric Auger, Will Deacon,
Ard Biesheuvel, oliver.upton@linux.dev, pbonzini@redhat.com,
gshan@redhat.com, rafael@kernel.org, borntraeger@linux.ibm.com,
alex.bennee@linaro.org, npiggin@gmail.com, harshpb@linux.ibm.com,
linux@armlinux.org.uk, darren@os.amperecomputing.com,
ilkka@os.amperecomputing.com, vishnu@os.amperecomputing.com,
Karl Heubaum, salil.mehta@opnsrc.net, zhukeqian1@huawei.com,
wangxiongfeng2@huawei.com, wangyanan55@huawei.com,
jiakernel2@gmail.com, maobibo@loongson.cn, lixianglai@loongson.cn,
shahuang@redhat.com, zhao1.liu@intel.com, linuxarm@huawei.com,
gustavo.romero@linaro.org
> On 9 Oct 2024, at 03:17, Salil Mehta <salil.mehta@huawei.com> wrote:
>
> This shall be used to store user specified topology{socket,cluster,core,thread}
> and shall be converted to a unique 'vcpu-id' which is used as slot-index during
> hot(un)plug of vCPU.
>
> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> ---
> hw/arm/virt.c | 10 ++++++++++
> include/hw/arm/virt.h | 28 ++++++++++++++++++++++++++++
> target/arm/cpu.c | 4 ++++
> target/arm/cpu.h | 4 ++++
> 4 files changed, 46 insertions(+)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 7934b23651..a0aeb263dc 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2240,6 +2240,14 @@ static void machvirt_init(MachineState *machine)
> &error_fatal);
>
> aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
> + object_property_set_int(cpuobj, "socket-id",
> + virt_get_socket_id(machine, n), NULL);
> + object_property_set_int(cpuobj, "cluster-id",
> + virt_get_cluster_id(machine, n), NULL);
> + object_property_set_int(cpuobj, "core-id",
> + virt_get_core_id(machine, n), NULL);
> + object_property_set_int(cpuobj, "thread-id",
> + virt_get_thread_id(machine, n), NULL);
>
> if (!vms->secure) {
> object_property_set_bool(cpuobj, "has_el3", false, NULL);
> @@ -2763,6 +2771,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
> {
> int n;
> unsigned int max_cpus = ms->smp.max_cpus;
> + unsigned int smp_threads = ms->smp.threads;
> VirtMachineState *vms = VIRT_MACHINE(ms);
> MachineClass *mc = MACHINE_GET_CLASS(vms);
>
> @@ -2776,6 +2785,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
> ms->possible_cpus->len = max_cpus;
> for (n = 0; n < ms->possible_cpus->len; n++) {
> ms->possible_cpus->cpus[n].type = ms->cpu_type;
> + ms->possible_cpus->cpus[n].vcpus_count = smp_threads;
> ms->possible_cpus->cpus[n].arch_id =
> virt_cpu_mp_affinity(vms, n);
>
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index aca4f8061b..db3e2aebb9 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -214,4 +214,32 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
> vms->highmem_redists) ? 2 : 1;
> }
>
> +static inline int virt_get_socket_id(const MachineState *ms, int cpu_index)
> +{
> + assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
> +
> + return ms->possible_cpus->cpus[cpu_index].props.socket_id;
> +}
> +
> +static inline int virt_get_cluster_id(const MachineState *ms, int cpu_index)
> +{
> + assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
> +
> + return ms->possible_cpus->cpus[cpu_index].props.cluster_id;
> +}
> +
> +static inline int virt_get_core_id(const MachineState *ms, int cpu_index)
> +{
> + assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
> +
> + return ms->possible_cpus->cpus[cpu_index].props.core_id;
> +}
> +
> +static inline int virt_get_thread_id(const MachineState *ms, int cpu_index)
> +{
> + assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len);
> +
> + return ms->possible_cpus->cpus[cpu_index].props.thread_id;
> +}
> +
> #endif /* QEMU_ARM_VIRT_H */
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 19191c2391..bda95366d1 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -2622,6 +2622,10 @@ static Property arm_cpu_properties[] = {
> DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
> mp_affinity, ARM64_AFFINITY_INVALID),
> DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
> + DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0),
> + DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0),
> + DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0),
> + DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0),
> DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1),
> /* True to default to the backward-compat old CNTFRQ rather than 1Ghz */
> DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false),
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index f065756c5c..1277a0ddfc 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -1086,6 +1086,10 @@ struct ArchCPU {
> QLIST_HEAD(, ARMELChangeHook) el_change_hooks;
>
> int32_t node_id; /* NUMA node this CPU belongs to */
> + int32_t socket_id;
> + int32_t cluster_id;
> + int32_t core_id;
> + int32_t thread_id;
>
Reviewed-by: Miguel Luis <miguel.luis@oracle.com>
> /* Used to synchronize KVM and QEMU in-kernel device levels */
> uint8_t device_irq_level;
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH RFC V4 02/33] cpu-common: Add common CPU utility for possible vCPUs
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 01/33] arm/virt, target/arm: Add new ARMCPU {socket, cluster, core, thread}-id property Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-10 14:47 ` Miguel Luis
2024-10-09 3:17 ` [PATCH RFC V4 03/33] hw/arm/virt: Disable vCPU hotplug for *unsupported* Accel or GIC Type Salil Mehta via
` (27 subsequent siblings)
29 siblings, 1 reply; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
This patch adds various utility functions that may be required to fetch or check
the state of possible vCPUs. It also introduces the concept of *disabled* vCPUs,
which are part of the *possible* vCPUs but are not enabled. This state will be
used during machine initialization and later during the plugging or unplugging
of vCPUs. We release the QOM CPU objects for all disabled vCPUs.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
cpu-common.c | 21 ++++++++++++++++++++
include/hw/core/cpu.h | 46 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+)
diff --git a/cpu-common.c b/cpu-common.c
index 6b262233a3..4a446f6f7f 100644
--- a/cpu-common.c
+++ b/cpu-common.c
@@ -24,6 +24,7 @@
#include "sysemu/cpus.h"
#include "qemu/lockable.h"
#include "trace/trace-root.h"
+#include "hw/boards.h"
QemuMutex qemu_cpu_list_lock;
static QemuCond exclusive_cond;
@@ -108,6 +109,26 @@ void cpu_list_remove(CPUState *cpu)
cpu_list_generation_id++;
}
+CPUState *qemu_get_possible_cpu(int index)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ const CPUArchIdList *possible_cpus = ms->possible_cpus;
+
+ assert((index >= 0) && (index < possible_cpus->len));
+
+ return CPU(possible_cpus->cpus[index].cpu);
+}
+
+bool qemu_present_cpu(CPUState *cpu)
+{
+ return cpu;
+}
+
+bool qemu_enabled_cpu(CPUState *cpu)
+{
+ return cpu && !cpu->disabled;
+}
+
CPUState *qemu_get_cpu(int index)
{
CPUState *cpu;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 1c9c775df6..73a4e4cce1 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -538,6 +538,20 @@ struct CPUState {
CPUPluginState *plugin_state;
#endif
+ /*
+ * In the guest kernel, the presence of vCPUs is determined by information
+ * provided by the VMM or firmware via the ACPI MADT at boot time. Some
+ * architectures do not allow modifications to this configuration after
+ * the guest has booted. Therefore, for such architectures, hotpluggable
+ * vCPUs are exposed by the VMM as not 'ACPI Enabled' to the kernel.
+ * Within QEMU, such vCPUs (those that are 'yet-to-be-plugged' or have
+ * been hot-unplugged) may either have a `CPUState` object in a 'disabled'
+ * state or may not have a `CPUState` object at all.
+ *
+ * By default, `CPUState` objects are enabled across all architectures.
+ */
+ bool disabled;
+
/* TODO Move common fields from CPUArchState here. */
int cpu_index;
int cluster_index;
@@ -924,6 +938,38 @@ static inline bool cpu_in_exclusive_context(const CPUState *cpu)
*/
CPUState *qemu_get_cpu(int index);
+/**
+ * qemu_get_possible_cpu:
+ * @index: The CPUState@cpu_index value of the CPU to obtain.
+ * Input index MUST be in range [0, Max Possible CPUs)
+ *
+ * If CPUState object exists,then it gets a CPU matching
+ * @index in the possible CPU array.
+ *
+ * Returns: The possible CPU or %NULL if CPU does not exist.
+ */
+CPUState *qemu_get_possible_cpu(int index);
+
+/**
+ * qemu_present_cpu:
+ * @cpu: The vCPU to check
+ *
+ * Checks if the vCPU is amongst the present possible vcpus.
+ *
+ * Returns: True if it is present possible vCPU else false
+ */
+bool qemu_present_cpu(CPUState *cpu);
+
+/**
+ * qemu_enabled_cpu:
+ * @cpu: The vCPU to check
+ *
+ * Checks if the vCPU is enabled.
+ *
+ * Returns: True if it is 'enabled' else false
+ */
+bool qemu_enabled_cpu(CPUState *cpu);
+
/**
* cpu_exists:
* @id: Guest-exposed CPU ID to lookup.
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [PATCH RFC V4 02/33] cpu-common: Add common CPU utility for possible vCPUs
2024-10-09 3:17 ` [PATCH RFC V4 02/33] cpu-common: Add common CPU utility for possible vCPUs Salil Mehta via
@ 2024-10-10 14:47 ` Miguel Luis
2024-10-11 9:25 ` Salil Mehta via
0 siblings, 1 reply; 37+ messages in thread
From: Miguel Luis @ 2024-10-10 14:47 UTC (permalink / raw)
To: Salil Mehta
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org, Michael S . Tsirkin,
Marc Zyngier, Jean-Philippe Brucker, Jonathan Cameron,
Lorenzo Pieralisi, Peter Maydell, Richard Henderson,
Igor Mammedov, andrew.jones@linux.dev, david@redhat.com,
Philippe Mathieu-Daudé, Eric Auger, Will Deacon,
Ard Biesheuvel, oliver.upton@linux.dev, pbonzini@redhat.com,
gshan@redhat.com, rafael@kernel.org, borntraeger@linux.ibm.com,
alex.bennee@linaro.org, npiggin@gmail.com, harshpb@linux.ibm.com,
linux@armlinux.org.uk, darren@os.amperecomputing.com,
ilkka@os.amperecomputing.com, vishnu@os.amperecomputing.com,
Karl Heubaum, salil.mehta@opnsrc.net, zhukeqian1@huawei.com,
wangxiongfeng2@huawei.com, wangyanan55@huawei.com,
jiakernel2@gmail.com, maobibo@loongson.cn, lixianglai@loongson.cn,
shahuang@redhat.com, zhao1.liu@intel.com, linuxarm@huawei.com,
gustavo.romero@linaro.org
Hi Salil,
> On 9 Oct 2024, at 03:17, Salil Mehta <salil.mehta@huawei.com> wrote:
>
> This patch adds various utility functions that may be required to fetch or check
> the state of possible vCPUs. It also introduces the concept of *disabled* vCPUs,
> which are part of the *possible* vCPUs but are not enabled. This state will be
> used during machine initialization and later during the plugging or unplugging
> of vCPUs. We release the QOM CPU objects for all disabled vCPUs.
>
> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> ---
> cpu-common.c | 21 ++++++++++++++++++++
> include/hw/core/cpu.h | 46 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 67 insertions(+)
>
> diff --git a/cpu-common.c b/cpu-common.c
> index 6b262233a3..4a446f6f7f 100644
> --- a/cpu-common.c
> +++ b/cpu-common.c
> @@ -24,6 +24,7 @@
> #include "sysemu/cpus.h"
> #include "qemu/lockable.h"
> #include "trace/trace-root.h"
> +#include "hw/boards.h"
>
> QemuMutex qemu_cpu_list_lock;
> static QemuCond exclusive_cond;
> @@ -108,6 +109,26 @@ void cpu_list_remove(CPUState *cpu)
> cpu_list_generation_id++;
> }
>
> +CPUState *qemu_get_possible_cpu(int index)
> +{
> + MachineState *ms = MACHINE(qdev_get_machine());
> + const CPUArchIdList *possible_cpus = ms->possible_cpus;
> +
> + assert((index >= 0) && (index < possible_cpus->len));
> +
> + return CPU(possible_cpus->cpus[index].cpu);
> +}
> +
> +bool qemu_present_cpu(CPUState *cpu)
> +{
> + return cpu;
I don’t feel qemu_present_cpu should be needed as cpus are implicitly present as
an initialization premise and arm/virt being the only user of this now.
Thanks
Miguel
> +}
> +
> +bool qemu_enabled_cpu(CPUState *cpu)
> +{
> + return cpu && !cpu->disabled;
> +}
> +
> CPUState *qemu_get_cpu(int index)
> {
> CPUState *cpu;
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 1c9c775df6..73a4e4cce1 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -538,6 +538,20 @@ struct CPUState {
> CPUPluginState *plugin_state;
> #endif
>
> + /*
> + * In the guest kernel, the presence of vCPUs is determined by information
> + * provided by the VMM or firmware via the ACPI MADT at boot time. Some
> + * architectures do not allow modifications to this configuration after
> + * the guest has booted. Therefore, for such architectures, hotpluggable
> + * vCPUs are exposed by the VMM as not 'ACPI Enabled' to the kernel.
> + * Within QEMU, such vCPUs (those that are 'yet-to-be-plugged' or have
> + * been hot-unplugged) may either have a `CPUState` object in a 'disabled'
> + * state or may not have a `CPUState` object at all.
> + *
> + * By default, `CPUState` objects are enabled across all architectures.
> + */
> + bool disabled;
> +
> /* TODO Move common fields from CPUArchState here. */
> int cpu_index;
> int cluster_index;
> @@ -924,6 +938,38 @@ static inline bool cpu_in_exclusive_context(const CPUState *cpu)
> */
> CPUState *qemu_get_cpu(int index);
>
> +/**
> + * qemu_get_possible_cpu:
> + * @index: The CPUState@cpu_index value of the CPU to obtain.
> + * Input index MUST be in range [0, Max Possible CPUs)
> + *
> + * If CPUState object exists,then it gets a CPU matching
> + * @index in the possible CPU array.
> + *
> + * Returns: The possible CPU or %NULL if CPU does not exist.
> + */
> +CPUState *qemu_get_possible_cpu(int index);
> +
> +/**
> + * qemu_present_cpu:
> + * @cpu: The vCPU to check
> + *
> + * Checks if the vCPU is amongst the present possible vcpus.
> + *
> + * Returns: True if it is present possible vCPU else false
> + */
> +bool qemu_present_cpu(CPUState *cpu);
> +
> +/**
> + * qemu_enabled_cpu:
> + * @cpu: The vCPU to check
> + *
> + * Checks if the vCPU is enabled.
> + *
> + * Returns: True if it is 'enabled' else false
> + */
> +bool qemu_enabled_cpu(CPUState *cpu);
> +
> /**
> * cpu_exists:
> * @id: Guest-exposed CPU ID to lookup.
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 37+ messages in thread
* RE: [PATCH RFC V4 02/33] cpu-common: Add common CPU utility for possible vCPUs
2024-10-10 14:47 ` Miguel Luis
@ 2024-10-11 9:25 ` Salil Mehta via
0 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-11 9:25 UTC (permalink / raw)
To: Miguel Luis
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org, Michael S . Tsirkin,
Marc Zyngier, Jean-Philippe Brucker, Jonathan Cameron,
Lorenzo Pieralisi, Peter Maydell, Richard Henderson,
Igor Mammedov, andrew.jones@linux.dev, david@redhat.com,
Philippe Mathieu-Daudé, Eric Auger, Will Deacon,
Ard Biesheuvel, oliver.upton@linux.dev, pbonzini@redhat.com,
gshan@redhat.com, rafael@kernel.org, borntraeger@linux.ibm.com,
alex.bennee@linaro.org, npiggin@gmail.com, harshpb@linux.ibm.com,
linux@armlinux.org.uk, darren@os.amperecomputing.com,
ilkka@os.amperecomputing.com, vishnu@os.amperecomputing.com,
Karl Heubaum, salil.mehta@opnsrc.net, zhukeqian,
wangxiongfeng (C), wangyanan (Y), jiakernel2@gmail.com,
maobibo@loongson.cn, lixianglai@loongson.cn, shahuang@redhat.com,
zhao1.liu@intel.com, Linuxarm, gustavo.romero@linaro.org
HI Miguel,
> From: Miguel Luis <miguel.luis@oracle.com>
> Sent: Thursday, October 10, 2024 3:47 PM
> To: Salil Mehta <salil.mehta@huawei.com>
>
> Hi Salil,
>
> > On 9 Oct 2024, at 03:17, Salil Mehta <salil.mehta@huawei.com> wrote:
> >
> > This patch adds various utility functions that may be required to
> > fetch or check the state of possible vCPUs. It also introduces the
> > concept of *disabled* vCPUs, which are part of the *possible* vCPUs
> > but are not enabled. This state will be used during machine
> > initialization and later during the plugging or unplugging of vCPUs. We
> release the QOM CPU objects for all disabled vCPUs.
> >
> > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> > ---
> > cpu-common.c | 21 ++++++++++++++++++++
> > include/hw/core/cpu.h | 46
> +++++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 67 insertions(+)
> >
> > diff --git a/cpu-common.c b/cpu-common.c index 6b262233a3..4a446f6f7f
> > 100644
> > --- a/cpu-common.c
> > +++ b/cpu-common.c
> > @@ -24,6 +24,7 @@
> > #include "sysemu/cpus.h"
> > #include "qemu/lockable.h"
> > #include "trace/trace-root.h"
> > +#include "hw/boards.h"
> >
> > QemuMutex qemu_cpu_list_lock;
> > static QemuCond exclusive_cond;
> > @@ -108,6 +109,26 @@ void cpu_list_remove(CPUState *cpu)
> > cpu_list_generation_id++;
> > }
> >
> > +CPUState *qemu_get_possible_cpu(int index) {
> > + MachineState *ms = MACHINE(qdev_get_machine());
> > + const CPUArchIdList *possible_cpus = ms->possible_cpus;
> > +
> > + assert((index >= 0) && (index < possible_cpus->len));
> > +
> > + return CPU(possible_cpus->cpus[index].cpu);
> > +}
> > +
> > +bool qemu_present_cpu(CPUState *cpu)
> > +{
> > + return cpu;
>
> I don’t feel qemu_present_cpu should be needed as cpus are implicitly
> present as an initialization premise and arm/virt being the only user of this
> now.
Yes, as explained to you earlier there is a history to it. In the earlier protoypes,
I was planning to hide the persistence of the vCPU object behind this function
but then the same function was also being used at other place within the code
like GICv3. Later, I introduced qemu_enabled_cpu() and acpi_persistent_cpu()
realizing that persistence of vCPU is only required at the ACPI level. And hence
got away with most of the qemu_present_cpu() usages.
Gavin also commented on this earlier. Maybe we can deprecate it.
Thanks
Salil.
>
> Thanks
> Miguel
>
> > +}
> > +
> > +bool qemu_enabled_cpu(CPUState *cpu)
> > +{
> > + return cpu && !cpu->disabled;
> > +}
> > +
> > CPUState *qemu_get_cpu(int index)
> > {
> > CPUState *cpu;
> > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index
> > 1c9c775df6..73a4e4cce1 100644
> > --- a/include/hw/core/cpu.h
> > +++ b/include/hw/core/cpu.h
> > @@ -538,6 +538,20 @@ struct CPUState {
> > CPUPluginState *plugin_state;
> > #endif
> >
> > + /*
> > + * In the guest kernel, the presence of vCPUs is determined by
> information
> > + * provided by the VMM or firmware via the ACPI MADT at boot time.
> Some
> > + * architectures do not allow modifications to this configuration after
> > + * the guest has booted. Therefore, for such architectures,
> hotpluggable
> > + * vCPUs are exposed by the VMM as not 'ACPI Enabled' to the kernel.
> > + * Within QEMU, such vCPUs (those that are 'yet-to-be-plugged' or
> have
> > + * been hot-unplugged) may either have a `CPUState` object in a
> 'disabled'
> > + * state or may not have a `CPUState` object at all.
> > + *
> > + * By default, `CPUState` objects are enabled across all architectures.
> > + */
> > + bool disabled;
> > +
> > /* TODO Move common fields from CPUArchState here. */
> > int cpu_index;
> > int cluster_index;
> > @@ -924,6 +938,38 @@ static inline bool cpu_in_exclusive_context(const
> > CPUState *cpu) */ CPUState *qemu_get_cpu(int index);
> >
> > +/**
> > + * qemu_get_possible_cpu:
> > + * @index: The CPUState@cpu_index value of the CPU to obtain.
> > + * Input index MUST be in range [0, Max Possible CPUs)
> > + *
> > + * If CPUState object exists,then it gets a CPU matching
> > + * @index in the possible CPU array.
> > + *
> > + * Returns: The possible CPU or %NULL if CPU does not exist.
> > + */
> > +CPUState *qemu_get_possible_cpu(int index);
> > +
> > +/**
> > + * qemu_present_cpu:
> > + * @cpu: The vCPU to check
> > + *
> > + * Checks if the vCPU is amongst the present possible vcpus.
> > + *
> > + * Returns: True if it is present possible vCPU else false */ bool
> > +qemu_present_cpu(CPUState *cpu);
> > +
> > +/**
> > + * qemu_enabled_cpu:
> > + * @cpu: The vCPU to check
> > + *
> > + * Checks if the vCPU is enabled.
> > + *
> > + * Returns: True if it is 'enabled' else false */ bool
> > +qemu_enabled_cpu(CPUState *cpu);
> > +
> > /**
> > * cpu_exists:
> > * @id: Guest-exposed CPU ID to lookup.
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH RFC V4 03/33] hw/arm/virt: Disable vCPU hotplug for *unsupported* Accel or GIC Type
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 01/33] arm/virt, target/arm: Add new ARMCPU {socket, cluster, core, thread}-id property Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 02/33] cpu-common: Add common CPU utility for possible vCPUs Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 04/33] hw/arm/virt: Move setting of common vCPU properties in a function Salil Mehta via
` (26 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
For unsupported acceleration types and GIC versions, explicitly disable vCPU
hotplug support and limit the number of possible vCPUs to those available at
boot time (i.e., SMP CPUs). This flag will be referenced at various points in
the code to verify the presence of vCPU hotplug functionality on this machine.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 66 +++++++++++++++++++++++++++++----------------------
1 file changed, 38 insertions(+), 28 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a0aeb263dc..1c730f85c2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2107,8 +2107,6 @@ static void machvirt_init(MachineState *machine)
unsigned int smp_cpus = machine->smp.cpus;
unsigned int max_cpus = machine->smp.max_cpus;
- possible_cpus = mc->possible_cpu_arch_ids(machine);
-
/*
* In accelerated mode, the memory map is computed earlier in kvm_type()
* to create a VM with the right number of IPA bits.
@@ -2123,7 +2121,7 @@ static void machvirt_init(MachineState *machine)
* we are about to deal with. Once this is done, get rid of
* the object.
*/
- cpuobj = object_new(possible_cpus->cpus[0].type);
+ cpuobj = object_new(machine->cpu_type);
armcpu = ARM_CPU(cpuobj);
pa_bits = arm_pamax(armcpu);
@@ -2138,6 +2136,43 @@ static void machvirt_init(MachineState *machine)
*/
finalize_gic_version(vms);
+ /*
+ * The maximum number of CPUs depends on the GIC version, or on how
+ * many redistributors we can fit into the memory map (which in turn
+ * depends on whether this is a GICv3 or v4).
+ */
+ if (vms->gic_version == VIRT_GIC_VERSION_2) {
+ virt_max_cpus = GIC_NCPU;
+ } else {
+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST);
+ if (vms->highmem_redists) {
+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
+ }
+ }
+
+ if ((tcg_enabled() && !qemu_tcg_mttcg_enabled()) || hvf_enabled() ||
+ qtest_enabled() || (vms->gic_version < VIRT_GIC_VERSION_3)) {
+ max_cpus = machine->smp.max_cpus = smp_cpus;
+ mc->has_hotpluggable_cpus = false;
+ if (vms->gic_version >= VIRT_GIC_VERSION_3) {
+ warn_report("cpu hotplug feature has been disabled");
+ }
+ }
+
+ if (max_cpus > virt_max_cpus) {
+ error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
+ "supported by machine 'mach-virt' (%d)",
+ max_cpus, virt_max_cpus);
+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) {
+ error_printf("Try 'highmem-redists=on' for more CPUs\n");
+ }
+
+ exit(1);
+ }
+
+ /* uses smp.max_cpus to initialize all possible vCPUs */
+ possible_cpus = mc->possible_cpu_arch_ids(machine);
+
if (vms->secure) {
/*
* The Secure view of the world is the same as the NonSecure,
@@ -2172,31 +2207,6 @@ static void machvirt_init(MachineState *machine)
vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC;
}
- /*
- * The maximum number of CPUs depends on the GIC version, or on how
- * many redistributors we can fit into the memory map (which in turn
- * depends on whether this is a GICv3 or v4).
- */
- if (vms->gic_version == VIRT_GIC_VERSION_2) {
- virt_max_cpus = GIC_NCPU;
- } else {
- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST);
- if (vms->highmem_redists) {
- virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
- }
- }
-
- if (max_cpus > virt_max_cpus) {
- error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
- "supported by machine 'mach-virt' (%d)",
- max_cpus, virt_max_cpus);
- if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) {
- error_printf("Try 'highmem-redists=on' for more CPUs\n");
- }
-
- exit(1);
- }
-
if (vms->secure && (kvm_enabled() || hvf_enabled())) {
error_report("mach-virt: %s does not support providing "
"Security extensions (TrustZone) to the guest CPU",
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 04/33] hw/arm/virt: Move setting of common vCPU properties in a function
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (2 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 03/33] hw/arm/virt: Disable vCPU hotplug for *unsupported* Accel or GIC Type Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 05/33] arm/virt, target/arm: Machine init time change common to vCPU {cold|hot}-plug Salil Mehta via
` (25 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Factor out vCPU properties code common for {hot,cold}-plugged vCPUs in the
machvirt_init(). This allows code reuse.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 219 ++++++++++++++++++++++++++----------------
include/hw/arm/virt.h | 4 +
2 files changed, 139 insertions(+), 84 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 1c730f85c2..c01489a22d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2090,16 +2090,129 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
}
}
+static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot,
+ Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ VirtMachineState *vms = VIRT_MACHINE(ms);
+ Error *local_err = NULL;
+ VirtMachineClass *vmc;
+
+ vmc = VIRT_MACHINE_GET_CLASS(ms);
+
+ /* now, set the cpu object property values */
+ numa_cpu_pre_plug(cpu_slot, DEVICE(cpuobj), &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ object_property_set_int(cpuobj, "mp-affinity", cpu_slot->arch_id, NULL);
+
+ if (!vms->secure) {
+ object_property_set_bool(cpuobj, "has_el3", false, NULL);
+ }
+
+ if (!vms->virt && object_property_find(cpuobj, "has_el2")) {
+ object_property_set_bool(cpuobj, "has_el2", false, NULL);
+ }
+
+ if (vmc->kvm_no_adjvtime &&
+ object_property_find(cpuobj, "kvm-no-adjvtime")) {
+ object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL);
+ }
+
+ if (vmc->no_kvm_steal_time &&
+ object_property_find(cpuobj, "kvm-steal-time")) {
+ object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL);
+ }
+
+ if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) {
+ object_property_set_bool(cpuobj, "pmu", false, NULL);
+ }
+
+ if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) {
+ object_property_set_bool(cpuobj, "lpa2", false, NULL);
+ }
+
+ if (object_property_find(cpuobj, "reset-cbar")) {
+ object_property_set_int(cpuobj, "reset-cbar",
+ vms->memmap[VIRT_CPUPERIPHS].base,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+ }
+
+ /* link already initialized {secure,tag}-memory regions to this cpu */
+ object_property_set_link(cpuobj, "memory", OBJECT(vms->sysmem), &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ if (vms->secure) {
+ object_property_set_link(cpuobj, "secure-memory",
+ OBJECT(vms->secure_sysmem), &local_err);
+ if (local_err) {
+ goto out;
+ }
+ }
+
+ if (vms->mte) {
+ if (!object_property_find(cpuobj, "tag-memory")) {
+ error_setg(&local_err, "MTE requested, but not supported "
+ "by the guest CPU");
+ if (local_err) {
+ goto out;
+ }
+ }
+
+ object_property_set_link(cpuobj, "tag-memory", OBJECT(vms->tag_sysmem),
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ if (vms->secure) {
+ object_property_set_link(cpuobj, "secure-tag-memory",
+ OBJECT(vms->secure_tag_sysmem),
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * RFC: Question: this must only be called for the hotplugged cpus. For the
+ * cold booted secondary cpus this is being taken care in arm_load_kernel()
+ * in boot.c. Perhaps we should remove that code now?
+ */
+ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) {
+ object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit,
+ NULL);
+
+ /* Secondary CPUs start in PSCI powered-down state */
+ if (CPU(cpuobj)->cpu_index > 0) {
+ object_property_set_bool(cpuobj, "start-powered-off", true, NULL);
+ }
+ }
+
+out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ }
+}
+
static void machvirt_init(MachineState *machine)
{
VirtMachineState *vms = VIRT_MACHINE(machine);
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
MachineClass *mc = MACHINE_GET_CLASS(machine);
const CPUArchIdList *possible_cpus;
- MemoryRegion *sysmem = get_system_memory();
+ MemoryRegion *secure_tag_sysmem = NULL;
MemoryRegion *secure_sysmem = NULL;
MemoryRegion *tag_sysmem = NULL;
- MemoryRegion *secure_tag_sysmem = NULL;
+ MemoryRegion *sysmem;
int n, virt_max_cpus;
bool firmware_loaded;
bool aarch64 = true;
@@ -2173,6 +2286,8 @@ static void machvirt_init(MachineState *machine)
/* uses smp.max_cpus to initialize all possible vCPUs */
possible_cpus = mc->possible_cpu_arch_ids(machine);
+ sysmem = vms->sysmem = get_system_memory();
+
if (vms->secure) {
/*
* The Secure view of the world is the same as the NonSecure,
@@ -2180,7 +2295,7 @@ static void machvirt_init(MachineState *machine)
* containing the system memory at low priority; any secure-only
* devices go in at higher priority and take precedence.
*/
- secure_sysmem = g_new(MemoryRegion, 1);
+ secure_sysmem = vms->secure_sysmem = g_new(MemoryRegion, 1);
memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory",
UINT64_MAX);
memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1);
@@ -2228,6 +2343,23 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
+ if (vms->mte) {
+ /* Create the memory region only once, but link to all cpus later */
+ tag_sysmem = vms->tag_sysmem = g_new(MemoryRegion, 1);
+ memory_region_init(tag_sysmem, OBJECT(machine),
+ "tag-memory", UINT64_MAX / 32);
+
+ if (vms->secure) {
+ secure_tag_sysmem = vms->secure_tag_sysmem = g_new(MemoryRegion, 1);
+ memory_region_init(secure_tag_sysmem, OBJECT(machine),
+ "secure-tag-memory", UINT64_MAX / 32);
+
+ /* As with ram, secure-tag takes precedence over tag. */
+ memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
+ tag_sysmem, -1);
+ }
+ }
+
create_fdt(vms);
assert(possible_cpus->len == max_cpus);
@@ -2240,15 +2372,10 @@ static void machvirt_init(MachineState *machine)
}
cpuobj = object_new(possible_cpus->cpus[n].type);
- object_property_set_int(cpuobj, "mp-affinity",
- possible_cpus->cpus[n].arch_id, NULL);
cs = CPU(cpuobj);
cs->cpu_index = n;
- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
- &error_fatal);
-
aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
object_property_set_int(cpuobj, "socket-id",
virt_get_socket_id(machine, n), NULL);
@@ -2259,82 +2386,6 @@ static void machvirt_init(MachineState *machine)
object_property_set_int(cpuobj, "thread-id",
virt_get_thread_id(machine, n), NULL);
- if (!vms->secure) {
- object_property_set_bool(cpuobj, "has_el3", false, NULL);
- }
-
- if (!vms->virt && object_property_find(cpuobj, "has_el2")) {
- object_property_set_bool(cpuobj, "has_el2", false, NULL);
- }
-
- if (vmc->kvm_no_adjvtime &&
- object_property_find(cpuobj, "kvm-no-adjvtime")) {
- object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL);
- }
-
- if (vmc->no_kvm_steal_time &&
- object_property_find(cpuobj, "kvm-steal-time")) {
- object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL);
- }
-
- if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) {
- object_property_set_bool(cpuobj, "pmu", false, NULL);
- }
-
- if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) {
- object_property_set_bool(cpuobj, "lpa2", false, NULL);
- }
-
- if (object_property_find(cpuobj, "reset-cbar")) {
- object_property_set_int(cpuobj, "reset-cbar",
- vms->memmap[VIRT_CPUPERIPHS].base,
- &error_abort);
- }
-
- object_property_set_link(cpuobj, "memory", OBJECT(sysmem),
- &error_abort);
- if (vms->secure) {
- object_property_set_link(cpuobj, "secure-memory",
- OBJECT(secure_sysmem), &error_abort);
- }
-
- if (vms->mte) {
- /* Create the memory region only once, but link to all cpus. */
- if (!tag_sysmem) {
- /*
- * The property exists only if MemTag is supported.
- * If it is, we must allocate the ram to back that up.
- */
- if (!object_property_find(cpuobj, "tag-memory")) {
- error_report("MTE requested, but not supported "
- "by the guest CPU");
- exit(1);
- }
-
- tag_sysmem = g_new(MemoryRegion, 1);
- memory_region_init(tag_sysmem, OBJECT(machine),
- "tag-memory", UINT64_MAX / 32);
-
- if (vms->secure) {
- secure_tag_sysmem = g_new(MemoryRegion, 1);
- memory_region_init(secure_tag_sysmem, OBJECT(machine),
- "secure-tag-memory", UINT64_MAX / 32);
-
- /* As with ram, secure-tag takes precedence over tag. */
- memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
- tag_sysmem, -1);
- }
- }
-
- object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
- &error_abort);
- if (vms->secure) {
- object_property_set_link(cpuobj, "secure-tag-memory",
- OBJECT(secure_tag_sysmem),
- &error_abort);
- }
- }
-
qdev_realize(DEVICE(cpuobj), NULL, &error_fatal);
object_unref(cpuobj);
}
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index db3e2aebb9..5300e8d2bc 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -143,6 +143,10 @@ struct VirtMachineState {
DeviceState *platform_bus_dev;
FWCfgState *fw_cfg;
PFlashCFI01 *flash[2];
+ MemoryRegion *sysmem;
+ MemoryRegion *secure_sysmem;
+ MemoryRegion *tag_sysmem;
+ MemoryRegion *secure_tag_sysmem;
bool secure;
bool highmem;
bool highmem_compact;
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 05/33] arm/virt, target/arm: Machine init time change common to vCPU {cold|hot}-plug
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (3 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 04/33] hw/arm/virt: Move setting of common vCPU properties in a function Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 06/33] arm/virt, kvm: Pre-create disabled possible vCPUs @machine init Salil Mehta via
` (24 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Introduce the common logic required during the initialization of both cold and
hot-plugged vCPUs. Additionally, initialize the *disabled* state of the vCPUs,
which will be used further during the initialization phases of various other
components like GIC, PMU, ACPI, etc., as part of the virtual machine
initialization.
Reported-by: Gavin Shan <gshan@redhat.com>
[GS: pointed the assertion due to wrong range check in cpu_pre_plug phase]
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 92 ++++++++++++++++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 26 ++++++++++++
target/arm/cpu.c | 7 ++++
target/arm/cpu64.c | 14 +++++++
4 files changed, 138 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index c01489a22d..3db4769289 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2828,6 +2828,26 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
return socket_id % ms->numa_state->num_nodes;
}
+static int
+virt_get_cpu_id_from_cpu_topo(const MachineState *ms, DeviceState *dev)
+{
+ int cpu_id, sock_vcpu_num, clus_vcpu_num, core_vcpu_num;
+ ARMCPU *cpu = ARM_CPU(dev);
+
+ /* calculate total logical cpus across socket/cluster/core */
+ sock_vcpu_num = cpu->socket_id * (ms->smp.threads * ms->smp.cores *
+ ms->smp.clusters);
+ clus_vcpu_num = cpu->cluster_id * (ms->smp.threads * ms->smp.cores);
+ core_vcpu_num = cpu->core_id * ms->smp.threads;
+
+ /* get vcpu-id(logical cpu index) for this vcpu from this topology */
+ cpu_id = (sock_vcpu_num + clus_vcpu_num + core_vcpu_num) + cpu->thread_id;
+
+ assert(cpu_id >= 0 && cpu_id < ms->possible_cpus->len);
+
+ return cpu_id;
+}
+
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
{
int n;
@@ -2910,6 +2930,70 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
dev, &error_abort);
}
+static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ MachineState *ms = MACHINE(hotplug_dev);
+ ARMCPU *cpu = ARM_CPU(dev);
+ CPUState *cs = CPU(dev);
+ CPUArchId *cpu_slot;
+
+ /* sanity check the cpu */
+ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
+ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
+ ms->cpu_type);
+ return;
+ }
+
+ if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) {
+ error_setg(errp, "Invalid thread-id %u specified, correct range 0:%u",
+ cpu->thread_id, ms->smp.threads - 1);
+ return;
+ }
+
+ if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) {
+ error_setg(errp, "Invalid core-id %d specified, correct range 0:%u",
+ cpu->core_id, ms->smp.cores - 1);
+ return;
+ }
+
+ if ((cpu->cluster_id < 0) || (cpu->cluster_id >= ms->smp.clusters)) {
+ error_setg(errp, "Invalid cluster-id %u specified, correct range 0:%u",
+ cpu->cluster_id, ms->smp.clusters - 1);
+ return;
+ }
+
+ if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) {
+ error_setg(errp, "Invalid socket-id %u specified, correct range 0:%u",
+ cpu->socket_id, ms->smp.sockets - 1);
+ return;
+ }
+
+ cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev);
+
+ cpu_slot = virt_find_cpu_slot(cs);
+ if (qemu_present_cpu(CPU(cpu_slot->cpu))) {
+ error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist",
+ cs->cpu_index, cpu->socket_id, cpu->cluster_id, cpu->core_id,
+ cpu->thread_id, cpu_slot->arch_id);
+ return;
+ }
+ virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp);
+}
+
+static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ CPUState *cs = CPU(dev);
+ CPUArchId *cpu_slot;
+
+ /* insert the cold/hot-plugged vcpu in the slot */
+ cpu_slot = virt_find_cpu_slot(cs);
+ cpu_slot->cpu = CPU(dev);
+
+ cs->disabled = false;
+}
+
static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@@ -2917,6 +3001,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
virt_memory_pre_plug(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ virt_cpu_pre_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
@@ -2973,6 +3059,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
virt_memory_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ virt_cpu_plug(hotplug_dev, dev, errp);
}
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
@@ -3057,7 +3145,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
if (device_is_dynamic_sysbus(mc, dev) ||
object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) ||
- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
return HOTPLUG_HANDLER(machine);
}
return NULL;
@@ -3161,6 +3250,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
mc->valid_cpu_types = valid_cpu_types;
mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
mc->kvm_type = virt_kvm_type;
+ mc->has_hotpluggable_cpus = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
hc->pre_plug = virt_machine_device_pre_plug_cb;
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 5300e8d2bc..446c574c0d 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -246,4 +246,30 @@ static inline int virt_get_thread_id(const MachineState *ms, int cpu_index)
return ms->possible_cpus->cpus[cpu_index].props.thread_id;
}
+static inline CPUArchId *virt_find_cpu_slot(CPUState *cs)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ CPUArchId *cpu_slot;
+
+ assert(cs->cpu_index >= 0 && cs->cpu_index < ms->possible_cpus->len);
+
+ cpu_slot = &ms->possible_cpus->cpus[cs->cpu_index];
+
+ /*
+ * The slot ID refers to the index where a vCPU with a specific architecture
+ * ID (e.g., MPIDR or affinity) is plugged in. The slot ID is more closely
+ * related to the machine configuration, while the architecture ID is tied
+ * directly to the vCPU itself. Currently, the code assumes that the slot ID
+ * and architecture ID are the same, which can make the concept of a slot
+ * somewhat vague. However, it makes more sense to associate the
+ * hot-(un)plugging of vCPUs with a slot as a metaphor. This could represent
+ * the smallest granularity for vCPU hot-(un)plugging. That said, we cannot
+ * rule out the possibility of extending this concept to die or socket
+ * level hot-(un)plugging in the future, should the ARM specification allow
+ * for it.
+ */
+
+ return cpu_slot;
+}
+
#endif /* QEMU_ARM_VIRT_H */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index bda95366d1..3de0cb346b 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2680,6 +2680,12 @@ static const TCGCPUOps arm_tcg_ops = {
};
#endif /* CONFIG_TCG */
+static int64_t arm_cpu_get_arch_id(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ return cpu->mp_affinity;
+}
+
static void arm_cpu_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
@@ -2699,6 +2705,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
cc->has_work = arm_cpu_has_work;
cc->mmu_index = arm_cpu_mmu_index;
cc->dump_state = arm_cpu_dump_state;
+ cc->get_arch_id = arm_cpu_get_arch_id;
cc->set_pc = arm_cpu_set_pc;
cc->get_pc = arm_cpu_get_pc;
cc->gdb_read_register = arm_cpu_gdb_read_register;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 262a1d6c0b..d36d3519df 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -784,6 +784,17 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp)
}
}
+static void aarch64_cpu_initfn(Object *obj)
+{
+ CPUState *cs = CPU(obj);
+
+ /*
+ * We start every ARM64 vCPU as a disabled, possible vCPU. It must be
+ * explicitly enabled.
+ */
+ cs->disabled = true;
+}
+
static void aarch64_cpu_finalizefn(Object *obj)
{
}
@@ -796,7 +807,9 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs)
static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
{
CPUClass *cc = CPU_CLASS(oc);
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ dc->user_creatable = true;
cc->gdb_read_register = aarch64_cpu_gdb_read_register;
cc->gdb_write_register = aarch64_cpu_gdb_write_register;
cc->gdb_core_xml_file = "aarch64-core.xml";
@@ -841,6 +854,7 @@ void aarch64_cpu_register(const ARMCPUInfo *info)
static const TypeInfo aarch64_cpu_type_info = {
.name = TYPE_AARCH64_CPU,
.parent = TYPE_ARM_CPU,
+ .instance_init = aarch64_cpu_initfn,
.instance_finalize = aarch64_cpu_finalizefn,
.abstract = true,
.class_init = aarch64_cpu_class_init,
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 06/33] arm/virt, kvm: Pre-create disabled possible vCPUs @machine init
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (4 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 05/33] arm/virt, target/arm: Machine init time change common to vCPU {cold|hot}-plug Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 07/33] arm/virt, gicv3: Changes to pre-size GIC with " Salil Mehta via
` (23 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
In the ARMv8 architecture, the GIC must know all the CPUs it is connected to
during its initialization, and this cannot change afterward. This must be
ensured during the initialization of the VGIC in KVM, which requires all vCPUs
to be created and present during its initialization. This is necessary because:
1. The association between GICC and MPIDR must be fixed at VM initialization
time. This is represented by the register `GIC_TYPER(mp_affinity, proc_num)`.
2. GICC (CPU interfaces), GICR (redistributors), etc., must all be initialized
at boot time.
3. Memory regions associated with GICR, etc., cannot be changed (added, deleted,
or modified) after the VM has been initialized.
This patch adds support to pre-create all possible vCPUs within the host using
the KVM interface as part of the virtual machine initialization. These vCPUs can
later be attached to QOM/ACPI when they are actually hot-plugged and made
present.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
[VP: Identified CPU stall issue & suggested probable fix]
---
hw/arm/virt.c | 69 ++++++++++++++++++++++++++++++++++++++-----
include/hw/core/cpu.h | 1 +
target/arm/cpu64.c | 1 +
target/arm/kvm.c | 41 ++++++++++++++++++++++++-
target/arm/kvm_arm.h | 11 +++++++
5 files changed, 114 insertions(+), 9 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3db4769289..badde5ed7a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2364,17 +2364,12 @@ static void machvirt_init(MachineState *machine)
assert(possible_cpus->len == max_cpus);
for (n = 0; n < possible_cpus->len; n++) {
+ CPUArchId *cpu_slot;
Object *cpuobj;
CPUState *cs;
- if (n >= smp_cpus) {
- break;
- }
-
cpuobj = object_new(possible_cpus->cpus[n].type);
-
cs = CPU(cpuobj);
- cs->cpu_index = n;
aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
object_property_set_int(cpuobj, "socket-id",
@@ -2386,8 +2381,57 @@ static void machvirt_init(MachineState *machine)
object_property_set_int(cpuobj, "thread-id",
virt_get_thread_id(machine, n), NULL);
- qdev_realize(DEVICE(cpuobj), NULL, &error_fatal);
- object_unref(cpuobj);
+ if (n < smp_cpus) {
+ qdev_realize(DEVICE(cpuobj), NULL, &error_fatal);
+ object_unref(cpuobj);
+ } else {
+ /* handling for vCPUs which are yet-to-be hot-plugged */
+ cs->cpu_index = n;
+ cpu_slot = virt_find_cpu_slot(cs);
+
+ /* GICv3 will need `mp-affinity` to derive `gicr_typer` */
+ virt_cpu_set_properties(cpuobj, cpu_slot, &error_fatal);
+
+ /*
+ * For KVM, we shall be pre-creating the now disabled/un-plugged
+ * possbile host vcpus and park them till the time they are
+ * actually hot plugged. This is required to pre-size the host
+ * GICC and GICR with the all possible vcpus for this VM.
+ */
+ if (kvm_enabled()) {
+ kvm_arm_create_host_vcpu(ARM_CPU(cs));
+ /*
+ * Override the default architecture ID with the one fetched
+ * from KVM. After initialization, we will destroy the CPUState
+ * for disabled vCPUs; however, the CPU slot and its association
+ * with the architecture ID (and consequently the vCPU ID) will
+ * remain fixed for the entire lifetime of QEMU and cannot be
+ * altered. This is also an ARM CPU architectural constraint.
+ */
+ cpu_slot->arch_id = arm_cpu_mp_affinity(ARM_CPU(cs));
+ }
+ /*
+ * Add disabled vCPU to CPU slot during the init phase of the virt
+ * machine
+ * 1. We need this ARMCPU object during the GIC init. This object
+ * will facilitate in pre-realizing the GIC. Any info like
+ * mp-affinity(required to derive gicr_type) etc. could still be
+ * fetched while preserving QOM abstraction akin to realized
+ * vCPUs.
+ * 2. Now, after initialization of the virt machine is complete we
+ * could use two approaches to deal with this ARMCPU object:
+ * (i) re-use this ARMCPU object during hotplug of this vCPU.
+ * OR
+ * (ii) defer release this ARMCPU object after gic has been
+ * initialized or during pre-plug phase when a vCPU is
+ * hotplugged.
+ *
+ * We will use the (ii) approach and release the ARMCPU objects
+ * after GIC and machine has been fully initialized during
+ * machine_init_done() phase.
+ */
+ cpu_slot->cpu = cs;
+ }
}
/* Now we've created the CPUs we can see if they have the hypvirt timer */
@@ -2990,6 +3034,15 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
/* insert the cold/hot-plugged vcpu in the slot */
cpu_slot = virt_find_cpu_slot(cs);
cpu_slot->cpu = CPU(dev);
+ if (kvm_enabled()) {
+ /*
+ * Override the default architecture ID with the one fetched from KVM
+ * Currently, KVM derives the architecture ID from the vCPU ID specified
+ * by QEMU. In the future, we might implement a change where the entire
+ * architecture ID can be configured directly by QEMU.
+ */
+ cpu_slot->arch_id = arm_cpu_mp_affinity(ARM_CPU(cs));
+ }
cs->disabled = false;
}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 73a4e4cce1..bcc62fbf0c 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -530,6 +530,7 @@ struct CPUState {
uint64_t dirty_pages;
int kvm_vcpu_stats_fd;
bool vcpu_dirty;
+ VMChangeStateEntry *vmcse;
/* Use by accel-block: CPU is executing an ioctl() */
QemuLockCnt in_ioctl_lock;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index d36d3519df..2a517fdb9f 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -793,6 +793,7 @@ static void aarch64_cpu_initfn(Object *obj)
* explicitly enabled.
*/
cs->disabled = true;
+ cs->thread_id = 0;
}
static void aarch64_cpu_finalizefn(Object *obj)
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 849e2e21b3..8ed160cbca 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1003,6 +1003,38 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
write_list_to_cpustate(cpu);
}
+void kvm_arm_create_host_vcpu(ARMCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ unsigned long vcpu_id = cs->cpu_index;
+ int ret;
+
+ ret = kvm_create_vcpu(cs);
+ if (ret < 0) {
+ error_report("Failed to create host vcpu %ld", vcpu_id);
+ abort();
+ }
+
+ /*
+ * Initialize the vCPU in the host. This will reset the sys regs
+ * for this vCPU and related registers like MPIDR_EL1 etc. also
+ * gets programmed during this call to host. These are referred
+ * later while setting device attributes of the GICR during GICv3
+ * reset
+ */
+ ret = kvm_arch_init_vcpu(cs);
+ if (ret < 0) {
+ error_report("Failed to initialize host vcpu %ld", vcpu_id);
+ abort();
+ }
+
+ /*
+ * park the created vCPU. shall be used during kvm_get_vcpu() when
+ * threads are created during realization of ARM vCPUs.
+ */
+ kvm_park_vcpu(cs);
+}
+
/*
* Update KVM's MP_STATE based on what QEMU thinks it is
*/
@@ -1874,7 +1906,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
return -EINVAL;
}
- qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cpu);
+ /*
+ * Install VM change handler only when vCPU thread has been spawned
+ * i.e. vCPU is being realized
+ */
+ if (cs->thread_id) {
+ cs->vmcse = qemu_add_vm_change_state_handler(kvm_arm_vm_state_change,
+ cpu);
+ }
/* Determine init features for this CPU */
memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index cfaa0d9bc7..0be7e896d2 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -96,6 +96,17 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu);
*/
void kvm_arm_reset_vcpu(ARMCPU *cpu);
+/**
+ * kvm_arm_create_host_vcpu:
+ * @cpu: ARMCPU
+ *
+ * Called at to pre create all possible kvm vCPUs within the the host at the
+ * virt machine init time. This will also init this pre-created vCPU and
+ * hence result in vCPU reset at host. These pre created and inited vCPUs
+ * shall be parked for use when ARM vCPUs are actually realized.
+ */
+void kvm_arm_create_host_vcpu(ARMCPU *cpu);
+
#ifdef CONFIG_KVM
/**
* kvm_arm_create_scratch_host_vcpu:
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 07/33] arm/virt, gicv3: Changes to pre-size GIC with possible vCPUs @machine init
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (5 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 06/33] arm/virt, kvm: Pre-create disabled possible vCPUs @machine init Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 08/33] arm/virt, gicv3: Introduce GICv3 CPU Interface *accessibility* flag and checks Salil Mehta via
` (22 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The GIC must be pre-sized with the possible vCPUs during initialization. This is
essential because:
1. Memory regions and resources associated with GICC/GICR cannot be modified
(i.e., added, deleted, or changed) once the VM has been initialized.
2. Additionally, the `GIC_TYPER` must be initialized with the `mp_affinity` and
CPU interface number associations, which cannot be altered after
initialization.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 15 ++++++++-------
include/hw/arm/virt.h | 2 +-
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index badde5ed7a..822c7d3d14 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -768,6 +768,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
const char *gictype;
int i;
unsigned int smp_cpus = ms->smp.cpus;
+ unsigned int max_cpus = ms->smp.max_cpus;
uint32_t nb_redist_regions = 0;
int revision;
@@ -792,7 +793,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
}
vms->gic = qdev_new(gictype);
qdev_prop_set_uint32(vms->gic, "revision", revision);
- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus);
+ qdev_prop_set_uint32(vms->gic, "num-cpu", max_cpus);
/* Note that the num-irq property counts both internal and external
* interrupts; there are always 32 of the former (mandated by GIC spec).
*/
@@ -804,7 +805,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
if (vms->gic_version != VIRT_GIC_VERSION_2) {
QList *redist_region_count;
uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST);
- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity);
+ uint32_t redist0_count = MIN(max_cpus, redist0_capacity);
nb_redist_regions = virt_gicv3_redist_region_count(vms);
@@ -815,7 +816,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
qlist_append_int(redist_region_count,
- MIN(smp_cpus - redist0_count, redist1_capacity));
+ MIN(max_cpus - redist0_count, redist1_capacity));
}
qdev_prop_set_array(vms->gic, "redist-region-count",
redist_region_count);
@@ -888,7 +889,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
} else if (vms->virt) {
qemu_irq irq = qdev_get_gpio_in(vms->gic,
intidbase + ARCH_GIC_MAINT_IRQ);
- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq);
+ sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq);
}
qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
@@ -896,11 +897,11 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
+ VIRTUAL_PMU_IRQ));
sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
- sysbus_connect_irq(gicbusdev, i + smp_cpus,
+ sysbus_connect_irq(gicbusdev, i + max_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus,
+ sysbus_connect_irq(gicbusdev, i + 2 * max_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus,
+ sysbus_connect_irq(gicbusdev, i + 3 * max_cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
if (vms->gic_version != VIRT_GIC_VERSION_2) {
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 446c574c0d..362422413c 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -214,7 +214,7 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
assert(vms->gic_version != VIRT_GIC_VERSION_2);
- return (MACHINE(vms)->smp.cpus > redist0_capacity &&
+ return (MACHINE(vms)->smp.max_cpus > redist0_capacity &&
vms->highmem_redists) ? 2 : 1;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 08/33] arm/virt, gicv3: Introduce GICv3 CPU Interface *accessibility* flag and checks
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (6 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 07/33] arm/virt, gicv3: Changes to pre-size GIC with " Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 09/33] hw/intc/arm-gicv3*: Changes required to (re)init the GICv3 vCPU Interface Salil Mehta via
` (21 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Introduce a `gicc_accessible` flag to indicate whether it is safe to access the
GICv3 CPU interface. This flag will determine the availability of the GICv3 CPU
interface based on whether the associated QOM vCPUs are enabled or disabled.
Additionally, implement checks throughout the GICv3 codebase to ensure that any
updates or accesses to GICC registers (e.g., `ICC_*_EL1`) occur only when the
`gicc_accessible` flag is set. This ensures that operations such as KVM VGIC
GICC register fetches or modifications are performed only for GICv3 CPU
interfaces that are valid and associated with active QOM vCPUs.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/intc/arm_gicv3_common.c | 9 ++++++--
hw/intc/arm_gicv3_cpuif.c | 8 +++++++
hw/intc/arm_gicv3_kvm.c | 23 ++++++++++++++++++-
include/hw/intc/arm_gicv3_common.h | 36 ++++++++++++++++++++++++++++++
4 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index bd50a1b079..4f230257ef 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -436,10 +436,15 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
s->cpu = g_new0(GICv3CPUState, s->num_cpu);
for (i = 0; i < s->num_cpu; i++) {
- CPUState *cpu = qemu_get_cpu(i);
+ CPUState *cpu = qemu_get_possible_cpu(i);
uint64_t cpu_affid;
- s->cpu[i].cpu = cpu;
+ /*
+ * Accordingly, set the QOM `GICv3CPUState` as either accessible or
+ * inaccessible based on the `CPUState` of the associated QOM vCPU.
+ */
+ gicv3_set_cpustate(&s->cpu[i], cpu, qemu_enabled_cpu(cpu));
+
s->cpu[i].gic = s;
/* Store GICv3CPUState in CPUARMState gicv3state pointer */
gicv3_set_gicv3state(cpu, &s->cpu[i]);
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index bdb13b00e9..151f957d42 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -1052,6 +1052,10 @@ void gicv3_cpuif_update(GICv3CPUState *cs)
ARMCPU *cpu = ARM_CPU(cs->cpu);
CPUARMState *env = &cpu->env;
+ if (!gicv3_cpu_accessible(cs)) {
+ return;
+ }
+
g_assert(bql_locked());
trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq,
@@ -2036,6 +2040,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs,
for (i = 0; i < s->num_cpu; i++) {
GICv3CPUState *ocs = &s->cpu[i];
+ if (!gicv3_cpu_accessible(ocs)) {
+ continue;
+ }
+
if (irm) {
/* IRM == 1 : route to all CPUs except self */
if (cs == ocs) {
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 9ea6b8e218..7e741ace50 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -24,6 +24,7 @@
#include "hw/intc/arm_gicv3_common.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
#include "sysemu/runstate.h"
#include "kvm_arm.h"
@@ -458,6 +459,16 @@ static void kvm_arm_gicv3_put(GICv3State *s)
GICv3CPUState *c = &s->cpu[ncpu];
int num_pri_bits;
+ /*
+ * We must ensure that we do not attempt to access or update KVM GICC
+ * registers if their corresponding QOM `GICv3CPUState` is marked as
+ * 'inaccessible', either because their corresponding QOM vCPU objects
+ * do not exist or are disabled due to hot-unplug action.
+ */
+ if (!gicv3_cpu_accessible(c)) {
+ continue;
+ }
+
kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true);
kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
&c->icc_ctlr_el1[GICV3_NS], true);
@@ -616,6 +627,14 @@ static void kvm_arm_gicv3_get(GICv3State *s)
GICv3CPUState *c = &s->cpu[ncpu];
int num_pri_bits;
+ /*
+ * don't attempt to access KVM VGIC for the disabled vCPUs where
+ * GICv3CPUState is inaccessible.
+ */
+ if (!gicv3_cpu_accessible(c)) {
+ continue;
+ }
+
kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false);
kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
&c->icc_ctlr_el1[GICV3_NS], false);
@@ -815,7 +834,9 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
for (i = 0; i < s->num_cpu; i++) {
ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+ if (gicv3_cpu_accessible(&s->cpu[i])) {
+ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+ }
}
/* Try to create the device via the device control API */
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index cd09bee3bc..73d9088768 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -190,6 +190,7 @@ struct GICv3CPUState {
uint64_t icc_apr[3][4];
uint64_t icc_igrpen[3];
uint64_t icc_ctlr_el3;
+ bool gicc_accessible;
/* Virtualization control interface */
uint64_t ich_apr[3][4]; /* ich_apr[GICV3_G1][x] never used */
@@ -353,4 +354,39 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
*/
const char *gicv3_class_name(void);
+/**
+ * gicv3_cpu_accessible
+ *
+ * The `GICv3CPUState` can become inaccessible if the associated `CPUState` is
+ * either unavailable or in a disabled state. This state is independent of the
+ * KVM VGIC and is not compliant with ARM CPU architecture (i.e. there is no
+ * way we can explicitly enable/disable ARM GIC CPU interface). This change
+ * is specific to QOM only.
+ *
+ * Returns: True if accessible otherwise False
+ */
+static inline bool gicv3_cpu_accessible(GICv3CPUState *gicc)
+{
+ assert(gicc);
+ return gicc->gicc_accessible;
+}
+
+/**
+ * gicv3_set_cpustate
+ *
+ * Sets `GICv3CPUState` and the associated `CPUState` as accessible and
+ * available for use
+ */
+static inline void gicv3_set_cpustate(GICv3CPUState *s,
+ CPUState *cpu,
+ bool gicc_accessible)
+{
+ if (gicc_accessible) {
+ s->cpu = cpu;
+ s->gicc_accessible = true;
+ } else {
+ s->cpu = NULL;
+ s->gicc_accessible = false;
+ }
+}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 09/33] hw/intc/arm-gicv3*: Changes required to (re)init the GICv3 vCPU Interface
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (7 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 08/33] arm/virt, gicv3: Introduce GICv3 CPU Interface *accessibility* flag and checks Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 10/33] arm/acpi: Enable ACPI support for vCPU hotplug Salil Mehta via
` (20 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The GICv3 CPU interface must be (re)initialized when a vCPU is either cold- or
hot-plugged. System registers need to be defined and registered with the
associated vCPU. For cold-plugged vCPUs, this occurs during the GICv3
realization phase, while for hot-plugged vCPUs, it happens during the GICv3
update notification. The latter will be addressed in subsequent patches.
This process must be implemented for both emulation/TCG and KVM cases. This
change adds the necessary support and refactors the existing code to maximize
reuse for both cold and hotplug vCPU initialization.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/intc/arm_gicv3.c | 1 +
hw/intc/arm_gicv3_cpuif.c | 245 ++++++++++++++---------------
hw/intc/arm_gicv3_cpuif_common.c | 13 ++
hw/intc/arm_gicv3_kvm.c | 14 +-
hw/intc/gicv3_internal.h | 1 +
include/hw/intc/arm_gicv3_common.h | 1 +
6 files changed, 143 insertions(+), 132 deletions(-)
diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index 58e18fff54..2a30625916 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -459,6 +459,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data)
ARMGICv3Class *agc = ARM_GICV3_CLASS(klass);
agcc->post_load = arm_gicv3_post_load;
+ agcc->init_cpu_reginfo = gicv3_init_cpu_reginfo;
device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize);
}
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 151f957d42..453d1296ea 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -3033,143 +3033,138 @@ static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque)
gicv3_cpuif_virt_irq_fiq_update(cs);
}
-void gicv3_init_cpuif(GICv3State *s)
+void gicv3_init_cpu_reginfo(CPUState *cs)
{
- /* Called from the GICv3 realize function; register our system
- * registers with the CPU
- */
- int i;
+ ARMCPU *cpu = ARM_CPU(cs);
+ GICv3CPUState *gcs = icc_cs_from_env(&cpu->env);
- for (i = 0; i < s->num_cpu; i++) {
- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
- GICv3CPUState *cs = &s->cpu[i];
+ /*
+ * If the CPU doesn't define a GICv3 configuration, probably because
+ * in real hardware it doesn't have one, then we use default values
+ * matching the one used by most Arm CPUs. This applies to:
+ * cpu->gic_num_lrs
+ * cpu->gic_vpribits
+ * cpu->gic_vprebits
+ * cpu->gic_pribits
+ */
- /*
- * If the CPU doesn't define a GICv3 configuration, probably because
- * in real hardware it doesn't have one, then we use default values
- * matching the one used by most Arm CPUs. This applies to:
- * cpu->gic_num_lrs
- * cpu->gic_vpribits
- * cpu->gic_vprebits
- * cpu->gic_pribits
- */
+ /*
+ * Note that we can't just use the GICv3CPUState as an opaque pointer
+ * in define_arm_cp_regs_with_opaque(), because when we're called back
+ * it might be with code translated by CPU 0 but run by CPU 1, in
+ * which case we'd get the wrong value.
+ * So instead we define the regs with no ri->opaque info, and
+ * get back to the GICv3CPUState from the CPUARMState.
+ *
+ * These CP regs callbacks can be called from either TCG or HVF code.
+ */
+ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
- /* Note that we can't just use the GICv3CPUState as an opaque pointer
- * in define_arm_cp_regs_with_opaque(), because when we're called back
- * it might be with code translated by CPU 0 but run by CPU 1, in
- * which case we'd get the wrong value.
- * So instead we define the regs with no ri->opaque info, and
- * get back to the GICv3CPUState from the CPUARMState.
- *
- * These CP regs callbacks can be called from either TCG or HVF code.
- */
- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+ /*
+ * If the CPU implements FEAT_NMI and FEAT_GICv3 it must also
+ * implement FEAT_GICv3_NMI, which is the CPU interface part
+ * of NMI support. This is distinct from whether the GIC proper
+ * (redistributors and distributor) have NMI support. In QEMU
+ * that is a property of the GIC device in s->nmi_support;
+ * cs->nmi_support indicates the CPU interface's support.
+ */
+ if (cpu_isar_feature(aa64_nmi, cpu)) {
+ gcs->nmi_support = true;
+ define_arm_cp_regs(cpu, gicv3_cpuif_gicv3_nmi_reginfo);
+ }
- /*
- * If the CPU implements FEAT_NMI and FEAT_GICv3 it must also
- * implement FEAT_GICv3_NMI, which is the CPU interface part
- * of NMI support. This is distinct from whether the GIC proper
- * (redistributors and distributor) have NMI support. In QEMU
- * that is a property of the GIC device in s->nmi_support;
- * cs->nmi_support indicates the CPU interface's support.
- */
- if (cpu_isar_feature(aa64_nmi, cpu)) {
- cs->nmi_support = true;
- define_arm_cp_regs(cpu, gicv3_cpuif_gicv3_nmi_reginfo);
- }
+ /*
+ * The CPU implementation specifies the number of supported
+ * bits of physical priority. For backwards compatibility
+ * of migration, we have a compat property that forces use
+ * of 8 priority bits regardless of what the CPU really has.
+ */
+ if (gcs->gic->force_8bit_prio) {
+ gcs->pribits = 8;
+ } else {
+ gcs->pribits = cpu->gic_pribits ?: 5;
+ }
- /*
- * The CPU implementation specifies the number of supported
- * bits of physical priority. For backwards compatibility
- * of migration, we have a compat property that forces use
- * of 8 priority bits regardless of what the CPU really has.
- */
- if (s->force_8bit_prio) {
- cs->pribits = 8;
- } else {
- cs->pribits = cpu->gic_pribits ?: 5;
- }
+ /*
+ * The GICv3 has separate ID register fields for virtual priority
+ * and preemption bit values, but only a single ID register field
+ * for the physical priority bits. The preemption bit count is
+ * always the same as the priority bit count, except that 8 bits
+ * of priority means 7 preemption bits. We precalculate the
+ * preemption bits because it simplifies the code and makes the
+ * parallels between the virtual and physical bits of the GIC
+ * a bit clearer.
+ */
+ gcs->prebits = gcs->pribits;
+ if (gcs->prebits == 8) {
+ gcs->prebits--;
+ }
+ /*
+ * Check that CPU code defining pribits didn't violate
+ * architectural constraints our implementation relies on.
+ */
+ g_assert(gcs->pribits >= 4 && gcs->pribits <= 8);
- /*
- * The GICv3 has separate ID register fields for virtual priority
- * and preemption bit values, but only a single ID register field
- * for the physical priority bits. The preemption bit count is
- * always the same as the priority bit count, except that 8 bits
- * of priority means 7 preemption bits. We precalculate the
- * preemption bits because it simplifies the code and makes the
- * parallels between the virtual and physical bits of the GIC
- * a bit clearer.
- */
- cs->prebits = cs->pribits;
- if (cs->prebits == 8) {
- cs->prebits--;
- }
- /*
- * Check that CPU code defining pribits didn't violate
- * architectural constraints our implementation relies on.
- */
- g_assert(cs->pribits >= 4 && cs->pribits <= 8);
+ /*
+ * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions
+ * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them.
+ */
+ if (gcs->prebits >= 6) {
+ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo);
+ }
+ if (gcs->prebits == 7) {
+ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo);
+ }
- /*
- * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions
- * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them.
- */
- if (cs->prebits >= 6) {
- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo);
- }
- if (cs->prebits == 7) {
- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo);
- }
+ if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) {
+ int j;
- if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) {
- int j;
+ gcs->num_list_regs = cpu->gic_num_lrs ?: 4;
+ gcs->vpribits = cpu->gic_vpribits ?: 5;
+ gcs->vprebits = cpu->gic_vprebits ?: 5;
- cs->num_list_regs = cpu->gic_num_lrs ?: 4;
- cs->vpribits = cpu->gic_vpribits ?: 5;
- cs->vprebits = cpu->gic_vprebits ?: 5;
- /* Check against architectural constraints: getting these
- * wrong would be a bug in the CPU code defining these,
- * and the implementation relies on them holding.
- */
- g_assert(cs->vprebits <= cs->vpribits);
- g_assert(cs->vprebits >= 5 && cs->vprebits <= 7);
- g_assert(cs->vpribits >= 5 && cs->vpribits <= 8);
+ /* Check against architectural constraints: getting these
+ * wrong would be a bug in the CPU code defining these,
+ * and the implementation relies on them holding.
+ */
+ g_assert(gcs->vprebits <= gcs->vpribits);
+ g_assert(gcs->vprebits >= 5 && gcs->vprebits <= 7);
+ g_assert(gcs->vpribits >= 5 && gcs->vpribits <= 8);
- define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo);
+ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo);
- for (j = 0; j < cs->num_list_regs; j++) {
- /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs
- * are split into two cp15 regs, LR (the low part, with the
- * same encoding as the AArch64 LR) and LRC (the high part).
- */
- ARMCPRegInfo lr_regset[] = {
- { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 12,
- .crm = 12 + (j >> 3), .opc2 = j & 7,
- .type = ARM_CP_IO | ARM_CP_NO_RAW,
- .nv2_redirect_offset = 0x400 + 8 * j,
- .access = PL2_RW,
- .readfn = ich_lr_read,
- .writefn = ich_lr_write,
- },
- { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 4, .crn = 12,
- .crm = 14 + (j >> 3), .opc2 = j & 7,
- .type = ARM_CP_IO | ARM_CP_NO_RAW,
- .access = PL2_RW,
- .readfn = ich_lr_read,
- .writefn = ich_lr_write,
- },
- };
- define_arm_cp_regs(cpu, lr_regset);
- }
- if (cs->vprebits >= 6) {
- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo);
- }
- if (cs->vprebits == 7) {
- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo);
- }
+ for (j = 0; j < gcs->num_list_regs; j++) {
+ /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs
+ * are split into two cp15 regs, LR (the low part, with the
+ * same encoding as the AArch64 LR) and LRC (the high part).
+ */
+ ARMCPRegInfo lr_regset[] = {
+ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 12,
+ .crm = 12 + (j >> 3), .opc2 = j & 7,
+ .type = ARM_CP_IO | ARM_CP_NO_RAW,
+ .nv2_redirect_offset = 0x400 + 8 * j,
+ .access = PL2_RW,
+ .readfn = ich_lr_read,
+ .writefn = ich_lr_write,
+ },
+ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 4, .crn = 12,
+ .crm = 14 + (j >> 3), .opc2 = j & 7,
+ .type = ARM_CP_IO | ARM_CP_NO_RAW,
+ .access = PL2_RW,
+ .readfn = ich_lr_read,
+ .writefn = ich_lr_write,
+ },
+ };
+ define_arm_cp_regs(cpu, lr_regset);
+ }
+ if (gcs->vprebits >= 6) {
+ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo);
+ }
+ if (gcs->vprebits == 7) {
+ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo);
}
if (tcg_enabled() || qtest_enabled()) {
/*
@@ -3177,7 +3172,7 @@ void gicv3_init_cpuif(GICv3State *s)
* state only changes on EL changes involving EL2 or EL3, so for
* the non-TCG case this is OK, as EL2 and EL3 can't exist.
*/
- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs);
+ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, gcs);
} else {
assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2));
assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3));
diff --git a/hw/intc/arm_gicv3_cpuif_common.c b/hw/intc/arm_gicv3_cpuif_common.c
index ff1239f65d..ef9eef3e01 100644
--- a/hw/intc/arm_gicv3_cpuif_common.c
+++ b/hw/intc/arm_gicv3_cpuif_common.c
@@ -20,3 +20,16 @@ void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
env->gicv3state = (void *)s;
};
+
+void gicv3_init_cpuif(GICv3State *s)
+{
+ ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_GET_CLASS(s);
+ int i;
+
+ /* define and register `system registers` with the vCPU */
+ for (i = 0; i < s->num_cpu; i++) {
+ if (gicv3_cpu_accessible(&s->cpu[i])) {
+ agcc->init_cpu_reginfo(s->cpu[i].cpu);
+ }
+ }
+}
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 7e741ace50..3e1e97d830 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -796,6 +796,10 @@ static void vm_change_state_handler(void *opaque, bool running,
}
}
+static void kvm_gicv3_init_cpu_reginfo(CPUState *cs)
+{
+ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo);
+}
static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
{
@@ -831,13 +835,8 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
- for (i = 0; i < s->num_cpu; i++) {
- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
-
- if (gicv3_cpu_accessible(&s->cpu[i])) {
- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
- }
- }
+ /* initialize vCPU interface */
+ gicv3_init_cpuif(s);
/* Try to create the device via the device control API */
s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V3, false);
@@ -923,6 +922,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
agcc->pre_save = kvm_arm_gicv3_get;
agcc->post_load = kvm_arm_gicv3_put;
+ agcc->init_cpu_reginfo = kvm_gicv3_init_cpu_reginfo;
device_class_set_parent_realize(dc, kvm_arm_gicv3_realize,
&kgc->parent_realize);
resettable_class_set_parent_phases(rc, NULL, kvm_arm_gicv3_reset_hold, NULL,
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index bc9f518fe8..cc8edc499b 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -722,6 +722,7 @@ void gicv3_redist_vinvall(GICv3CPUState *cs, uint64_t vptaddr);
void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns);
void gicv3_init_cpuif(GICv3State *s);
+void gicv3_init_cpu_reginfo(CPUState *cs);
/**
* gicv3_cpuif_update:
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 73d9088768..c19eb8d3d0 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -339,6 +339,7 @@ struct ARMGICv3CommonClass {
void (*pre_save)(GICv3State *s);
void (*post_load)(GICv3State *s);
+ void (*init_cpu_reginfo)(CPUState *cs);
};
void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 10/33] arm/acpi: Enable ACPI support for vCPU hotplug
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (8 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 09/33] hw/intc/arm-gicv3*: Changes required to (re)init the GICv3 vCPU Interface Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 11/33] arm/virt: Enhance GED framework to handle vCPU hotplug events Salil Mehta via
` (19 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
ACPI is required to interface QEMU with the guest. Roughly falls into below
cases,
1. Convey the possible vCPUs config at the machine init time to the guest
using various DSDT tables like MADT etc.
2. Convey vCPU hotplug events to guest(using GED)
3. Assist in evaluation of various ACPI methods(like _EVT, _STA, _OST, _EJ0,
_MAT etc.)
4. Provides ACPI CPU hotplug state and 12 Byte memory mapped vCPU hotplug
control register interface to the OSPM/guest corresponding to each possible
vcpu. The register interface consists of various R/W fields and their
handling operations. These are called when ever register fields or memory
regions are accessed (i.e. read or written) by OSPM when ever it evaluates
various ACPI methods.
Note: lot of this framework code is inherited from the changes already done for
x86 but still some minor changes are required to make it compatible with
ARM64.)
Enable the ACPI support switch for vCPU hotplug feature. Actual ACPI changes
required will follow in subsequent patches.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 1ad60da7aa..b6f0a65ead 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -33,6 +33,7 @@ config ARM_VIRT
select ACPI_HW_REDUCED
select ACPI_APEI
select ACPI_VIOT
+ select ACPI_CPU_HOTPLUG
select VIRTIO_MEM_SUPPORTED
select ACPI_CXL
select ACPI_HMAT
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 11/33] arm/virt: Enhance GED framework to handle vCPU hotplug events
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (9 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 10/33] arm/acpi: Enable ACPI support for vCPU hotplug Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 12/33] arm/virt: Create GED device before *disabled* vCPU Objects are destroyed Salil Mehta via
` (18 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
During GED device creation at Virt Machine initialization, add a new vCPU
hotplug event to the existing set of supported GED device events. Additionally,
initialize the memory map for the vCPU hotplug *control device*, which will
provide an interface to exchange ACPI events between QEMU/VMM and the Guest
Kernel.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 5 ++++-
include/hw/arm/virt.h | 1 +
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 822c7d3d14..3986f6d9fc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -80,6 +80,7 @@
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#include "hw/acpi/generic_event_device.h"
+#include "hw/acpi/cpu_hotplug.h"
#include "hw/virtio/virtio-md-pci.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/char/pl011.h"
@@ -180,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
[VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN},
[VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
[VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
+ [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN},
[VIRT_MMIO] = { 0x0a000000, 0x00000200 },
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
@@ -677,7 +679,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
DeviceState *dev;
MachineState *ms = MACHINE(vms);
int irq = vms->irqmap[VIRT_ACPI_GED];
- uint32_t event = ACPI_GED_PWR_DOWN_EVT;
+ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT;
if (ms->ram_slots) {
event |= ACPI_GED_MEM_HOTPLUG_EVT;
@@ -693,6 +695,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, vms->memmap[VIRT_CPUHP_ACPI].base);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq));
return dev;
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 362422413c..b5bfb75f71 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -78,6 +78,7 @@ enum {
VIRT_PCDIMM_ACPI,
VIRT_ACPI_GED,
VIRT_NVDIMM_ACPI,
+ VIRT_CPUHP_ACPI,
VIRT_PVTIME,
VIRT_LOWMEMMAP_LAST,
};
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 12/33] arm/virt: Create GED device before *disabled* vCPU Objects are destroyed
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (10 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 11/33] arm/virt: Enhance GED framework to handle vCPU hotplug events Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 13/33] arm/virt: Init PMU at host for all possible vCPUs Salil Mehta via
` (17 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
ACPI CPU hotplug state (is_present=_STA.PRESENT, is_enabled=_STA.ENABLED) for
all the possible vCPUs MUST be initialized during machine init. This is done
during the creation of the GED device. VMM/Qemu MUST expose/fake the ACPI state
of the disabled vCPUs to the Guest kernel as 'present' (_STA.PRESENT) always
i.e. ACPI persistent. if the 'disabled' vCPU objectes are destroyed before the
GED device has been created then their ACPI hotplug state might not get
initialized correctly as acpi_persistent flag is part of the CPUState. This will
expose wrong status of the unplugged vCPUs to the Guest kernel.
Hence, moving the GED device creation before disabled vCPU objects get destroyed
as part of the post CPU init routine.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3986f6d9fc..e40e6c23e4 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2452,6 +2452,16 @@ static void machvirt_init(MachineState *machine)
create_gic(vms, sysmem);
+ /*
+ * ACPI CPU Hotplug state MUST be initialized before destroying disabled
+ * vCPUs in the cpu post init routine
+ */
+ if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
+ vms->acpi_dev = create_acpi_ged(vms);
+ } else {
+ create_gpio_devices(vms, VIRT_GPIO, sysmem);
+ }
+
virt_cpu_post_init(vms, sysmem);
fdt_add_pmu_nodes(vms);
@@ -2504,12 +2514,6 @@ static void machvirt_init(MachineState *machine)
create_pcie(vms);
- if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
- vms->acpi_dev = create_acpi_ged(vms);
- } else {
- create_gpio_devices(vms, VIRT_GPIO, sysmem);
- }
-
if (vms->secure && !vmc->no_secure_gpio) {
create_gpio_devices(vms, VIRT_SECURE_GPIO, secure_sysmem);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 13/33] arm/virt: Init PMU at host for all possible vCPUs
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (11 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 12/33] arm/virt: Create GED device before *disabled* vCPU Objects are destroyed Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 14/33] arm/virt: Release objects for *disabled* possible vCPUs after init Salil Mehta via
` (16 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The PMU for all possible vCPUs must be initialized during VM initialization.
Refactor the existing code to accommodate possible vCPUs. This assumes that all
processors being used are identical. It is an architectural constraint of ARM
CPUs that all vCPUs MUST have identical feature sets, at least until the ARM
specification is updated to allow otherwise.
Past discussion for reference:
Link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00131.html
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 9 +++++----
include/hw/arm/virt.h | 1 +
include/hw/core/cpu.h | 5 +++++
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e40e6c23e4..696e0a9f75 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2033,12 +2033,13 @@ static void finalize_gic_version(VirtMachineState *vms)
*/
static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
{
+ CPUArchIdList *possible_cpus = vms->parent.possible_cpus;
int max_cpus = MACHINE(vms)->smp.max_cpus;
- bool aarch64, pmu, steal_time;
+ bool aarch64, steal_time;
CPUState *cpu;
aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL);
- pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
+ vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
steal_time = object_property_get_bool(OBJECT(first_cpu),
"kvm-steal-time", NULL);
@@ -2065,8 +2066,8 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime);
}
- CPU_FOREACH(cpu) {
- if (pmu) {
+ CPU_FOREACH_POSSIBLE(cpu, possible_cpus) {
+ if (vms->pmu) {
assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU));
if (kvm_irqchip_in_kernel()) {
kvm_arm_pmu_set_irq(ARM_CPU(cpu), VIRTUAL_PMU_IRQ);
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index b5bfb75f71..98ce68eae1 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -161,6 +161,7 @@ struct VirtMachineState {
bool mte;
bool dtb_randomness;
bool second_ns_uart_present;
+ bool pmu;
OnOffAuto acpi;
VirtGICType gic_version;
VirtIOMMUType iommu;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index bcc62fbf0c..fa6f1dbec9 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -607,6 +607,11 @@ extern CPUTailQ cpus_queue;
#define CPU_FOREACH_SAFE(cpu, next_cpu) \
QTAILQ_FOREACH_SAFE_RCU(cpu, &cpus_queue, node, next_cpu)
+#define CPU_FOREACH_POSSIBLE(cpu, poslist) \
+ for (int iter = 0; \
+ iter < (poslist)->len && ((cpu) = (poslist)->cpus[iter].cpu, 1); \
+ iter++)
+
extern __thread CPUState *current_cpu;
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 14/33] arm/virt: Release objects for *disabled* possible vCPUs after init
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (12 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 13/33] arm/virt: Init PMU at host for all possible vCPUs Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 15/33] arm/virt/acpi: Update ACPI DSDT Tbl to include CPUs AML with hotplug support Salil Mehta via
` (15 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
During `machvirt_init()`, QOM ARMCPU objects are pre-created along with the
corresponding KVM vCPUs in the host for all possible vCPUs. This is necessary
due to the architectural constraint that KVM restricts the deferred creation of
KVM vCPUs and VGIC initialization/sizing after VM initialization. Hence, VGIC is
pre-sized with possible vCPUs.
After the initialization of the machine is complete, the disabled possible KVM
vCPUs are parked in the per-virt-machine list "kvm_parked_vcpus," and we release
the QOM ARMCPU objects for the disabled vCPUs. These will be re-created when the
vCPU is hotplugged again. The QOM ARMCPU object is then re-attached to the
corresponding parked KVM vCPU.
Alternatively, we could have chosen not to release the QOM CPU objects and kept
reusing them. This approach might require some modifications to the
`qdevice_add()` interface to retrieve the old ARMCPU object instead of creating
a new one for the hotplug request.
Each of these approaches has its own pros and cons. This prototype uses the
first approach (suggestions are welcome!).
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 696e0a9f75..d8cae70ab2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2093,6 +2093,36 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
}
}
}
+
+ if (kvm_enabled() || tcg_enabled()) {
+ CPU_FOREACH_POSSIBLE(cpu, possible_cpus) {
+ /*
+ * Now, GIC has been sized with possible CPUs and we dont require
+ * disabled vCPU objects to be represented in the QOM. Release the
+ * disabled ARMCPU objects earlier used during init for pre-sizing.
+ *
+ * We fake to the guest through ACPI about the presence(_STA.PRES=1)
+ * of these non-existent vCPUs at VMM/qemu and present these as
+ * disabled vCPUs(_STA.ENA=0) so that they cant be used. These vCPUs
+ * can be later added to the guest through hotplug exchanges when
+ * ARMCPU objects are created back again using 'device_add' QMP
+ * command.
+ */
+ /*
+ * RFC: Question: Other approach could've been to keep them forever
+ * and release it only once when qemu exits as part of finalize or
+ * when new vCPU is hotplugged. In the later old could be released
+ * for the newly created object for the same vCPU or just reuse the
+ * old QOM vCPU object?
+ */
+ if (!qemu_enabled_cpu(cpu)) {
+ CPUArchId *cpu_slot;
+ cpu_slot = virt_find_cpu_slot(cpu);
+ cpu_slot->cpu = NULL;
+ object_unref(OBJECT(cpu));
+ }
+ }
+ }
}
static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot,
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 15/33] arm/virt/acpi: Update ACPI DSDT Tbl to include CPUs AML with hotplug support
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (13 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 14/33] arm/virt: Release objects for *disabled* possible vCPUs after init Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 16/33] hw/acpi: Make _MAT method optional Salil Mehta via
` (14 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Support for Virtual CPU Hotplug requires a sequence of ACPI handshakes between
QEMU and the guest kernel when a vCPU is plugged or unplugged. Most of the AML
code to support these handshakes already exists. This AML needs to be built
during VM initialization for the ARM architecture as well, if GED support
exists.
TODO (Peculiar Problem with AML):
Encountering the following ACPI namespace error when the GED AML is placed after
the CPU AML.
Error excerpt:
[ 75.795774] ACPI BIOS Error (bug): Could not resolve symbol [\_SB.GED.CSCN],
[ 75.797237] ACPI Error: Aborting method \_SB.GED._EVT due to previous error
[ 75.798530] acpi-ged ACPI0013:00: IRQ method execution failed
Preliminary analysis:
The error is definitely not related to a `forward declaration` issue but rather
to associating the `CSCN` (CPU Scan event handler) method with the `GED` scope
before the `\\_SB.GED` scope is created. Therefore, it appears that the GED AML
should be initialized prior to the CPU AML.
As a result, I had to move the GED AML before the CPU AML to resolve the issue.
Everything about the two AML sections seems identical, except for their
location. Any insights?
==============================
Summary comparison of DSDT.dsl
==============================
[1] Both working and not working DSDT.dsl Files are placed at below path:
https://drive.google.com/drive/folders/1bbEvS18CtBn3vYFnGIVdgcSD_hggyODV?usp=drive_link
[2] Configuration: -smp cpu 4, maxcpus 6
DSDT.dsl (Not Working) DSDT.dsl (Working)
--------------------- ------------------
DefinitionBlock ("", "DSDT", 2, "BOCHS ", "BXPC ", 0x00000001) DefinitionBlock ("", "DSDT", 2, "BOCHS ", "BXPC ", 0x00000001)
{ {
Scope (\_SB) Scope (\_SB)
{ {
Scope (_SB) Device (\_SB.GED)
{ {
Device (\_SB.PRES) Name (_HID, "ACPI0013"
{ Name (_UID, "GED")
[...] Name (_CRS, ResourceTemplate ()
Device (\_SB.CPUS) [...]
{ Method (_EVT, 1, Serialized)
Name (_HID, "ACPI0010") {
Name (_CID, EisaId ("PNP0A05")) Local0 = ESEL /* \_SB_.GED_.ESEL */
Method (CTFY, 2, NotSerialized) If (((Local0 & 0x02) == 0x02))
{ {
[...] Notify (PWRB, 0x80)
Method (CSTA, 1, Serialized) }
{
[...] If (((Local0 & 0x08) == 0x08))
Method (CEJ0, 1, Serialized) {
{ \_SB.GED.CSCN ()
[...] }
Method (CSCN, 0, Serialized) }
{ }
[...]
Method (COST, 4, Serialized) Scope (_SB)
{ {
[...] Device (\_SB.PRES)
Device (C000) {
{ [...]
[...] Device (\_SB.CPUS)
Device (C001) {
{ Name (_HID, "ACPI0010")
[...] Name (_CID, EisaId ("PNP0A05"))
Device (C002) Method (CTFY, 2, NotSerialized)
{ {
[...] [...]
Device (C003) Method (CSTA, 1, Serialized)
{ {
[...] [...]
Device (C004) Method (CEJ0, 1, Serialized)
{ {
[...] [...]
Device (C005) Method (CSCN, 0, Serialized)
{ {
} [...]
} Method (COST, 4, Serialized)
{
Method (\_SB.GED.CSCN, 0, NotSerialized) [...]
{ Device (C000)
\_SB.CPUS.CSCN () {
} [...]
Device (C001)
Device (COM0) {
{ [...]
[...] Device (C002)
{
Device (\_SB.GED) [...]
{ Device (C003)
Name (_HID, "ACPI0013") {
Name (_UID, "GED") [...]
Name (_CRS, ResourceTemplate () Device (C004)
{ {
[...] [...]
OperationRegion (EREG, SystemMemory, 0x09080000, 0x04) Device (C005)
Field (EREG, DWordAcc, NoLock, WriteAsZeros) {
{ }
[...] }
Method (_EVT, 1, Serialized) Method (\_SB.GED.CSCN, 0, NotSerialized)
{ {
Local0 = ESEL \_SB.CPUS.CSCN ()
If (((Local0 & 0x02) == 0x02)) }
{
Notify (PWRB, 0x80) Device (COM0)
} {
[...]
If (((Local0 & 0x08) == 0x08)) }
{ }
\_SB.GED.CSCN ()
}
}
}
Device (PWRB)
{
[...]
}
}
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt-acpi-build.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index f76fb117ad..32238170ab 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -805,6 +805,7 @@ static void
build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
Aml *scope, *dsdt;
MachineState *ms = MACHINE(vms);
const MemMapEntry *memmap = vms->memmap;
@@ -821,7 +822,28 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
* the RTC ACPI device at all when using UEFI.
*/
scope = aml_scope("\\_SB");
- acpi_dsdt_add_cpus(scope, vms);
+ if (vms->acpi_dev) {
+ build_ged_aml(scope, "\\_SB."GED_DEVICE,
+ HOTPLUG_HANDLER(vms->acpi_dev),
+ irqmap[VIRT_ACPI_GED] + ARM_SPI_BASE, AML_SYSTEM_MEMORY,
+ memmap[VIRT_ACPI_GED].base);
+ } else {
+ acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
+ (irqmap[VIRT_GPIO] + ARM_SPI_BASE));
+ }
+
+ /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */
+ if (vms->acpi_dev && mc->has_hotpluggable_cpus) {
+ CPUHotplugFeatures opts = {
+ .acpi_1_compatible = false,
+ .has_legacy_cphp = false
+ };
+
+ build_cpus_aml(scope, ms, opts, NULL, memmap[VIRT_CPUHP_ACPI].base,
+ "\\_SB", AML_GED_EVT_CPU_SCAN_METHOD, AML_SYSTEM_MEMORY);
+ } else {
+ acpi_dsdt_add_cpus(scope, vms);
+ }
acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0],
(irqmap[VIRT_UART0] + ARM_SPI_BASE), 0);
if (vms->second_ns_uart_present) {
@@ -836,15 +858,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
(irqmap[VIRT_MMIO] + ARM_SPI_BASE),
0, NUM_VIRTIO_TRANSPORTS);
acpi_dsdt_add_pci(scope, memmap, irqmap[VIRT_PCIE] + ARM_SPI_BASE, vms);
- if (vms->acpi_dev) {
- build_ged_aml(scope, "\\_SB."GED_DEVICE,
- HOTPLUG_HANDLER(vms->acpi_dev),
- irqmap[VIRT_ACPI_GED] + ARM_SPI_BASE, AML_SYSTEM_MEMORY,
- memmap[VIRT_ACPI_GED].base);
- } else {
- acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
- (irqmap[VIRT_GPIO] + ARM_SPI_BASE));
- }
if (vms->acpi_dev) {
uint32_t event = object_property_get_uint(OBJECT(vms->acpi_dev),
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 16/33] hw/acpi: Make _MAT method optional
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (14 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 15/33] arm/virt/acpi: Update ACPI DSDT Tbl to include CPUs AML with hotplug support Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:17 ` [PATCH RFC V4 17/33] hw/arm/acpi: MADT Tbl change to size the guest with possible vCPUs Salil Mehta via
` (13 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
The GICC interface on arm64 vCPUs is statically defined in the MADT, and
doesn't require a _MAT entry. Although the GICC is indicated as present
by the MADT entry, it can only be used from vCPU sysregs, which aren't
accessible until hot-add.
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/acpi/cpu.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 5cb60ca8bc..2d6afcfff7 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -671,10 +671,13 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
aml_append(dev, method);
/* build _MAT object */
- build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */
- aml_append(dev, aml_name_decl("_MAT",
+ if (build_madt_cpu) {
+ build_madt_cpu(i, arch_ids, madt_buf,
+ true); /* set enabled flag */
+ aml_append(dev, aml_name_decl("_MAT",
aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data)));
- g_array_free(madt_buf, true);
+ g_array_free(madt_buf, true);
+ }
if (CPU(arch_ids->cpus[i].cpu) != first_cpu) {
method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 17/33] hw/arm/acpi: MADT Tbl change to size the guest with possible vCPUs
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (15 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 16/33] hw/acpi: Make _MAT method optional Salil Mehta via
@ 2024-10-09 3:17 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 18/33] hw/acpi: Simulate *persistent* vCPU presence to Guest in ACPI _STA.{PRES, ENA} Bits Salil Mehta via
` (12 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:17 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
When QEMU builds the MADT table, modifications are needed to include information
about possible vCPUs that are exposed as ACPI-disabled (i.e., `_STA.Enabled=0`).
This new information will help the guest kernel pre-size its resources during
boot time. Pre-sizing based on possible vCPUs will facilitate the future
hot-plugging of the currently disabled vCPUs.
Additionally, this change addresses updates to the ACPI MADT GIC CPU interface
flags, as introduced in the UEFI ACPI 6.5 specification. These updates enable
deferred virtual CPU onlining in the guest kernel.
Link: https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#gic-cpu-interface-gicc-structure
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt-acpi-build.c | 36 ++++++++++++++++++++++++++++++------
include/hw/acpi/cpu.h | 18 ++++++++++++++++++
2 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 32238170ab..ff104b3496 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -663,6 +663,29 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size)
build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */
}
+static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+
+ /* can only exist in 'enabled' state */
+ if (!mc->has_hotpluggable_cpus) {
+ return 1;
+ }
+
+ /*
+ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot
+ * We MUST set 'online-capable' bit for all hotpluggable CPUs except the
+ * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged.
+ * Though as-of-now this is only used as a debugging feature.
+ *
+ * UEFI ACPI Specification 6.5
+ * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure
+ * Table: 5.37 GICC CPU Interface Flags
+ * Link: https://uefi.org/specs/ACPI/6.5
+ */
+ return cpu && !cpu->cpu_index ? 1 : (1 << 3);
+}
+
static void
build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
@@ -689,12 +712,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, vms->gic_version, 1);
build_append_int_noprefix(table_data, 0, 3); /* Reserved */
- for (i = 0; i < MACHINE(vms)->smp.cpus; i++) {
- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
+ for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) {
+ CPUState *cpu = qemu_get_possible_cpu(i);
uint64_t physical_base_address = 0, gich = 0, gicv = 0;
uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0;
- uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ?
- VIRTUAL_PMU_IRQ : 0;
+ uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0;
+ uint32_t flags = virt_acpi_get_gicc_flags(cpu);
+ uint64_t mpidr = acpi_get_cpu_archid(i);
if (vms->gic_version == VIRT_GIC_VERSION_2) {
physical_base_address = memmap[VIRT_GIC_CPU].base;
@@ -709,7 +733,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, i, 4); /* GIC ID */
build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */
/* Flags */
- build_append_int_noprefix(table_data, 1, 4); /* Enabled */
+ build_append_int_noprefix(table_data, flags, 4);
/* Parking Protocol Version */
build_append_int_noprefix(table_data, 0, 4);
/* Performance Interrupt GSIV */
@@ -723,7 +747,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, vgic_interrupt, 4);
build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/
/* MPIDR */
- build_append_int_noprefix(table_data, arm_cpu_mp_affinity(armcpu), 8);
+ build_append_int_noprefix(table_data, mpidr, 8);
/* Processor Power Efficiency Class */
build_append_int_noprefix(table_data, 0, 1);
/* Reserved */
diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h
index 32654dc274..4d6ae5453f 100644
--- a/include/hw/acpi/cpu.h
+++ b/include/hw/acpi/cpu.h
@@ -75,4 +75,22 @@ extern const VMStateDescription vmstate_cpu_hotplug;
VMSTATE_STRUCT(cpuhp, state, 1, \
vmstate_cpu_hotplug, CPUHotplugState)
+/**
+ * acpi_get_cpu_archid:
+ * @cpu_index: possible vCPU for which arch-id needs to be retreived
+ *
+ * Fetches the vCPU arch-id of the possible vCPU. This should be same
+ * same as the one configured at KVM Host.
+ *
+ * Returns: arch-id of the possible vCPU
+ */
+static inline uint64_t acpi_get_cpu_archid(int cpu_index)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ const CPUArchIdList *possible_cpus = ms->possible_cpus;
+
+ assert((cpu_index >= 0) && (cpu_index < possible_cpus->len));
+
+ return possible_cpus->cpus[cpu_index].arch_id;
+}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 18/33] hw/acpi: Simulate *persistent* vCPU presence to Guest in ACPI _STA.{PRES, ENA} Bits
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (16 preceding siblings ...)
2024-10-09 3:17 ` [PATCH RFC V4 17/33] hw/arm/acpi: MADT Tbl change to size the guest with possible vCPUs Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 19/33] target/arm: Force ARM vCPU *present* status ACPI *persistent* Salil Mehta via
` (11 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Certain CPU architecture specifications [1][2][3] prohibit changes to the CPUs
*presence* after the kernel has booted. This is because many system
initializations depend on the exact CPU count at boot time and do not expect it
to change afterward. For example, components like interrupt controllers that are
closely coupled with CPUs, or various per-CPU features, may not support
configuration changes once the kernel has been initialized.
This requirement poses a challenge for virtualization features like vCPU
hotplug. To address this, changes to the ACPI AML are necessary to update the
`_STA.PRES` (presence) and `_STA.ENA` (enabled) bits accordingly during guest
initialization, as well as when vCPUs are hot-plugged or hot-unplugged. The
presence of unplugged vCPUs may need to be deliberately *simulated* at the ACPI
level to maintain a *persistent* view of vCPUs for the guest kernel.
Introduce an `acpi_persistent` property, which can be used to initialize the
ACPI vCPU `presence` state accordingly. Architectures that require ACPI to
expose a persistent view of vCPUs can override its default value. Refer to the
subsequent patches for its usage.
References:
[1] KVMForum 2023 Presentation: Challenges Revisited in Supporting Virt CPU Hotplug on
architectures that don’t Support CPU Hotplug (like ARM64)
a. Kernel Link: https://kvm-forum.qemu.org/2023/KVM-forum-cpu-hotplug_7OJ1YyJ.pdf
b. Qemu Link: https://kvm-forum.qemu.org/2023/Challenges_Revisited_in_Supporting_Virt_CPU_Hotplug_-__ii0iNb3.pdf
[2] KVMForum 2020 Presentation: Challenges in Supporting Virtual CPU Hotplug on
SoC Based Systems (like ARM64)
Link: https://kvmforum2020.sched.com/event/eE4m
[3] Check comment 5 in the bugzilla entry
Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
cpu-target.c | 1 +
hw/acpi/cpu.c | 70 +++++++++++++++++++++++++++++++---
hw/acpi/generic_event_device.c | 11 ++++++
include/hw/acpi/cpu.h | 23 +++++++++++
include/hw/core/cpu.h | 10 +++++
5 files changed, 110 insertions(+), 5 deletions(-)
diff --git a/cpu-target.c b/cpu-target.c
index 499facf774..c8a29ab495 100644
--- a/cpu-target.c
+++ b/cpu-target.c
@@ -200,6 +200,7 @@ static Property cpu_common_props[] = {
*/
DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
MemoryRegion *),
+ DEFINE_PROP_BOOL("acpi-persistent", CPUState, acpi_persistent, false),
#endif
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 2d6afcfff7..d5d0363d08 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -63,10 +63,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
cdev = &cpu_st->devs[cpu_st->selector];
switch (addr) {
case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */
- val |= cdev->cpu ? 1 : 0;
+ val |= cdev->is_enabled ? 1 : 0;
val |= cdev->is_inserting ? 2 : 0;
val |= cdev->is_removing ? 4 : 0;
val |= cdev->fw_remove ? 16 : 0;
+ val |= cdev->is_present ? 32 : 0;
trace_cpuhp_acpi_read_flags(cpu_st->selector, val);
break;
case ACPI_CPU_CMD_DATA_OFFSET_RW:
@@ -225,7 +226,40 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
state->dev_count = id_list->len;
state->devs = g_new0(typeof(*state->devs), state->dev_count);
for (i = 0; i < id_list->len; i++) {
- state->devs[i].cpu = CPU(id_list->cpus[i].cpu);
+ struct CPUState *cpu = CPU(id_list->cpus[i].cpu);
+ /*
+ * In most architectures, CPUs that are marked as ACPI 'present' are
+ * also ACPI 'enabled' by default. These states remain consistent at
+ * both the QOM and ACPI levels.
+ */
+ if (qemu_enabled_cpu(cpu)) {
+ state->devs[i].is_enabled = true;
+ state->devs[i].is_present = true;
+ state->devs[i].cpu = cpu;
+ } else {
+ state->devs[i].is_enabled = false;
+ /*
+ * In some architectures, even 'unplugged' or 'disabled' QOM CPUs
+ * may be exposed as ACPI 'present.' This approach provides a
+ * persistent view of the vCPUs to the guest kernel. This could be
+ * due to an architectural constraint that requires every per-CPU
+ * component to be present at boot time, meaning the exact count of
+ * vCPUs must be known and cannot be altered after the kernel has
+ * booted. As a result, the vCPU states at the QOM and ACPI levels
+ * might become inconsistent. However, in such cases, the presence
+ * of vCPUs has been deliberately simulated at the ACPI level.
+ */
+ if (acpi_persistent_cpu(cpu)) {
+ state->devs[i].is_present = true;
+ /*
+ * `CPUHotplugState::AcpiCpuStatus::cpu` becomes insignificant
+ * in this case
+ */
+ } else {
+ state->devs[i].is_present = qemu_present_cpu(cpu);
+ state->devs[i].cpu = cpu;
+ }
+ }
state->devs[i].arch_id = id_list->cpus[i].arch_id;
}
memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,
@@ -258,6 +292,8 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev,
}
cdev->cpu = CPU(dev);
+ cdev->is_present = true;
+ cdev->is_enabled = true;
if (dev->hotplugged) {
cdev->is_inserting = true;
acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS);
@@ -289,6 +325,11 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st,
return;
}
+ cdev->is_enabled = false;
+ if (!acpi_persistent_cpu(CPU(dev))) {
+ cdev->is_present = false;
+ }
+
cdev->cpu = NULL;
}
@@ -299,6 +340,8 @@ static const VMStateDescription vmstate_cpuhp_sts = {
.fields = (const VMStateField[]) {
VMSTATE_BOOL(is_inserting, AcpiCpuStatus),
VMSTATE_BOOL(is_removing, AcpiCpuStatus),
+ VMSTATE_BOOL(is_present, AcpiCpuStatus),
+ VMSTATE_BOOL(is_enabled, AcpiCpuStatus),
VMSTATE_UINT32(ost_event, AcpiCpuStatus),
VMSTATE_UINT32(ost_status, AcpiCpuStatus),
VMSTATE_END_OF_LIST()
@@ -336,6 +379,7 @@ const VMStateDescription vmstate_cpu_hotplug = {
#define CPU_REMOVE_EVENT "CRMV"
#define CPU_EJECT_EVENT "CEJ0"
#define CPU_FW_EJECT_EVENT "CEJF"
+#define CPU_PRESENT "CPRS"
void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
build_madt_cpu_fn build_madt_cpu, hwaddr base_addr,
@@ -396,7 +440,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1));
/* tell firmware to do device eject, write only */
aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1));
- aml_append(field, aml_reserved_field(3));
+ /* 1 if present, read only */
+ aml_append(field, aml_named_field(CPU_PRESENT, 1));
+ aml_append(field, aml_reserved_field(2));
aml_append(field, aml_named_field(CPU_COMMAND, 8));
aml_append(cpu_ctrl_dev, field);
@@ -426,6 +472,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK);
Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR);
Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED);
+ Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT);
Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND);
Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA);
Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT);
@@ -454,13 +501,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
{
Aml *idx = aml_arg(0);
Aml *sta = aml_local(0);
+ Aml *ifctx2;
+ Aml *else_ctx;
aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
aml_append(method, aml_store(idx, cpu_selector));
aml_append(method, aml_store(zero, sta));
- ifctx = aml_if(aml_equal(is_enabled, one));
+ ifctx = aml_if(aml_equal(is_present, one));
{
- aml_append(ifctx, aml_store(aml_int(0xF), sta));
+ ifctx2 = aml_if(aml_equal(is_enabled, one));
+ {
+ /* cpu is present and enabled */
+ aml_append(ifctx2, aml_store(aml_int(0xF), sta));
+ }
+ aml_append(ifctx, ifctx2);
+ else_ctx = aml_else();
+ {
+ /* cpu is present but disabled */
+ aml_append(else_ctx, aml_store(aml_int(0xD), sta));
+ }
+ aml_append(ifctx, else_ctx);
}
aml_append(method, ifctx);
aml_append(method, aml_release(ctrl_lock));
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 15b4c3ebbf..a4d78a534c 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -331,6 +331,16 @@ static const VMStateDescription vmstate_memhp_state = {
}
};
+static const VMStateDescription vmstate_cpuhp_state = {
+ .name = "acpi-ged/cpuhp",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_ged_state = {
.name = "acpi-ged-state",
.version_id = 1,
@@ -379,6 +389,7 @@ static const VMStateDescription vmstate_acpi_ged = {
},
.subsections = (const VMStateDescription * const []) {
&vmstate_memhp_state,
+ &vmstate_cpuhp_state,
&vmstate_ghes_state,
NULL
}
diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h
index 4d6ae5453f..1e2147a8ac 100644
--- a/include/hw/acpi/cpu.h
+++ b/include/hw/acpi/cpu.h
@@ -26,6 +26,8 @@ typedef struct AcpiCpuStatus {
uint64_t arch_id;
bool is_inserting;
bool is_removing;
+ bool is_present;
+ bool is_enabled;
bool fw_remove;
uint32_t ost_event;
uint32_t ost_status;
@@ -93,4 +95,25 @@ static inline uint64_t acpi_get_cpu_archid(int cpu_index)
return possible_cpus->cpus[cpu_index].arch_id;
}
+
+/**
+ * acpi_persistent_cpu:
+ * @cpu: The vCPU to check
+ *
+ * Checks if the vCPU state should always be reflected as *present* via ACPI
+ * to the Guest. By default, this is False on all architectures and has to be
+ * explicity set during initialization.
+ *
+ * Returns: True if it is ACPI 'persistent' CPU
+ *
+ */
+static inline bool acpi_persistent_cpu(CPUState *cpu)
+{
+ assert(cpu);
+ /*
+ * returns if 'Presence' of the vCPU is persistent and should be simulated
+ * via ACPI even after vCPUs have been unplugged in QOM
+ */
+ return cpu->acpi_persistent;
+}
#endif
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index fa6f1dbec9..2e62d5f1a5 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -552,6 +552,16 @@ struct CPUState {
* By default, `CPUState` objects are enabled across all architectures.
*/
bool disabled;
+ /*
+ * On certain architectures, to provide the guest kernel with a persistent
+ * view of vCPU presence, even when the QOM might not have a corresponding
+ * vCPU object, ACPI may need to simulate the presence of vCPUs while
+ * keeping them ACPI-disabled for the guest. This is achieved by returning
+ * `_STA.PRES=True` and `_STA.Ena=False` for unplugged vCPUs in QEMU's QOM.
+ * By default, this flag is set to FALSE and must be explicitly set to TRUE
+ * on architectures like ARM.
+ */
+ bool acpi_persistent;
/* TODO Move common fields from CPUArchState here. */
int cpu_index;
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 19/33] target/arm: Force ARM vCPU *present* status ACPI *persistent*
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (17 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 18/33] hw/acpi: Simulate *persistent* vCPU presence to Guest in ACPI _STA.{PRES, ENA} Bits Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 20/33] arm/virt: Add/update basic hot-(un)plug framework Salil Mehta via
` (10 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The ARM CPU architecture does not permit changes to CPU presence after the
kernel has booted. This is an immutable requirement from ARM and represents a
strict architectural constraint [1][2].
The ACPI update [3] reinforces this by specifying that the `_STA.Present` bit
in the ACPI specification cannot be modified once the system has booted.
Consequently, the firmware, ACPI, and QEMU must provide the guest kernel with a
persistent view of the vCPUs, even when they are not present in the QOM
(i.e., when they are unplugged or have yet to be plugged into the QOM after the
kernel has booted).
References:
[1] KVMForum 2023 Presentation: Challenges Revisited in Supporting Virt CPU Hotplug on
architectures that don’t Support CPU Hotplug (like ARM64)
a. Kernel Link: https://kvm-forum.qemu.org/2023/KVM-forum-cpu-hotplug_7OJ1YyJ.pdf
b. Qemu Link: https://kvm-forum.qemu.org/2023/Challenges_Revisited_in_Supporting_Virt_CPU_Hotplug_-__ii0iNb3.pdf
[2] KVMForum 2020 Presentation: Challenges in Supporting Virtual CPU Hotplug on
SoC Based Systems (like ARM64)
Link: https://kvmforum2020.sched.com/event/eE4m
[3] Check comment 5 in the bugzilla entry
Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
target/arm/cpu64.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 2a517fdb9f..a84f312ae2 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -794,6 +794,13 @@ static void aarch64_cpu_initfn(Object *obj)
*/
cs->disabled = true;
cs->thread_id = 0;
+ /*
+ * To provide the guest with a persistent view of vCPU presence, ACPI may
+ * need to simulate the presence of vCPUs even when they are not present in
+ * the QOM or are in a disabled state. This flag is utilized during the
+ * initialization of ACPI hotplug state and during vCPU hot-unplug events.
+ */
+ cs->acpi_persistent = true;
}
static void aarch64_cpu_finalizefn(Object *obj)
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 20/33] arm/virt: Add/update basic hot-(un)plug framework
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (18 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 19/33] target/arm: Force ARM vCPU *present* status ACPI *persistent* Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 21/33] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during hot-(un)plug Salil Mehta via
` (9 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Add CPU hot-unplug hooks and update hotplug hooks with additional sanity checks
for use in hotplug paths.
Note: The functional contents of the hooks (currently left with TODO comments)
will be gradually filled in subsequent patches in an incremental approach to
patch and logic building, which would roughly include the following:
1. (Un)wiring of interrupts between vCPU<->GIC.
2. Sending events to the guest for hot-(un)plug so that the guest can take
appropriate actions.
3. Notifying the GIC about the hot-(un)plug action so that the vCPU can be
(un)stitched to the GIC CPU interface.
4. Updating the guest with next boot information for this vCPU in the firmware.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d8cae70ab2..6da71b0068 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -85,6 +85,7 @@
#include "hw/virtio/virtio-iommu.h"
#include "hw/char/pl011.h"
#include "qemu/guest-random.h"
+#include "qapi/qmp/qdict.h"
static GlobalProperty arm_virt_compat[] = {
{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" },
@@ -3016,11 +3017,23 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
MachineState *ms = MACHINE(hotplug_dev);
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
ARMCPU *cpu = ARM_CPU(dev);
CPUState *cs = CPU(dev);
CPUArchId *cpu_slot;
+ if (dev->hotplugged && !vms->acpi_dev) {
+ error_setg(errp, "GED acpi device does not exists");
+ return;
+ }
+
+ if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+ error_setg(errp, "CPU hotplug not supported on this machine");
+ return;
+ }
+
/* sanity check the cpu */
if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
@@ -3062,11 +3075,30 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp);
+
+ /*
+ * Fix the GIC for the newly plugged vCPU. The QOM CPU object for this new
+ * vCPU needs to be updated in the corresponding QOM `GICv3CPUState` object.
+ * Additionally, the IRQs for this new CPU object must be re-wired. This
+ * update is confined to the QOM layer and does not affect KVM, as KVM was
+ * already pre-sized with possible CPUs during VM initialization. This
+ * serves as a workaround to the constraints posed by the ARM architecture
+ * in supporting CPU hotplug, for which no formal specification exists.
+ *
+ * This GIC IRQ patch-up is necessary for both cold- and hot-plugged vCPUs.
+ * Cold-initialized vCPUs have their GIC state initialized earlier during
+ * `machvirt_init()`.
+ */
+ if (vms->acpi_dev) {
+ /* TODO: update GIC about this hotplug change here */
+ /* TODO: wire the GIC<->CPU irqs */
+ }
}
static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
CPUState *cs = CPU(dev);
CPUArchId *cpu_slot;
@@ -3083,7 +3115,80 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
cpu_slot->arch_id = arm_cpu_mp_affinity(ARM_CPU(cs));
}
+ if (vms->acpi_dev) {
+ /* TODO: register cpu for reset & update F/W info for the next boot */
+ }
+
+ /*
+ * `Enable` the QOM vCPU before updating its ACPI `AcpiCpuStatus` and
+ * notifying the guest kernel about the availability of the new vCPU.
+ */
cs->disabled = false;
+
+ /*
+ * Update the ACPI hotplug state for vCPUs being both hot- and cold-plugged.
+ * vCPUs can be cold-plugged using the `-device` option. For vCPUs that are
+ * hot-plugged, the guest is also notified.
+ */
+ if (vms->acpi_dev) {
+ /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */
+ }
+}
+
+static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+ ARMCPU *cpu = ARM_CPU(dev);
+ CPUState *cs = CPU(dev);
+
+ if (!vms->acpi_dev) {
+ error_setg(errp, "GED does not exists or device is not realized!");
+ return;
+ }
+
+ if (!mc->has_hotpluggable_cpus) {
+ error_setg(errp, "CPU hot(un)plug not supported on this machine");
+ return;
+ }
+
+ if (cs->cpu_index == first_cpu->cpu_index) {
+ error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported",
+ first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id,
+ cpu->core_id, cpu->thread_id);
+ return;
+ }
+
+ /* TODO: request cpu hotplug from guest */
+}
+
+static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+ CPUState *cs = CPU(dev);
+ CPUArchId *cpu_slot;
+
+ if (!vms->acpi_dev) {
+ error_setg(errp, "GED does not exists or device is not realized!");
+ return;
+ }
+
+ cpu_slot = virt_find_cpu_slot(cs);
+
+ /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */
+
+ /* TODO: unwire the gic-cpu irqs here */
+ /* TODO: update the GIC about this hot unplug change */
+
+ /* TODO: unregister cpu for reset & update F/W info for the next boot */
+
+ qobject_unref(dev->opts);
+ dev->opts = NULL;
+
+ cpu_slot->cpu = NULL;
+ cs->disabled = true;
}
static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
@@ -3210,6 +3315,8 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev),
errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ virt_cpu_unplug_request(hotplug_dev, dev, errp);
} else {
error_setg(errp, "device unplug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -3223,6 +3330,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
virt_dimm_unplug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ virt_cpu_unplug(hotplug_dev, dev, errp);
} else {
error_setg(errp, "virt: device unplug for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 21/33] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during hot-(un)plug
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (19 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 20/33] arm/virt: Add/update basic hot-(un)plug framework Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 22/33] hw/arm, gicv3: Changes to notify GICv3 CPU state with vCPU hot-(un)plug event Salil Mehta via
` (8 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Refactors the existing GIC create code to extract common code to wire the
vcpu<->gic interrupts. This function could be used with cold-plug case and also
used when vCPU is hot-plugged. It also introduces a new function to unwire the
vcpu<->gic interrupts for the vCPU hot-unplug cases.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
[4/05/2024: Issue with total number of PPI available during create GIC]
Suggested-by: Miguel Luis <miguel.luis@oracle.com>
[5/05/2024: Fix the total number of PPIs available as per ARM BSA to avoid overflow]
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 154 ++++++++++++++++++++++++++++-------------
hw/core/gpio.c | 2 +-
include/hw/qdev-core.h | 2 +
target/arm/cpu-qom.h | 18 +++--
4 files changed, 118 insertions(+), 58 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6da71b0068..dad5f7d40f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -764,6 +764,107 @@ static bool gicv3_nmi_present(VirtMachineState *vms)
(vms->gic_version != VIRT_GIC_VERSION_2);
}
+/*
+ * Mapping from the output timer irq lines from the CPU to the GIC PPI inputs
+ * we use for the virt board.
+ */
+const int timer_irq[] = {
+ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
+ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
+ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ,
+ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ,
+};
+
+static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs)
+{
+ MachineState *ms = MACHINE(vms);
+ unsigned int max_cpus = ms->smp.max_cpus;
+ DeviceState *cpudev = DEVICE(cs);
+ DeviceState *gicdev = vms->gic;
+ int cpu = CPU(cs)->cpu_index;
+ int type = vms->gic_version;
+ int irq, num_gpio_in;
+
+ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
+ qdev_disconnect_gpio_out_named(cpudev, NULL, irq);
+ }
+
+ if (type != VIRT_GIC_VERSION_2) {
+ qdev_disconnect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+ 0);
+ } else if (vms->virt) {
+ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ,
+ cpu + 4 * max_cpus);
+ }
+
+ /*
+ * RFC: Question: This currently does not takes care of intimating the
+ * devices which might be sitting on system bus. Do we need a
+ * sysbus_disconnect_irq() which also does the job of notification beside
+ * disconnection?
+ */
+ qdev_disconnect_gpio_out_named(cpudev, "pmu-interrupt", 0);
+
+ /* Unwire GIC's IRQ/FIQ/VIRQ/VFIQ/NMI/VINMI interrupt outputs to CPU */
+ num_gpio_in = (vms->gic_version != VIRT_GIC_VERSION_2) ?
+ NUM_GPIO_IN : NUM_GICV2_GPIO_IN;
+ for (irq = 0; irq < num_gpio_in; irq++) {
+ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ,
+ cpu + irq * max_cpus);
+ }
+}
+
+static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs)
+{
+ MachineState *ms = MACHINE(vms);
+ unsigned int max_cpus = ms->smp.max_cpus;
+ DeviceState *cpudev = DEVICE(cs);
+ DeviceState *gicdev = vms->gic;
+ int cpu = CPU(cs)->cpu_index;
+ int type = vms->gic_version;
+ SysBusDevice *gicbusdev;
+ int intidbase;
+ int irqn;
+
+ intidbase = NUM_IRQS + cpu * GIC_INTERNAL;
+
+ for (irqn = 0; irqn < ARRAY_SIZE(timer_irq); irqn++) {
+ qdev_connect_gpio_out(cpudev, irqn,
+ qdev_get_gpio_in(gicdev,
+ intidbase + timer_irq[irqn]));
+ }
+
+ gicbusdev = SYS_BUS_DEVICE(gicdev);
+ if (type != VIRT_GIC_VERSION_2) {
+ qemu_irq irq = qdev_get_gpio_in(gicdev,
+ intidbase + ARCH_GIC_MAINT_IRQ);
+ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+ 0, irq);
+ } else if (vms->virt) {
+ qemu_irq irq = qdev_get_gpio_in(gicdev,
+ intidbase + ARCH_GIC_MAINT_IRQ);
+ sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus, irq);
+ }
+
+ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
+ qdev_get_gpio_in(gicdev,
+ intidbase + VIRTUAL_PMU_IRQ));
+
+ sysbus_connect_irq(gicbusdev, cpu, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+ sysbus_connect_irq(gicbusdev, cpu + max_cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+ sysbus_connect_irq(gicbusdev, cpu + 2 * max_cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
+ sysbus_connect_irq(gicbusdev, cpu + 3 * max_cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
+ if (vms->gic_version != VIRT_GIC_VERSION_2) {
+ sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_NMI));
+ sysbus_connect_irq(gicbusdev, cpu + 5 * max_cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_VINMI));
+ }
+}
+
static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
{
MachineState *ms = MACHINE(vms);
@@ -866,54 +967,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
* CPU's inputs.
*/
for (i = 0; i < smp_cpus; i++) {
- DeviceState *cpudev = DEVICE(qemu_get_cpu(i));
- int intidbase = NUM_IRQS + i * GIC_INTERNAL;
- /* Mapping from the output timer irq lines from the CPU to the
- * GIC PPI inputs we use for the virt board.
- */
- const int timer_irq[] = {
- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ,
- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ,
- [GTIMER_HYPVIRT] = ARCH_TIMER_NS_EL2_VIRT_IRQ,
- };
-
- for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
- qdev_connect_gpio_out(cpudev, irq,
- qdev_get_gpio_in(vms->gic,
- intidbase + timer_irq[irq]));
- }
-
- if (vms->gic_version != VIRT_GIC_VERSION_2) {
- qemu_irq irq = qdev_get_gpio_in(vms->gic,
- intidbase + ARCH_GIC_MAINT_IRQ);
- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
- 0, irq);
- } else if (vms->virt) {
- qemu_irq irq = qdev_get_gpio_in(vms->gic,
- intidbase + ARCH_GIC_MAINT_IRQ);
- sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq);
- }
-
- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
- qdev_get_gpio_in(vms->gic, intidbase
- + VIRTUAL_PMU_IRQ));
-
- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
- sysbus_connect_irq(gicbusdev, i + max_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
- sysbus_connect_irq(gicbusdev, i + 2 * max_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
- sysbus_connect_irq(gicbusdev, i + 3 * max_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
-
- if (vms->gic_version != VIRT_GIC_VERSION_2) {
- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_NMI));
- sysbus_connect_irq(gicbusdev, i + 5 * smp_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_VINMI));
- }
+ wire_gic_cpu_irqs(vms, qemu_get_cpu(i));
}
fdt_add_gic_node(vms);
@@ -3091,7 +3145,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
*/
if (vms->acpi_dev) {
/* TODO: update GIC about this hotplug change here */
- /* TODO: wire the GIC<->CPU irqs */
+ wire_gic_cpu_irqs(vms, cs);
}
}
@@ -3179,7 +3233,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
/* TODO: update the acpi cpu hotplug state for cpu hot-unplug */
- /* TODO: unwire the gic-cpu irqs here */
+ unwire_gic_cpu_irqs(vms, cs);
/* TODO: update the GIC about this hot unplug change */
/* TODO: unregister cpu for reset & update F/W info for the next boot */
diff --git a/hw/core/gpio.c b/hw/core/gpio.c
index 80d07a6ec9..abb164d5c0 100644
--- a/hw/core/gpio.c
+++ b/hw/core/gpio.c
@@ -143,7 +143,7 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n)
/* disconnect a GPIO output, returning the disconnected input (if any) */
-static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev,
+qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev,
const char *name, int n)
{
char *propname = g_strdup_printf("%s[%d]",
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 77bfcbdf73..ce4331cea4 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -739,6 +739,8 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n);
*/
qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt,
const char *name, int n);
+qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev,
+ const char *name, int n);
BusState *qdev_get_child_bus(DeviceState *dev, const char *name);
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index b497667d61..e49fb096de 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -37,13 +37,17 @@ DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU,
#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
/* Meanings of the ARMCPU object's seven inbound GPIO lines */
-#define ARM_CPU_IRQ 0
-#define ARM_CPU_FIQ 1
-#define ARM_CPU_VIRQ 2
-#define ARM_CPU_VFIQ 3
-#define ARM_CPU_NMI 4
-#define ARM_CPU_VINMI 5
-#define ARM_CPU_VFNMI 6
+enum {
+ ARM_CPU_IRQ = 0,
+ ARM_CPU_FIQ = 1,
+ ARM_CPU_VIRQ = 2,
+ ARM_CPU_VFIQ = 3,
+ NUM_GICV2_GPIO_IN = (ARM_CPU_VFIQ+1),
+ ARM_CPU_NMI = 4,
+ ARM_CPU_VINMI = 5,
+ /* ARM_CPU_VFNMI = 6, */ /* not used? */
+ NUM_GPIO_IN = (ARM_CPU_VINMI+1),
+};
/* For M profile, some registers are banked secure vs non-secure;
* these are represented as a 2-element array where the first element
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 22/33] hw/arm, gicv3: Changes to notify GICv3 CPU state with vCPU hot-(un)plug event
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (20 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 21/33] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during hot-(un)plug Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 23/33] hw/arm: Changes required for reset and to support next boot Salil Mehta via
` (7 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Virtual CPU hot-(un)plug events must be communicated to the GIC. Introduce a
notification mechanism to ensure these events are properly relayed to the GIC,
allowing it to update the accessibility of the GIC CPU interface and adjust the
vCPU-to-GIC CPU interface association accordingly.
This approach deviates from the standard ARM CPU architecture specification,
where the CPU-to-GIC interface is typically fixed and the accessibility of the
GIC CPU interface cannot be disabled. However, this workaround is necessary
to address limitations imposed by the ARM CPU architecture [1][2].
For more details regarding these constraints and the workarounds, please refer
to the slides below:
References:
[1] KVMForum 2023 Presentation: Challenges Revisited in Supporting Virt CPU Hotplug on
architectures that don’t Support CPU Hotplug (like ARM64)
Link: https://kvm-forum.qemu.org/2023/Challenges_Revisited_in_Supporting_Virt_CPU_Hotplug_-__ii0iNb3.pdf
(Slides 13,17,18)
[2] KVMForum 2020 Presentation: Challenges in Supporting Virtual CPU Hotplug on
SoC Based Systems (like ARM64)
Link: https://kvmforum2020.sched.com/event/eE4m
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 31 ++++++++++++++-
hw/intc/arm_gicv3_common.c | 60 +++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 1 +
include/hw/intc/arm_gicv3_common.h | 23 ++++++++++++
4 files changed, 112 insertions(+), 3 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index dad5f7d40f..9634011ae7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -702,6 +702,16 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
return dev;
}
+static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
+
+ if (mc->has_hotpluggable_cpus) {
+ Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic);
+ notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier);
+ }
+}
+
static void create_its(VirtMachineState *vms)
{
const char *itsclass = its_class_name();
@@ -977,6 +987,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
} else if (vms->gic_version == VIRT_GIC_VERSION_2) {
create_v2m(vms);
}
+
+ /* add GIC CPU hot(un)plug update notifier */
+ virt_add_gic_cpuhp_notifier(vms);
}
static void create_uart(const VirtMachineState *vms, int uart,
@@ -2452,6 +2465,8 @@ static void machvirt_init(MachineState *machine)
create_fdt(vms);
+ notifier_list_init(&vms->cpuhp_notifiers);
+
assert(possible_cpus->len == max_cpus);
for (n = 0; n < possible_cpus->len; n++) {
CPUArchId *cpu_slot;
@@ -3068,6 +3083,18 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
dev, &error_abort);
}
+static void virt_update_gic(VirtMachineState *vms, CPUState *cs, bool plugging)
+{
+ GICv3CPUHotplugInfo gic_info = {
+ .gic = vms->gic,
+ .cpu = cs,
+ .cpu_plugging = plugging
+ };
+
+ /* notify gic to stitch GICC to this new cpu */
+ notifier_list_notify(&vms->cpuhp_notifiers, &gic_info);
+}
+
static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
@@ -3144,7 +3171,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
* `machvirt_init()`.
*/
if (vms->acpi_dev) {
- /* TODO: update GIC about this hotplug change here */
+ virt_update_gic(vms, cs, true);
wire_gic_cpu_irqs(vms, cs);
}
}
@@ -3234,7 +3261,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
/* TODO: update the acpi cpu hotplug state for cpu hot-unplug */
unwire_gic_cpu_irqs(vms, cs);
- /* TODO: update the GIC about this hot unplug change */
+ virt_update_gic(vms, cs, false);
/* TODO: unregister cpu for reset & update F/W info for the next boot */
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 4f230257ef..e7b2d04358 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -33,7 +33,6 @@
#include "hw/arm/linux-boot-if.h"
#include "sysemu/kvm.h"
-
static void gicv3_gicd_no_migration_shift_bug_post_load(GICv3State *cs)
{
if (cs->gicd_no_migration_shift_bug) {
@@ -366,6 +365,62 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
}
}
+static int arm_gicv3_get_proc_num(GICv3State *s, CPUState *cpu)
+{
+ uint64_t mp_affinity;
+ uint64_t gicr_typer;
+ uint64_t cpu_affid;
+ int i;
+
+ mp_affinity = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL);
+ /* match the cpu mp-affinity to get the gic cpuif number */
+ for (i = 0; i < s->num_cpu; i++) {
+ gicr_typer = s->cpu[i].gicr_typer;
+ cpu_affid = (gicr_typer >> 32) & 0xFFFFFF;
+ if (cpu_affid == mp_affinity) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data)
+{
+ GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data;
+ CPUState *cpu = gic_info->cpu;
+ ARMGICv3CommonClass *agcc;
+ int gic_cpuif_num;
+ GICv3State *s;
+
+ s = ARM_GICV3_COMMON(gic_info->gic);
+ agcc = ARM_GICV3_COMMON_GET_CLASS(s);
+
+ /* this shall get us mapped GICv3 CPU interface corresponding to MPIDR */
+ gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu);
+ if (gic_cpuif_num < 0) {
+ error_report("Failed to associate cpu %d with any GIC cpuif",
+ cpu->cpu_index);
+ abort();
+ }
+
+ /* Update the GICv3 CPU interface accessibiltiy accordingly */
+ gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu, gic_info->cpu_plugging);
+
+ if (!gic_info->cpu_plugging) {
+ return;
+ }
+
+ /* re-stitch the GICv3 CPU interface to this new vCPU */
+ gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]);
+
+ /*
+ * define and register the GICv3 CPU interface `system registers` for
+ * this new vCPU being hotplugged
+ */
+ agcc->init_cpu_reginfo(cpu);
+}
+
static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
{
GICv3State *s = ARM_GICV3_COMMON(dev);
@@ -490,6 +545,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST;
}
+ s->cpu_update_notifier.notify = arm_gicv3_cpu_update_notifier;
+
s->itslist = g_ptr_array_new();
}
@@ -497,6 +554,7 @@ static void arm_gicv3_finalize(Object *obj)
{
GICv3State *s = ARM_GICV3_COMMON(obj);
+ notifier_remove(&s->cpu_update_notifier);
g_free(s->redist_region_count);
}
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 98ce68eae1..0202f0252c 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -186,6 +186,7 @@ struct VirtMachineState {
char *oem_id;
char *oem_table_id;
bool ns_el2_virt_timer_irq;
+ NotifierList cpuhp_notifiers;
};
#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index c19eb8d3d0..170118f645 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -294,6 +294,7 @@ struct GICv3State {
GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ];
uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)];
+ Notifier cpu_update_notifier;
GICv3CPUState *cpu;
/* List of all ITSes connected to this GIC */
GPtrArray *itslist;
@@ -344,6 +345,28 @@ struct ARMGICv3CommonClass {
void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
const MemoryRegionOps *ops);
+/**
+ * Structure used by GICv3 CPU hotplug notifier
+ */
+typedef struct GICv3CPUHotplugInfo {
+ DeviceState *gic; /* GICv3State */
+ CPUState *cpu;
+ bool cpu_plugging; /* CPU being plugged or unplugged */
+} GICv3CPUHotplugInfo;
+
+/**
+ * gicv3_cpuhp_notifier
+ *
+ * Returns CPU hotplug notifier which could be used to update GIC about any
+ * CPU hot(un)plug events.
+ *
+ * Returns: Notifier initialized with CPU Hot(un)plug update function
+ */
+static inline Notifier *gicv3_cpuhp_notifier(DeviceState *dev)
+{
+ GICv3State *s = ARM_GICV3_COMMON(dev);
+ return &s->cpu_update_notifier;
+}
/**
* gicv3_class_name
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 23/33] hw/arm: Changes required for reset and to support next boot
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (21 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 22/33] hw/arm, gicv3: Changes to notify GICv3 CPU state with vCPU hot-(un)plug event Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 24/33] arm/virt: Update the guest(via GED) about vCPU hot-(un)plug events Salil Mehta via
` (6 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Updates the firmware config with the next boot cpus information and also
registers the reset callback to be called when guest reboots to reset the cpu.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/boot.c | 2 +-
hw/arm/virt.c | 18 +++++++++++++++---
include/hw/arm/boot.h | 2 ++
include/hw/arm/virt.h | 1 +
4 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 5301d8d318..8bf8d003eb 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -682,7 +682,7 @@ fail:
return -1;
}
-static void do_cpu_reset(void *opaque)
+void do_cpu_reset(void *opaque)
{
ARMCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9634011ae7..8cb66c11a1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -45,6 +45,8 @@
#include "sysemu/device_tree.h"
#include "sysemu/numa.h"
#include "sysemu/runstate.h"
+#include "sysemu/reset.h"
+#include "sysemu/sysemu.h"
#include "sysemu/tpm.h"
#include "sysemu/tcg.h"
#include "sysemu/kvm.h"
@@ -1427,7 +1429,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
char *nodename;
fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as);
- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus);
nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
qemu_fdt_add_subnode(ms->fdt, nodename);
@@ -3197,7 +3199,13 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
}
if (vms->acpi_dev) {
- /* TODO: register cpu for reset & update F/W info for the next boot */
+ qemu_register_reset(do_cpu_reset, ARM_CPU(cs));
+ }
+
+ /* update the firmware information for the next boot. */
+ vms->boot_cpus++;
+ if (vms->fw_cfg) {
+ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus);
}
/*
@@ -3263,7 +3271,11 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
unwire_gic_cpu_irqs(vms, cs);
virt_update_gic(vms, cs, false);
- /* TODO: unregister cpu for reset & update F/W info for the next boot */
+ qemu_unregister_reset(do_cpu_reset, ARM_CPU(cs));
+ vms->boot_cpus--;
+ if (vms->fw_cfg) {
+ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus);
+ }
qobject_unref(dev->opts);
dev->opts = NULL;
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
index 80c492d742..f81326a1dc 100644
--- a/include/hw/arm/boot.h
+++ b/include/hw/arm/boot.h
@@ -178,6 +178,8 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu,
int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
hwaddr addr_limit, AddressSpace *as, MachineState *ms);
+void do_cpu_reset(void *opaque);
+
/* Write a secure board setup routine with a dummy handler for SMCs */
void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu,
const struct arm_boot_info *info,
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 0202f0252c..073d18281e 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -172,6 +172,7 @@ struct VirtMachineState {
MemMapEntry *memmap;
char *pciehb_nodename;
const int *irqmap;
+ uint16_t boot_cpus;
int fdt_size;
uint32_t clock_phandle;
uint32_t gic_phandle;
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 24/33] arm/virt: Update the guest(via GED) about vCPU hot-(un)plug events
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (22 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 23/33] hw/arm: Changes required for reset and to support next boot Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 25/33] target/arm/cpu: Check if hotplugged ARM vCPU's FEAT match existing Salil Mehta via
` (5 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
During any vCPU hot-(un)plug operation, the running guest VM must be notified
about the addition of a new vCPU or the removal of an existing vCPU. This
notification is handled via an ACPI GED event, which is eventually demultiplexed
into a vCPU hotplug event, and then further into a specific hot-(un)plug event
for the *targeted* vCPU.
Introduce the required ACPI calls into the existing hot-(un)plug hooks, allowing
ACPI GED events to be triggered from QEMU to the guest VM.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 39 ++++++++++++++++++++++++++++++++++++---
1 file changed, 36 insertions(+), 3 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8cb66c11a1..5d440f9121 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3183,6 +3183,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
{
VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
CPUState *cs = CPU(dev);
+ Error *local_err = NULL;
CPUArchId *cpu_slot;
/* insert the cold/hot-plugged vcpu in the slot */
@@ -3220,8 +3221,18 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
* hot-plugged, the guest is also notified.
*/
if (vms->acpi_dev) {
- /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */
+ HotplugHandlerClass *hhc;
+ /* update acpi hotplug state and send cpu hotplug event to guest */
+ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev);
+ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err);
+ if (local_err) {
+ goto fail;
+ }
}
+
+ return;
+fail:
+ error_propagate(errp, local_err);
}
static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
@@ -3230,7 +3241,9 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
ARMCPU *cpu = ARM_CPU(dev);
+ HotplugHandlerClass *hhc;
CPUState *cs = CPU(dev);
+ Error *local_err = NULL;
if (!vms->acpi_dev) {
error_setg(errp, "GED does not exists or device is not realized!");
@@ -3249,14 +3262,25 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
return;
}
- /* TODO: request cpu hotplug from guest */
+ /* request cpu hotplug from guest */
+ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev);
+ hhc->unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err);
+ if (local_err) {
+ goto fail;
+ }
+
+ return;
+fail:
+ error_propagate(errp, local_err);
}
static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+ HotplugHandlerClass *hhc;
CPUState *cs = CPU(dev);
+ Error *local_err = NULL;
CPUArchId *cpu_slot;
if (!vms->acpi_dev) {
@@ -3266,7 +3290,12 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
cpu_slot = virt_find_cpu_slot(cs);
- /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */
+ /* update the acpi cpu hotplug state for cpu hot-unplug */
+ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev);
+ hhc->unplug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err);
+ if (local_err) {
+ goto fail;
+ }
unwire_gic_cpu_irqs(vms, cs);
virt_update_gic(vms, cs, false);
@@ -3282,6 +3311,10 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
cpu_slot->cpu = NULL;
cs->disabled = true;
+
+ return;
+fail:
+ error_propagate(errp, local_err);
}
static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 25/33] target/arm/cpu: Check if hotplugged ARM vCPU's FEAT match existing
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (23 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 24/33] arm/virt: Update the guest(via GED) about vCPU hot-(un)plug events Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 26/33] tcg: Update tcg_register_thread() leg to handle region alloc for hotplugged vCPU Salil Mehta via
` (4 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The ARM extensions configuration *must* match the existing vCPUs already
initialized in KVM at VM initialization. ARM does not allow any per-vCPU
features to be changed once the system has fully initialized. This is an
immutable constraint of the ARM CPU architecture.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
target/arm/cpu.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 3de0cb346b..14fcabc2c9 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1912,6 +1912,49 @@ static void arm_cpu_finalizefn(Object *obj)
#endif
}
+static void arm_cpu_check_features_change(ARMCPU *cpu, Error **errp)
+{
+#if defined(TARGET_AARCH64) && !defined(CONFIG_USER_ONLY)
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+ ARMCPU *firstcpu = ARM_CPU(first_cpu);
+ DeviceState *dev = DEVICE(cpu);
+
+ if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return;
+ }
+
+ /* For now, features of hotplugged CPU MUST match earlier booted CPUs */
+ if (!dev->hotplugged || !mc->has_hotpluggable_cpus) {
+ return;
+ }
+
+ if (cpu_isar_feature(aa64_sve, cpu) &&
+ (cpu->sve_max_vq != firstcpu->sve_max_vq ||
+ cpu->sve_vq.map != firstcpu->sve_vq.map)) {
+ error_setg(errp,
+ "CPU %d: 'SVE' feature didn't match with existing CPUs",
+ CPU(cpu)->cpu_index);
+ return;
+ }
+
+ if (cpu_isar_feature(aa64_sme, cpu) &&
+ (cpu->sme_vq.map != firstcpu->sme_vq.map)) {
+ error_setg(errp,
+ "CPU %d: 'SME' feature didn't match with exisitng CPUs",
+ CPU(cpu)->cpu_index);
+ return;
+ }
+
+ if (cpu_isar_feature(aa64_pauth, cpu) &&
+ (cpu->prop_pauth != firstcpu->prop_pauth)) {
+ error_setg(errp,
+ "CPU %d: 'PAuth' feature didn't match with exisitng CPUs",
+ CPU(cpu)->cpu_index);
+ return;
+ }
+#endif
+}
+
void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
{
Error *local_err = NULL;
@@ -1961,6 +2004,13 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
return;
}
}
+
+ /*
+ * As of now, we do not support heterogeneous computing, hence, features of
+ * all cpus should match. Hotplugged vCPUs are not allowed to have
+ * different features than the existing cold-plugged vCPUs
+ */
+ arm_cpu_check_features_change(cpu, &local_err);
}
static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 26/33] tcg: Update tcg_register_thread() leg to handle region alloc for hotplugged vCPU
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (24 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 25/33] target/arm/cpu: Check if hotplugged ARM vCPU's FEAT match existing Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 27/33] target/arm: Add support to *unrealize* ARMCPU during vCPU Hot-unplug Salil Mehta via
` (3 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The TCG code cache consists of multiple regions shared among vCPUs in
multi-threaded TCG mode. For cold-plugged vCPUs, these regions are sized and
allocated during initialization in the `tcg_register_thread()` function when the
vCPUs are realized. Later, these regions must be reallocated for hot-plugged
vCPUs as well.
If region allocation fails for hot-plugged vCPUs—due to the code cache being
under stress—the TCG code cache must be flushed to create space for the newly
hot-plugged vCPU. The only safe way to perform `tb_flush()` is to execute it
synchronously within the `cpu_exec()` loop.
Reported-by: Miguel Luis <miguel.luis@oracle.com>
Signed-off-by: Miguel Luis <miguel.luis@oracle.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
accel/tcg/tcg-accel-ops-mttcg.c | 2 +-
accel/tcg/tcg-accel-ops-rr.c | 2 +-
include/tcg/startup.h | 6 ++++++
include/tcg/tcg.h | 1 +
tcg/region.c | 14 ++++++++++++++
tcg/tcg.c | 13 ++++++++++++-
6 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 49814ec4af..ab2f79d2c7 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -74,7 +74,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
force_rcu.notifier.notify = mttcg_force_rcu;
force_rcu.cpu = cpu;
rcu_add_force_rcu_notifier(&force_rcu.notifier);
- tcg_register_thread();
+ tcg_register_thread(cpu);
bql_lock();
qemu_thread_get_self(cpu->thread);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 8ebadf8e9e..953231837c 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -186,7 +186,7 @@ static void *rr_cpu_thread_fn(void *arg)
rcu_register_thread();
force_rcu.notify = rr_force_rcu;
rcu_add_force_rcu_notifier(&force_rcu);
- tcg_register_thread();
+ tcg_register_thread(cpu);
bql_lock();
qemu_thread_get_self(cpu->thread);
diff --git a/include/tcg/startup.h b/include/tcg/startup.h
index f71305765c..a565071516 100644
--- a/include/tcg/startup.h
+++ b/include/tcg/startup.h
@@ -25,6 +25,8 @@
#ifndef TCG_STARTUP_H
#define TCG_STARTUP_H
+#include "hw/core/cpu.h"
+
/**
* tcg_init: Initialize the TCG runtime
* @tb_size: translation buffer size
@@ -43,7 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
* accelerator's init_machine() method) must register with this
* function before initiating translation.
*/
+#ifdef CONFIG_USER_ONLY
void tcg_register_thread(void);
+#else
+void tcg_register_thread(CPUState *cpu);
+#endif
/**
* tcg_prologue_init(): Generate the code for the TCG prologue
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 21d5884741..e3328cc600 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -518,6 +518,7 @@ struct TCGContext {
/* Track which vCPU triggers events */
CPUState *cpu; /* *_trans */
+ bool tbflush_pend; /* TB flush pending due to vCPU hotplug */
/* These structures are private to tcg-target.c.inc. */
#ifdef TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/region.c b/tcg/region.c
index 478ec051c4..9007bfd71e 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -393,6 +393,20 @@ bool tcg_region_alloc(TCGContext *s)
static void tcg_region_initial_alloc__locked(TCGContext *s)
{
bool err = tcg_region_alloc__locked(s);
+
+ /*
+ * Hotplugged vCPUs may initially fail to find even a single available
+ * region. This could be due to the TB cache being under stress from the
+ * existing vCPUs. To mitigate this, the TB cache should be flushed.
+ * Therefore, the region allocation failure should be ignored, and a flag
+ * set to mark `tb_flush()` as pending. The flush will be performed later,
+ * synchronously in the context of `cpu_exec_loop()`/`tb_gen_code()`.
+ */
+ if (err && s->cpu && DEVICE(s->cpu)->hotplugged) {
+ s->tbflush_pend = true;
+ return;
+ }
+
g_assert(!err);
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 34e3056380..5e9c6b2b4b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -783,12 +783,13 @@ void tcg_register_thread(void)
tcg_ctx = &tcg_init_ctx;
}
#else
-void tcg_register_thread(void)
+void tcg_register_thread(CPUState *cpu)
{
TCGContext *s = g_malloc(sizeof(*s));
unsigned int i, n;
*s = tcg_init_ctx;
+ s->cpu = cpu;
/* Relink mem_base. */
for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
@@ -1388,6 +1389,16 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s)
TranslationBlock *tb;
void *next;
+ /*
+ * The hotplugged vCPU's TCG context might not have any regions allocated.
+ * If this condition is detected, we should flush the TB cache to ensure
+ * that regions can be allocated for the newly hotplugged vCPU's TCGContext.
+ */
+ if (s->tbflush_pend) {
+ s->tbflush_pend = false;
+ return NULL;
+ }
+
retry:
tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 27/33] target/arm: Add support to *unrealize* ARMCPU during vCPU Hot-unplug
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (25 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 26/33] tcg: Update tcg_register_thread() leg to handle region alloc for hotplugged vCPU Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 28/33] tcg/mttcg: Introduce MTTCG thread unregistration leg Salil Mehta via
` (2 subsequent siblings)
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
vCPU Hot-unplug will result in QOM CPU object unrealization which will do away
with all the vCPU thread creations, allocations, registrations that happened
as part of the realization process. This change introduces the ARM CPU unrealize
function taking care of exactly that.
Note, initialized KVM vCPUs are not destroyed in host KVM rather their Qemu
context is parked at the QEMU KVM layer.
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
[VP: Identified CPU stall issue & suggested probable fix]
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
system/physmem.c | 8 ++-
target/arm/cpu.c | 117 ++++++++++++++++++++++++++++++++++++++++-
target/arm/cpu.h | 14 +++++
target/arm/gdbstub.c | 6 +++
target/arm/helper.c | 25 +++++++++
target/arm/internals.h | 3 ++
target/arm/kvm.c | 5 ++
7 files changed, 176 insertions(+), 2 deletions(-)
diff --git a/system/physmem.c b/system/physmem.c
index d71a2b1bbd..1bec3049fe 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2613,8 +2613,14 @@ static void tcg_commit(MemoryListener *listener)
*
* That said, the listener is also called during realize, before
* all of the tcg machinery for run-on is initialized: thus halt_cond.
+ * Similarly, the listener can also be triggered during the *unrealize*
+ * operation. In such a case, we should avoid using `run_on_cpu` because the
+ * TCG vCPU thread might already be terminated. As a result, the CPU work
+ * will never get processed, and `tcg_commit_cpu` will not be called. This
+ * means that operations like `tlb_flush()` might not be executed,
+ * potentially leading to inconsistencies.
*/
- if (cpu->halt_cond) {
+ if (cpu->halt_cond && !cpu->unplug) {
async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas));
} else {
tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas));
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 14fcabc2c9..19d2f89f5f 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -157,6 +157,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);
}
+void arm_unregister_pre_el_change_hooks(ARMCPU *cpu)
+{
+ ARMELChangeHook *entry, *next;
+
+ QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) {
+ QLIST_REMOVE(entry, node);
+ g_free(entry);
+ }
+}
+
void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
void *opaque)
{
@@ -168,6 +178,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
}
+void arm_unregister_el_change_hooks(ARMCPU *cpu)
+{
+ ARMELChangeHook *entry, *next;
+
+ QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
+ QLIST_REMOVE(entry, node);
+ g_free(entry);
+ }
+}
+
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
{
/* Reset a single ARMCPRegInfo register */
@@ -2642,6 +2662,98 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
acc->parent_realize(dev, errp);
}
+#ifndef CONFIG_USER_ONLY
+static void arm_cpu_unrealizefn(DeviceState *dev)
+{
+ ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
+ ARMCPU *cpu = ARM_CPU(dev);
+ CPUARMState *env = &cpu->env;
+ CPUState *cs = CPU(dev);
+ bool has_secure;
+
+ /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */
+ destroy_cpreg_list(cpu);
+ arm_cpu_unregister_gdb_regs(cpu);
+ unregister_cp_regs_for_features(cpu);
+
+ if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ g_free(env->sau.rbar);
+ g_free(env->sau.rlar);
+ }
+
+ if (arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V7) &&
+ cpu->pmsav7_dregion) {
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ g_free(env->pmsav8.rbar[M_REG_NS]);
+ g_free(env->pmsav8.rlar[M_REG_NS]);
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ g_free(env->pmsav8.rbar[M_REG_S]);
+ g_free(env->pmsav8.rlar[M_REG_S]);
+ }
+ } else {
+ g_free(env->pmsav7.drbar);
+ g_free(env->pmsav7.drsr);
+ g_free(env->pmsav7.dracr);
+ }
+ if (cpu->pmsav8r_hdregion) {
+ g_free(env->pmsav8.hprbar);
+ g_free(env->pmsav8.hprlar);
+ }
+ }
+
+ if (arm_feature(env, ARM_FEATURE_PMU)) {
+ if (!kvm_enabled()) {
+ arm_unregister_pre_el_change_hooks(cpu);
+ arm_unregister_el_change_hooks(cpu);
+ }
+
+ if (cpu->pmu_timer) {
+ timer_del(cpu->pmu_timer);
+ }
+ }
+
+ cpu_remove_sync(CPU(dev));
+
+ /*
+ * We are intentionally destroying the CPU address space after the vCPU
+ * threads have been joined. This ensures that for TCG, any pending TLB
+ * flushes associated with the CPU are completed. The destruction of the
+ * address space also removes associated listeners, and joining threads
+ * after the address space no longer exists can lead to race conditions with
+ * already queued work for this CPU, which may result in a segmentation
+ * fault (SEGV) in `tcg_commit_cpu()`.
+ *
+ * Alternatively, Peter Maydell has suggested moving the CPU address space
+ * destruction to `cpu_common_unrealize()`, which would be called in the
+ * context of `parent_unrealize()`. This would also address the race
+ * condition in TCG.
+ *
+ * RFC: Question: Any additional thoughts or feedback on this approach would
+ * be appreciated?
+ */
+ has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
+ cpu_address_space_destroy(cs, ARMASIdx_NS);
+ if (cpu->tag_memory != NULL) {
+ cpu_address_space_destroy(cs, ARMASIdx_TagNS);
+ if (has_secure) {
+ cpu_address_space_destroy(cs, ARMASIdx_TagS);
+ }
+ }
+ if (has_secure) {
+ cpu_address_space_destroy(cs, ARMASIdx_S);
+ }
+
+ acc->parent_unrealize(dev);
+
+ timer_del(cpu->gt_timer[GTIMER_PHYS]);
+ timer_del(cpu->gt_timer[GTIMER_VIRT]);
+ timer_del(cpu->gt_timer[GTIMER_HYP]);
+ timer_del(cpu->gt_timer[GTIMER_SEC]);
+ timer_del(cpu->gt_timer[GTIMER_HYPVIRT]);
+}
+#endif
+
static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
{
ObjectClass *oc;
@@ -2745,7 +2857,10 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
device_class_set_parent_realize(dc, arm_cpu_realizefn,
&acc->parent_realize);
-
+#ifndef CONFIG_USER_ONLY
+ device_class_set_parent_unrealize(dc, arm_cpu_unrealizefn,
+ &acc->parent_unrealize);
+#endif
device_class_set_props(dc, arm_cpu_properties);
resettable_class_set_parent_phases(rc, NULL, arm_cpu_reset_hold, NULL,
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1277a0ddfc..07bd7d6542 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1128,6 +1128,7 @@ struct ARMCPUClass {
const ARMCPUInfo *info;
DeviceRealize parent_realize;
+ DeviceUnrealize parent_unrealize;
ResettablePhases parent_phases;
};
@@ -3293,6 +3294,13 @@ static inline AddressSpace *arm_addressspace(CPUState *cs, MemTxAttrs attrs)
*/
void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
void *opaque);
+/**
+ * arm_unregister_pre_el_change_hook:
+ * unregister all pre EL change hook functions. Generally called during
+ * unrealize'ing leg
+ */
+void arm_unregister_pre_el_change_hooks(ARMCPU *cpu);
+
/**
* arm_register_el_change_hook:
* Register a hook function which will be called immediately after this
@@ -3305,6 +3313,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
*/
void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
*opaque);
+/**
+ * arm_unregister_el_change_hook:
+ * unregister all EL change hook functions. Generally called during
+ * unrealize'ing leg
+ */
+void arm_unregister_el_change_hooks(ARMCPU *cpu);
/**
* arm_rebuild_hflags:
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index 554b8736bb..58067e30a5 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -595,3 +595,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
}
#endif /* CONFIG_TCG */
}
+
+void arm_cpu_unregister_gdb_regs(ARMCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ gdb_unregister_coprocessor_all(cs);
+}
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0a582c1cd3..a890f98445 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -264,6 +264,19 @@ void init_cpreg_list(ARMCPU *cpu)
g_list_free(keys);
}
+void destroy_cpreg_list(ARMCPU *cpu)
+{
+ assert(cpu->cpreg_indexes);
+ assert(cpu->cpreg_values);
+ assert(cpu->cpreg_vmstate_indexes);
+ assert(cpu->cpreg_vmstate_values);
+
+ g_free(cpu->cpreg_indexes);
+ g_free(cpu->cpreg_values);
+ g_free(cpu->cpreg_vmstate_indexes);
+ g_free(cpu->cpreg_vmstate_values);
+}
+
static bool arm_pan_enabled(CPUARMState *env)
{
if (is_a64(env)) {
@@ -9985,6 +9998,18 @@ void register_cp_regs_for_features(ARMCPU *cpu)
#endif
}
+void unregister_cp_regs_for_features(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ /* M profile has no coprocessor registers */
+ return;
+ }
+
+ /* empty it all. unregister all the coprocessor registers */
+ g_hash_table_remove_all(cpu->cp_regs);
+}
+
/*
* Private utility function for define_one_arm_cp_reg_with_opaque():
* add a single reginfo struct to the hash table.
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 203a2dae14..722c4dd00b 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -367,9 +367,12 @@ void arm_cpu_register(const ARMCPUInfo *info);
void aarch64_cpu_register(const ARMCPUInfo *info);
void register_cp_regs_for_features(ARMCPU *cpu);
+void unregister_cp_regs_for_features(ARMCPU *cpu);
void init_cpreg_list(ARMCPU *cpu);
+void destroy_cpreg_list(ARMCPU *cpu);
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
+void arm_cpu_unregister_gdb_regs(ARMCPU *cpu);
void arm_translate_init(void);
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 8ed160cbca..369d7ad135 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1983,6 +1983,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
int kvm_arch_destroy_vcpu(CPUState *cs)
{
+ /* vCPUs which are yet to be realized will not have handler */
+ if (cs->thread_id) {
+ qemu_del_vm_change_state_handler(cs->vmcse);
+ }
+
return 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 28/33] tcg/mttcg: Introduce MTTCG thread unregistration leg
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (26 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 27/33] target/arm: Add support to *unrealize* ARMCPU during vCPU Hot-unplug Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:18 ` [PATCH RFC V4 29/33] hw/intc/arm_gicv3_common: Add GICv3CPUState 'accessible' flag migration handling Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 30/33] target/arm/kvm, tcg: Handle SMCCC hypercall exits in VMM during PSCI_CPU_{ON, OFF} Salil Mehta via
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
From: Miguel Luis <miguel.luis@oracle.com>
Introduce the TCG thread unregistration leg which shall be called in context to
TCG/vCPU unrealize.
Reported-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Miguel Luis <miguel.luis@oracle.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
accel/tcg/tcg-accel-ops-mttcg.c | 1 +
include/tcg/startup.h | 7 +++++++
tcg/tcg.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 41 insertions(+)
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index ab2f79d2c7..4b26164ffd 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -122,6 +122,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
bql_unlock();
rcu_remove_force_rcu_notifier(&force_rcu.notifier);
rcu_unregister_thread();
+ tcg_unregister_thread();
return NULL;
}
diff --git a/include/tcg/startup.h b/include/tcg/startup.h
index a565071516..c035d03f7e 100644
--- a/include/tcg/startup.h
+++ b/include/tcg/startup.h
@@ -51,6 +51,13 @@ void tcg_register_thread(void);
void tcg_register_thread(CPUState *cpu);
#endif
+/**
+ * tcg_unregister_thread: Unregister this thread with the TCG runtime
+ *
+ * This leg shall be called whenever TCG vCPU is hot-unplugged
+ */
+void tcg_unregister_thread(void);
+
/**
* tcg_prologue_init(): Generate the code for the TCG prologue
*
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5e9c6b2b4b..3bdebdb332 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -811,6 +811,39 @@ void tcg_register_thread(CPUState *cpu)
tcg_ctx = s;
}
+
+static void tcg_free_plugin_context(TCGContext *s)
+{
+#ifdef CONFIG_PLUGIN
+ unsigned i;
+
+ if (s->plugin_tb) {
+ for (i = 0; i < s->plugin_tb->insns->len; i++) {
+ g_free(g_ptr_array_index(s->plugin_tb->insns, i));
+ }
+ g_ptr_array_free(s->plugin_tb->insns, TRUE);
+
+ if (!s->plugin_tb->insns) {
+ g_free(s->plugin_tb);
+ }
+ }
+#endif
+}
+
+void tcg_unregister_thread(void)
+{
+ TCGContext *s = tcg_ctx;
+ unsigned int n;
+
+ /* unclaim an entry in tcg_ctxs */
+ n = qatomic_fetch_dec(&tcg_cur_ctxs);
+ g_assert(n > 1);
+ qatomic_store_release(&tcg_ctxs[n - 1], 0);
+
+ tcg_free_plugin_context(s);
+
+ g_free(s);
+}
#endif /* !CONFIG_USER_ONLY */
/* pool based memory allocation */
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 29/33] hw/intc/arm_gicv3_common: Add GICv3CPUState 'accessible' flag migration handling
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (27 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 28/33] tcg/mttcg: Introduce MTTCG thread unregistration leg Salil Mehta via
@ 2024-10-09 3:18 ` Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 30/33] target/arm/kvm, tcg: Handle SMCCC hypercall exits in VMM during PSCI_CPU_{ON, OFF} Salil Mehta via
29 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:18 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
The QOM `GICv3CPUState` (and consequently the corresponding KVM VGIC `ICC_*_EL1`
registers) can be either 'accessible' or 'inaccessible', depending on the state
of the associated QOM vCPUs. This `gicc_accessible` state should be saved during
migration at the source and restored at the destination.
Ideally, the number of possible and enabled QOM vCPUs should match between the
source and destination. Ensuring this consistency is the responsibility of the
administrator. However, if the destination QEMU has more enabled vCPUs than the
source, we can either fail the migration or override the destination QEMU’s vCPU
configuration to match the source. We have adopted the latter approach as a
mitigation for the mismatch. Nonetheless, the administrator should still ensure
that the number of possible QOM vCPUs is consistent at both ends.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/intc/arm_gicv3_common.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index e7b2d04358..5dda14ce1c 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -126,6 +126,29 @@ static int vmstate_gicv3_cpu_pre_load(void *opaque)
return 0;
}
+static int vmstate_gicv3_cpu_post_load(void *opaque, int version_id)
+{
+ GICv3CPUState *cs = opaque;
+
+ /*
+ * If the destination QEMU has more *enabled* vCPUs than the source, we can
+ * either *fail* the migration or override the destination QEMU’s vCPU
+ * configuration to match the source. Since it is safe to override the
+ * `CPUState` of the extra *enabled* vCPUs at the destination, we have
+ * adopted the latter approach as a mitigation for the mismatch.
+ * RFC: Question: any suggestions on this are welcome?
+ */
+ if (!gicv3_cpu_accessible((cs)) && qemu_enabled_cpu(cs->cpu)) {
+ warn_report("Found CPU %d enabled for incoming *disabled* GICC State\n",
+ cs->cpu->cpu_index);
+ warn_report("Disabling CPU %d to match the incoming migrated state",
+ cs->cpu->cpu_index);
+ qdev_unrealize(DEVICE(cs->cpu));
+ }
+
+ return 0;
+}
+
static bool icc_sre_el1_reg_needed(void *opaque)
{
GICv3CPUState *cs = opaque;
@@ -186,6 +209,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
.version_id = 1,
.minimum_version_id = 1,
.pre_load = vmstate_gicv3_cpu_pre_load,
+ .post_load = vmstate_gicv3_cpu_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(level, GICv3CPUState),
VMSTATE_UINT32(gicr_ctlr, GICv3CPUState),
@@ -207,6 +231,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
VMSTATE_UINT64_2DARRAY(icc_apr, GICv3CPUState, 3, 4),
VMSTATE_UINT64_ARRAY(icc_igrpen, GICv3CPUState, 3),
VMSTATE_UINT64(icc_ctlr_el3, GICv3CPUState),
+ VMSTATE_BOOL(gicc_accessible, GICv3CPUState),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * const []) {
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 30/33] target/arm/kvm, tcg: Handle SMCCC hypercall exits in VMM during PSCI_CPU_{ON, OFF}
2024-10-09 3:17 [PATCH RFC V4 00/33] Support of Virtual CPU Hotplug for ARMv8 Arch Salil Mehta via
` (28 preceding siblings ...)
2024-10-09 3:18 ` [PATCH RFC V4 29/33] hw/intc/arm_gicv3_common: Add GICv3CPUState 'accessible' flag migration handling Salil Mehta via
@ 2024-10-09 3:37 ` Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 31/33] target/arm/kvm: Write vCPU's state back to KVM on cold-reset Salil Mehta via
` (2 more replies)
29 siblings, 3 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:37 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
From: Author Salil Mehta <salil.mehta@huawei.com>
To support vCPU hotplug, we must trap any `HVC`/`SMC` `PSCI_CPU_{ON,OFF}`
hypercalls from the host KVM to QEMU for policy checks. This ensures the
following when a vCPU is brought online:
1. The vCPU is actually plugged in (i.e., present).
2. The vCPU is not disabled.
Implement the registration and handling of `HVC`/`SMC` hypercall exits within
the VMM, ensuring that proper policy checks and control flow are enforced during
the vCPU onlining and offlining processes.
Co-developed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
target/arm/arm-powerctl.c | 22 ++++++---
target/arm/helper.c | 2 +-
target/arm/internals.h | 11 -----
target/arm/kvm.c | 93 +++++++++++++++++++++++++++++++++++++
target/arm/kvm_arm.h | 14 ++++++
target/arm/meson.build | 1 +
target/arm/{tcg => }/psci.c | 8 ++++
target/arm/tcg/meson.build | 4 --
8 files changed, 132 insertions(+), 23 deletions(-)
rename target/arm/{tcg => }/psci.c (97%)
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index 2b2055c6ac..03bb8e7b8a 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -17,6 +17,7 @@
#include "qemu/main-loop.h"
#include "sysemu/tcg.h"
#include "target/arm/multiprocessing.h"
+#include "hw/boards.h"
#ifndef DEBUG_ARM_POWERCTL
#define DEBUG_ARM_POWERCTL 0
@@ -31,14 +32,14 @@
CPUState *arm_get_cpu_by_id(uint64_t id)
{
+ MachineState *ms = MACHINE(qdev_get_machine());
CPUState *cpu;
DPRINTF("cpu %" PRId64 "\n", id);
- CPU_FOREACH(cpu) {
- ARMCPU *armcpu = ARM_CPU(cpu);
-
- if (arm_cpu_mp_affinity(armcpu) == id) {
+ /* with vCPU hotplug support, we must now check for all possible vCPUs */
+ CPU_FOREACH_POSSIBLE(cpu, ms->possible_cpus) {
+ if (cpu && (arm_cpu_mp_affinity(ARM_CPU(cpu)) == id)) {
return cpu;
}
}
@@ -119,9 +120,16 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
/* Retrieve the cpu we are powering up */
target_cpu_state = arm_get_cpu_by_id(cpuid);
- if (!target_cpu_state) {
- /* The cpu was not found */
- return QEMU_ARM_POWERCTL_INVALID_PARAM;
+
+ if (!qemu_enabled_cpu(target_cpu_state)) {
+ /*
+ * The cpu is not plugged in or disabled. We should return appropriate
+ * value as introduced in DEN0022E PSCI 1.2 issue E
+ */
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "[ARM]%s: Denying attempt to online removed/disabled "
+ "CPU%" PRId64"\n", __func__, cpuid);
+ return QEMU_ARM_POWERCTL_IS_OFF;
}
target_cpu = ARM_CPU(target_cpu_state);
diff --git a/target/arm/helper.c b/target/arm/helper.c
index a890f98445..c121e3bc1a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11840,7 +11840,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
env->exception.syndrome);
}
- if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) {
+ if (arm_is_psci_call(cpu, cs->exception_index)) {
arm_handle_psci_call(cpu);
qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
return;
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 722c4dd00b..e9c3ae4494 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -501,21 +501,10 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len);
/* Callback function for when a watchpoint or breakpoint triggers. */
void arm_debug_excp_handler(CPUState *cs);
-#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG)
-static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
-{
- return false;
-}
-static inline void arm_handle_psci_call(ARMCPU *cpu)
-{
- g_assert_not_reached();
-}
-#else
/* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */
bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
/* Actually handle a PSCI call */
void arm_handle_psci_call(ARMCPU *cpu);
-#endif
/**
* arm_clear_exclusive: clear the exclusive monitor
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 369d7ad135..9a51249a42 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -544,9 +544,51 @@ int kvm_arch_get_default_type(MachineState *ms)
return fixed_ipa ? 0 : size;
}
+static bool kvm_arm_set_vm_attr(struct kvm_device_attr *attr, const char *name)
+{
+ int err;
+
+ err = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, attr);
+ if (err != 0) {
+ error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err));
+ return false;
+ }
+
+ err = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, attr);
+ if (err != 0) {
+ error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err));
+ return false;
+ }
+
+ return true;
+}
+
+int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction)
+{
+ struct kvm_smccc_filter filter = {
+ .base = func,
+ .nr_functions = 1,
+ .action = faction,
+ };
+ struct kvm_device_attr attr = {
+ .group = KVM_ARM_VM_SMCCC_CTRL,
+ .attr = KVM_ARM_VM_SMCCC_FILTER,
+ .flags = 0,
+ .addr = (uintptr_t)&filter,
+ };
+
+ if (!kvm_arm_set_vm_attr(&attr, "SMCCC Filter")) {
+ error_report("failed to set SMCCC filter in KVM Host");
+ return -1;
+ }
+
+ return 0;
+}
+
int kvm_arch_init(MachineState *ms, KVMState *s)
{
int ret = 0;
+
/* For ARM interrupt delivery is always asynchronous,
* whether we are using an in-kernel VGIC or not.
*/
@@ -609,6 +651,22 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
hw_breakpoints = g_array_sized_new(true, true,
sizeof(HWBreakpoint), max_hw_bps);
+ /*
+ * To be able to handle PSCI CPU ON calls in QEMU, we need to install SMCCC
+ * filter in the Host KVM. This is required to support features like
+ * virtual CPU Hotplug on ARM platforms.
+ */
+ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON,
+ KVM_SMCCC_FILTER_FWD_TO_USER)) {
+ error_report("CPU On PSCI-to-user-space fwd filter install failed");
+ abort();
+ }
+ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF,
+ KVM_SMCCC_FILTER_FWD_TO_USER)) {
+ error_report("CPU Off PSCI-to-user-space fwd filter install failed");
+ abort();
+ }
+
return ret;
}
@@ -1452,6 +1510,38 @@ static bool kvm_arm_handle_debug(ARMCPU *cpu,
return false;
}
+static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ kvm_cpu_synchronize_state(cs);
+
+ /*
+ * hard coding immediate to 0 as we dont expect non-zero value as of now
+ * This might change in future versions. Hence, KVM_GET_ONE_REG could be
+ * used in such cases but it must be enhanced then only synchronize will
+ * also fetch ESR_EL2 value.
+ */
+ if (run->hypercall.flags == KVM_HYPERCALL_EXIT_SMC) {
+ cs->exception_index = EXCP_SMC;
+ env->exception.syndrome = syn_aa64_smc(0);
+ } else {
+ cs->exception_index = EXCP_HVC;
+ env->exception.syndrome = syn_aa64_hvc(0);
+ }
+ env->exception.target_el = 1;
+ bql_lock();
+ arm_cpu_do_interrupt(cs);
+ bql_unlock();
+
+ /*
+ * For PSCI, exit the kvm_run loop and process the work. Especially
+ * important if this was a CPU_OFF command and we can't return to the guest.
+ */
+ return EXCP_INTERRUPT;
+}
+
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
{
ARMCPU *cpu = ARM_CPU(cs);
@@ -1468,6 +1558,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
ret = kvm_arm_handle_dabt_nisv(cpu, run->arm_nisv.esr_iss,
run->arm_nisv.fault_ipa);
break;
+ case KVM_EXIT_HYPERCALL:
+ ret = kvm_arm_handle_hypercall(cs, run);
+ break;
default:
qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
__func__, run->exit_reason);
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 0be7e896d2..b9c2b0f501 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -225,6 +225,15 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
+/**
+ * kvm_arm_set_smccc_filter
+ * @func: funcion
+ * @faction: SMCCC filter action(handle, deny, fwd-to-user) to be deployed
+ *
+ * Sets the ARMs SMC-CC filter in KVM Host for selective hypercall exits
+ */
+int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction);
+
#else
/*
@@ -294,6 +303,11 @@ static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
g_assert_not_reached();
}
+static inline int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction)
+{
+ g_assert_not_reached();
+}
+
#endif
#endif
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 2e10464dbb..3e9f704f35 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -23,6 +23,7 @@ arm_system_ss.add(files(
'arm-qmp-cmds.c',
'cortex-regs.c',
'machine.c',
+ 'psci.c',
'ptw.c',
))
diff --git a/target/arm/tcg/psci.c b/target/arm/psci.c
similarity index 97%
rename from target/arm/tcg/psci.c
rename to target/arm/psci.c
index 51d2ca3d30..b3fcb85079 100644
--- a/target/arm/tcg/psci.c
+++ b/target/arm/psci.c
@@ -21,7 +21,9 @@
#include "exec/helper-proto.h"
#include "kvm-consts.h"
#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
#include "sysemu/runstate.h"
+#include "sysemu/tcg.h"
#include "internals.h"
#include "arm-powerctl.h"
#include "target/arm/multiprocessing.h"
@@ -158,6 +160,11 @@ void arm_handle_psci_call(ARMCPU *cpu)
case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+ if (!tcg_enabled()) {
+ warn_report("CPU suspend not supported in non-tcg mode");
+ break;
+ }
+#ifdef CONFIG_TCG
/* Affinity levels are not supported in QEMU */
if (param[1] & 0xfffe0000) {
ret = QEMU_PSCI_RET_INVALID_PARAMS;
@@ -170,6 +177,7 @@ void arm_handle_psci_call(ARMCPU *cpu)
env->regs[0] = 0;
}
helper_wfi(env, 4);
+#endif
break;
case QEMU_PSCI_1_0_FN_PSCI_FEATURES:
switch (param[1]) {
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 508932a249..5b43c84c40 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -54,9 +54,5 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
'sve_helper.c',
))
-arm_system_ss.add(files(
- 'psci.c',
-))
-
arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c'))
arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c'))
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 31/33] target/arm/kvm: Write vCPU's state back to KVM on cold-reset
2024-10-09 3:37 ` [PATCH RFC V4 30/33] target/arm/kvm, tcg: Handle SMCCC hypercall exits in VMM during PSCI_CPU_{ON, OFF} Salil Mehta via
@ 2024-10-09 3:37 ` Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 32/33] hw/intc/arm_gicv3_kvm: Pause all vCPU to ensure locking in KVM of resetting vCPU Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 33/33] hw/arm/virt: Expose cold-booted vCPUs as MADT GICC *Enabled* Salil Mehta via
2 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:37 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Previously, all `PSCI_CPU_{ON, OFF}` calls were handled directly by KVM.
However, with the introduction of vCPU hotplug, these hypervisor calls are now
trapped to QEMU for policy checks. This shift can lead to inconsistent vCPU
states between KVM and QEMU, particularly when the vCPU has been recently
plugged in and is transitioning from the unparked state in QOM. Therefore, it is
crucial to synchronize the vCPU state with KVM, especially in the context of a
cold reset of the QOM vCPU.
To ensure this synchronization, mark the QOM vCPU as "dirty" to trigger a call
to `kvm_arch_put_registers()`. This guarantees that KVM’s `MP_STATE` is updated
accordingly, forcing synchronization of the `mp_state` between QEMU and KVM.
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
target/arm/kvm.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 9a51249a42..a3c98fa213 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1038,6 +1038,7 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu)
void kvm_arm_reset_vcpu(ARMCPU *cpu)
{
int ret;
+ CPUState *cs = CPU(cpu);
/* Re-init VCPU so that all registers are set to
* their respective reset values.
@@ -1059,6 +1060,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
* for the same reason we do so in kvm_arch_get_registers().
*/
write_list_to_cpustate(cpu);
+
+ /*
+ * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty if
+ * it was parked in KVM and is now booting from a PSCI CPU_ON call.
+ */
+ cs->vcpu_dirty = true;
}
void kvm_arm_create_host_vcpu(ARMCPU *cpu)
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 32/33] hw/intc/arm_gicv3_kvm: Pause all vCPU to ensure locking in KVM of resetting vCPU
2024-10-09 3:37 ` [PATCH RFC V4 30/33] target/arm/kvm, tcg: Handle SMCCC hypercall exits in VMM during PSCI_CPU_{ON, OFF} Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 31/33] target/arm/kvm: Write vCPU's state back to KVM on cold-reset Salil Mehta via
@ 2024-10-09 3:37 ` Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 33/33] hw/arm/virt: Expose cold-booted vCPUs as MADT GICC *Enabled* Salil Mehta via
2 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:37 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
vCPU reset can result in device access to VGIC CPU system registers using the
`IOCTL KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS` interface. When accessing these
registers in the KVM host, it is necessary to acquire a lock on all vCPUs during
the `vgic_v3_attr_regs_access()` operation.
This operation may fail if KVM is unable to acquire the necessary locks on all
vCPUs. Therefore, to ensure proper locking of the vCPU being reset and prevent
failures, we need to *pause all vCPUs* during this operation to facilitate
successful locking within the host.
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/intc/arm_gicv3_kvm.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 3e1e97d830..bcdbf83897 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -714,10 +714,19 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
return;
}
+ /*
+ * This shall be called even when vcpu is being hotplugged or onlined and
+ * other vcpus might be running. Host kernel KVM code to handle device
+ * access of IOCTLs KVM_{GET|SET}_DEVICE_ATTR might fail due to inability to
+ * grab vcpu locks for all the vcpus. Hence, we need to pause all vcpus to
+ * facilitate locking within host.
+ */
+ pause_all_vcpus();
/* Initialize to actual HW supported configuration */
kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer),
&c->icc_ctlr_el1[GICV3_NS], false, &error_abort);
+ resume_all_vcpus();
c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread
* [PATCH RFC V4 33/33] hw/arm/virt: Expose cold-booted vCPUs as MADT GICC *Enabled*
2024-10-09 3:37 ` [PATCH RFC V4 30/33] target/arm/kvm, tcg: Handle SMCCC hypercall exits in VMM during PSCI_CPU_{ON, OFF} Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 31/33] target/arm/kvm: Write vCPU's state back to KVM on cold-reset Salil Mehta via
2024-10-09 3:37 ` [PATCH RFC V4 32/33] hw/intc/arm_gicv3_kvm: Pause all vCPU to ensure locking in KVM of resetting vCPU Salil Mehta via
@ 2024-10-09 3:37 ` Salil Mehta via
2 siblings, 0 replies; 37+ messages in thread
From: Salil Mehta via @ 2024-10-09 3:37 UTC (permalink / raw)
To: qemu-devel, qemu-arm, mst
Cc: salil.mehta, maz, jean-philippe, jonathan.cameron, lpieralisi,
peter.maydell, richard.henderson, imammedo, andrew.jones, david,
philmd, eric.auger, will, ardb, oliver.upton, pbonzini, gshan,
rafael, borntraeger, alex.bennee, npiggin, harshpb, linux, darren,
ilkka, vishnu, karl.heubaum, miguel.luis, salil.mehta, zhukeqian1,
wangxiongfeng2, wangyanan55, jiakernel2, maobibo, lixianglai,
shahuang, zhao1.liu, linuxarm, gustavo.romero
Hotpluggable vCPUs must be exposed as "online-capable" according to the new UEFI
specification [1][2]. However, marking cold-booted vCPUs as "online-capable"
during boot may cause them to go undetected by legacy operating systems,
potentially leading to compatibility issues. Hence, both 'online-capable' bit
and 'Enabled' bit in GIC CPU Interface flags should not be mutually exclusive as
they are now.
Since implementing this specification change may take time, it is necessary to
temporarily *disable* support for *unplugging* cold-booted vCPUs to maintain
compatibility with legacy OS environments.
As an alternative and temporary mitigation, we could introduce a property that
controls whether cold-booted vCPUs are marked as unpluggable. Community feedback
on this approach would be appreciated.
References:
[1] Original UEFI/ACPI proposed Change Bugzilla – TianoCore
Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706
[2] Advanced Configuration and Power Interface (ACPI) Specification, Release 6.5, Aug 29, 2022
Section: 5.2.12.14 GIC CPU Interface (GICC) Structure / Table 5.37: GICC CPU Interface Flags
Link: https://uefi.org/sites/default/files/resources/ACPI_Spec_6_5_Aug29.pdf (Pages 138, 140)
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
hw/arm/virt.c | 16 ++++++++++++++++
include/hw/core/cpu.h | 2 ++
2 files changed, 18 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5d440f9121..208f4ecfe1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3176,6 +3176,10 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
virt_update_gic(vms, cs, true);
wire_gic_cpu_irqs(vms, cs);
}
+
+ if (!dev->hotplugged) {
+ cs->cold_booted = true;
+ }
}
static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
@@ -3255,6 +3259,18 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev,
return;
}
+ /*
+ * UEFI ACPI standard change is required to make both 'enabled' and the
+ * 'online-capable' bit co-exist instead of being mutually exclusive.
+ * check virt_acpi_get_gicc_flags() for more details.
+ *
+ * Disable the unplugging of cold-booted vCPUs as a temporary mitigation.
+ */
+ if (cs->cold_booted) {
+ error_setg(errp, "Hot-unplug of cold-booted CPU not supported!");
+ return;
+ }
+
if (cs->cpu_index == first_cpu->cpu_index) {
error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported",
first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id,
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 2e62d5f1a5..8dcca3bcb7 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -570,6 +570,8 @@ struct CPUState {
uint32_t halted;
int32_t exception_index;
+ bool cold_booted;
+
AccelCPUState *accel;
/* Used to keep track of an outstanding cpu throttle thread for migration
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread