From: Gleb Natapov <gleb@redhat.com>
To: kevin@koconnor.net
Cc: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 18/21] Add SRAT ACPI table support.
Date: Thu, 8 Oct 2009 17:59:23 +0200 [thread overview]
Message-ID: <1255017566-26220-19-git-send-email-gleb@redhat.com> (raw)
In-Reply-To: <1255017566-26220-1-git-send-email-gleb@redhat.com>
Take NUMA topology info from the QEMU firmware configuration interface
(number of nodes, node for each (V)CPU and amount of memory) and build
a SRAT table describing this topology for the guest OS. Handles more than
4 GB of RAM by including a hole for 32bit PCI memory mapping.
Qemu pcbios commit 444f1226c11082d374b7e1361c6f5696e479642a
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
src/acpi.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
src/paravirt.c | 16 ++++++
src/paravirt.h | 2 +
3 files changed, 170 insertions(+), 5 deletions(-)
diff --git a/src/acpi.c b/src/acpi.c
index 3e7efc8..41ad0cb 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -151,7 +151,7 @@ struct multiple_apic_table
} PACKED;
-/* Values for Type in APIC_HEADER_DEF */
+/* Values for Type in APIC sub-headers */
#define APIC_PROCESSOR 0
#define APIC_IO 1
@@ -167,7 +167,7 @@ struct multiple_apic_table
/*
* MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
*/
-#define APIC_HEADER_DEF /* Common APIC sub-structure header */\
+#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\
u8 type; \
u8 length;
@@ -175,7 +175,7 @@ struct multiple_apic_table
struct madt_processor_apic
{
- APIC_HEADER_DEF
+ ACPI_SUB_HEADER_DEF
u8 processor_id; /* ACPI processor id */
u8 local_apic_id; /* Processor's local APIC id */
#if 0
@@ -188,7 +188,7 @@ struct madt_processor_apic
struct madt_io_apic
{
- APIC_HEADER_DEF
+ ACPI_SUB_HEADER_DEF
u8 io_apic_id; /* I/O APIC ID */
u8 reserved; /* Reserved - must be zero */
u32 address; /* APIC physical address */
@@ -199,7 +199,7 @@ struct madt_io_apic
#define PCI_ISA_IRQ_MASK 0x0e20
struct madt_intsrcovr {
- APIC_HEADER_DEF
+ ACPI_SUB_HEADER_DEF
u8 bus;
u8 source;
u32 gsi;
@@ -230,6 +230,43 @@ struct acpi_20_hpet {
} PACKED;
#define ACPI_HPET_ADDRESS 0xFED00000UL
+/*
+ * SRAT (NUMA topology description) table
+ */
+
+#define SRAT_PROCESSOR 0
+#define SRAT_MEMORY 1
+
+struct system_resource_affinity_table
+{
+ ACPI_TABLE_HEADER_DEF
+ u32 reserved1;
+ u32 reserved2[2];
+} PACKED;
+
+struct srat_processor_affinity
+{
+ ACPI_SUB_HEADER_DEF
+ u8 proximity_lo;
+ u8 local_apic_id;
+ u32 flags;
+ u8 local_sapic_eid;
+ u8 proximity_hi[3];
+ u32 reserved;
+} PACKED;
+
+struct srat_memory_affinity
+{
+ ACPI_SUB_HEADER_DEF
+ u8 proximity[4];
+ u16 reserved1;
+ u32 base_addr_low,base_addr_high;
+ u32 length_low,length_high;
+ u32 reserved2;
+ u32 flags;
+ u32 reserved3[2];
+} PACKED;
+
#include "acpi-dsdt.hex"
static inline u16 cpu_to_le16(u16 x)
@@ -447,6 +484,115 @@ build_hpet(void)
return hpet;
}
+static void
+acpi_build_srat_memory(struct srat_memory_affinity *numamem,
+ u64 base, u64 len, int node, int enabled)
+{
+ numamem->type = SRAT_MEMORY;
+ numamem->length = sizeof(*numamem);
+ memset (numamem->proximity, 0 ,4);
+ numamem->proximity[0] = node;
+ numamem->flags = cpu_to_le32(!!enabled);
+ numamem->base_addr_low = base & 0xFFFFFFFF;
+ numamem->base_addr_high = base >> 32;
+ numamem->length_low = len & 0xFFFFFFFF;
+ numamem->length_high = len >> 32;
+}
+
+#define SRAT_SIGNATURE 0x54415253 //HPET
+static void *
+build_srat(void)
+{
+ int nb_numa_nodes = qemu_cfg_get_numa_nodes();
+
+ if (nb_numa_nodes == 0)
+ return NULL;
+
+ u64 *numadata = malloc_tmphigh(sizeof(u64) * (CountCPUs + nb_numa_nodes));
+ if (!numadata) {
+ dprintf(1, "Not enough memory for read numa data from VM!\n");
+ return NULL;
+ }
+
+ qemu_cfg_get_numa_data(numadata, CountCPUs + nb_numa_nodes);
+
+ struct system_resource_affinity_table *srat;
+ int srat_size = sizeof(*srat) +
+ sizeof(struct srat_processor_affinity) * CountCPUs +
+ sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
+
+ srat = malloc_high(srat_size);
+ if (!srat) {
+ dprintf(1, "Not enough memory for srat table!\n");
+ return NULL;
+ }
+
+ memset(srat, 0, srat_size);
+ srat->reserved1=1;
+ struct srat_processor_affinity *core = (void*)(srat + 1);
+ int i;
+ u64 curnode;
+
+ for (i = 0; i < CountCPUs; ++i) {
+ core->type = SRAT_PROCESSOR;
+ core->length = sizeof(*core);
+ core->local_apic_id = i;
+ curnode = *numadata++;
+ core->proximity_lo = curnode;
+ memset(core->proximity_hi, 0, 3);
+ core->local_sapic_eid = 0;
+ if (i < CountCPUs)
+ core->flags = cpu_to_le32(1);
+ else
+ core->flags = 0;
+ core++;
+ }
+
+
+ /* the memory map is a bit tricky, it contains at least one hole
+ * from 640k-1M and possibly another one from 3.5G-4G.
+ */
+ struct srat_memory_affinity *numamem = (void*)core;
+ int slots = 0;
+ u64 mem_len, mem_base, next_base = 0;
+
+ acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
+ next_base = 1024 * 1024;
+ numamem++;
+ slots++;
+ for (i = 1; i < nb_numa_nodes + 1; ++i) {
+ mem_base = next_base;
+ mem_len = *numadata++;
+ if (i == 1)
+ mem_len -= 1024 * 1024;
+ next_base = mem_base + mem_len;
+
+ /* Cut out the PCI hole */
+ if (mem_base <= RamSize && next_base > RamSize) {
+ mem_len -= next_base - RamSize;
+ if (mem_len > 0) {
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+ numamem++;
+ slots++;
+ }
+ mem_base = 1ULL << 32;
+ mem_len = next_base - RamSize;
+ next_base += (1ULL << 32) - RamSize;
+ }
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+ numamem++;
+ slots++;
+ }
+ for (; slots < nb_numa_nodes + 2; slots++) {
+ acpi_build_srat_memory(numamem, 0, 0, 0, 0);
+ numamem++;
+ }
+
+ build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1);
+
+ return srat;
+}
+
struct rsdp_descriptor *RsdpAddr;
#define MAX_ACPI_TABLES 20
@@ -486,6 +632,7 @@ acpi_bios_init(void)
ACPI_INIT_TABLE(build_ssdt());
ACPI_INIT_TABLE(build_madt());
ACPI_INIT_TABLE(build_hpet());
+ ACPI_INIT_TABLE(build_srat());
u16 i, external_tables = qemu_cfg_acpi_additional_tables();
diff --git a/src/paravirt.c b/src/paravirt.c
index 8c08ce7..8fbeb9c 100644
--- a/src/paravirt.c
+++ b/src/paravirt.c
@@ -265,3 +265,19 @@ int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs,
return 0;
}
+int qemu_cfg_get_numa_nodes(void)
+{
+ u64 cnt;
+
+ qemu_cfg_read_entry(&cnt, QEMU_CFG_NUMA, sizeof(cnt));
+
+ return (int)cnt;
+}
+
+void qemu_cfg_get_numa_data(u64 *data, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ qemu_cfg_read((u8*)(data + i), sizeof(u64));
+}
diff --git a/src/paravirt.h b/src/paravirt.h
index 2b2f314..04a6907 100644
--- a/src/paravirt.h
+++ b/src/paravirt.h
@@ -49,5 +49,7 @@ u16 qemu_cfg_smbios_entries(void);
size_t qemu_cfg_smbios_load_field(int type, size_t offset, void *addr);
int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs,
unsigned *max_struct_size, char *end);
+int qemu_cfg_get_numa_nodes(void);
+void qemu_cfg_get_numa_data(u64 *data, int n);
#endif
--
1.6.3.3
next prev parent reply other threads:[~2009-10-08 15:59 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-08 15:59 [Qemu-devel] [PATCH 00/21] Bring seabios and qemu pcbios closer together Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH v2 01/21] Add support for passing additional acpi tables from qemu Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH v2 02/21] Load SMBIOS entries and files " Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 03/21] Always create PCI interrupt override acpi tables Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 04/21] Correct default pci irq links Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 05/21] irq0override provided by qemu Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 06/21] Check at runtime if VM is KVM Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 07/21] Remove CONFIG_KVM compile option Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 08/21] Add rule to compile DSDT to make file Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 09/21] Use preprocessor for pci link routing Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 10/21] Advertise pci irqs as active high in DSDT Gleb Natapov
2009-10-09 10:56 ` Jamie Lokier
2009-10-09 11:37 ` Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 11/21] Restrict pci interrupts to irq 5/9/10/11 Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 12/21] Use extended interrupt descriptor for pci irqs Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 13/21] Remove irq 9 from the pci interrupt link resources Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 14/21] Provide gpe _L0x methods Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 15/21] Pci hotplug support Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 16/21] HPET support Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 17/21] Add 26 pci slots, bringing the total to 32 Gleb Natapov
2009-10-08 15:59 ` Gleb Natapov [this message]
2009-10-08 15:59 ` [Qemu-devel] [PATCH 19/21] Read max number of cpus from VM Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 20/21] Move qemu cfg init before smp init Gleb Natapov
2009-10-09 2:18 ` [Qemu-devel] " Kevin O'Connor
2009-10-08 15:59 ` [Qemu-devel] [PATCH 21/21] Use MaxCountCPUs during building of per cpu tables Gleb Natapov
2009-10-09 2:22 ` [Qemu-devel] " Kevin O'Connor
2009-10-09 6:37 ` Gleb Natapov
2009-10-09 13:44 ` Kevin O'Connor
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1255017566-26220-19-git-send-email-gleb@redhat.com \
--to=gleb@redhat.com \
--cc=kevin@koconnor.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).