qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Gleb Natapov <gleb@redhat.com>
To: kevin@koconnor.net
Cc: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 18/21] Add SRAT ACPI table support.
Date: Thu,  8 Oct 2009 17:59:23 +0200	[thread overview]
Message-ID: <1255017566-26220-19-git-send-email-gleb@redhat.com> (raw)
In-Reply-To: <1255017566-26220-1-git-send-email-gleb@redhat.com>

Take NUMA topology info from the QEMU firmware configuration interface
(number of nodes, node for each (V)CPU and amount of memory) and build
a SRAT table describing this topology for the guest OS. Handles more than
4 GB of RAM by including a hole for 32bit PCI memory mapping.

Qemu pcbios commit 444f1226c11082d374b7e1361c6f5696e479642a

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 src/acpi.c     |  157 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 src/paravirt.c |   16 ++++++
 src/paravirt.h |    2 +
 3 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/src/acpi.c b/src/acpi.c
index 3e7efc8..41ad0cb 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -151,7 +151,7 @@ struct multiple_apic_table
 } PACKED;
 
 
-/* Values for Type in APIC_HEADER_DEF */
+/* Values for Type in APIC sub-headers */
 
 #define APIC_PROCESSOR          0
 #define APIC_IO                 1
@@ -167,7 +167,7 @@ struct multiple_apic_table
 /*
  * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
  */
-#define APIC_HEADER_DEF   /* Common APIC sub-structure header */\
+#define ACPI_SUB_HEADER_DEF   /* Common ACPI sub-structure header */\
     u8  type;                               \
     u8  length;
 
@@ -175,7 +175,7 @@ struct multiple_apic_table
 
 struct madt_processor_apic
 {
-    APIC_HEADER_DEF
+    ACPI_SUB_HEADER_DEF
     u8  processor_id;           /* ACPI processor id */
     u8  local_apic_id;          /* Processor's local APIC id */
 #if 0
@@ -188,7 +188,7 @@ struct madt_processor_apic
 
 struct madt_io_apic
 {
-    APIC_HEADER_DEF
+    ACPI_SUB_HEADER_DEF
     u8  io_apic_id;             /* I/O APIC ID */
     u8  reserved;               /* Reserved - must be zero */
     u32 address;                /* APIC physical address */
@@ -199,7 +199,7 @@ struct madt_io_apic
 #define PCI_ISA_IRQ_MASK    0x0e20
 
 struct madt_intsrcovr {
-    APIC_HEADER_DEF
+    ACPI_SUB_HEADER_DEF
     u8  bus;
     u8  source;
     u32 gsi;
@@ -230,6 +230,43 @@ struct acpi_20_hpet {
 } PACKED;
 #define ACPI_HPET_ADDRESS 0xFED00000UL
 
+/*
+ * SRAT (NUMA topology description) table
+ */
+
+#define SRAT_PROCESSOR          0
+#define SRAT_MEMORY             1
+
+struct system_resource_affinity_table
+{
+    ACPI_TABLE_HEADER_DEF
+    u32    reserved1;
+    u32    reserved2[2];
+} PACKED;
+
+struct srat_processor_affinity
+{
+    ACPI_SUB_HEADER_DEF
+    u8     proximity_lo;
+    u8     local_apic_id;
+    u32    flags;
+    u8     local_sapic_eid;
+    u8     proximity_hi[3];
+    u32    reserved;
+} PACKED;
+
+struct srat_memory_affinity
+{
+    ACPI_SUB_HEADER_DEF
+    u8     proximity[4];
+    u16    reserved1;
+    u32    base_addr_low,base_addr_high;
+    u32    length_low,length_high;
+    u32    reserved2;
+    u32    flags;
+    u32    reserved3[2];
+} PACKED;
+
 #include "acpi-dsdt.hex"
 
 static inline u16 cpu_to_le16(u16 x)
@@ -447,6 +484,115 @@ build_hpet(void)
     return hpet;
 }
 
+static void
+acpi_build_srat_memory(struct srat_memory_affinity *numamem,
+                       u64 base, u64 len, int node, int enabled)
+{
+    numamem->type = SRAT_MEMORY;
+    numamem->length = sizeof(*numamem);
+    memset (numamem->proximity, 0 ,4);
+    numamem->proximity[0] = node;
+    numamem->flags = cpu_to_le32(!!enabled);
+    numamem->base_addr_low = base & 0xFFFFFFFF;
+    numamem->base_addr_high = base >> 32;
+    numamem->length_low = len & 0xFFFFFFFF;
+    numamem->length_high = len >> 32;
+}
+
+#define SRAT_SIGNATURE 0x54415253 //HPET
+static void *
+build_srat(void)
+{
+    int nb_numa_nodes = qemu_cfg_get_numa_nodes();
+
+    if (nb_numa_nodes == 0)
+        return NULL;
+
+    u64 *numadata = malloc_tmphigh(sizeof(u64) * (CountCPUs + nb_numa_nodes));
+    if (!numadata) {
+        dprintf(1, "Not enough memory for read numa data from VM!\n");
+        return NULL;
+    }
+
+    qemu_cfg_get_numa_data(numadata, CountCPUs + nb_numa_nodes);
+
+    struct system_resource_affinity_table *srat;
+    int srat_size = sizeof(*srat) +
+        sizeof(struct srat_processor_affinity) * CountCPUs +
+        sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
+
+    srat = malloc_high(srat_size);
+    if (!srat) {
+        dprintf(1, "Not enough memory for srat table!\n");
+        return NULL;
+    }
+
+    memset(srat, 0, srat_size);
+    srat->reserved1=1;
+    struct srat_processor_affinity *core = (void*)(srat + 1);
+    int i;
+    u64 curnode;
+
+    for (i = 0; i < CountCPUs; ++i) {
+        core->type = SRAT_PROCESSOR;
+        core->length = sizeof(*core);
+        core->local_apic_id = i;
+        curnode = *numadata++;
+        core->proximity_lo = curnode;
+        memset(core->proximity_hi, 0, 3);
+        core->local_sapic_eid = 0;
+        if (i < CountCPUs)
+            core->flags = cpu_to_le32(1);
+        else
+            core->flags = 0;
+        core++;
+    }
+
+
+    /* the memory map is a bit tricky, it contains at least one hole
+     * from 640k-1M and possibly another one from 3.5G-4G.
+     */
+    struct srat_memory_affinity *numamem = (void*)core;
+    int slots = 0;
+    u64 mem_len, mem_base, next_base = 0;
+
+    acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
+    next_base = 1024 * 1024;
+    numamem++;
+    slots++;
+    for (i = 1; i < nb_numa_nodes + 1; ++i) {
+        mem_base = next_base;
+        mem_len = *numadata++;
+        if (i == 1)
+            mem_len -= 1024 * 1024;
+        next_base = mem_base + mem_len;
+
+        /* Cut out the PCI hole */
+        if (mem_base <= RamSize && next_base > RamSize) {
+            mem_len -= next_base - RamSize;
+            if (mem_len > 0) {
+                acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+                numamem++;
+                slots++;
+            }
+            mem_base = 1ULL << 32;
+            mem_len = next_base - RamSize;
+            next_base += (1ULL << 32) - RamSize;
+        }
+        acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+        numamem++;
+        slots++;
+    }
+    for (; slots < nb_numa_nodes + 2; slots++) {
+        acpi_build_srat_memory(numamem, 0, 0, 0, 0);
+        numamem++;
+    }
+
+    build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1);
+
+    return srat;
+}
+
 struct rsdp_descriptor *RsdpAddr;
 
 #define MAX_ACPI_TABLES 20
@@ -486,6 +632,7 @@ acpi_bios_init(void)
     ACPI_INIT_TABLE(build_ssdt());
     ACPI_INIT_TABLE(build_madt());
     ACPI_INIT_TABLE(build_hpet());
+    ACPI_INIT_TABLE(build_srat());
 
     u16 i, external_tables = qemu_cfg_acpi_additional_tables();
 
diff --git a/src/paravirt.c b/src/paravirt.c
index 8c08ce7..8fbeb9c 100644
--- a/src/paravirt.c
+++ b/src/paravirt.c
@@ -265,3 +265,19 @@ int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs,
     return 0;
 }
 
+int qemu_cfg_get_numa_nodes(void)
+{
+    u64 cnt;
+
+    qemu_cfg_read_entry(&cnt, QEMU_CFG_NUMA, sizeof(cnt));
+
+    return (int)cnt;
+}
+
+void qemu_cfg_get_numa_data(u64 *data, int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+        qemu_cfg_read((u8*)(data + i), sizeof(u64));
+}
diff --git a/src/paravirt.h b/src/paravirt.h
index 2b2f314..04a6907 100644
--- a/src/paravirt.h
+++ b/src/paravirt.h
@@ -49,5 +49,7 @@ u16 qemu_cfg_smbios_entries(void);
 size_t qemu_cfg_smbios_load_field(int type, size_t offset, void *addr);
 int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs,
                                   unsigned *max_struct_size, char *end);
+int qemu_cfg_get_numa_nodes(void);
+void qemu_cfg_get_numa_data(u64 *data, int n);
 
 #endif
-- 
1.6.3.3

  parent reply	other threads:[~2009-10-08 15:59 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-08 15:59 [Qemu-devel] [PATCH 00/21] Bring seabios and qemu pcbios closer together Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH v2 01/21] Add support for passing additional acpi tables from qemu Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH v2 02/21] Load SMBIOS entries and files " Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 03/21] Always create PCI interrupt override acpi tables Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 04/21] Correct default pci irq links Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 05/21] irq0override provided by qemu Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 06/21] Check at runtime if VM is KVM Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 07/21] Remove CONFIG_KVM compile option Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 08/21] Add rule to compile DSDT to make file Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 09/21] Use preprocessor for pci link routing Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 10/21] Advertise pci irqs as active high in DSDT Gleb Natapov
2009-10-09 10:56   ` Jamie Lokier
2009-10-09 11:37     ` Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 11/21] Restrict pci interrupts to irq 5/9/10/11 Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 12/21] Use extended interrupt descriptor for pci irqs Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 13/21] Remove irq 9 from the pci interrupt link resources Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 14/21] Provide gpe _L0x methods Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 15/21] Pci hotplug support Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 16/21] HPET support Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 17/21] Add 26 pci slots, bringing the total to 32 Gleb Natapov
2009-10-08 15:59 ` Gleb Natapov [this message]
2009-10-08 15:59 ` [Qemu-devel] [PATCH 19/21] Read max number of cpus from VM Gleb Natapov
2009-10-08 15:59 ` [Qemu-devel] [PATCH 20/21] Move qemu cfg init before smp init Gleb Natapov
2009-10-09  2:18   ` [Qemu-devel] " Kevin O'Connor
2009-10-08 15:59 ` [Qemu-devel] [PATCH 21/21] Use MaxCountCPUs during building of per cpu tables Gleb Natapov
2009-10-09  2:22   ` [Qemu-devel] " Kevin O'Connor
2009-10-09  6:37     ` Gleb Natapov
2009-10-09 13:44       ` Kevin O'Connor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1255017566-26220-19-git-send-email-gleb@redhat.com \
    --to=gleb@redhat.com \
    --cc=kevin@koconnor.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).