All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andre Przywara <andre.przywara@amd.com>
To: Anthony Liguori <anthony@codemonkey.ws>
Cc: qemu-devel@nongnu.org, Avi Kivity <avi@redhat.com>
Subject: [Qemu-devel] [PATCH 8/8] v2: add SRAT generation to BIOS
Date: Tue, 16 Dec 2008 15:21:03 +0100	[thread overview]
Message-ID: <4947B94F.4030206@amd.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 491 bytes --]

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: qemunuma_v2_bios_srat.patch --]
[-- Type: text/x-patch, Size: 10099 bytes --]

# HG changeset patch
# User Andre Przywara <andre.przywara@amd.com>
# Date 1229435604 -3600
# Node ID eca1bcd2031b23e6ac744777571d63cff65126eb
# Parent  4f0a8ac2d88ffffc1dcd82785c1620553baa86da
add SRAT table generation to BIOS (preliminary patch)

diff -r 4f0a8ac2d88f -r eca1bcd2031b pc-bios/bios.diff
--- a/pc-bios/bios.diff	Tue Dec 16 14:52:48 2008 +0100
+++ b/pc-bios/bios.diff	Tue Dec 16 14:53:24 2008 +0100
@@ -130,7 +130,102 @@
                          regs.u.r32.ecx = 0x14;
 --- bochs-2.3.7.orig/bios/rombios32.c
 +++ bochs-2.3.7/bios/rombios32.c
-@@ -479,7 +479,12 @@
+@@ -393,6 +393,75 @@
+ unsigned long bios_table_cur_addr;
+ unsigned long bios_table_end_addr;
+ 
++static inline uint16_t le16_to_cpu(uint16_t x)
++{
++    return x;
++}
++
++static inline uint32_t le32_to_cpu(uint32_t x)
++{
++    return x;
++}
++
++static inline uint64_t le64_to_cpu(uint64_t x)
++{
++    return x;
++}
++
++#ifdef BX_QEMU
++#define QEMU_CFG_CTL_PORT 0x510
++#define QEMU_CFG_DATA_PORT 0x511
++#define QEMU_CFG_SIGNATURE  0x00
++#define QEMU_CFG_ID         0x01
++#define QEMU_CFG_UUID       0x02
++#define QEMU_CFG_NUMA_NODES 0x07
++#define QEMU_CFG_NUMA_VCPUS 0x08
++#define QEMU_CFG_NUMA_MEM   0x09
++
++int qemu_cfg_port;
++
++void qemu_cfg_select(int f)
++{
++    outw(QEMU_CFG_CTL_PORT, f);
++}
++
++int qemu_cfg_port_probe()
++{
++    char *sig = "QEMU";
++    int i;
++
++    qemu_cfg_select(QEMU_CFG_SIGNATURE);
++
++    for (i = 0; i < 4; i++)
++        if (inb(QEMU_CFG_DATA_PORT) != sig[i])
++            return 0;
++
++    return 1;
++}
++
++void qemu_cfg_read(uint8_t *buf, int len)
++{
++    while (len--)
++        *(buf++) = inb(QEMU_CFG_DATA_PORT);
++}
++
++uint32_t qemu_cfg_get32 (void)
++{
++    uint32_t ret;
++
++    qemu_cfg_read ((uint8_t*)&ret, 4);
++    return le32_to_cpu (ret);
++}
++
++uint64_t qemu_cfg_get64 (void)
++{
++    uint64_t ret;
++
++    qemu_cfg_read ((uint8_t*)&ret, 8);
++    return le64_to_cpu (ret);
++}
++#endif
++
+ void uuid_probe(void)
+ {
+ #ifdef BX_QEMU
+@@ -420,6 +489,18 @@
+     }
+ }
+ 
++int get_numa_nodes(void)
++{
++    uint16_t nodes = 0;
++#ifdef BX_QEMU
++    if(qemu_cfg_port) {
++        qemu_cfg_select(QEMU_CFG_NUMA_NODES);
++        qemu_cfg_read((uint8_t*)&nodes, 2);
++    }
++#endif
++    return le16_to_cpu(nodes);
++}
++
+ void cpu_probe(void)
+ {
+     uint32_t eax, ebx, ecx, edx;
+@@ -479,7 +560,12 @@
          sipi_vector = AP_BOOT_ADDR >> 12;
          writel(APIC_BASE + APIC_ICR_LOW, 0x000C4600 | sipi_vector);
  
@@ -143,3 +238,242 @@
  
          smp_cpus = readw((void *)CPU_COUNT_ADDR);
      }
+@@ -1082,7 +1168,7 @@
+ struct rsdt_descriptor_rev1
+ {
+ 	ACPI_TABLE_HEADER_DEF                           /* ACPI common table header */
+-	uint32_t                             table_offset_entry [3]; /* Array of pointers to other */
++	uint32_t                             table_offset_entry [4]; /* Array of pointers to other */
+ 			 /* ACPI tables */
+ };
+ 
+@@ -1200,6 +1286,9 @@
+ #define APIC_XRUPT_SOURCE       8
+ #define APIC_RESERVED           9           /* 9 and greater are reserved */
+ 
++#define SRAT_PROCESSOR          0
++#define SRAT_MEMORY             1
++
+ /*
+  * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
+  */
+@@ -1207,6 +1296,40 @@
+ 	uint8_t                              type; \
+ 	uint8_t                              length;
+ 
++/*
++ * SRAT (NUMA topology description) table
++ */
++struct system_resource_affinity_table
++{
++    ACPI_TABLE_HEADER_DEF
++    uint32_t    reserved1;
++    uint32_t    reserved2[2];
++};
++
++struct srat_processor_affinity
++{
++APIC_HEADER_DEF
++	uint8_t     proximity_lo;
++	uint8_t     local_apic_id;
++	uint32_t    flags;
++	uint8_t     local_sapic_eid;
++	uint8_t     proximity_hi[3];
++	uint32_t    reserved;
++};
++
++struct srat_memory_affinity
++{
++	APIC_HEADER_DEF
++	uint8_t     proximity[4];
++	uint16_t    reserved1;
++	uint32_t    base_addr_low,base_addr_high;
++	uint32_t    length_low,length_high;
++	uint32_t    reserved2;
++	uint32_t    flags;
++	uint32_t    reserved3[2];
++};
++	
++
+ /* Sub-structures for MADT */
+ 
+ struct madt_processor_apic
+@@ -1253,6 +1376,26 @@
+     return (-sum) & 0xff;
+ }
+ 
++static void read_config_numa_vcpus (uint32_t *nodes, int numnodes)
++{
++#ifdef BX_QEMU
++uint64_t cpumask;
++int node,cpu;
++
++    qemu_cfg_select (QEMU_CFG_NUMA_VCPUS);
++    for (node = 0; node < numnodes; node++) {
++        cpumask = qemu_cfg_get64();
++        for (cpu = 0; cpu < 64; cpu++) {
++            if (cpumask == 0) break;
++            if (cpumask & 1) nodes[cpu]=node;
++            cpumask >>= 1;
++        }
++    }
++#endif
++    return;
++
++}
++
+ static void acpi_build_table_header(struct acpi_table_header *h,
+                                     char *sig, int len, uint8_t rev)
+ {
+@@ -1328,6 +1471,21 @@
+     return ssdt_ptr - ssdt;
+ }
+ 
++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
++    uint64_t base, uint64_t len, int node, int enabled)
++{
++    numamem->type = SRAT_MEMORY;
++    numamem->length = sizeof(*numamem);
++    memset (numamem->proximity, 0 ,4);
++    numamem->proximity[0] = node;
++    numamem->flags = cpu_to_le32(!!enabled);
++    numamem->base_addr_low = base & 0xFFFFFFFF;
++    numamem->base_addr_high = base >> 32;
++    numamem->length_low = len & 0xFFFFFFFF;
++    numamem->length_high = len >> 32;
++    return;
++}
++
+ /* base_addr must be a multiple of 4KB */
+ void acpi_bios_init(void)
+ {
+@@ -1336,10 +1494,12 @@
+     struct fadt_descriptor_rev1 *fadt;
+     struct facs_descriptor_rev1 *facs;
+     struct multiple_apic_table *madt;
++    struct system_resource_affinity_table *srat;
+     uint8_t *dsdt, *ssdt;
+     uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
+     uint32_t acpi_tables_size, madt_addr, madt_size;
+-    int i;
++    uint32_t srat_addr, srat_size;
++    int i, numanodes;
+ 
+     /* reserve memory space for tables */
+ #ifdef BX_USE_EBDA_TABLES
+@@ -1375,6 +1535,21 @@
+     ssdt = (void *)(addr);
+     addr += acpi_build_processor_ssdt(ssdt);
+ 
++    numanodes = get_numa_nodes();
++    if (numanodes > 0) {
++        addr = (addr + 7) & ~7;
++        srat_addr = addr;
++        srat_size = sizeof(*srat) +
++            sizeof(struct srat_processor_affinity) * smp_cpus +
++            sizeof(struct srat_memory_affinity) * (numanodes + 2);
++        srat = (void *)(addr);
++        addr += srat_size;
++    } else {
++        srat_addr = addr;
++        srat = (void*)(addr);
++        srat_size = 0;
++    }
++
+     addr = (addr + 7) & ~7;
+     madt_addr = addr;
+     madt_size = sizeof(*madt) +
+@@ -1405,8 +1580,10 @@
+     rsdt->table_offset_entry[0] = cpu_to_le32(fadt_addr);
+     rsdt->table_offset_entry[1] = cpu_to_le32(madt_addr);
+     rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
+-    acpi_build_table_header((struct acpi_table_header *)rsdt,
+-                            "RSDT", sizeof(*rsdt), 1);
++    if (numanodes > 0)
++        rsdt->table_offset_entry[3] = cpu_to_le32(srat_addr);
++    acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
++        sizeof(*rsdt) - (numanodes > 0? 0: sizeof(uint32_t)), 1);
+ 
+     /* FADT */
+     memset(fadt, 0, sizeof(*fadt));
+@@ -1466,6 +1643,69 @@
+         acpi_build_table_header((struct acpi_table_header *)madt,
+                                 "APIC", madt_size, 1);
+     }
++
++    /* SRAT */
++#ifdef BX_QEMU
++    if (numanodes > 0) {
++        struct srat_processor_affinity *core;
++        struct srat_memory_affinity *numamem;
++        int slots;
++        uint64_t mem_len, mem_base, next_base = 0;
++        uint32_t nodes[64];
++
++        memset (srat, 0 , srat_size);
++        srat->reserved1=1;
++
++        read_config_numa_vcpus (nodes, numanodes);
++        core = (void*)(srat + 1);
++        for (i = 0; i < smp_cpus; ++i) {
++            core->type = SRAT_PROCESSOR;
++            core->length = sizeof(*core);
++            core->local_apic_id = i;
++            core->proximity_lo = nodes[i];
++            memset (core->proximity_hi, 0, 3);
++            core->local_sapic_eid = 0;
++            if (i < smp_cpus)
++                core->flags = cpu_to_le32(1);
++            else
++                core->flags = 0;
++            core++;
++        }
++        /* the memory map is a bit tricky, it contains at least one hole
++           from 640k-1M and possibly another one from 3.5G-4G. */
++        numamem = (void*)core; slots = 0;
++        qemu_cfg_select (QEMU_CFG_NUMA_MEM);
++        acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
++        next_base = 1024 * 1024; numamem++;slots++;
++        for (i = 1; i < numanodes + 1; ++i) {
++            mem_base = next_base;
++            mem_len = qemu_cfg_get64();
++            if (i == 1) mem_len -= 1024 * 1024;
++            next_base = mem_base + mem_len;
++
++            /* Cut out the PCI hole */
++            if (mem_base <= ram_size && next_base > ram_size) {
++                mem_len -= next_base - ram_size;
++                if (mem_len > 0) {
++                    acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++                    numamem++; slots++;
++                }
++                mem_base = 1ULL << 32;
++                mem_len = next_base - ram_size;
++                next_base += (1ULL << 32) - ram_size;
++            }
++            acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
++            numamem++; slots++;
++        }
++        for (; slots < numanodes + 2; slots++) {
++            acpi_build_srat_memory(numamem, 0, 0, 0, 0);
++            numamem++;
++        }
++
++        acpi_build_table_header((struct acpi_table_header *)srat,
++                                "SRAT", srat_size, 1);
++    }
++#endif
+ }
+ 
+ /* SMBIOS entry point -- must be written to a 16-bit aligned address
+@@ -1982,6 +2222,10 @@
+ {
+     BX_INFO("Starting rombios32\n");
+ 
++    #ifdef BX_QEMU
++        qemu_cfg_port = qemu_cfg_port_probe();
++    #endif
++
+     ram_probe();
+ 
+     cpu_probe();

             reply	other threads:[~2008-12-16 14:20 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-16 14:21 Andre Przywara [this message]
2008-12-16 21:22 ` [Qemu-devel] Re: [PATCH 8/8] v2: add SRAT generation to BIOS Anthony Liguori
2008-12-16 22:57   ` Andre Przywara
2008-12-17  0:03     ` Anthony Liguori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4947B94F.4030206@amd.com \
    --to=andre.przywara@amd.com \
    --cc=anthony@codemonkey.ws \
    --cc=avi@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.