qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC PATCH v1 1/3] spapr: Introduce pseries-2.7 machine type
       [not found] <1464932984-26623-1-git-send-email-bharata@linux.vnet.ibm.com>
@ 2016-06-03  5:49 ` Bharata B Rao
  2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 2/3] spapr: Remove alignment gap b/n RAM and hotplug regions Bharata B Rao
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Bharata B Rao @ 2016-06-03  5:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: david, mdroth, nfont, aik, qemu-ppc, Bharata B Rao

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 44e401a..30b9731 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2344,18 +2344,36 @@ static const TypeInfo spapr_machine_info = {
     type_init(spapr_machine_register_##suffix)
 
 /*
+ * pseries-2.7
+ */
+static void spapr_machine_2_7_instance_options(MachineState *machine)
+{
+}
+
+static void spapr_machine_2_7_class_options(MachineClass *mc)
+{
+    /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(2_7, "2.7", true);
+
+/*
  * pseries-2.6
  */
+#define SPAPR_COMPAT_2_6 \
+    HW_COMPAT_2_6
+
 static void spapr_machine_2_6_instance_options(MachineState *machine)
 {
 }
 
 static void spapr_machine_2_6_class_options(MachineClass *mc)
 {
-    /* Defaults for the latest behaviour inherited from the base class */
+    spapr_machine_2_7_class_options(mc);
+    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
 }
 
-DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
+DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
 
 /*
  * pseries-2.5
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [RFC PATCH v1 2/3] spapr: Remove alignment gap b/n RAM and hotplug regions
       [not found] <1464932984-26623-1-git-send-email-bharata@linux.vnet.ibm.com>
  2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 1/3] spapr: Introduce pseries-2.7 machine type Bharata B Rao
@ 2016-06-03  5:49 ` Bharata B Rao
  2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW Bharata B Rao
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Bharata B Rao @ 2016-06-03  5:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: david, mdroth, nfont, aik, qemu-ppc, Bharata B Rao

Let the alignment b/n RAM and memory hotplug region be equal to
LMB size (256MB) so that there is no gap b/n RAM and hotplug region.
This new alignment is true for only pseries-2.7 onwards and the older
machine types continue to have the earlier 1GB alignment.

Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         | 11 ++++++++++-
 include/hw/ppc/spapr.h |  5 ++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 30b9731..623c35f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1825,7 +1825,7 @@ static void ppc_spapr_init(MachineState *machine)
         }
 
         spapr->hotplug_memory.base = ROUND_UP(machine->ram_size,
-                                              SPAPR_HOTPLUG_MEM_ALIGN);
+                                              smc->hotplug_alignment);
         memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr),
                            "hotplug-memory", hotplug_mem_size);
         memory_region_add_subregion(sysmem, spapr->hotplug_memory.base,
@@ -2294,6 +2294,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
 
     smc->dr_lmb_enabled = true;
+    smc->hotplug_alignment = SPAPR_MEMORY_BLOCK_SIZE;
     fwc->get_dev_path = spapr_get_fw_dev_path;
     nc->nmi_monitor_handler = spapr_nmi;
 }
@@ -2369,8 +2370,16 @@ static void spapr_machine_2_6_instance_options(MachineState *machine)
 
 static void spapr_machine_2_6_class_options(MachineClass *mc)
 {
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
     spapr_machine_2_7_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
+
+    /*
+     * 2.6 and older types supported 1GB alignment gap b/n RAM
+     * and hotplug memory region.
+     */
+    smc->hotplug_alignment = G_BYTE;
 }
 
 DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 971df3d..b2aeb15 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -37,6 +37,8 @@ struct sPAPRMachineClass {
     /*< public >*/
     bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
     bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
+    hwaddr hotplug_alignment;  /* controls the alignment b/n RAM and hotplug
+                                  regions */
 };
 
 /**
@@ -610,9 +612,6 @@ int spapr_rng_populate_dt(void *fdt);
  */
 #define SPAPR_MAX_RAM_SLOTS     32
 
-/* 1GB alignment for hotplug memory region */
-#define SPAPR_HOTPLUG_MEM_ALIGN (1ULL << 30)
-
 /*
  * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
  * property under ibm,dynamic-reconfiguration-memory node.
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW
       [not found] <1464932984-26623-1-git-send-email-bharata@linux.vnet.ibm.com>
  2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 1/3] spapr: Introduce pseries-2.7 machine type Bharata B Rao
  2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 2/3] spapr: Remove alignment gap b/n RAM and hotplug regions Bharata B Rao
@ 2016-06-03  5:49 ` Bharata B Rao
       [not found] ` <201606030550.u535joke039889@mx0a-001b2d01.pphosted.com>
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Bharata B Rao @ 2016-06-03  5:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: david, mdroth, nfont, aik, qemu-ppc, Bharata B Rao

Memory hotplug can fail for some combinations of RAM and maxmem when
DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends
on maximum addressable memory returned by guest and this value is currently
being calculated wrongly by the guest kernel routine memory_hotplug_max().
While there is an attempt to fix the guest kernel, this patch works
around the problem within QEMU itself.

memory_hotplug_max() routine in the guest kernel arrives at max
addressable memory by multiplying lmb-size with the lmb-count obtained
from ibm,dynamic-memory property. There are two assumptions here:

- All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM
  where only hot-pluggable LMBs are present in this property.
- The memory area comprising of RAM and hotplug region is contiguous: This
  needn't be true always for PowerKVM as there can be gap between
  boot time RAM and hotplug region.

This work around involves having all the LMBs (RMA, rest of the boot time
LMBs and hot-pluggable LMBs) as part of ibm,dynamic-memory so that
guest kernel's calculation of max addressable memory comes out correct
resulting in correct DDW value which prevents memory hotplug failures.
memory@0 is created for RMA, but RMA LMBs are also represented as
"reserved" LMBs in ibm,dynamic-memory. Parts of this are essenitally a
revert of e8f986fc57a664a74b9f685b466506366a15201b.

In addition to this, the alignment of hotplug memory region is reduced from
current 1G to 256M (LMB size in PowerKVM) so that we don't end up with any
gaps between boot time RAM and hotplug region.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         | 59 +++++++++++++++++++++++++++++++++++---------------
 include/hw/ppc/spapr.h |  5 +++--
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 623c35f..3dfbc37 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -569,7 +569,6 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
         }
         if (!mem_start) {
             /* ppc_spapr_init() checks for rma_size <= node0_size already */
-            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
             mem_start += spapr->rma_size;
             node_size -= spapr->rma_size;
         }
@@ -762,18 +761,13 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
     int ret, i, offset;
     uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
     uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
-    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+    uint32_t nr_rma_lmbs = spapr->rma_size / lmb_size;
+    uint32_t nr_lmbs = machine->maxram_size / lmb_size;
+    uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size;
     uint32_t *int_buf, *cur_index, buf_len;
     int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
 
     /*
-     * Don't create the node if there are no DR LMBs.
-     */
-    if (!nr_lmbs) {
-        return 0;
-    }
-
-    /*
      * Allocate enough buffer size to fit in ibm,dynamic-memory
      * or ibm,associativity-lookup-arrays
      */
@@ -805,9 +799,15 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
     for (i = 0; i < nr_lmbs; i++) {
         sPAPRDRConnector *drc;
         sPAPRDRConnectorClass *drck;
-        uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
+        uint64_t addr;
         uint32_t *dynamic_memory = cur_index;
 
+        if (i < nr_assigned_lmbs) {
+            addr = i * lmb_size;
+        } else {
+            addr = (i - nr_assigned_lmbs) * lmb_size +
+                    spapr->hotplug_memory.base;
+        }
         drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
                                        addr/lmb_size);
         g_assert(drc);
@@ -820,7 +820,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
         dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
         if (addr < machine->ram_size ||
                     memory_region_present(get_system_memory(), addr)) {
-            dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+            if (i < nr_rma_lmbs) {
+                dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED);
+            } else {
+                dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+            }
         } else {
             dynamic_memory[5] = cpu_to_be32(0);
         }
@@ -882,6 +886,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
     /* Generate ibm,dynamic-reconfiguration-memory node if required */
     if (memory_update && smc->dr_lmb_enabled) {
         _FDT((spapr_populate_drconf_memory(spapr, fdt)));
+    } else {
+        _FDT((spapr_populate_memory(spapr, fdt)));
     }
 
     /* Pack resulting tree */
@@ -919,10 +925,23 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
     /* open out the base tree into a temp buffer for the final tweaks */
     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
 
-    ret = spapr_populate_memory(spapr, fdt);
-    if (ret < 0) {
-        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
-        exit(1);
+    /*
+     * Add memory@0 node to represent RMA. Rest of the memory is either
+     * represented by memory nodes or ibm,dynamic-reconfiguration-memory
+     * node later during ibm,client-architecture-support call.
+     *
+     * If NUMA is configured, ensure that memory@0 ends up in the
+     * first memory-less node.
+     */
+    if (nb_numa_nodes) {
+        for (i = 0; i < nb_numa_nodes; ++i) {
+            if (numa_info[i].node_mem) {
+                spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
+                break;
+            }
+        }
+    } else {
+        spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
     }
 
     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
@@ -1654,14 +1673,20 @@ static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
 {
     MachineState *machine = MACHINE(spapr);
     uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
-    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+    uint32_t nr_lmbs = machine->maxram_size / lmb_size;
+    uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size;
     int i;
 
     for (i = 0; i < nr_lmbs; i++) {
         sPAPRDRConnector *drc;
         uint64_t addr;
 
-        addr = i * lmb_size + spapr->hotplug_memory.base;
+        if (i < nr_assigned_lmbs) {
+            addr = i * lmb_size;
+        } else {
+            addr = (i - nr_assigned_lmbs) * lmb_size +
+                    spapr->hotplug_memory.base;
+        }
         drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB,
                                      addr/lmb_size);
         qemu_register_reset(spapr_drc_reset, drc);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index b2aeb15..e5ef979 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -619,9 +619,10 @@ int spapr_rng_populate_dt(void *fdt);
 #define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6
 
 /*
- * This flag value defines the LMB as assigned in ibm,dynamic-memory
- * property under ibm,dynamic-reconfiguration-memory node.
+ * Defines for flag value in ibm,dynamic-memory property under
+ * ibm,dynamic-reconfiguration-memory node.
  */
 #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
+#define SPAPR_LMB_FLAGS_RESERVED 0x00000080
 
 #endif /* !defined (__HW_SPAPR_H__) */
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v1 1/3] spapr: Introduce pseries-2.7 machine type
       [not found] ` <201606030550.u535joke039889@mx0a-001b2d01.pphosted.com>
@ 2016-06-03  6:59   ` David Gibson
  0 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2016-06-03  6:59 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: qemu-devel, mdroth, nfont, aik, qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 1661 bytes --]

On Fri, Jun 03, 2016 at 11:19:42AM +0530, Bharata B Rao wrote:
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Applied to ppc-for-2.7, we were always going to need this at some point.

> ---
>  hw/ppc/spapr.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 44e401a..30b9731 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2344,18 +2344,36 @@ static const TypeInfo spapr_machine_info = {
>      type_init(spapr_machine_register_##suffix)
>  
>  /*
> + * pseries-2.7
> + */
> +static void spapr_machine_2_7_instance_options(MachineState *machine)
> +{
> +}
> +
> +static void spapr_machine_2_7_class_options(MachineClass *mc)
> +{
> +    /* Defaults for the latest behaviour inherited from the base class */
> +}
> +
> +DEFINE_SPAPR_MACHINE(2_7, "2.7", true);
> +
> +/*
>   * pseries-2.6
>   */
> +#define SPAPR_COMPAT_2_6 \
> +    HW_COMPAT_2_6
> +
>  static void spapr_machine_2_6_instance_options(MachineState *machine)
>  {
>  }
>  
>  static void spapr_machine_2_6_class_options(MachineClass *mc)
>  {
> -    /* Defaults for the latest behaviour inherited from the base class */
> +    spapr_machine_2_7_class_options(mc);
> +    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
>  }
>  
> -DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
> +DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
>  
>  /*
>   * pseries-2.5

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v1 2/3] spapr: Remove alignment gap b/n RAM and hotplug regions
       [not found] ` <201606030550.u535jnsX039604@mx0a-001b2d01.pphosted.com>
@ 2016-06-03  7:09   ` David Gibson
  0 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2016-06-03  7:09 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: qemu-devel, mdroth, nfont, aik, qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 3460 bytes --]

On Fri, Jun 03, 2016 at 11:19:43AM +0530, Bharata B Rao wrote:
> Let the alignment b/n RAM and memory hotplug region be equal to
> LMB size (256MB) so that there is no gap b/n RAM and hotplug region.
> This new alignment is true for only pseries-2.7 onwards and the older
> machine types continue to have the earlier 1GB alignment.
> 
> Suggested-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Code looks fine, but the commit message needs to explain the rationale
for this (IIUC it's essentially due to a guest bug processing the dt
information describing the gap).

> ---
>  hw/ppc/spapr.c         | 11 ++++++++++-
>  include/hw/ppc/spapr.h |  5 ++---
>  2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 30b9731..623c35f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1825,7 +1825,7 @@ static void ppc_spapr_init(MachineState *machine)
>          }
>  
>          spapr->hotplug_memory.base = ROUND_UP(machine->ram_size,
> -                                              SPAPR_HOTPLUG_MEM_ALIGN);
> +                                              smc->hotplug_alignment);
>          memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr),
>                             "hotplug-memory", hotplug_mem_size);
>          memory_region_add_subregion(sysmem, spapr->hotplug_memory.base,
> @@ -2294,6 +2294,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
>  
>      smc->dr_lmb_enabled = true;
> +    smc->hotplug_alignment = SPAPR_MEMORY_BLOCK_SIZE;
>      fwc->get_dev_path = spapr_get_fw_dev_path;
>      nc->nmi_monitor_handler = spapr_nmi;
>  }
> @@ -2369,8 +2370,16 @@ static void spapr_machine_2_6_instance_options(MachineState *machine)
>  
>  static void spapr_machine_2_6_class_options(MachineClass *mc)
>  {
> +    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> +
>      spapr_machine_2_7_class_options(mc);
>      SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
> +
> +    /*
> +     * 2.6 and older types supported 1GB alignment gap b/n RAM
> +     * and hotplug memory region.
> +     */
> +    smc->hotplug_alignment = G_BYTE;
>  }
>  
>  DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 971df3d..b2aeb15 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -37,6 +37,8 @@ struct sPAPRMachineClass {
>      /*< public >*/
>      bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
>      bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
> +    hwaddr hotplug_alignment;  /* controls the alignment b/n RAM and hotplug
> +                                  regions */
>  };
>  
>  /**
> @@ -610,9 +612,6 @@ int spapr_rng_populate_dt(void *fdt);
>   */
>  #define SPAPR_MAX_RAM_SLOTS     32
>  
> -/* 1GB alignment for hotplug memory region */
> -#define SPAPR_HOTPLUG_MEM_ALIGN (1ULL << 30)
> -
>  /*
>   * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
>   * property under ibm,dynamic-reconfiguration-memory node.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW
       [not found] ` <201606030550.u535jnDh039622@mx0a-001b2d01.pphosted.com>
@ 2016-06-03  7:10   ` David Gibson
  2016-06-06  3:48     ` Bharata B Rao
  0 siblings, 1 reply; 7+ messages in thread
From: David Gibson @ 2016-06-03  7:10 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: qemu-devel, mdroth, nfont, aik, qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 8457 bytes --]

On Fri, Jun 03, 2016 at 11:19:44AM +0530, Bharata B Rao wrote:
> Memory hotplug can fail for some combinations of RAM and maxmem when
> DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends
> on maximum addressable memory returned by guest and this value is currently
> being calculated wrongly by the guest kernel routine memory_hotplug_max().
> While there is an attempt to fix the guest kernel, this patch works
> around the problem within QEMU itself.
> 
> memory_hotplug_max() routine in the guest kernel arrives at max
> addressable memory by multiplying lmb-size with the lmb-count obtained
> from ibm,dynamic-memory property. There are two assumptions here:
> 
> - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM
>   where only hot-pluggable LMBs are present in this property.
> - The memory area comprising of RAM and hotplug region is contiguous: This
>   needn't be true always for PowerKVM as there can be gap between
>   boot time RAM and hotplug region.
> 
> This work around involves having all the LMBs (RMA, rest of the boot time
> LMBs and hot-pluggable LMBs) as part of ibm,dynamic-memory so that
> guest kernel's calculation of max addressable memory comes out correct
> resulting in correct DDW value which prevents memory hotplug failures.
> memory@0 is created for RMA, but RMA LMBs are also represented as
> "reserved" LMBs in ibm,dynamic-memory. Parts of this are essenitally a
> revert of e8f986fc57a664a74b9f685b466506366a15201b.
> 
> In addition to this, the alignment of hotplug memory region is reduced from
> current 1G to 256M (LMB size in PowerKVM) so that we don't end up with any
> gaps between boot time RAM and hotplug region.

Hmm.. could we work around the problem without altering the memory
alignment by inserting extra dummy reserved LMBs covering the gap?

> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         | 59 +++++++++++++++++++++++++++++++++++---------------
>  include/hw/ppc/spapr.h |  5 +++--
>  2 files changed, 45 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 623c35f..3dfbc37 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -569,7 +569,6 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
>          }
>          if (!mem_start) {
>              /* ppc_spapr_init() checks for rma_size <= node0_size already */
> -            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
>              mem_start += spapr->rma_size;
>              node_size -= spapr->rma_size;
>          }
> @@ -762,18 +761,13 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>      int ret, i, offset;
>      uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
>      uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
> -    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
> +    uint32_t nr_rma_lmbs = spapr->rma_size / lmb_size;
> +    uint32_t nr_lmbs = machine->maxram_size / lmb_size;
> +    uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size;
>      uint32_t *int_buf, *cur_index, buf_len;
>      int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
>  
>      /*
> -     * Don't create the node if there are no DR LMBs.
> -     */
> -    if (!nr_lmbs) {
> -        return 0;
> -    }
> -
> -    /*
>       * Allocate enough buffer size to fit in ibm,dynamic-memory
>       * or ibm,associativity-lookup-arrays
>       */
> @@ -805,9 +799,15 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>      for (i = 0; i < nr_lmbs; i++) {
>          sPAPRDRConnector *drc;
>          sPAPRDRConnectorClass *drck;
> -        uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
> +        uint64_t addr;
>          uint32_t *dynamic_memory = cur_index;
>  
> +        if (i < nr_assigned_lmbs) {
> +            addr = i * lmb_size;
> +        } else {
> +            addr = (i - nr_assigned_lmbs) * lmb_size +
> +                    spapr->hotplug_memory.base;
> +        }
>          drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
>                                         addr/lmb_size);
>          g_assert(drc);
> @@ -820,7 +820,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>          dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
>          if (addr < machine->ram_size ||
>                      memory_region_present(get_system_memory(), addr)) {
> -            dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> +            if (i < nr_rma_lmbs) {
> +                dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED);
> +            } else {
> +                dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> +            }
>          } else {
>              dynamic_memory[5] = cpu_to_be32(0);
>          }
> @@ -882,6 +886,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
>      /* Generate ibm,dynamic-reconfiguration-memory node if required */
>      if (memory_update && smc->dr_lmb_enabled) {
>          _FDT((spapr_populate_drconf_memory(spapr, fdt)));
> +    } else {
> +        _FDT((spapr_populate_memory(spapr, fdt)));
>      }
>  
>      /* Pack resulting tree */
> @@ -919,10 +925,23 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
>      /* open out the base tree into a temp buffer for the final tweaks */
>      _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
>  
> -    ret = spapr_populate_memory(spapr, fdt);
> -    if (ret < 0) {
> -        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
> -        exit(1);
> +    /*
> +     * Add memory@0 node to represent RMA. Rest of the memory is either
> +     * represented by memory nodes or ibm,dynamic-reconfiguration-memory
> +     * node later during ibm,client-architecture-support call.
> +     *
> +     * If NUMA is configured, ensure that memory@0 ends up in the
> +     * first memory-less node.
> +     */
> +    if (nb_numa_nodes) {
> +        for (i = 0; i < nb_numa_nodes; ++i) {
> +            if (numa_info[i].node_mem) {
> +                spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
> +                break;
> +            }
> +        }
> +    } else {
> +        spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
>      }
>  
>      ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
> @@ -1654,14 +1673,20 @@ static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
>  {
>      MachineState *machine = MACHINE(spapr);
>      uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
> -    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
> +    uint32_t nr_lmbs = machine->maxram_size / lmb_size;
> +    uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size;
>      int i;
>  
>      for (i = 0; i < nr_lmbs; i++) {
>          sPAPRDRConnector *drc;
>          uint64_t addr;
>  
> -        addr = i * lmb_size + spapr->hotplug_memory.base;
> +        if (i < nr_assigned_lmbs) {
> +            addr = i * lmb_size;
> +        } else {
> +            addr = (i - nr_assigned_lmbs) * lmb_size +
> +                    spapr->hotplug_memory.base;
> +        }
>          drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB,
>                                       addr/lmb_size);
>          qemu_register_reset(spapr_drc_reset, drc);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index b2aeb15..e5ef979 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -619,9 +619,10 @@ int spapr_rng_populate_dt(void *fdt);
>  #define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6
>  
>  /*
> - * This flag value defines the LMB as assigned in ibm,dynamic-memory
> - * property under ibm,dynamic-reconfiguration-memory node.
> + * Defines for flag value in ibm,dynamic-memory property under
> + * ibm,dynamic-reconfiguration-memory node.
>   */
>  #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
> +#define SPAPR_LMB_FLAGS_RESERVED 0x00000080
>  
>  #endif /* !defined (__HW_SPAPR_H__) */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW
  2016-06-03  7:10   ` [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW David Gibson
@ 2016-06-06  3:48     ` Bharata B Rao
  0 siblings, 0 replies; 7+ messages in thread
From: Bharata B Rao @ 2016-06-06  3:48 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-devel, mdroth, nfont, aik, qemu-ppc

On Fri, Jun 03, 2016 at 05:10:48PM +1000, David Gibson wrote:
> On Fri, Jun 03, 2016 at 11:19:44AM +0530, Bharata B Rao wrote:
> > Memory hotplug can fail for some combinations of RAM and maxmem when
> > DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends
> > on maximum addressable memory returned by guest and this value is currently
> > being calculated wrongly by the guest kernel routine memory_hotplug_max().
> > While there is an attempt to fix the guest kernel, this patch works
> > around the problem within QEMU itself.
> > 
> > memory_hotplug_max() routine in the guest kernel arrives at max
> > addressable memory by multiplying lmb-size with the lmb-count obtained
> > from ibm,dynamic-memory property. There are two assumptions here:
> > 
> > - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM
> >   where only hot-pluggable LMBs are present in this property.
> > - The memory area comprising of RAM and hotplug region is contiguous: This
> >   needn't be true always for PowerKVM as there can be gap between
> >   boot time RAM and hotplug region.
> > 
> > This work around involves having all the LMBs (RMA, rest of the boot time
> > LMBs and hot-pluggable LMBs) as part of ibm,dynamic-memory so that
> > guest kernel's calculation of max addressable memory comes out correct
> > resulting in correct DDW value which prevents memory hotplug failures.
> > memory@0 is created for RMA, but RMA LMBs are also represented as
> > "reserved" LMBs in ibm,dynamic-memory. Parts of this are essenitally a
> > revert of e8f986fc57a664a74b9f685b466506366a15201b.
> > 
> > In addition to this, the alignment of hotplug memory region is reduced from
> > current 1G to 256M (LMB size in PowerKVM) so that we don't end up with any
> > gaps between boot time RAM and hotplug region.
> 
> Hmm.. could we work around the problem without altering the memory
> alignment by inserting extra dummy reserved LMBs covering the gap?

Appears possible, let me see if it can be done by w/o creating the
DRC objects for such reserved LMBs.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-06-06  3:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1464932984-26623-1-git-send-email-bharata@linux.vnet.ibm.com>
2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 1/3] spapr: Introduce pseries-2.7 machine type Bharata B Rao
2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 2/3] spapr: Remove alignment gap b/n RAM and hotplug regions Bharata B Rao
2016-06-03  5:49 ` [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW Bharata B Rao
     [not found] ` <201606030550.u535joke039889@mx0a-001b2d01.pphosted.com>
2016-06-03  6:59   ` [Qemu-devel] [RFC PATCH v1 1/3] spapr: Introduce pseries-2.7 machine type David Gibson
     [not found] ` <201606030550.u535jnsX039604@mx0a-001b2d01.pphosted.com>
2016-06-03  7:09   ` [Qemu-devel] [RFC PATCH v1 2/3] spapr: Remove alignment gap b/n RAM and hotplug regions David Gibson
     [not found] ` <201606030550.u535jnDh039622@mx0a-001b2d01.pphosted.com>
2016-06-03  7:10   ` [Qemu-devel] [RFC PATCH v1 3/3] spapr: spapr: Work around the memory hotplug failure with DDW David Gibson
2016-06-06  3:48     ` Bharata B Rao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).