qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/2] ps: memhp: enforce gaps between DIMMs
@ 2015-09-25 13:53 Igor Mammedov
  2015-09-25 13:53 ` [Qemu-devel] [PATCH 1/2] memhp: extend address auto assignment to support gaps Igor Mammedov
  2015-09-25 13:53 ` [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
  0 siblings, 2 replies; 12+ messages in thread
From: Igor Mammedov @ 2015-09-25 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pkrempa, ehabkost, mst, bharata, pbonzini, david

it's a simplier way suggested by Michael S. Tsirkin
to workaround virtio bug reported earlier:
http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
where virtio can't handle buffer that crosses border
between 2 DIMM's (i.e. 2 MemoryRegions).

idea is to leave gaps between DIMMs, making their GPAs
non contiguous, which effectively forces kmalloc
to not use DIMM if buffer doesn't fit inside of it.

Igor Mammedov (2):
  memhp: extend address auto assignment to support gaps
  pc: memhp: force gaps between DIMM's GPA

 hw/i386/pc.c             |  4 +++-
 hw/i386/pc_piix.c        |  3 +++
 hw/i386/pc_q35.c         |  3 +++
 hw/mem/pc-dimm.c         | 13 +++++++------
 hw/ppc/spapr.c           |  2 +-
 include/hw/i386/pc.h     |  2 ++
 include/hw/mem/pc-dimm.h |  7 ++++---
 7 files changed, 23 insertions(+), 11 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH 1/2] memhp: extend address auto assignment to support gaps
  2015-09-25 13:53 [Qemu-devel] [PATCH 0/2] ps: memhp: enforce gaps between DIMMs Igor Mammedov
@ 2015-09-25 13:53 ` Igor Mammedov
  2015-09-25 13:53 ` [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
  1 sibling, 0 replies; 12+ messages in thread
From: Igor Mammedov @ 2015-09-25 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pkrempa, ehabkost, mst, bharata, pbonzini, david

setting gap to non 0 value will make sparse DIMM
address auto allocation, leaving gaps between
a new DIMM address and preceeding existing DIMM.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/i386/pc.c             |  2 +-
 hw/mem/pc-dimm.c         | 13 +++++++------
 hw/ppc/spapr.c           |  2 +-
 include/hw/mem/pc-dimm.h |  7 ++++---
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 461c128..91d134c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1644,7 +1644,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index bb04862..1e5a8ea 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -32,7 +32,8 @@ typedef struct pc_dimms_capacity {
 } pc_dimms_capacity;
 
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, Error **errp)
+                         MemoryRegion *mr, uint64_t align, uint64_t gap,
+                         Error **errp)
 {
     int slot;
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -48,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
 
     addr = pc_dimm_get_free_addr(hpms->base,
                                  memory_region_size(&hpms->mr),
-                                 !addr ? NULL : &addr, align,
+                                 !addr ? NULL : &addr, align, gap,
                                  memory_region_size(mr), &local_err);
     if (local_err) {
         goto out;
@@ -287,8 +288,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, uint64_t size,
-                               Error **errp)
+                               uint64_t *hint, uint64_t align, uint64_t gap,
+                               uint64_t size, Error **errp)
 {
     GSList *list = NULL, *item;
     uint64_t new_addr, ret = 0;
@@ -333,13 +334,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
             goto out;
         }
 
-        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
+        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size + gap)) {
             if (hint) {
                 DeviceState *d = DEVICE(dimm);
                 error_setg(errp, "address range conflicts with '%s'", d->id);
                 goto out;
             }
-            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
+            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + gap, align);
         }
     }
     ret = new_addr;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7f4f196..db35156 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2096,7 +2096,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
+    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, 0, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index d83bf30..aa784f8 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -83,15 +83,16 @@ typedef struct MemoryHotplugState {
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, uint64_t size,
-                               Error **errp);
+                               uint64_t *hint, uint64_t align, uint64_t gap,
+                               uint64_t size, Error **errp);
 
 int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
 int qmp_pc_dimm_device_list(Object *obj, void *opaque);
 uint64_t pc_existing_dimms_capacity(Error **errp);
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, Error **errp);
+                         MemoryRegion *mr, uint64_t align, uint64_t gap,
+                         Error **errp);
 void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
                            MemoryRegion *mr);
 #endif
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-25 13:53 [Qemu-devel] [PATCH 0/2] ps: memhp: enforce gaps between DIMMs Igor Mammedov
  2015-09-25 13:53 ` [Qemu-devel] [PATCH 1/2] memhp: extend address auto assignment to support gaps Igor Mammedov
@ 2015-09-25 13:53 ` Igor Mammedov
  2015-09-27 10:48   ` Michael S. Tsirkin
  1 sibling, 1 reply; 12+ messages in thread
From: Igor Mammedov @ 2015-09-25 13:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pkrempa, ehabkost, mst, bharata, pbonzini, david

mapping DIMMs non contiguously allows to workaround
virtio bug reported earlier:
http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
in this case guest kernel doesn't allocate buffers
that can cross DIMM boundary keeping each buffer
local to a DIMM.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
benefit of this workaround is that no guest side
changes are required.
---
 hw/i386/pc.c         | 4 +++-
 hw/i386/pc_piix.c    | 3 +++
 hw/i386/pc_q35.c     | 3 +++
 include/hw/i386/pc.h | 2 ++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 91d134c..c462c4e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
+                        pcmc->inter_dimm_gap, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3ffb05f..3165667 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
 
 static void pc_i440fx_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     m->family = "pc_piix";
     m->desc = "Standard PC (i440FX + PIIX, 1996)";
     m->hot_add_cpu = pc_hot_add_cpu;
     m->default_machine_opts = "firmware=bios-256k.bin";
     m->default_display = "std";
+    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
 }
 
 static void pc_i440fx_2_5_machine_options(MachineClass *m)
@@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
     m->alias = NULL;
     m->is_default = 0;
     pcmc->broken_reserved_end = true;
+    pcmc->inter_dimm_gap = 0;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1b7d3b6..8ad6687 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
 
 static void pc_q35_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     m->family = "pc_q35";
     m->desc = "Standard PC (Q35 + ICH9, 2009)";
     m->hot_add_cpu = pc_hot_add_cpu;
@@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->default_display = "std";
     m->no_floppy = 1;
     m->no_tco = 0;
+    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
 }
 
 static void pc_q35_2_5_machine_options(MachineClass *m)
@@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
     pc_q35_2_5_machine_options(m);
     m->alias = NULL;
     pcmc->broken_reserved_end = true;
+    pcmc->inter_dimm_gap = 0;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 6896328..dd6b34a 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -50,6 +50,7 @@ struct PCMachineState {
 #define PC_MACHINE_SMM              "smm"
 #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
 
+#define PC_2MB_DIMM_GAP (1ULL << 21)
 /**
  * PCMachineClass:
  * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
@@ -60,6 +61,7 @@ struct PCMachineClass {
 
     /*< public >*/
     bool broken_reserved_end;
+    uint64_t inter_dimm_gap;
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
 };
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-25 13:53 ` [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
@ 2015-09-27 10:48   ` Michael S. Tsirkin
  2015-09-27 13:06     ` Igor Mammedov
  0 siblings, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-09-27 10:48 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: pkrempa, ehabkost, qemu-devel, bharata, pbonzini, david

On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> mapping DIMMs non contiguously allows to workaround
> virtio bug reported earlier:
> http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> in this case guest kernel doesn't allocate buffers
> that can cross DIMM boundary keeping each buffer
> local to a DIMM.
> 
> Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> benefit of this workaround is that no guest side
> changes are required.

That's a hard requirement, I agree.


> ---
>  hw/i386/pc.c         | 4 +++-
>  hw/i386/pc_piix.c    | 3 +++
>  hw/i386/pc_q35.c     | 3 +++
>  include/hw/i386/pc.h | 2 ++
>  4 files changed, 11 insertions(+), 1 deletion(-)

Aren't other architectures besides PC ever affected?
Do they all allocate all of memory contigious in HVA space?

Also - does the issue only affect hotplugged memory?

Can't the patch be local to pc-dimm (except maybe the
backwards compatibility thing)?


> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 91d134c..c462c4e 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
>      HotplugHandlerClass *hhc;
>      Error *local_err = NULL;
>      PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>      PCDIMMDevice *dimm = PC_DIMM(dev);
>      PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
>      MemoryRegion *mr = ddc->get_memory_region(dimm);
> @@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
>          goto out;
>      }
>  
> -    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
> +    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
> +                        pcmc->inter_dimm_gap, &local_err);
>      if (local_err) {
>          goto out;
>      }
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 3ffb05f..3165667 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
>  
>  static void pc_i440fx_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      m->family = "pc_piix";
>      m->desc = "Standard PC (i440FX + PIIX, 1996)";
>      m->hot_add_cpu = pc_hot_add_cpu;
>      m->default_machine_opts = "firmware=bios-256k.bin";
>      m->default_display = "std";
> +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
>  }
>  
>  static void pc_i440fx_2_5_machine_options(MachineClass *m)
> @@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
>      m->alias = NULL;
>      m->is_default = 0;
>      pcmc->broken_reserved_end = true;
> +    pcmc->inter_dimm_gap = 0;
>      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
>  }
>  
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 1b7d3b6..8ad6687 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
>  
>  static void pc_q35_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      m->family = "pc_q35";
>      m->desc = "Standard PC (Q35 + ICH9, 2009)";
>      m->hot_add_cpu = pc_hot_add_cpu;
> @@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
>      m->default_display = "std";
>      m->no_floppy = 1;
>      m->no_tco = 0;
> +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
>  }
>  
>  static void pc_q35_2_5_machine_options(MachineClass *m)
> @@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
>      pc_q35_2_5_machine_options(m);
>      m->alias = NULL;
>      pcmc->broken_reserved_end = true;
> +    pcmc->inter_dimm_gap = 0;
>      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
>  }
>  
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 6896328..dd6b34a 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -50,6 +50,7 @@ struct PCMachineState {
>  #define PC_MACHINE_SMM              "smm"
>  #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
>  
> +#define PC_2MB_DIMM_GAP (1ULL << 21)
>  /**
>   * PCMachineClass:
>   * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler

Seems somewhat arbitrary. It's aligned later - so won't a 1 byte gap be enough?

> @@ -60,6 +61,7 @@ struct PCMachineClass {
>  
>      /*< public >*/
>      bool broken_reserved_end;
> +    uint64_t inter_dimm_gap;
>      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
>                                             DeviceState *dev);
>  };
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-27 10:48   ` Michael S. Tsirkin
@ 2015-09-27 13:06     ` Igor Mammedov
  2015-09-27 13:11       ` Michael S. Tsirkin
  0 siblings, 1 reply; 12+ messages in thread
From: Igor Mammedov @ 2015-09-27 13:06 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: pkrempa, ehabkost, qemu-devel, bharata, pbonzini, david

On Sun, 27 Sep 2015 13:48:21 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > mapping DIMMs non contiguously allows to workaround
> > virtio bug reported earlier:
> > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > in this case guest kernel doesn't allocate buffers
> > that can cross DIMM boundary keeping each buffer
> > local to a DIMM.
> > 
> > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > benefit of this workaround is that no guest side
> > changes are required.
> 
> That's a hard requirement, I agree.
> 
> 
> > ---
> >  hw/i386/pc.c         | 4 +++-
> >  hw/i386/pc_piix.c    | 3 +++
> >  hw/i386/pc_q35.c     | 3 +++
> >  include/hw/i386/pc.h | 2 ++
> >  4 files changed, 11 insertions(+), 1 deletion(-)
> 
> Aren't other architectures besides PC ever affected?
> Do they all allocate all of memory contigious in HVA space?
I'm not sure about other targets I've CCed interested parties.

> 
> Also - does the issue only affect hotplugged memory?
Potentially it affects -numa memdev=foo, but however I've
tried I wasn't able to reproduce. We could do it as
separate workaround later if it would affect someone
and virtio is not fixed to handle split buffers by that time.

 
> Can't the patch be local to pc-dimm (except maybe the
> backwards compatibility thing)?
I think decision about using gaps and its size
should be done by board and not generic pc-dimm.


> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 91d134c..c462c4e 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> >      HotplugHandlerClass *hhc;
> >      Error *local_err = NULL;
> >      PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> > +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> >      PCDIMMDevice *dimm = PC_DIMM(dev);
> >      PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> >      MemoryRegion *mr = ddc->get_memory_region(dimm);
> > @@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> >          goto out;
> >      }
> >  
> > -    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
> > +    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
> > +                        pcmc->inter_dimm_gap, &local_err);
> >      if (local_err) {
> >          goto out;
> >      }
> > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > index 3ffb05f..3165667 100644
> > --- a/hw/i386/pc_piix.c
> > +++ b/hw/i386/pc_piix.c
> > @@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
> >  
> >  static void pc_i440fx_machine_options(MachineClass *m)
> >  {
> > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> >      m->family = "pc_piix";
> >      m->desc = "Standard PC (i440FX + PIIX, 1996)";
> >      m->hot_add_cpu = pc_hot_add_cpu;
> >      m->default_machine_opts = "firmware=bios-256k.bin";
> >      m->default_display = "std";
> > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> >  }
> >  
> >  static void pc_i440fx_2_5_machine_options(MachineClass *m)
> > @@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
> >      m->alias = NULL;
> >      m->is_default = 0;
> >      pcmc->broken_reserved_end = true;
> > +    pcmc->inter_dimm_gap = 0;
> >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> >  }
> >  
> > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > index 1b7d3b6..8ad6687 100644
> > --- a/hw/i386/pc_q35.c
> > +++ b/hw/i386/pc_q35.c
> > @@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
> >  
> >  static void pc_q35_machine_options(MachineClass *m)
> >  {
> > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> >      m->family = "pc_q35";
> >      m->desc = "Standard PC (Q35 + ICH9, 2009)";
> >      m->hot_add_cpu = pc_hot_add_cpu;
> > @@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
> >      m->default_display = "std";
> >      m->no_floppy = 1;
> >      m->no_tco = 0;
> > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> >  }
> >  
> >  static void pc_q35_2_5_machine_options(MachineClass *m)
> > @@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
> >      pc_q35_2_5_machine_options(m);
> >      m->alias = NULL;
> >      pcmc->broken_reserved_end = true;
> > +    pcmc->inter_dimm_gap = 0;
> >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> >  }
> >  
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index 6896328..dd6b34a 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -50,6 +50,7 @@ struct PCMachineState {
> >  #define PC_MACHINE_SMM              "smm"
> >  #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
> >  
> > +#define PC_2MB_DIMM_GAP (1ULL << 21)
> >  /**
> >   * PCMachineClass:
> >   * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
> 
> Seems somewhat arbitrary. It's aligned later - so won't a 1 byte gap be enough?
1 byte should be also enough, since effectively it would kick alignment adjustment.

The reason why I've picked 2Mb is that QEMU ram allocator allocates
2Mb granularity.

> 
> > @@ -60,6 +61,7 @@ struct PCMachineClass {
> >  
> >      /*< public >*/
> >      bool broken_reserved_end;
> > +    uint64_t inter_dimm_gap;
> >      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
> >                                             DeviceState *dev);
> >  };
> > -- 
> > 1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-27 13:06     ` Igor Mammedov
@ 2015-09-27 13:11       ` Michael S. Tsirkin
  2015-09-27 14:04         ` Igor Mammedov
  0 siblings, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-09-27 13:11 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: pkrempa, ehabkost, qemu-devel, bharata, pbonzini, david

On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> On Sun, 27 Sep 2015 13:48:21 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > mapping DIMMs non contiguously allows to workaround
> > > virtio bug reported earlier:
> > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > in this case guest kernel doesn't allocate buffers
> > > that can cross DIMM boundary keeping each buffer
> > > local to a DIMM.
> > > 
> > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > ---
> > > benefit of this workaround is that no guest side
> > > changes are required.
> > 
> > That's a hard requirement, I agree.
> > 
> > 
> > > ---
> > >  hw/i386/pc.c         | 4 +++-
> > >  hw/i386/pc_piix.c    | 3 +++
> > >  hw/i386/pc_q35.c     | 3 +++
> > >  include/hw/i386/pc.h | 2 ++
> > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > 
> > Aren't other architectures besides PC ever affected?
> > Do they all allocate all of memory contigious in HVA space?
> I'm not sure about other targets I've CCed interested parties.
> 
> > 
> > Also - does the issue only affect hotplugged memory?
> Potentially it affects -numa memdev=foo, but however I've
> tried I wasn't able to reproduce.
> We could do it as
> separate workaround later if it would affect someone
> and virtio is not fixed to handle split buffers by that time.
> 

You can't reproduce a crash or you can't reproduce getting contigious
GPA with fragmented HVA?
If you can see fragmentation that's enough to assume guest crash can
be triggered, even if it doesn't with Linux.

>  
> > Can't the patch be local to pc-dimm (except maybe the
> > backwards compatibility thing)?
> I think decision about using gaps and its size
> should be done by board and not generic pc-dimm.
> 

Well virtio is generic and can be used by all boards.


> > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > index 91d134c..c462c4e 100644
> > > --- a/hw/i386/pc.c
> > > +++ b/hw/i386/pc.c
> > > @@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> > >      HotplugHandlerClass *hhc;
> > >      Error *local_err = NULL;
> > >      PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> > > +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> > >      PCDIMMDevice *dimm = PC_DIMM(dev);
> > >      PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> > >      MemoryRegion *mr = ddc->get_memory_region(dimm);
> > > @@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> > >          goto out;
> > >      }
> > >  
> > > -    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
> > > +    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
> > > +                        pcmc->inter_dimm_gap, &local_err);
> > >      if (local_err) {
> > >          goto out;
> > >      }
> > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > index 3ffb05f..3165667 100644
> > > --- a/hw/i386/pc_piix.c
> > > +++ b/hw/i386/pc_piix.c
> > > @@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
> > >  
> > >  static void pc_i440fx_machine_options(MachineClass *m)
> > >  {
> > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > >      m->family = "pc_piix";
> > >      m->desc = "Standard PC (i440FX + PIIX, 1996)";
> > >      m->hot_add_cpu = pc_hot_add_cpu;
> > >      m->default_machine_opts = "firmware=bios-256k.bin";
> > >      m->default_display = "std";
> > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > >  }
> > >  
> > >  static void pc_i440fx_2_5_machine_options(MachineClass *m)
> > > @@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
> > >      m->alias = NULL;
> > >      m->is_default = 0;
> > >      pcmc->broken_reserved_end = true;
> > > +    pcmc->inter_dimm_gap = 0;
> > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > >  }
> > >  
> > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > index 1b7d3b6..8ad6687 100644
> > > --- a/hw/i386/pc_q35.c
> > > +++ b/hw/i386/pc_q35.c
> > > @@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
> > >  
> > >  static void pc_q35_machine_options(MachineClass *m)
> > >  {
> > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > >      m->family = "pc_q35";
> > >      m->desc = "Standard PC (Q35 + ICH9, 2009)";
> > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > @@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
> > >      m->default_display = "std";
> > >      m->no_floppy = 1;
> > >      m->no_tco = 0;
> > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > >  }
> > >  
> > >  static void pc_q35_2_5_machine_options(MachineClass *m)
> > > @@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
> > >      pc_q35_2_5_machine_options(m);
> > >      m->alias = NULL;
> > >      pcmc->broken_reserved_end = true;
> > > +    pcmc->inter_dimm_gap = 0;
> > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > >  }
> > >  
> > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > index 6896328..dd6b34a 100644
> > > --- a/include/hw/i386/pc.h
> > > +++ b/include/hw/i386/pc.h
> > > @@ -50,6 +50,7 @@ struct PCMachineState {
> > >  #define PC_MACHINE_SMM              "smm"
> > >  #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
> > >  
> > > +#define PC_2MB_DIMM_GAP (1ULL << 21)
> > >  /**
> > >   * PCMachineClass:
> > >   * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
> > 
> > Seems somewhat arbitrary. It's aligned later - so won't a 1 byte gap be enough?
> 1 byte should be also enough, since effectively it would kick alignment adjustment.
> 
> The reason why I've picked 2Mb is that QEMU ram allocator allocates
> 2Mb granularity.

It will align it itself. We don't want that logic to spread out to
unrelated parts of QEMU.

> > 
> > > @@ -60,6 +61,7 @@ struct PCMachineClass {
> > >  
> > >      /*< public >*/
> > >      bool broken_reserved_end;
> > > +    uint64_t inter_dimm_gap;
> > >      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
> > >                                             DeviceState *dev);
> > >  };
> > > -- 
> > > 1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-27 13:11       ` Michael S. Tsirkin
@ 2015-09-27 14:04         ` Igor Mammedov
  2015-09-27 14:18           ` Michael S. Tsirkin
  2015-09-28  4:39           ` Bharata B Rao
  0 siblings, 2 replies; 12+ messages in thread
From: Igor Mammedov @ 2015-09-27 14:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: pkrempa, ehabkost, qemu-devel, bharata, pbonzini, david

On Sun, 27 Sep 2015 16:11:02 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> > On Sun, 27 Sep 2015 13:48:21 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > > mapping DIMMs non contiguously allows to workaround
> > > > virtio bug reported earlier:
> > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > > in this case guest kernel doesn't allocate buffers
> > > > that can cross DIMM boundary keeping each buffer
> > > > local to a DIMM.
> > > > 
> > > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > ---
> > > > benefit of this workaround is that no guest side
> > > > changes are required.
> > > 
> > > That's a hard requirement, I agree.
> > > 
> > > 
> > > > ---
> > > >  hw/i386/pc.c         | 4 +++-
> > > >  hw/i386/pc_piix.c    | 3 +++
> > > >  hw/i386/pc_q35.c     | 3 +++
> > > >  include/hw/i386/pc.h | 2 ++
> > > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > > 
> > > Aren't other architectures besides PC ever affected?
> > > Do they all allocate all of memory contigious in HVA space?
> > I'm not sure about other targets I've CCed interested parties.
> > 
> > > 
> > > Also - does the issue only affect hotplugged memory?
> > Potentially it affects -numa memdev=foo, but however I've
> > tried I wasn't able to reproduce.
> > We could do it as
> > separate workaround later if it would affect someone
> > and virtio is not fixed to handle split buffers by that time.
> > 
> 
> You can't reproduce a crash or you can't reproduce getting contigious
> GPA with fragmented HVA?
> If you can see fragmentation that's enough to assume guest crash can
> be triggered, even if it doesn't with Linux.
I'll check it.

> 
> >  
> > > Can't the patch be local to pc-dimm (except maybe the
> > > backwards compatibility thing)?
> > I think decision about using gaps and its size
> > should be done by board and not generic pc-dimm.
> > 
> 
> Well virtio is generic and can be used by all boards.
Still pc-dimm.addr is not allocation is not part of pc-dimm
device. it's just helper functions that happen to live in
the same file source file.

But more importantly every target might have it's own
notion how it partitions hotplug address space so making
the same gap global might break them.

It's safer to enable gaps per target, I think ppc guys
will make their own patch on top of this to taking
in account their target specific and compat stuff.

> 
> 
> > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > index 91d134c..c462c4e 100644
> > > > --- a/hw/i386/pc.c
> > > > +++ b/hw/i386/pc.c
> > > > @@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> > > >      HotplugHandlerClass *hhc;
> > > >      Error *local_err = NULL;
> > > >      PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> > > > +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> > > >      PCDIMMDevice *dimm = PC_DIMM(dev);
> > > >      PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> > > >      MemoryRegion *mr = ddc->get_memory_region(dimm);
> > > > @@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> > > >          goto out;
> > > >      }
> > > >  
> > > > -    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
> > > > +    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
> > > > +                        pcmc->inter_dimm_gap, &local_err);
> > > >      if (local_err) {
> > > >          goto out;
> > > >      }
> > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > index 3ffb05f..3165667 100644
> > > > --- a/hw/i386/pc_piix.c
> > > > +++ b/hw/i386/pc_piix.c
> > > > @@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
> > > >  
> > > >  static void pc_i440fx_machine_options(MachineClass *m)
> > > >  {
> > > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > > >      m->family = "pc_piix";
> > > >      m->desc = "Standard PC (i440FX + PIIX, 1996)";
> > > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > >      m->default_machine_opts = "firmware=bios-256k.bin";
> > > >      m->default_display = "std";
> > > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > > >  }
> > > >  
> > > >  static void pc_i440fx_2_5_machine_options(MachineClass *m)
> > > > @@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
> > > >      m->alias = NULL;
> > > >      m->is_default = 0;
> > > >      pcmc->broken_reserved_end = true;
> > > > +    pcmc->inter_dimm_gap = 0;
> > > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > > >  }
> > > >  
> > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > index 1b7d3b6..8ad6687 100644
> > > > --- a/hw/i386/pc_q35.c
> > > > +++ b/hw/i386/pc_q35.c
> > > > @@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
> > > >  
> > > >  static void pc_q35_machine_options(MachineClass *m)
> > > >  {
> > > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > > >      m->family = "pc_q35";
> > > >      m->desc = "Standard PC (Q35 + ICH9, 2009)";
> > > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > > @@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
> > > >      m->default_display = "std";
> > > >      m->no_floppy = 1;
> > > >      m->no_tco = 0;
> > > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > > >  }
> > > >  
> > > >  static void pc_q35_2_5_machine_options(MachineClass *m)
> > > > @@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
> > > >      pc_q35_2_5_machine_options(m);
> > > >      m->alias = NULL;
> > > >      pcmc->broken_reserved_end = true;
> > > > +    pcmc->inter_dimm_gap = 0;
> > > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > > >  }
> > > >  
> > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > > index 6896328..dd6b34a 100644
> > > > --- a/include/hw/i386/pc.h
> > > > +++ b/include/hw/i386/pc.h
> > > > @@ -50,6 +50,7 @@ struct PCMachineState {
> > > >  #define PC_MACHINE_SMM              "smm"
> > > >  #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
> > > >  
> > > > +#define PC_2MB_DIMM_GAP (1ULL << 21)
> > > >  /**
> > > >   * PCMachineClass:
> > > >   * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
> > > 
> > > Seems somewhat arbitrary. It's aligned later - so won't a 1 byte gap be enough?
> > 1 byte should be also enough, since effectively it would kick alignment adjustment.
> > 
> > The reason why I've picked 2Mb is that QEMU ram allocator allocates
> > 2Mb granularity.
> 
> It will align it itself. We don't want that logic to spread out to
> unrelated parts of QEMU.
Ok, I'll make it boolean and just use hardcoded 1
in pc_dimm_plug()->pc_dimm_memory_plug(...,1,...)

> 
> > > 
> > > > @@ -60,6 +61,7 @@ struct PCMachineClass {
> > > >  
> > > >      /*< public >*/
> > > >      bool broken_reserved_end;
> > > > +    uint64_t inter_dimm_gap;
> > > >      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
> > > >                                             DeviceState *dev);
> > > >  };
> > > > -- 
> > > > 1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-27 14:04         ` Igor Mammedov
@ 2015-09-27 14:18           ` Michael S. Tsirkin
  2015-09-28  9:18             ` Igor Mammedov
  2015-09-28  4:39           ` Bharata B Rao
  1 sibling, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-09-27 14:18 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: pkrempa, ehabkost, qemu-devel, bharata, pbonzini, david

On Sun, Sep 27, 2015 at 04:04:06PM +0200, Igor Mammedov wrote:
> On Sun, 27 Sep 2015 16:11:02 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> > > On Sun, 27 Sep 2015 13:48:21 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > > > mapping DIMMs non contiguously allows to workaround
> > > > > virtio bug reported earlier:
> > > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > > > in this case guest kernel doesn't allocate buffers
> > > > > that can cross DIMM boundary keeping each buffer
> > > > > local to a DIMM.
> > > > > 
> > > > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > ---
> > > > > benefit of this workaround is that no guest side
> > > > > changes are required.
> > > > 
> > > > That's a hard requirement, I agree.
> > > > 
> > > > 
> > > > > ---
> > > > >  hw/i386/pc.c         | 4 +++-
> > > > >  hw/i386/pc_piix.c    | 3 +++
> > > > >  hw/i386/pc_q35.c     | 3 +++
> > > > >  include/hw/i386/pc.h | 2 ++
> > > > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > > > 
> > > > Aren't other architectures besides PC ever affected?
> > > > Do they all allocate all of memory contigious in HVA space?
> > > I'm not sure about other targets I've CCed interested parties.
> > > 
> > > > 
> > > > Also - does the issue only affect hotplugged memory?
> > > Potentially it affects -numa memdev=foo, but however I've
> > > tried I wasn't able to reproduce.
> > > We could do it as
> > > separate workaround later if it would affect someone
> > > and virtio is not fixed to handle split buffers by that time.
> > > 
> > 
> > You can't reproduce a crash or you can't reproduce getting contigious
> > GPA with fragmented HVA?
> > If you can see fragmentation that's enough to assume guest crash can
> > be triggered, even if it doesn't with Linux.
> I'll check it.
> 
> > 
> > >  
> > > > Can't the patch be local to pc-dimm (except maybe the
> > > > backwards compatibility thing)?
> > > I think decision about using gaps and its size
> > > should be done by board and not generic pc-dimm.
> > > 
> > 
> > Well virtio is generic and can be used by all boards.
> Still pc-dimm.addr is not allocation is not part of pc-dimm
> device. it's just helper functions that happen to live in
> the same file source file.
> 
> But more importantly every target might have it's own
> notion how it partitions hotplug address space so making
> the same gap global might break them.

That's why each target has it's own alignment, no?

> It's safer to enable gaps per target, I think ppc guys
> will make their own patch on top of this to taking
> in account their target specific and compat stuff.

Sure, it can be split up but we need to address at least
the kvm platforms.

> > 
> > 
> > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > > index 91d134c..c462c4e 100644
> > > > > --- a/hw/i386/pc.c
> > > > > +++ b/hw/i386/pc.c
> > > > > @@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> > > > >      HotplugHandlerClass *hhc;
> > > > >      Error *local_err = NULL;
> > > > >      PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> > > > > +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> > > > >      PCDIMMDevice *dimm = PC_DIMM(dev);
> > > > >      PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> > > > >      MemoryRegion *mr = ddc->get_memory_region(dimm);
> > > > > @@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
> > > > >          goto out;
> > > > >      }
> > > > >  
> > > > > -    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
> > > > > +    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
> > > > > +                        pcmc->inter_dimm_gap, &local_err);
> > > > >      if (local_err) {
> > > > >          goto out;
> > > > >      }
> > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > > index 3ffb05f..3165667 100644
> > > > > --- a/hw/i386/pc_piix.c
> > > > > +++ b/hw/i386/pc_piix.c
> > > > > @@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
> > > > >  
> > > > >  static void pc_i440fx_machine_options(MachineClass *m)
> > > > >  {
> > > > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > > > >      m->family = "pc_piix";
> > > > >      m->desc = "Standard PC (i440FX + PIIX, 1996)";
> > > > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > > >      m->default_machine_opts = "firmware=bios-256k.bin";
> > > > >      m->default_display = "std";
> > > > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > > > >  }
> > > > >  
> > > > >  static void pc_i440fx_2_5_machine_options(MachineClass *m)
> > > > > @@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
> > > > >      m->alias = NULL;
> > > > >      m->is_default = 0;
> > > > >      pcmc->broken_reserved_end = true;
> > > > > +    pcmc->inter_dimm_gap = 0;
> > > > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > > > >  }
> > > > >  
> > > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > > index 1b7d3b6..8ad6687 100644
> > > > > --- a/hw/i386/pc_q35.c
> > > > > +++ b/hw/i386/pc_q35.c
> > > > > @@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
> > > > >  
> > > > >  static void pc_q35_machine_options(MachineClass *m)
> > > > >  {
> > > > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > > > >      m->family = "pc_q35";
> > > > >      m->desc = "Standard PC (Q35 + ICH9, 2009)";
> > > > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > > > @@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
> > > > >      m->default_display = "std";
> > > > >      m->no_floppy = 1;
> > > > >      m->no_tco = 0;
> > > > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > > > >  }
> > > > >  
> > > > >  static void pc_q35_2_5_machine_options(MachineClass *m)
> > > > > @@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
> > > > >      pc_q35_2_5_machine_options(m);
> > > > >      m->alias = NULL;
> > > > >      pcmc->broken_reserved_end = true;
> > > > > +    pcmc->inter_dimm_gap = 0;
> > > > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > > > >  }
> > > > >  
> > > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > > > index 6896328..dd6b34a 100644
> > > > > --- a/include/hw/i386/pc.h
> > > > > +++ b/include/hw/i386/pc.h
> > > > > @@ -50,6 +50,7 @@ struct PCMachineState {
> > > > >  #define PC_MACHINE_SMM              "smm"
> > > > >  #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm"
> > > > >  
> > > > > +#define PC_2MB_DIMM_GAP (1ULL << 21)
> > > > >  /**
> > > > >   * PCMachineClass:
> > > > >   * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
> > > > 
> > > > Seems somewhat arbitrary. It's aligned later - so won't a 1 byte gap be enough?
> > > 1 byte should be also enough, since effectively it would kick alignment adjustment.
> > > 
> > > The reason why I've picked 2Mb is that QEMU ram allocator allocates
> > > 2Mb granularity.
> > 
> > It will align it itself. We don't want that logic to spread out to
> > unrelated parts of QEMU.
> Ok, I'll make it boolean and just use hardcoded 1
> in pc_dimm_plug()->pc_dimm_memory_plug(...,1,...)
> 
> > 
> > > > 
> > > > > @@ -60,6 +61,7 @@ struct PCMachineClass {
> > > > >  
> > > > >      /*< public >*/
> > > > >      bool broken_reserved_end;
> > > > > +    uint64_t inter_dimm_gap;
> > > > >      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
> > > > >                                             DeviceState *dev);
> > > > >  };
> > > > > -- 
> > > > > 1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-27 14:04         ` Igor Mammedov
  2015-09-27 14:18           ` Michael S. Tsirkin
@ 2015-09-28  4:39           ` Bharata B Rao
  2015-09-28  9:13             ` Igor Mammedov
  1 sibling, 1 reply; 12+ messages in thread
From: Bharata B Rao @ 2015-09-28  4:39 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: pkrempa, ehabkost, Michael S. Tsirkin, qemu-devel, pbonzini,
	david

On Sun, Sep 27, 2015 at 04:04:06PM +0200, Igor Mammedov wrote:
> On Sun, 27 Sep 2015 16:11:02 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> > > On Sun, 27 Sep 2015 13:48:21 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > > > mapping DIMMs non contiguously allows to workaround
> > > > > virtio bug reported earlier:
> > > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > > > in this case guest kernel doesn't allocate buffers
> > > > > that can cross DIMM boundary keeping each buffer
> > > > > local to a DIMM.
> > > > > 
> > > > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > ---
> > > > > benefit of this workaround is that no guest side
> > > > > changes are required.
> > > > 
> > > > That's a hard requirement, I agree.
> > > > 
> > > > 
> > > > > ---
> > > > >  hw/i386/pc.c         | 4 +++-
> > > > >  hw/i386/pc_piix.c    | 3 +++
> > > > >  hw/i386/pc_q35.c     | 3 +++
> > > > >  include/hw/i386/pc.h | 2 ++
> > > > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > > > 
> > > > Aren't other architectures besides PC ever affected?
> > > > Do they all allocate all of memory contigious in HVA space?
> > > I'm not sure about other targets I've CCed interested parties.
> > > 
> > > > 
> > > > Also - does the issue only affect hotplugged memory?
> > > Potentially it affects -numa memdev=foo, but however I've
> > > tried I wasn't able to reproduce.
> > > We could do it as
> > > separate workaround later if it would affect someone
> > > and virtio is not fixed to handle split buffers by that time.
> > > 
> > 
> > You can't reproduce a crash or you can't reproduce getting contigious
> > GPA with fragmented HVA?
> > If you can see fragmentation that's enough to assume guest crash can
> > be triggered, even if it doesn't with Linux.
> I'll check it.
> 
> > 
> > >  
> > > > Can't the patch be local to pc-dimm (except maybe the
> > > > backwards compatibility thing)?
> > > I think decision about using gaps and its size
> > > should be done by board and not generic pc-dimm.
> > > 
> > 
> > Well virtio is generic and can be used by all boards.
> Still pc-dimm.addr is not allocation is not part of pc-dimm
> device. it's just helper functions that happen to live in
> the same file source file.
> 
> But more importantly every target might have it's own
> notion how it partitions hotplug address space so making
> the same gap global might break them.
> 
> It's safer to enable gaps per target, I think ppc guys
> will make their own patch on top of this to taking
> in account their target specific and compat stuff.

I have never seen this issue that you mention at

http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html

in PowerPC. I have not been able to reproduce the QEMU crash with the
commandline suggested there.

(# ./ppc64-softmmu/qemu-system-ppc64 --enable-kvm --nographic -machine pseries -m 8G,slots=32,maxmem=32G -device virtio-blk-pci,drive=rootdisk -drive file=/home/bharata/F20-snap1,if=none,cache=none,id=rootdisk,format=qcow2 -monitor telnet:localhost:1235,server,nowait -vga none -bios /home/bharata/slof/slof.bin -smp 16,maxcpus=32 -netdev tap,id=foo,ifname=tap0,script=/home/bharata/qemu-ifup -device virtio-net-pci,id=n1,netdev=foo `for i in $(seq 0 15); do echo -n "-object memory-backend-ram,id=m$i,size=256M -device pc-dimm,id=dimm$i,memdev=m$i "; done` -snapshot)

PowerPC sPAPR memory hotplug enforces memory alignment of 256MB
for both boottime as well as hotplugged memory.

So not sure if anything other than the default gap=0 which you have
done in this patchset for PowerPC is necessary.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-28  4:39           ` Bharata B Rao
@ 2015-09-28  9:13             ` Igor Mammedov
  2015-10-05  8:44               ` Bharata B Rao
  0 siblings, 1 reply; 12+ messages in thread
From: Igor Mammedov @ 2015-09-28  9:13 UTC (permalink / raw)
  To: Bharata B Rao
  Cc: pkrempa, ehabkost, Michael S. Tsirkin, qemu-devel, pbonzini,
	david

On Mon, 28 Sep 2015 10:09:26 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> On Sun, Sep 27, 2015 at 04:04:06PM +0200, Igor Mammedov wrote:
> > On Sun, 27 Sep 2015 16:11:02 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> > > > On Sun, 27 Sep 2015 13:48:21 +0300
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > 
> > > > > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > > > > mapping DIMMs non contiguously allows to workaround
> > > > > > virtio bug reported earlier:
> > > > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > > > > in this case guest kernel doesn't allocate buffers
> > > > > > that can cross DIMM boundary keeping each buffer
> > > > > > local to a DIMM.
> > > > > > 
> > > > > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > ---
> > > > > > benefit of this workaround is that no guest side
> > > > > > changes are required.
> > > > > 
> > > > > That's a hard requirement, I agree.
> > > > > 
> > > > > 
> > > > > > ---
> > > > > >  hw/i386/pc.c         | 4 +++-
> > > > > >  hw/i386/pc_piix.c    | 3 +++
> > > > > >  hw/i386/pc_q35.c     | 3 +++
> > > > > >  include/hw/i386/pc.h | 2 ++
> > > > > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > > > > 
> > > > > Aren't other architectures besides PC ever affected?
> > > > > Do they all allocate all of memory contigious in HVA space?
> > > > I'm not sure about other targets I've CCed interested parties.
> > > > 
> > > > > 
> > > > > Also - does the issue only affect hotplugged memory?
> > > > Potentially it affects -numa memdev=foo, but however I've
> > > > tried I wasn't able to reproduce.
> > > > We could do it as
> > > > separate workaround later if it would affect someone
> > > > and virtio is not fixed to handle split buffers by that time.
> > > > 
> > > 
> > > You can't reproduce a crash or you can't reproduce getting
> > > contigious GPA with fragmented HVA?
> > > If you can see fragmentation that's enough to assume guest crash
> > > can be triggered, even if it doesn't with Linux.
> > I'll check it.
> > 
> > > 
> > > >  
> > > > > Can't the patch be local to pc-dimm (except maybe the
> > > > > backwards compatibility thing)?
> > > > I think decision about using gaps and its size
> > > > should be done by board and not generic pc-dimm.
> > > > 
> > > 
> > > Well virtio is generic and can be used by all boards.
> > Still pc-dimm.addr is not allocation is not part of pc-dimm
> > device. it's just helper functions that happen to live in
> > the same file source file.
> > 
> > But more importantly every target might have it's own
> > notion how it partitions hotplug address space so making
> > the same gap global might break them.
> > 
> > It's safer to enable gaps per target, I think ppc guys
> > will make their own patch on top of this to taking
> > in account their target specific and compat stuff.
> 
> I have never seen this issue that you mention at
> 
> http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> 
> in PowerPC. I have not been able to reproduce the QEMU crash with the
> commandline suggested there.
> 
> (# ./ppc64-softmmu/qemu-system-ppc64 --enable-kvm --nographic
> -machine pseries -m 8G,slots=32,maxmem=32G -device
> virtio-blk-pci,drive=rootdisk -drive
> file=/home/bharata/F20-snap1,if=none,cache=none,id=rootdisk,format=qcow2
> -monitor telnet:localhost:1235,server,nowait -vga none
> -bios /home/bharata/slof/slof.bin -smp 16,maxcpus=32 -netdev
> tap,id=foo,ifname=tap0,script=/home/bharata/qemu-ifup -device
> virtio-net-pci,id=n1,netdev=foo `for i in $(seq 0 15); do echo -n
> "-object memory-backend-ram,id=m$i,size=256M -device
> pc-dimm,id=dimm$i,memdev=m$i "; done` -snapshot)
> 
> PowerPC sPAPR memory hotplug enforces memory alignment of 256MB
> for both boottime as well as hotplugged memory.
> 
> So not sure if anything other than the default gap=0 which you have
> done in this patchset for PowerPC is necessary.
The bigger initial memory and dimm sizes the less likelihood of
triggering the bug. You don't see it mostly due to luck, but it doesn't
rule out possibility of it happening in production.
So please consider turning on gaps for ppc machine.

Looking at how hotplug_mem_size is sized in hw/ppc/spapr.c it doesn't
look that just turning on gaps would work since it doesn't have a space
for alignment adjustment.

try to plug dimm device in following order:
  -m 8G,slots=2,maxmem=1256M \

  -object memory-backend-ram,id=m1,size=256M -device pc-dimm,memdev=m1 \

  -object memory-backend-file,id=hugepage1g,size=1G,file=/path/to1Gb/hugepagefs \
  -device pc-dimm,memdev=hugepage1g

it should fail when adding second dimm since alignment for 1Gb huge page
would be 1Gb but hotplug_mem container size is only 1256M total.

in PC target we apply following adjustment to container size:
            /* size hotplug region assuming 1G page max alignment per slot */
            hotplug_mem_size += (1ULL << 30) * machine->ram_slots;

so it's fine to add small gap which will trigger and consume
extra GPA offset for alignment adjustment in hotplug_mem container,
which was over-provisioned size-wise in advance for the case of mixed
memory backends.

> Regards,
> Bharata.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-27 14:18           ` Michael S. Tsirkin
@ 2015-09-28  9:18             ` Igor Mammedov
  0 siblings, 0 replies; 12+ messages in thread
From: Igor Mammedov @ 2015-09-28  9:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: pkrempa, ehabkost, qemu-devel, bharata, pbonzini, david

On Sun, 27 Sep 2015 17:18:48 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Sep 27, 2015 at 04:04:06PM +0200, Igor Mammedov wrote:
> > On Sun, 27 Sep 2015 16:11:02 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> > > > On Sun, 27 Sep 2015 13:48:21 +0300
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > 
> > > > > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > > > > mapping DIMMs non contiguously allows to workaround
> > > > > > virtio bug reported earlier:
> > > > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > > > > in this case guest kernel doesn't allocate buffers
> > > > > > that can cross DIMM boundary keeping each buffer
> > > > > > local to a DIMM.
> > > > > > 
> > > > > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > ---
> > > > > > benefit of this workaround is that no guest side
> > > > > > changes are required.
> > > > > 
> > > > > That's a hard requirement, I agree.
> > > > > 
> > > > > 
> > > > > > ---
> > > > > >  hw/i386/pc.c         | 4 +++-
> > > > > >  hw/i386/pc_piix.c    | 3 +++
> > > > > >  hw/i386/pc_q35.c     | 3 +++
> > > > > >  include/hw/i386/pc.h | 2 ++
> > > > > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > > > > 
> > > > > Aren't other architectures besides PC ever affected?
> > > > > Do they all allocate all of memory contigious in HVA space?
> > > > I'm not sure about other targets I've CCed interested parties.
> > > > 
> > > > > 
> > > > > Also - does the issue only affect hotplugged memory?
> > > > Potentially it affects -numa memdev=foo, but however I've
> > > > tried I wasn't able to reproduce.
> > > > We could do it as
> > > > separate workaround later if it would affect someone
> > > > and virtio is not fixed to handle split buffers by that time.
> > > > 
> > > 
> > > You can't reproduce a crash or you can't reproduce getting
> > > contigious GPA with fragmented HVA?
> > > If you can see fragmentation that's enough to assume guest crash
> > > can be triggered, even if it doesn't with Linux.
> > I'll check it.
> > 
> > > 
> > > >  
> > > > > Can't the patch be local to pc-dimm (except maybe the
> > > > > backwards compatibility thing)?
> > > > I think decision about using gaps and its size
> > > > should be done by board and not generic pc-dimm.
> > > > 
> > > 
> > > Well virtio is generic and can be used by all boards.
> > Still pc-dimm.addr is not allocation is not part of pc-dimm
> > device. it's just helper functions that happen to live in
> > the same file source file.
> > 
> > But more importantly every target might have it's own
> > notion how it partitions hotplug address space so making
> > the same gap global might break them.
> 
> That's why each target has it's own alignment, no?
yep, it's target specific how and where it would map hotplugged
memory.

> 
> > It's safer to enable gaps per target, I think ppc guys
> > will make their own patch on top of this to taking
> > in account their target specific and compat stuff.
> 
> Sure, it can be split up but we need to address at least
> the kvm platforms.
I've put Bharata in the loop, so that if issue is applicable to PPC it
would be fixed.

> > > 
> > > 
> > > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > > > index 91d134c..c462c4e 100644
> > > > > > --- a/hw/i386/pc.c
> > > > > > +++ b/hw/i386/pc.c
> > > > > > @@ -1629,6 +1629,7 @@ static void
> > > > > > pc_dimm_plug(HotplugHandler *hotplug_dev,
> > > > > > HotplugHandlerClass *hhc; Error *local_err = NULL;
> > > > > >      PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> > > > > > +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> > > > > >      PCDIMMDevice *dimm = PC_DIMM(dev);
> > > > > >      PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> > > > > >      MemoryRegion *mr = ddc->get_memory_region(dimm);
> > > > > > @@ -1644,7 +1645,8 @@ static void
> > > > > > pc_dimm_plug(HotplugHandler *hotplug_dev, goto out;
> > > > > >      }
> > > > > >  
> > > > > > -    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr,
> > > > > > align, 0, &local_err);
> > > > > > +    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr,
> > > > > > align,
> > > > > > +                        pcmc->inter_dimm_gap, &local_err);
> > > > > >      if (local_err) {
> > > > > >          goto out;
> > > > > >      }
> > > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > > > index 3ffb05f..3165667 100644
> > > > > > --- a/hw/i386/pc_piix.c
> > > > > > +++ b/hw/i386/pc_piix.c
> > > > > > @@ -457,11 +457,13 @@ static void
> > > > > > pc_xen_hvm_init(MachineState *machine) 
> > > > > >  static void pc_i440fx_machine_options(MachineClass *m)
> > > > > >  {
> > > > > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > > > > >      m->family = "pc_piix";
> > > > > >      m->desc = "Standard PC (i440FX + PIIX, 1996)";
> > > > > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > > > >      m->default_machine_opts = "firmware=bios-256k.bin";
> > > > > >      m->default_display = "std";
> > > > > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > > > > >  }
> > > > > >  
> > > > > >  static void pc_i440fx_2_5_machine_options(MachineClass *m)
> > > > > > @@ -482,6 +484,7 @@ static void
> > > > > > pc_i440fx_2_4_machine_options(MachineClass *m) m->alias =
> > > > > > NULL; m->is_default = 0;
> > > > > >      pcmc->broken_reserved_end = true;
> > > > > > +    pcmc->inter_dimm_gap = 0;
> > > > > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > > > > >  }
> > > > > >  
> > > > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > > > index 1b7d3b6..8ad6687 100644
> > > > > > --- a/hw/i386/pc_q35.c
> > > > > > +++ b/hw/i386/pc_q35.c
> > > > > > @@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState
> > > > > > *machine) 
> > > > > >  static void pc_q35_machine_options(MachineClass *m)
> > > > > >  {
> > > > > > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> > > > > >      m->family = "pc_q35";
> > > > > >      m->desc = "Standard PC (Q35 + ICH9, 2009)";
> > > > > >      m->hot_add_cpu = pc_hot_add_cpu;
> > > > > > @@ -368,6 +369,7 @@ static void
> > > > > > pc_q35_machine_options(MachineClass *m) m->default_display
> > > > > > = "std"; m->no_floppy = 1;
> > > > > >      m->no_tco = 0;
> > > > > > +    pcmc->inter_dimm_gap = PC_2MB_DIMM_GAP;
> > > > > >  }
> > > > > >  
> > > > > >  static void pc_q35_2_5_machine_options(MachineClass *m)
> > > > > > @@ -385,6 +387,7 @@ static void
> > > > > > pc_q35_2_4_machine_options(MachineClass *m)
> > > > > > pc_q35_2_5_machine_options(m); m->alias = NULL;
> > > > > >      pcmc->broken_reserved_end = true;
> > > > > > +    pcmc->inter_dimm_gap = 0;
> > > > > >      SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
> > > > > >  }
> > > > > >  
> > > > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > > > > index 6896328..dd6b34a 100644
> > > > > > --- a/include/hw/i386/pc.h
> > > > > > +++ b/include/hw/i386/pc.h
> > > > > > @@ -50,6 +50,7 @@ struct PCMachineState {
> > > > > >  #define PC_MACHINE_SMM              "smm"
> > > > > >  #define PC_MACHINE_ENFORCE_ALIGNED_DIMM
> > > > > > "enforce-aligned-dimm" 
> > > > > > +#define PC_2MB_DIMM_GAP (1ULL << 21)
> > > > > >  /**
> > > > > >   * PCMachineClass:
> > > > > >   * @get_hotplug_handler: pointer to parent class callback
> > > > > > @get_hotplug_handler
> > > > > 
> > > > > Seems somewhat arbitrary. It's aligned later - so won't a 1
> > > > > byte gap be enough?
> > > > 1 byte should be also enough, since effectively it would kick
> > > > alignment adjustment.
> > > > 
> > > > The reason why I've picked 2Mb is that QEMU ram allocator
> > > > allocates 2Mb granularity.
> > > 
> > > It will align it itself. We don't want that logic to spread out to
> > > unrelated parts of QEMU.
> > Ok, I'll make it boolean and just use hardcoded 1
> > in pc_dimm_plug()->pc_dimm_memory_plug(...,1,...)
> > 
> > > 
> > > > > 
> > > > > > @@ -60,6 +61,7 @@ struct PCMachineClass {
> > > > > >  
> > > > > >      /*< public >*/
> > > > > >      bool broken_reserved_end;
> > > > > > +    uint64_t inter_dimm_gap;
> > > > > >      HotplugHandler *(*get_hotplug_handler)(MachineState
> > > > > > *machine, DeviceState *dev);
> > > > > >  };
> > > > > > -- 
> > > > > > 1.8.3.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-28  9:13             ` Igor Mammedov
@ 2015-10-05  8:44               ` Bharata B Rao
  0 siblings, 0 replies; 12+ messages in thread
From: Bharata B Rao @ 2015-10-05  8:44 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: pkrempa, ehabkost, Michael S. Tsirkin, qemu-devel, pbonzini,
	david

On Mon, Sep 28, 2015 at 11:13:42AM +0200, Igor Mammedov wrote:
> On Mon, 28 Sep 2015 10:09:26 +0530
> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> 
> > On Sun, Sep 27, 2015 at 04:04:06PM +0200, Igor Mammedov wrote:
> > > On Sun, 27 Sep 2015 16:11:02 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Sun, Sep 27, 2015 at 03:06:24PM +0200, Igor Mammedov wrote:
> > > > > On Sun, 27 Sep 2015 13:48:21 +0300
> > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > 
> > > > > > On Fri, Sep 25, 2015 at 03:53:12PM +0200, Igor Mammedov wrote:
> > > > > > > mapping DIMMs non contiguously allows to workaround
> > > > > > > virtio bug reported earlier:
> > > > > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > > > > > > in this case guest kernel doesn't allocate buffers
> > > > > > > that can cross DIMM boundary keeping each buffer
> > > > > > > local to a DIMM.
> > > > > > > 
> > > > > > > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > ---
> > > > > > > benefit of this workaround is that no guest side
> > > > > > > changes are required.
> > > > > > 
> > > > > > That's a hard requirement, I agree.
> > > > > > 
> > > > > > 
> > > > > > > ---
> > > > > > >  hw/i386/pc.c         | 4 +++-
> > > > > > >  hw/i386/pc_piix.c    | 3 +++
> > > > > > >  hw/i386/pc_q35.c     | 3 +++
> > > > > > >  include/hw/i386/pc.h | 2 ++
> > > > > > >  4 files changed, 11 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > Aren't other architectures besides PC ever affected?
> > > > > > Do they all allocate all of memory contigious in HVA space?
> > > > > I'm not sure about other targets I've CCed interested parties.
> > > > > 
> > > > > > 
> > > > > > Also - does the issue only affect hotplugged memory?
> > > > > Potentially it affects -numa memdev=foo, but however I've
> > > > > tried I wasn't able to reproduce.
> > > > > We could do it as
> > > > > separate workaround later if it would affect someone
> > > > > and virtio is not fixed to handle split buffers by that time.
> > > > > 
> > > > 
> > > > You can't reproduce a crash or you can't reproduce getting
> > > > contigious GPA with fragmented HVA?
> > > > If you can see fragmentation that's enough to assume guest crash
> > > > can be triggered, even if it doesn't with Linux.
> > > I'll check it.
> > > 
> > > > 
> > > > >  
> > > > > > Can't the patch be local to pc-dimm (except maybe the
> > > > > > backwards compatibility thing)?
> > > > > I think decision about using gaps and its size
> > > > > should be done by board and not generic pc-dimm.
> > > > > 
> > > > 
> > > > Well virtio is generic and can be used by all boards.
> > > Still pc-dimm.addr is not allocation is not part of pc-dimm
> > > device. it's just helper functions that happen to live in
> > > the same file source file.
> > > 
> > > But more importantly every target might have it's own
> > > notion how it partitions hotplug address space so making
> > > the same gap global might break them.
> > > 
> > > It's safer to enable gaps per target, I think ppc guys
> > > will make their own patch on top of this to taking
> > > in account their target specific and compat stuff.
> > 
> > I have never seen this issue that you mention at
> > 
> > http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
> > 
> > in PowerPC. I have not been able to reproduce the QEMU crash with the
> > commandline suggested there.
> > 
> > (# ./ppc64-softmmu/qemu-system-ppc64 --enable-kvm --nographic
> > -machine pseries -m 8G,slots=32,maxmem=32G -device
> > virtio-blk-pci,drive=rootdisk -drive
> > file=/home/bharata/F20-snap1,if=none,cache=none,id=rootdisk,format=qcow2
> > -monitor telnet:localhost:1235,server,nowait -vga none
> > -bios /home/bharata/slof/slof.bin -smp 16,maxcpus=32 -netdev
> > tap,id=foo,ifname=tap0,script=/home/bharata/qemu-ifup -device
> > virtio-net-pci,id=n1,netdev=foo `for i in $(seq 0 15); do echo -n
> > "-object memory-backend-ram,id=m$i,size=256M -device
> > pc-dimm,id=dimm$i,memdev=m$i "; done` -snapshot)
> > 
> > PowerPC sPAPR memory hotplug enforces memory alignment of 256MB
> > for both boottime as well as hotplugged memory.
> > 
> > So not sure if anything other than the default gap=0 which you have
> > done in this patchset for PowerPC is necessary.
> The bigger initial memory and dimm sizes the less likelihood of
> triggering the bug. You don't see it mostly due to luck, but it doesn't
> rule out possibility of it happening in production.
> So please consider turning on gaps for ppc machine.
> 
> Looking at how hotplug_mem_size is sized in hw/ppc/spapr.c it doesn't
> look that just turning on gaps would work since it doesn't have a space
> for alignment adjustment.
> 
> try to plug dimm device in following order:
>   -m 8G,slots=2,maxmem=1256M \
> 
>   -object memory-backend-ram,id=m1,size=256M -device pc-dimm,memdev=m1 \
> 
>   -object memory-backend-file,id=hugepage1g,size=1G,file=/path/to1Gb/hugepagefs \
>   -device pc-dimm,memdev=hugepage1g
> 
> it should fail when adding second dimm since alignment for 1Gb huge page
> would be 1Gb but hotplug_mem container size is only 1256M total.

PowerKVM supports only 16MB huge page size. Given that we enforce 256MB
alignment on RAM size, individual node mem sizes, maxmem and DIMM size,
I don't think we will encounter the problem similar to the above.

However to take care of the virtio bug, I just posted a patchset to
force gaps between DIMMs.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-10-05  8:45 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-25 13:53 [Qemu-devel] [PATCH 0/2] ps: memhp: enforce gaps between DIMMs Igor Mammedov
2015-09-25 13:53 ` [Qemu-devel] [PATCH 1/2] memhp: extend address auto assignment to support gaps Igor Mammedov
2015-09-25 13:53 ` [Qemu-devel] [PATCH 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
2015-09-27 10:48   ` Michael S. Tsirkin
2015-09-27 13:06     ` Igor Mammedov
2015-09-27 13:11       ` Michael S. Tsirkin
2015-09-27 14:04         ` Igor Mammedov
2015-09-27 14:18           ` Michael S. Tsirkin
2015-09-28  9:18             ` Igor Mammedov
2015-09-28  4:39           ` Bharata B Rao
2015-09-28  9:13             ` Igor Mammedov
2015-10-05  8:44               ` Bharata B Rao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).