qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/2] pc: memhp: enforce gaps between DIMMs
@ 2015-09-28  9:54 Igor Mammedov
  2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 1/2] memhp: extend address auto assignment to support gaps Igor Mammedov
  2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
  0 siblings, 2 replies; 4+ messages in thread
From: Igor Mammedov @ 2015-09-28  9:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, bharata, ehabkost, david, mst

v2:
   make inter_dimm_gap a boolean and inster gap in 1 byte
   instead of 2Mb, due to alignment that gap would be extended
   up to natural backend alignment value.

it's a simplier way suggested by Michael S. Tsirkin
to workaround virtio bug reported earlier:
http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
where virtio can't handle buffer that crosses border
between 2 DIMM's (i.e. 2 MemoryRegions).

idea is to leave gaps between DIMMs, making their GPAs
non contiguous, which effectively forces kmalloc
to not use DIMM if buffer doesn't fit inside of it.

Simpler reproducer:
 qemu-system-x86_64 -enable-kvm  -m 256,slots=250,maxmem=32G  -drive if=virtio,file=/dev/slow/rhel72  \
  `for i in $(seq 0 58); do echo -n "-object memory-backend-ram,id=m$i,size=10M -device pc-dimm,id=dimm$i,memdev=m$i "; done` \
   -nodefaults -snapshot -serial stdio -nographic -M pc-i440fx-2.4

if guest manages to boot just login and do:
  dd if=/dev/vda of=/dev/null bs=128M

it will crash QEMU in 99% cases with following message:
  qemu-system-x86_64: virtio: error trying to map MMIO memory

Igor Mammedov (2):
  memhp: extend address auto assignment to support gaps
  pc: memhp: force gaps between DIMM's GPA

 hw/i386/pc.c             |  4 +++-
 hw/i386/pc_piix.c        |  3 +++
 hw/i386/pc_q35.c         |  3 +++
 hw/mem/pc-dimm.c         | 13 +++++++------
 hw/ppc/spapr.c           |  2 +-
 include/hw/i386/pc.h     |  1 +
 include/hw/mem/pc-dimm.h |  7 ++++---
 7 files changed, 22 insertions(+), 11 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 1/2] memhp: extend address auto assignment to support gaps
  2015-09-28  9:54 [Qemu-devel] [PATCH v2 0/2] pc: memhp: enforce gaps between DIMMs Igor Mammedov
@ 2015-09-28  9:54 ` Igor Mammedov
  2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
  1 sibling, 0 replies; 4+ messages in thread
From: Igor Mammedov @ 2015-09-28  9:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, bharata, ehabkost, david, mst

setting gap to non 0 value will make sparse DIMM
address auto allocation, leaving gaps between
a new DIMM address and preceeding existing DIMM.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/i386/pc.c             |  2 +-
 hw/mem/pc-dimm.c         | 13 +++++++------
 hw/ppc/spapr.c           |  2 +-
 include/hw/mem/pc-dimm.h |  7 ++++---
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 461c128..91d134c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1644,7 +1644,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index bb04862..1e5a8ea 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -32,7 +32,8 @@ typedef struct pc_dimms_capacity {
 } pc_dimms_capacity;
 
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, Error **errp)
+                         MemoryRegion *mr, uint64_t align, uint64_t gap,
+                         Error **errp)
 {
     int slot;
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -48,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
 
     addr = pc_dimm_get_free_addr(hpms->base,
                                  memory_region_size(&hpms->mr),
-                                 !addr ? NULL : &addr, align,
+                                 !addr ? NULL : &addr, align, gap,
                                  memory_region_size(mr), &local_err);
     if (local_err) {
         goto out;
@@ -287,8 +288,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, uint64_t size,
-                               Error **errp)
+                               uint64_t *hint, uint64_t align, uint64_t gap,
+                               uint64_t size, Error **errp)
 {
     GSList *list = NULL, *item;
     uint64_t new_addr, ret = 0;
@@ -333,13 +334,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
             goto out;
         }
 
-        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
+        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size + gap)) {
             if (hint) {
                 DeviceState *d = DEVICE(dimm);
                 error_setg(errp, "address range conflicts with '%s'", d->id);
                 goto out;
             }
-            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
+            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + gap, align);
         }
     }
     ret = new_addr;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a9b5f2a..a40ada5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2096,7 +2096,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
+    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, 0, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index d83bf30..aa784f8 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -83,15 +83,16 @@ typedef struct MemoryHotplugState {
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, uint64_t size,
-                               Error **errp);
+                               uint64_t *hint, uint64_t align, uint64_t gap,
+                               uint64_t size, Error **errp);
 
 int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
 int qmp_pc_dimm_device_list(Object *obj, void *opaque);
 uint64_t pc_existing_dimms_capacity(Error **errp);
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, Error **errp);
+                         MemoryRegion *mr, uint64_t align, uint64_t gap,
+                         Error **errp);
 void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
                            MemoryRegion *mr);
 #endif
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-28  9:54 [Qemu-devel] [PATCH v2 0/2] pc: memhp: enforce gaps between DIMMs Igor Mammedov
  2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 1/2] memhp: extend address auto assignment to support gaps Igor Mammedov
@ 2015-09-28  9:54 ` Igor Mammedov
  2015-09-28 17:44   ` Eduardo Habkost
  1 sibling, 1 reply; 4+ messages in thread
From: Igor Mammedov @ 2015-09-28  9:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, bharata, ehabkost, david, mst

mapping DIMMs non contiguously allows to workaround
virtio bug reported earlier:
http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
in this case guest kernel doesn't allocate buffers
that can cross DIMM boundary keeping each buffer
local to a DIMM.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v2:
  make inter_dimm_gap boolean

benefit of this workaround is that no guest side
changes are required.
---
 hw/i386/pc.c         | 4 +++-
 hw/i386/pc_piix.c    | 3 +++
 hw/i386/pc_q35.c     | 3 +++
 include/hw/i386/pc.h | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 91d134c..fc97463 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1644,7 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, 0, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
+                        pcmc->inter_dimm_gap ? 1 : 0, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3ffb05f..ece735d 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -457,11 +457,13 @@ static void pc_xen_hvm_init(MachineState *machine)
 
 static void pc_i440fx_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     m->family = "pc_piix";
     m->desc = "Standard PC (i440FX + PIIX, 1996)";
     m->hot_add_cpu = pc_hot_add_cpu;
     m->default_machine_opts = "firmware=bios-256k.bin";
     m->default_display = "std";
+    pcmc->inter_dimm_gap = true;
 }
 
 static void pc_i440fx_2_5_machine_options(MachineClass *m)
@@ -482,6 +484,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
     m->alias = NULL;
     m->is_default = 0;
     pcmc->broken_reserved_end = true;
+    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1b7d3b6..ca73f25 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -360,6 +360,7 @@ static void pc_compat_1_4(MachineState *machine)
 
 static void pc_q35_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     m->family = "pc_q35";
     m->desc = "Standard PC (Q35 + ICH9, 2009)";
     m->hot_add_cpu = pc_hot_add_cpu;
@@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->default_display = "std";
     m->no_floppy = 1;
     m->no_tco = 0;
+    pcmc->inter_dimm_gap = true;
 }
 
 static void pc_q35_2_5_machine_options(MachineClass *m)
@@ -385,6 +387,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
     pc_q35_2_5_machine_options(m);
     m->alias = NULL;
     pcmc->broken_reserved_end = true;
+    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index ab5413f..c13e91d 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -60,6 +60,7 @@ struct PCMachineClass {
 
     /*< public >*/
     bool broken_reserved_end;
+    bool inter_dimm_gap;
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
 };
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/2] pc: memhp: force gaps between DIMM's GPA
  2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
@ 2015-09-28 17:44   ` Eduardo Habkost
  0 siblings, 0 replies; 4+ messages in thread
From: Eduardo Habkost @ 2015-09-28 17:44 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: pbonzini, bharata, qemu-devel, david, mst

On Mon, Sep 28, 2015 at 11:54:59AM +0200, Igor Mammedov wrote:
[...]
>  static void pc_i440fx_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      m->family = "pc_piix";
>      m->desc = "Standard PC (i440FX + PIIX, 1996)";
>      m->hot_add_cpu = pc_hot_add_cpu;
>      m->default_machine_opts = "firmware=bios-256k.bin";
>      m->default_display = "std";
> +    pcmc->inter_dimm_gap = true;
[...]
>  static void pc_q35_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      m->family = "pc_q35";
>      m->desc = "Standard PC (Q35 + ICH9, 2009)";
>      m->hot_add_cpu = pc_hot_add_cpu;
> @@ -368,6 +369,7 @@ static void pc_q35_machine_options(MachineClass *m)
>      m->default_display = "std";
>      m->no_floppy = 1;
>      m->no_tco = 0;
> +    pcmc->inter_dimm_gap = true;
>  }

You can initialize this at pc_machine_class_init() and avoid duplication
between pc_i440fx_2_5_machine_options() and pc_q35_machine_options().

But this is not incorrect, so if you prefer it this way:

Acked-by: Eduardo Habkost <ehabkost@redhat.com>

-- 
Eduardo

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-09-28 17:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-28  9:54 [Qemu-devel] [PATCH v2 0/2] pc: memhp: enforce gaps between DIMMs Igor Mammedov
2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 1/2] memhp: extend address auto assignment to support gaps Igor Mammedov
2015-09-28  9:54 ` [Qemu-devel] [PATCH v2 2/2] pc: memhp: force gaps between DIMM's GPA Igor Mammedov
2015-09-28 17:44   ` Eduardo Habkost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).