[Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2) personality (virtio-1)

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2) personality (virtio-1)
@ 2019-09-17 15:11 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Sergio Lopez @ 2019-09-13 12:06 UTC (permalink / raw)
  To: mst; +Cc: peter.maydell, Sergio Lopez, stefanha, cohuck, qemu-devel,
	abologna

Implement the modern (v2) personality, according to the VirtIO 1.0
specification.

Support for v2 among guests is not as widespread as it'd be
desirable. While the Linux driver has had it for a while, support is
missing, at least, from Tianocore EDK II, NetBSD and FreeBSD.

For this reason, the v2 personality is disabled, keeping the legacy
behavior as default. Machine types willing to use v2, can enable it
using MachineClass's compat_props.

Signed-off-by: Sergio Lopez <slp@redhat.com>
---
Changelog:

v3:
 - Use %HWADDR_PRIx instead of %x. (Stefan Hajnoczi)
 - Return 0 if host_features_sel > 0 for legacy mode. (Cornelia Huck)
 - Mask out legacy features in non-legacy mode. (Cornelia Huck)
 - Log an error in guest attempts to write guest_features with
   guest_features_sel > 0 in legacy mode. (Cornelia Huck)

v2:
 - Switch from RFC to PATCH.
 - Avoid the modern vs. legacy dichotomy. Use legacy or non-legacy
   instead. (Andrea Bolognani, Cornelia Huck)
 - Include the register offset in the warning messages. (Stefan
   Hajnoczi)
 - Fix device endianness for the non-legacy mode. (Michael S. Tsirkin)
 - Honor the specs in VIRTIO_MMIO_QUEUE_READY. (Michael S. Tsirkin)
---
 hw/virtio/virtio-mmio.c | 342 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 319 insertions(+), 23 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 97b7f35496..3578ae37be 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -47,14 +47,24 @@
         OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO)
 
 #define VIRT_MAGIC 0x74726976 /* 'virt' */
-#define VIRT_VERSION 1
+#define VIRT_VERSION 2
+#define VIRT_VERSION_LEGACY 1
 #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */
 
+typedef struct VirtIOMMIOQueue {
+    uint16_t num;
+    bool enabled;
+    uint32_t desc[2];
+    uint32_t avail[2];
+    uint32_t used[2];
+} VirtIOMMIOQueue;
+
 typedef struct {
     /* Generic */
     SysBusDevice parent_obj;
     MemoryRegion iomem;
     qemu_irq irq;
+    bool legacy;
     /* Guest accessible state needing migration and reset */
     uint32_t host_features_sel;
     uint32_t guest_features_sel;
@@ -62,6 +72,9 @@ typedef struct {
     /* virtio-bus */
     VirtioBusState bus;
     bool format_transport_address;
+    /* Fields only used for non-legacy (v2) devices */
+    uint32_t guest_features[2];
+    VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX];
 } VirtIOMMIOProxy;
 
 static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
@@ -115,7 +128,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
         case VIRTIO_MMIO_MAGIC_VALUE:
             return VIRT_MAGIC;
         case VIRTIO_MMIO_VERSION:
-            return VIRT_VERSION;
+            if (proxy->legacy) {
+                return VIRT_VERSION_LEGACY;
+            } else {
+                return VIRT_VERSION;
+            }
         case VIRTIO_MMIO_VENDOR_ID:
             return VIRT_VENDOR;
         default:
@@ -146,28 +163,64 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
     case VIRTIO_MMIO_MAGIC_VALUE:
         return VIRT_MAGIC;
     case VIRTIO_MMIO_VERSION:
-        return VIRT_VERSION;
+        if (proxy->legacy) {
+            return VIRT_VERSION_LEGACY;
+        } else {
+            return VIRT_VERSION;
+        }
     case VIRTIO_MMIO_DEVICE_ID:
         return vdev->device_id;
     case VIRTIO_MMIO_VENDOR_ID:
         return VIRT_VENDOR;
     case VIRTIO_MMIO_DEVICE_FEATURES:
-        if (proxy->host_features_sel) {
-            return 0;
+        if (proxy->legacy) {
+            if (proxy->host_features_sel) {
+                return 0;
+            } else {
+                return vdev->host_features;
+            }
+        } else {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+            return (vdev->host_features & ~vdc->legacy_features)
+                >> (32 * proxy->host_features_sel);
         }
-        return vdev->host_features;
     case VIRTIO_MMIO_QUEUE_NUM_MAX:
         if (!virtio_queue_get_num(vdev, vdev->queue_sel)) {
             return 0;
         }
         return VIRTQUEUE_MAX_SIZE;
     case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
         return virtio_queue_get_addr(vdev, vdev->queue_sel)
             >> proxy->guest_page_shift;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return proxy->vqs[vdev->queue_sel].enabled;
     case VIRTIO_MMIO_INTERRUPT_STATUS:
         return atomic_read(&vdev->isr);
     case VIRTIO_MMIO_STATUS:
         return vdev->status;
+    case VIRTIO_MMIO_CONFIG_GENERATION:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return vdev->generation;
     case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
     case VIRTIO_MMIO_DRIVER_FEATURES:
     case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
@@ -177,12 +230,20 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
     case VIRTIO_MMIO_QUEUE_ALIGN:
     case VIRTIO_MMIO_QUEUE_NOTIFY:
     case VIRTIO_MMIO_INTERRUPT_ACK:
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: read of write-only register\n",
-                      __func__);
+                      "%s: read of write-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         return 0;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         return 0;
     }
     return 0;
@@ -229,17 +290,41 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
     }
     switch (offset) {
     case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
-        proxy->host_features_sel = value;
+        if (value) {
+            proxy->host_features_sel = 1;
+        } else {
+            proxy->host_features_sel = 0;
+        }
         break;
     case VIRTIO_MMIO_DRIVER_FEATURES:
-        if (!proxy->guest_features_sel) {
-            virtio_set_features(vdev, value);
+        if (proxy->legacy) {
+            if (proxy->guest_features_sel) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: attempt to write guest features with "
+                              "guest_features_sel > 0 in legacy mode\n",
+                              __func__);
+            } else {
+                virtio_set_features(vdev, value);
+            }
+        } else {
+            proxy->guest_features[proxy->guest_features_sel] = value;
         }
         break;
     case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
-        proxy->guest_features_sel = value;
+        if (value) {
+            proxy->guest_features_sel = 1;
+        } else {
+            proxy->guest_features_sel = 0;
+        }
         break;
     case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         proxy->guest_page_shift = ctz32(value);
         if (proxy->guest_page_shift > 31) {
             proxy->guest_page_shift = 0;
@@ -253,15 +338,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
         break;
     case VIRTIO_MMIO_QUEUE_NUM:
         trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
-        virtio_queue_set_num(vdev, vdev->queue_sel, value);
-        /* Note: only call this function for legacy devices */
-        virtio_queue_update_rings(vdev, vdev->queue_sel);
+        if (proxy->legacy) {
+            virtio_queue_set_num(vdev, vdev->queue_sel, value);
+            virtio_queue_update_rings(vdev, vdev->queue_sel);
+        } else {
+            proxy->vqs[vdev->queue_sel].num = value;
+        }
         break;
     case VIRTIO_MMIO_QUEUE_ALIGN:
-        /* Note: this is only valid for legacy devices */
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         virtio_queue_set_align(vdev, vdev->queue_sel, value);
         break;
     case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         if (value == 0) {
             virtio_reset(vdev);
         } else {
@@ -269,6 +370,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
                                   value << proxy->guest_page_shift);
         }
         break;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        if (value) {
+            virtio_queue_set_num(vdev, vdev->queue_sel,
+                                 proxy->vqs[vdev->queue_sel].num);
+            virtio_queue_set_rings(vdev, vdev->queue_sel,
+                ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].desc[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].avail[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].used[0]);
+            proxy->vqs[vdev->queue_sel].enabled = 1;
+        } else {
+            proxy->vqs[vdev->queue_sel].enabled = 0;
+        }
+        break;
     case VIRTIO_MMIO_QUEUE_NOTIFY:
         if (value < VIRTIO_QUEUE_MAX) {
             virtio_queue_notify(vdev, value);
@@ -283,6 +407,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
             virtio_mmio_stop_ioeventfd(proxy);
         }
 
+        if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) {
+            virtio_set_features(vdev,
+                                ((uint64_t)proxy->guest_features[1]) << 32 |
+                                proxy->guest_features[0]);
+        }
+
         virtio_set_status(vdev, value & 0xff);
 
         if (value & VIRTIO_CONFIG_S_DRIVER_OK) {
@@ -293,6 +423,66 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
             virtio_reset(vdev);
         }
         break;
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[1] = value;
+        break;
     case VIRTIO_MMIO_MAGIC_VALUE:
     case VIRTIO_MMIO_VERSION:
     case VIRTIO_MMIO_DEVICE_ID:
@@ -300,22 +490,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
     case VIRTIO_MMIO_DEVICE_FEATURES:
     case VIRTIO_MMIO_QUEUE_NUM_MAX:
     case VIRTIO_MMIO_INTERRUPT_STATUS:
+    case VIRTIO_MMIO_CONFIG_GENERATION:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: write to readonly register\n",
-                      __func__);
+                      "%s: write to read-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
     }
 }
 
-static const MemoryRegionOps virtio_mem_ops = {
+static const MemoryRegionOps virtio_legacy_mem_ops = {
     .read = virtio_mmio_read,
     .write = virtio_mmio_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static const MemoryRegionOps virtio_mem_ops = {
+    .read = virtio_mmio_read,
+    .write = virtio_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
@@ -349,15 +548,90 @@ static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f)
     qemu_put_be32(f, proxy->guest_page_shift);
 }
 
+static const VMStateDescription vmstate_virtio_mmio_queue_state = {
+    .name = "virtio_mmio/queue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(num, VirtIOMMIOQueue),
+        VMSTATE_BOOL(enabled, VirtIOMMIOQueue),
+        VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio_state_sub = {
+    .name = "virtio_mmio/state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2),
+        VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0,
+                             vmstate_virtio_mmio_queue_state,
+                             VirtIOMMIOQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio = {
+    .name = "virtio_mmio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_mmio_state_sub,
+        NULL
+    }
+};
+
+static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL);
+}
+
+static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+}
+
+static bool virtio_mmio_has_extra_state(DeviceState *opaque)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return !proxy->legacy;
+}
+
 static void virtio_mmio_reset(DeviceState *d)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    int i;
 
     virtio_mmio_stop_ioeventfd(proxy);
     virtio_bus_reset(&proxy->bus);
     proxy->host_features_sel = 0;
     proxy->guest_features_sel = 0;
     proxy->guest_page_shift = 0;
+
+    if (!proxy->legacy) {
+        proxy->guest_features[0] = proxy->guest_features[1] = 0;
+
+        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+            proxy->vqs[i].enabled = 0;
+            proxy->vqs[i].num = 0;
+            proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
+            proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
+            proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
+        }
+    }
 }
 
 static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
@@ -420,11 +694,22 @@ assign_error:
     return r;
 }
 
+static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (!proxy->legacy) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+}
+
 /* virtio-mmio device */
 
 static Property virtio_mmio_properties[] = {
     DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
                      format_transport_address, true),
+    DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -436,8 +721,15 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
     qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS,
                         d, NULL);
     sysbus_init_irq(sbd, &proxy->irq);
-    memory_region_init_io(&proxy->iomem, OBJECT(d), &virtio_mem_ops, proxy,
-                          TYPE_VIRTIO_MMIO, 0x200);
+    if (proxy->legacy) {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_legacy_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    } else {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    }
     sysbus_init_mmio(sbd, &proxy->iomem);
 }
 
@@ -508,9 +800,13 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
     k->notify = virtio_mmio_update_irq;
     k->save_config = virtio_mmio_save_config;
     k->load_config = virtio_mmio_load_config;
+    k->save_extra_state = virtio_mmio_save_extra_state;
+    k->load_extra_state = virtio_mmio_load_extra_state;
+    k->has_extra_state = virtio_mmio_has_extra_state;
     k->set_guest_notifiers = virtio_mmio_set_guest_notifiers;
     k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
+    k->pre_plugged = virtio_mmio_pre_plugged;
     k->has_variable_vring_alignment = true;
     bus_class->max_dev = 1;
     bus_class->get_dev_path = virtio_mmio_bus_get_dev_path;
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 10/10] virtio-mmio: implement modern (v2) personality (virtio-1)
@ 2019-09-17 15:11 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Cornelia Huck, Sergio Lopez

From: Sergio Lopez <slp@redhat.com>

Implement the modern (v2) personality, according to the VirtIO 1.0
specification.

Support for v2 among guests is not as widespread as it'd be
desirable. While the Linux driver has had it for a while, support is
missing, at least, from Tianocore EDK II, NetBSD and FreeBSD.

For this reason, the v2 personality is disabled, keeping the legacy
behavior as default. Machine types willing to use v2, can enable it
using MachineClass's compat_props.

Signed-off-by: Sergio Lopez <slp@redhat.com>
Message-Id: <20190913120559.40835-1-slp@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio-mmio.c | 342 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 319 insertions(+), 23 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index eccc795f28..3d5ca0f667 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -50,14 +50,24 @@
         OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO)
 
 #define VIRT_MAGIC 0x74726976 /* 'virt' */
-#define VIRT_VERSION 1
+#define VIRT_VERSION 2
+#define VIRT_VERSION_LEGACY 1
 #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */
 
+typedef struct VirtIOMMIOQueue {
+    uint16_t num;
+    bool enabled;
+    uint32_t desc[2];
+    uint32_t avail[2];
+    uint32_t used[2];
+} VirtIOMMIOQueue;
+
 typedef struct {
     /* Generic */
     SysBusDevice parent_obj;
     MemoryRegion iomem;
     qemu_irq irq;
+    bool legacy;
     /* Guest accessible state needing migration and reset */
     uint32_t host_features_sel;
     uint32_t guest_features_sel;
@@ -65,6 +75,9 @@ typedef struct {
     /* virtio-bus */
     VirtioBusState bus;
     bool format_transport_address;
+    /* Fields only used for non-legacy (v2) devices */
+    uint32_t guest_features[2];
+    VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX];
 } VirtIOMMIOProxy;
 
 static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
@@ -118,7 +131,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
         case VIRTIO_MMIO_MAGIC_VALUE:
             return VIRT_MAGIC;
         case VIRTIO_MMIO_VERSION:
-            return VIRT_VERSION;
+            if (proxy->legacy) {
+                return VIRT_VERSION_LEGACY;
+            } else {
+                return VIRT_VERSION;
+            }
         case VIRTIO_MMIO_VENDOR_ID:
             return VIRT_VENDOR;
         default:
@@ -149,28 +166,64 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
     case VIRTIO_MMIO_MAGIC_VALUE:
         return VIRT_MAGIC;
     case VIRTIO_MMIO_VERSION:
-        return VIRT_VERSION;
+        if (proxy->legacy) {
+            return VIRT_VERSION_LEGACY;
+        } else {
+            return VIRT_VERSION;
+        }
     case VIRTIO_MMIO_DEVICE_ID:
         return vdev->device_id;
     case VIRTIO_MMIO_VENDOR_ID:
         return VIRT_VENDOR;
     case VIRTIO_MMIO_DEVICE_FEATURES:
-        if (proxy->host_features_sel) {
-            return 0;
+        if (proxy->legacy) {
+            if (proxy->host_features_sel) {
+                return 0;
+            } else {
+                return vdev->host_features;
+            }
+        } else {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+            return (vdev->host_features & ~vdc->legacy_features)
+                >> (32 * proxy->host_features_sel);
         }
-        return vdev->host_features;
     case VIRTIO_MMIO_QUEUE_NUM_MAX:
         if (!virtio_queue_get_num(vdev, vdev->queue_sel)) {
             return 0;
         }
         return VIRTQUEUE_MAX_SIZE;
     case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
         return virtio_queue_get_addr(vdev, vdev->queue_sel)
             >> proxy->guest_page_shift;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return proxy->vqs[vdev->queue_sel].enabled;
     case VIRTIO_MMIO_INTERRUPT_STATUS:
         return atomic_read(&vdev->isr);
     case VIRTIO_MMIO_STATUS:
         return vdev->status;
+    case VIRTIO_MMIO_CONFIG_GENERATION:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return vdev->generation;
     case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
     case VIRTIO_MMIO_DRIVER_FEATURES:
     case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
@@ -180,12 +233,20 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
     case VIRTIO_MMIO_QUEUE_ALIGN:
     case VIRTIO_MMIO_QUEUE_NOTIFY:
     case VIRTIO_MMIO_INTERRUPT_ACK:
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: read of write-only register\n",
-                      __func__);
+                      "%s: read of write-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         return 0;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         return 0;
     }
     return 0;
@@ -232,17 +293,41 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
     }
     switch (offset) {
     case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
-        proxy->host_features_sel = value;
+        if (value) {
+            proxy->host_features_sel = 1;
+        } else {
+            proxy->host_features_sel = 0;
+        }
         break;
     case VIRTIO_MMIO_DRIVER_FEATURES:
-        if (!proxy->guest_features_sel) {
-            virtio_set_features(vdev, value);
+        if (proxy->legacy) {
+            if (proxy->guest_features_sel) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: attempt to write guest features with "
+                              "guest_features_sel > 0 in legacy mode\n",
+                              __func__);
+            } else {
+                virtio_set_features(vdev, value);
+            }
+        } else {
+            proxy->guest_features[proxy->guest_features_sel] = value;
         }
         break;
     case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
-        proxy->guest_features_sel = value;
+        if (value) {
+            proxy->guest_features_sel = 1;
+        } else {
+            proxy->guest_features_sel = 0;
+        }
         break;
     case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         proxy->guest_page_shift = ctz32(value);
         if (proxy->guest_page_shift > 31) {
             proxy->guest_page_shift = 0;
@@ -256,15 +341,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
         break;
     case VIRTIO_MMIO_QUEUE_NUM:
         trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
-        virtio_queue_set_num(vdev, vdev->queue_sel, value);
-        /* Note: only call this function for legacy devices */
-        virtio_queue_update_rings(vdev, vdev->queue_sel);
+        if (proxy->legacy) {
+            virtio_queue_set_num(vdev, vdev->queue_sel, value);
+            virtio_queue_update_rings(vdev, vdev->queue_sel);
+        } else {
+            proxy->vqs[vdev->queue_sel].num = value;
+        }
         break;
     case VIRTIO_MMIO_QUEUE_ALIGN:
-        /* Note: this is only valid for legacy devices */
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         virtio_queue_set_align(vdev, vdev->queue_sel, value);
         break;
     case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         if (value == 0) {
             virtio_reset(vdev);
         } else {
@@ -272,6 +373,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
                                   value << proxy->guest_page_shift);
         }
         break;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        if (value) {
+            virtio_queue_set_num(vdev, vdev->queue_sel,
+                                 proxy->vqs[vdev->queue_sel].num);
+            virtio_queue_set_rings(vdev, vdev->queue_sel,
+                ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].desc[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].avail[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].used[0]);
+            proxy->vqs[vdev->queue_sel].enabled = 1;
+        } else {
+            proxy->vqs[vdev->queue_sel].enabled = 0;
+        }
+        break;
     case VIRTIO_MMIO_QUEUE_NOTIFY:
         if (value < VIRTIO_QUEUE_MAX) {
             virtio_queue_notify(vdev, value);
@@ -286,6 +410,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
             virtio_mmio_stop_ioeventfd(proxy);
         }
 
+        if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) {
+            virtio_set_features(vdev,
+                                ((uint64_t)proxy->guest_features[1]) << 32 |
+                                proxy->guest_features[0]);
+        }
+
         virtio_set_status(vdev, value & 0xff);
 
         if (value & VIRTIO_CONFIG_S_DRIVER_OK) {
@@ -296,6 +426,66 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
             virtio_reset(vdev);
         }
         break;
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[1] = value;
+        break;
     case VIRTIO_MMIO_MAGIC_VALUE:
     case VIRTIO_MMIO_VERSION:
     case VIRTIO_MMIO_DEVICE_ID:
@@ -303,22 +493,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
     case VIRTIO_MMIO_DEVICE_FEATURES:
     case VIRTIO_MMIO_QUEUE_NUM_MAX:
     case VIRTIO_MMIO_INTERRUPT_STATUS:
+    case VIRTIO_MMIO_CONFIG_GENERATION:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: write to readonly register\n",
-                      __func__);
+                      "%s: write to read-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
     }
 }
 
-static const MemoryRegionOps virtio_mem_ops = {
+static const MemoryRegionOps virtio_legacy_mem_ops = {
     .read = virtio_mmio_read,
     .write = virtio_mmio_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static const MemoryRegionOps virtio_mem_ops = {
+    .read = virtio_mmio_read,
+    .write = virtio_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
@@ -352,15 +551,90 @@ static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f)
     qemu_put_be32(f, proxy->guest_page_shift);
 }
 
+static const VMStateDescription vmstate_virtio_mmio_queue_state = {
+    .name = "virtio_mmio/queue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(num, VirtIOMMIOQueue),
+        VMSTATE_BOOL(enabled, VirtIOMMIOQueue),
+        VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio_state_sub = {
+    .name = "virtio_mmio/state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2),
+        VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0,
+                             vmstate_virtio_mmio_queue_state,
+                             VirtIOMMIOQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio = {
+    .name = "virtio_mmio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_mmio_state_sub,
+        NULL
+    }
+};
+
+static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL);
+}
+
+static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+}
+
+static bool virtio_mmio_has_extra_state(DeviceState *opaque)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return !proxy->legacy;
+}
+
 static void virtio_mmio_reset(DeviceState *d)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    int i;
 
     virtio_mmio_stop_ioeventfd(proxy);
     virtio_bus_reset(&proxy->bus);
     proxy->host_features_sel = 0;
     proxy->guest_features_sel = 0;
     proxy->guest_page_shift = 0;
+
+    if (!proxy->legacy) {
+        proxy->guest_features[0] = proxy->guest_features[1] = 0;
+
+        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+            proxy->vqs[i].enabled = 0;
+            proxy->vqs[i].num = 0;
+            proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
+            proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
+            proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
+        }
+    }
 }
 
 static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
@@ -423,11 +697,22 @@ assign_error:
     return r;
 }
 
+static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (!proxy->legacy) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+}
+
 /* virtio-mmio device */
 
 static Property virtio_mmio_properties[] = {
     DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
                      format_transport_address, true),
+    DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -439,8 +724,15 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
     qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS,
                         d, NULL);
     sysbus_init_irq(sbd, &proxy->irq);
-    memory_region_init_io(&proxy->iomem, OBJECT(d), &virtio_mem_ops, proxy,
-                          TYPE_VIRTIO_MMIO, 0x200);
+    if (proxy->legacy) {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_legacy_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    } else {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    }
     sysbus_init_mmio(sbd, &proxy->iomem);
 }
 
@@ -511,9 +803,13 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
     k->notify = virtio_mmio_update_irq;
     k->save_config = virtio_mmio_save_config;
     k->load_config = virtio_mmio_load_config;
+    k->save_extra_state = virtio_mmio_save_extra_state;
+    k->load_extra_state = virtio_mmio_load_extra_state;
+    k->has_extra_state = virtio_mmio_has_extra_state;
     k->set_guest_notifiers = virtio_mmio_set_guest_notifiers;
     k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
+    k->pre_plugged = virtio_mmio_pre_plugged;
     k->has_variable_vring_alignment = true;
     bus_class->max_dev = 1;
     bus_class->get_dev_path = virtio_mmio_bus_get_dev_path;
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2) personality (virtio-1)
  2019-09-17 15:11 ` [Qemu-devel] [PULL 10/10] " Michael S. Tsirkin
  (?)
@ 2019-09-16 14:40 ` Cornelia Huck
  -1 siblings, 0 replies; 35+ messages in thread
From: Cornelia Huck @ 2019-09-16 14:40 UTC (permalink / raw)
  To: Sergio Lopez; +Cc: stefanha, peter.maydell, qemu-devel, abologna, mst

On Fri, 13 Sep 2019 14:06:01 +0200
Sergio Lopez <slp@redhat.com> wrote:

> Implement the modern (v2) personality, according to the VirtIO 1.0
> specification.
> 
> Support for v2 among guests is not as widespread as it'd be
> desirable. While the Linux driver has had it for a while, support is
> missing, at least, from Tianocore EDK II, NetBSD and FreeBSD.
> 
> For this reason, the v2 personality is disabled, keeping the legacy
> behavior as default. Machine types willing to use v2, can enable it
> using MachineClass's compat_props.
> 
> Signed-off-by: Sergio Lopez <slp@redhat.com>
> ---
> Changelog:
> 
> v3:
>  - Use %HWADDR_PRIx instead of %x. (Stefan Hajnoczi)
>  - Return 0 if host_features_sel > 0 for legacy mode. (Cornelia Huck)
>  - Mask out legacy features in non-legacy mode. (Cornelia Huck)
>  - Log an error in guest attempts to write guest_features with
>    guest_features_sel > 0 in legacy mode. (Cornelia Huck)
> 
> v2:
>  - Switch from RFC to PATCH.
>  - Avoid the modern vs. legacy dichotomy. Use legacy or non-legacy
>    instead. (Andrea Bolognani, Cornelia Huck)
>  - Include the register offset in the warning messages. (Stefan
>    Hajnoczi)
>  - Fix device endianness for the non-legacy mode. (Michael S. Tsirkin)
>  - Honor the specs in VIRTIO_MMIO_QUEUE_READY. (Michael S. Tsirkin)
> ---
>  hw/virtio/virtio-mmio.c | 342 +++++++++++++++++++++++++++++++++++++---
>  1 file changed, 319 insertions(+), 23 deletions(-)
> 

Looks good to me now.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2), personality (virtio-1)
  2019-09-17 15:11 ` [Qemu-devel] [PULL 10/10] " Michael S. Tsirkin
  (?)
  (?)
@ 2019-09-21  7:06 ` Vasyl Vavrychuk
  2019-09-23 13:58   ` Sergio Lopez
  -1 siblings, 1 reply; 35+ messages in thread
From: Vasyl Vavrychuk @ 2019-09-21  7:06 UTC (permalink / raw)
  To: Sergio Lopez; +Cc: qemu-devel

Hi, Sergio,

 > For this reason, the v2 personality is disabled, keeping the legacy
 > behavior as default. Machine types willing to use v2, can enable it
 > using MachineClass's compat_props.
...
 > +    DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),

Currently, I am not enable to set "force-legacy" to false from command 
line for
virt machine.

I think, the "force-legacy" and compat_props should work the other way 
around.

The "force-legacy" should be set to false by default to select a new 
behaviour.
Instead of this hw_compat_4_1 should be modified to keep the old behaviour:

--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -29,6 +29,7 @@

  GlobalProperty hw_compat_4_1[] = {
      { "virtio-pci", "x-pcie-flr-init", "off" },
+    { "virtio-mmio", "force-legacy", "on" },
  };
  const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1);


This way, one can keep the old behaviour by doing "-M virt-4.1"

Best regards,
Vasyl



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2), personality (virtio-1)
  2019-09-21  7:06 ` [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2), " Vasyl Vavrychuk
@ 2019-09-23 13:58   ` Sergio Lopez
  0 siblings, 0 replies; 35+ messages in thread
From: Sergio Lopez @ 2019-09-23 13:58 UTC (permalink / raw)
  To: Vasyl Vavrychuk; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1062 bytes --]


Vasyl Vavrychuk <vvavrychuk@gmail.com> writes:

> Hi, Sergio,
>
>> For this reason, the v2 personality is disabled, keeping the legacy
>> behavior as default. Machine types willing to use v2, can enable it
>> using MachineClass's compat_props.
> ...
>> +    DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
>
> Currently, I am not enable to set "force-legacy" to false from command
> line for
> virt machine.

You can disable force-legacy from the command line by passing "-global
virtio-mmio.force-legacy=false".

> I think, the "force-legacy" and compat_props should work the other way
> around.
>
> The "force-legacy" should be set to false by default to select a new
> behaviour.
> Instead of this hw_compat_4_1 should be modified to keep the old behaviour:

The decision to keep the legacy behavior as the default is because
support for the non-legacy personality is not a widespread as it'd be
desirable. Basically, only the Linux guest driver supports it (it's
missing from every *BSD I checked, and also from TianoCore's EDK2).

Sergio.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 00/10] virtio, vhost, pc: features, fixes, cleanups.
@ 2019-09-17 15:10 Michael S. Tsirkin
  2019-09-19 10:13 ` Peter Maydell
  0 siblings, 1 reply; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell

The following changes since commit 138985c1ef8b66e4e5b383354e133e05d01d0b5f:

  Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-sep-12-2019' into staging (2019-09-13 16:04:46 +0100)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 44e687a4d9ab327761e221844ced7dc9c23350a5:

  virtio-mmio: implement modern (v2) personality (virtio-1) (2019-09-16 11:17:06 -0400)

----------------------------------------------------------------
virtio,vhost,pc: features, fixes, cleanups.

Virtio 1.0 support for virtio-mmio.
Misc fixes, cleanups.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

----------------------------------------------------------------
Laurent Vivier (1):
      MAINTAINERS: update virtio-rng and virtio-serial maintainer

Pankaj Gupta (1):
      virtio pmem: user document

Peter Xu (4):
      intel_iommu: Sanity check vfio-pci config on machine init done
      qdev/machine: Introduce hotplug_allowed hook
      pc/q35: Disallow vfio-pci hotplug without VT-d caching mode
      intel_iommu: Remove the caching-mode check during flag change

Raphael Norwitz (2):
      vhost-user-blk: prevent using uninitialized vqs
      backends/vhost-user.c: prevent using uninitialized vqs

Sergio Lopez (1):
      virtio-mmio: implement modern (v2) personality (virtio-1)

Wei Yang (1):
      docs/nvdimm: add example on persistent backend setup

 docs/nvdimm.txt           |  31 +++++
 include/hw/boards.h       |   9 ++
 include/hw/qdev-core.h    |   1 +
 backends/vhost-user.c     |   2 +-
 hw/block/vhost-user-blk.c |   2 +-
 hw/core/qdev.c            |  17 +++
 hw/i386/intel_iommu.c     |  41 +++++-
 hw/i386/pc.c              |  21 +++
 hw/virtio/virtio-mmio.c   | 342 ++++++++++++++++++++++++++++++++++++++++++----
 qdev-monitor.c            |   7 +
 MAINTAINERS               |   6 +-
 docs/virtio-pmem.rst      |  75 ++++++++++
 12 files changed, 521 insertions(+), 33 deletions(-)
 create mode 100644 docs/virtio-pmem.rst



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PULL 00/10] virtio, vhost, pc: features, fixes, cleanups.
  2019-09-17 15:10 [Qemu-devel] [PULL 00/10] virtio, vhost, pc: features, fixes, cleanups Michael S. Tsirkin
@ 2019-09-19 10:13 ` Peter Maydell
  0 siblings, 0 replies; 35+ messages in thread
From: Peter Maydell @ 2019-09-19 10:13 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: QEMU Developers

On Tue, 17 Sep 2019 at 16:10, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> The following changes since commit 138985c1ef8b66e4e5b383354e133e05d01d0b5f:
>
>   Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-sep-12-2019' into staging (2019-09-13 16:04:46 +0100)
>
> are available in the Git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to 44e687a4d9ab327761e221844ced7dc9c23350a5:
>
>   virtio-mmio: implement modern (v2) personality (virtio-1) (2019-09-16 11:17:06 -0400)
>
> ----------------------------------------------------------------
> virtio,vhost,pc: features, fixes, cleanups.
>
> Virtio 1.0 support for virtio-mmio.
> Misc fixes, cleanups.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
> ----------------------------------------------------------------
> Laurent Vivier (1):
>       MAINTAINERS: update virtio-rng and virtio-serial maintainer
>
> Pankaj Gupta (1):
>       virtio pmem: user document
>
> Peter Xu (4):
>       intel_iommu: Sanity check vfio-pci config on machine init done
>       qdev/machine: Introduce hotplug_allowed hook
>       pc/q35: Disallow vfio-pci hotplug without VT-d caching mode
>       intel_iommu: Remove the caching-mode check during flag change
>
> Raphael Norwitz (2):
>       vhost-user-blk: prevent using uninitialized vqs
>       backends/vhost-user.c: prevent using uninitialized vqs
>
> Sergio Lopez (1):
>       virtio-mmio: implement modern (v2) personality (virtio-1)
>
> Wei Yang (1):
>       docs/nvdimm: add example on persistent backend setup



Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/4.2
for any user-visible changes.

-- PMM


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v3 0/4] intel_iommu: Do sanity check of vfio-pci earlier
@ 2019-09-16  8:07 Peter Xu
  2019-09-17 15:10   ` [Qemu-devel] [PULL 05/10] " Michael S. Tsirkin
                   ` (3 more replies)
  0 siblings, 4 replies; 35+ messages in thread
From: Peter Xu @ 2019-09-16  8:07 UTC (permalink / raw)
  To: qemu-devel
  Cc: Daniel P. Berrangé, Eduardo Habkost, Michael S. Tsirkin,
	Jason Wang, Alex Williamson, peterx, Eric Auger, Bandan Das,
	Igor Mammedov, Paolo Bonzini, Richard Henderson

v3:
- repost with the correct tree

v2:
- rebase to master [Eric]
- add r-bs for Eric
- remove RFC tag

The VT-d code has some defects, one of them is that we cannot detect
the misuse of vIOMMU and vfio-pci early enough.

For example, logically this is not allowed:

  -device intel-iommu,caching-mode=off \
  -device vfio-pci,host=05:00.0

Because the caching mode is required to make vfio-pci devices
functional.

Previously we did this sanity check in vtd_iommu_notify_flag_changed()
as when the memory regions change their attributes.  However that's
too late in most cases!  Because the memory region layouts will only
change after IOMMU is enabled, and that's in most cases during the
guest OS boots.  So when the configuration is wrong, we will only bail
out during the guest boots rather than simply telling the user before
QEMU starts.

The same problem happens on device hotplug, say, when we have this:

  -device intel-iommu,caching-mode=off

Then we do something like:

  (HMP) device_add vfio-pci,host=05:00.0,bus=pcie.1

If at that time the vIOMMU is enabled in the guest then the QEMU
process will simply quit directly due to this hotplug event.  This is
a bit insane...

This series tries to solve above two problems by introducing two
sanity checks upon these places separately:

  - machine done
  - hotplug device

This is a bit awkward but I hope this could be better than before.
There is of course other solutions like hard-code the check into
vfio-pci but I feel it even more unpretty.  I didn't think out any
better way to do this, if there is please kindly shout out.

Please have a look to see whether this would be acceptable, thanks.

Peter Xu (4):
  intel_iommu: Sanity check vfio-pci config on machine init done
  qdev/machine: Introduce hotplug_allowed hook
  pc/q35: Disallow vfio-pci hotplug without VT-d caching mode
  intel_iommu: Remove the caching-mode check during flag change

 hw/core/qdev.c         | 17 +++++++++++++++++
 hw/i386/intel_iommu.c  | 41 +++++++++++++++++++++++++++++++++++------
 hw/i386/pc.c           | 21 +++++++++++++++++++++
 include/hw/boards.h    |  9 +++++++++
 include/hw/qdev-core.h |  1 +
 qdev-monitor.c         |  7 +++++++
 6 files changed, 90 insertions(+), 6 deletions(-)

-- 
2.21.0

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v3 1/4] intel_iommu: Sanity check vfio-pci config on machine init done
@ 2019-09-17 15:10   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Peter Xu @ 2019-09-16  8:07 UTC (permalink / raw)
  To: qemu-devel
  Cc: Daniel P. Berrangé, Eduardo Habkost, Michael S. Tsirkin,
	Jason Wang, Alex Williamson, peterx, Eric Auger, Bandan Das,
	Igor Mammedov, Paolo Bonzini, Richard Henderson

This check was previously only happened when the IOMMU is enabled in
the guest.  It was always too late because the enabling of IOMMU
normally only happens during the boot of guest OS.  It means that we
can bail out and exit directly during the guest OS boots if the
configuration of devices are not supported.  Or, if the guest didn't
enable vIOMMU at all, then the user can use the guest normally but as
long as it reconfigure the guest OS to enable the vIOMMU then reboot,
the user will see the panic right after the reset when the next boot
starts.

Let's make this failure even earlier so that we force the user to use
caching-mode for vfio-pci devices when with the vIOMMU.  So the user
won't get surprise at least during execution of the guest, which seems
a bit nicer.

This will affect some user who didn't enable vIOMMU in the guest OS
but was using vfio-pci and the vtd device in the past.  However I hope
it's not a majority because not enabling vIOMMU with the device
attached is actually meaningless.

We still keep the old assertion for safety so far because the hotplug
path could still reach it, so far.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 75ca6f9c70..bed8ffe446 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "hw/i386/x86-iommu.h"
 #include "hw/pci-host/q35.h"
 #include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm_i386.h"
 #include "migration/vmstate.h"
@@ -64,6 +65,13 @@
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
+static void vtd_panic_require_caching_mode(void)
+{
+    error_report("We need to set caching-mode=on for intel-iommu to enable "
+                 "device assignment with IOMMU protection.");
+    exit(1);
+}
+
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
 {
@@ -2929,9 +2937,7 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
     IntelIOMMUState *s = vtd_as->iommu_state;
 
     if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
-        error_report("We need to set caching-mode=on for intel-iommu to enable "
-                     "device assignment with IOMMU protection.");
-        exit(1);
+        vtd_panic_require_caching_mode();
     }
 
     /* Update per-address-space notifier flags */
@@ -3699,6 +3705,32 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
     return true;
 }
 
+static int vtd_machine_done_notify_one(Object *child, void *unused)
+{
+    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+    /*
+     * We hard-coded here because vfio-pci is the only special case
+     * here.  Let's be more elegant in the future when we can, but so
+     * far there seems to be no better way.
+     */
+    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
+        vtd_panic_require_caching_mode();
+    }
+
+    return 0;
+}
+
+static void vtd_machine_done_hook(Notifier *notifier, void *unused)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   vtd_machine_done_notify_one, NULL);
+}
+
+static Notifier vtd_machine_done_notify = {
+    .notify = vtd_machine_done_hook,
+};
+
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
     MachineState *ms = MACHINE(qdev_get_machine());
@@ -3744,6 +3776,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
     /* Pseudo address space under root PCI bus. */
     pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
+    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
 }
 
 static void vtd_class_init(ObjectClass *klass, void *data)
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 05/10] intel_iommu: Sanity check vfio-pci config on machine init done
@ 2019-09-17 15:10   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Eduardo Habkost, Peter Xu, Eric Auger,
	Paolo Bonzini, Richard Henderson

From: Peter Xu <peterx@redhat.com>

This check was previously only happened when the IOMMU is enabled in
the guest.  It was always too late because the enabling of IOMMU
normally only happens during the boot of guest OS.  It means that we
can bail out and exit directly during the guest OS boots if the
configuration of devices are not supported.  Or, if the guest didn't
enable vIOMMU at all, then the user can use the guest normally but as
long as it reconfigure the guest OS to enable the vIOMMU then reboot,
the user will see the panic right after the reset when the next boot
starts.

Let's make this failure even earlier so that we force the user to use
caching-mode for vfio-pci devices when with the vIOMMU.  So the user
won't get surprise at least during execution of the guest, which seems
a bit nicer.

This will affect some user who didn't enable vIOMMU in the guest OS
but was using vfio-pci and the vtd device in the past.  However I hope
it's not a majority because not enabling vIOMMU with the device
attached is actually meaningless.

We still keep the old assertion for safety so far because the hotplug
path could still reach it, so far.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20190916080718.3299-2-peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/intel_iommu.c | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 75ca6f9c70..bed8ffe446 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "hw/i386/x86-iommu.h"
 #include "hw/pci-host/q35.h"
 #include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm_i386.h"
 #include "migration/vmstate.h"
@@ -64,6 +65,13 @@
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
+static void vtd_panic_require_caching_mode(void)
+{
+    error_report("We need to set caching-mode=on for intel-iommu to enable "
+                 "device assignment with IOMMU protection.");
+    exit(1);
+}
+
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
 {
@@ -2929,9 +2937,7 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
     IntelIOMMUState *s = vtd_as->iommu_state;
 
     if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
-        error_report("We need to set caching-mode=on for intel-iommu to enable "
-                     "device assignment with IOMMU protection.");
-        exit(1);
+        vtd_panic_require_caching_mode();
     }
 
     /* Update per-address-space notifier flags */
@@ -3699,6 +3705,32 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
     return true;
 }
 
+static int vtd_machine_done_notify_one(Object *child, void *unused)
+{
+    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+    /*
+     * We hard-coded here because vfio-pci is the only special case
+     * here.  Let's be more elegant in the future when we can, but so
+     * far there seems to be no better way.
+     */
+    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
+        vtd_panic_require_caching_mode();
+    }
+
+    return 0;
+}
+
+static void vtd_machine_done_hook(Notifier *notifier, void *unused)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   vtd_machine_done_notify_one, NULL);
+}
+
+static Notifier vtd_machine_done_notify = {
+    .notify = vtd_machine_done_hook,
+};
+
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
     MachineState *ms = MACHINE(qdev_get_machine());
@@ -3744,6 +3776,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
     /* Pseudo address space under root PCI bus. */
     pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
+    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
 }
 
 static void vtd_class_init(ObjectClass *klass, void *data)
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v3 2/4] qdev/machine: Introduce hotplug_allowed hook
@ 2019-09-17 15:10   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Peter Xu @ 2019-09-16  8:07 UTC (permalink / raw)
  To: qemu-devel
  Cc: Daniel P. Berrangé, Eduardo Habkost, Michael S. Tsirkin,
	Jason Wang, Alex Williamson, peterx, Eric Auger, Bandan Das,
	Igor Mammedov, Paolo Bonzini, Richard Henderson

Introduce this new per-machine hook to give any machine class a chance
to do a sanity check on the to-be-hotplugged device as a sanity test.
This will be used for x86 to try to detect some illegal configuration
of devices, e.g., possible conflictions between vfio-pci and x86
vIOMMU.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/core/qdev.c         | 17 +++++++++++++++++
 include/hw/boards.h    |  9 +++++++++
 include/hw/qdev-core.h |  1 +
 qdev-monitor.c         |  7 +++++++
 4 files changed, 34 insertions(+)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 60d66c2f39..cbad6c1d55 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -237,6 +237,23 @@ HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
     return NULL;
 }
 
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
+{
+    MachineState *machine;
+    MachineClass *mc;
+    Object *m_obj = qdev_get_machine();
+
+    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+        machine = MACHINE(m_obj);
+        mc = MACHINE_GET_CLASS(machine);
+        if (mc->hotplug_allowed) {
+            return mc->hotplug_allowed(machine, dev, errp);
+        }
+    }
+
+    return true;
+}
+
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev)
 {
     if (dev->parent_bus) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2289536e48..be18a5c032 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -166,6 +166,13 @@ typedef struct {
  *    The function pointer to hook different machine specific functions for
  *    parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
  *    machine specific topology fields, such as smp_dies for PCMachine.
+ * @hotplug_allowed:
+ *    If the hook is provided, then it'll be called for each device
+ *    hotplug to check whether the device hotplug is allowed.  Return
+ *    true to grant allowance or false to reject the hotplug.  When
+ *    false is returned, an error must be set to show the reason of
+ *    the rejection.  If the hook is not provided, all hotplug will be
+ *    allowed.
  */
 struct MachineClass {
     /*< private >*/
@@ -224,6 +231,8 @@ struct MachineClass {
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
+    bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
+                            Error **errp);
     CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
                                                          unsigned cpu_index);
     const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index de70b7a19a..aa123f88cb 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -280,6 +280,7 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
                                  int required_for_version);
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev);
 HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
 /**
  * qdev_get_hotplug_handler: Get handler responsible for device wiring
  *
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 8fe5c2cad2..148df9cacf 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -615,6 +615,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
     /* create device */
     dev = DEVICE(object_new(driver));
 
+    /* Check whether the hotplug is allowed by the machine */
+    if (qdev_hotplug && !qdev_hotplug_allowed(dev, &err)) {
+        /* Error must be set in the machine hook */
+        assert(err);
+        goto err_del_dev;
+    }
+
     if (bus) {
         qdev_set_parent_bus(dev, bus);
     } else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 06/10] qdev/machine: Introduce hotplug_allowed hook
@ 2019-09-17 15:10   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Daniel P. Berrangé, Eduardo Habkost, Peter Xu,
	Eric Auger, Paolo Bonzini

From: Peter Xu <peterx@redhat.com>

Introduce this new per-machine hook to give any machine class a chance
to do a sanity check on the to-be-hotplugged device as a sanity test.
This will be used for x86 to try to detect some illegal configuration
of devices, e.g., possible conflictions between vfio-pci and x86
vIOMMU.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20190916080718.3299-3-peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/core/qdev.c         | 17 +++++++++++++++++
 include/hw/boards.h    |  9 +++++++++
 include/hw/qdev-core.h |  1 +
 qdev-monitor.c         |  7 +++++++
 4 files changed, 34 insertions(+)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 60d66c2f39..cbad6c1d55 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -237,6 +237,23 @@ HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
     return NULL;
 }
 
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
+{
+    MachineState *machine;
+    MachineClass *mc;
+    Object *m_obj = qdev_get_machine();
+
+    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+        machine = MACHINE(m_obj);
+        mc = MACHINE_GET_CLASS(machine);
+        if (mc->hotplug_allowed) {
+            return mc->hotplug_allowed(machine, dev, errp);
+        }
+    }
+
+    return true;
+}
+
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev)
 {
     if (dev->parent_bus) {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2289536e48..be18a5c032 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -166,6 +166,13 @@ typedef struct {
  *    The function pointer to hook different machine specific functions for
  *    parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
  *    machine specific topology fields, such as smp_dies for PCMachine.
+ * @hotplug_allowed:
+ *    If the hook is provided, then it'll be called for each device
+ *    hotplug to check whether the device hotplug is allowed.  Return
+ *    true to grant allowance or false to reject the hotplug.  When
+ *    false is returned, an error must be set to show the reason of
+ *    the rejection.  If the hook is not provided, all hotplug will be
+ *    allowed.
  */
 struct MachineClass {
     /*< private >*/
@@ -224,6 +231,8 @@ struct MachineClass {
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
+    bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
+                            Error **errp);
     CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
                                                          unsigned cpu_index);
     const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index de70b7a19a..aa123f88cb 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -280,6 +280,7 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
                                  int required_for_version);
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev);
 HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
 /**
  * qdev_get_hotplug_handler: Get handler responsible for device wiring
  *
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 8fe5c2cad2..148df9cacf 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -615,6 +615,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
     /* create device */
     dev = DEVICE(object_new(driver));
 
+    /* Check whether the hotplug is allowed by the machine */
+    if (qdev_hotplug && !qdev_hotplug_allowed(dev, &err)) {
+        /* Error must be set in the machine hook */
+        assert(err);
+        goto err_del_dev;
+    }
+
     if (bus) {
         qdev_set_parent_bus(dev, bus);
     } else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v3 3/4] pc/q35: Disallow vfio-pci hotplug without VT-d caching mode
@ 2019-09-17 15:11   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Peter Xu @ 2019-09-16  8:07 UTC (permalink / raw)
  To: qemu-devel
  Cc: Daniel P. Berrangé, Eduardo Habkost, Michael S. Tsirkin,
	Jason Wang, Alex Williamson, peterx, Eric Auger, Bandan Das,
	Igor Mammedov, Paolo Bonzini, Richard Henderson

Instead of bailing out when trying to hotplug a vfio-pci device with
below configuration:

  -device intel-iommu,caching-mode=off

With this we can return a warning message to the user via QMP/HMP and
the VM will continue to work after failing the hotplug:

  (qemu) device_add vfio-pci,bus=root.3,host=05:00.0,id=vfio1
  Error: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/pc.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index bad866fe44..0a6fa6e549 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2944,6 +2944,26 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
     }
 }
 
+
+static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
+{
+    X86IOMMUState *iommu = x86_iommu_get_default();
+    IntelIOMMUState *intel_iommu;
+
+    if (iommu &&
+        object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
+        object_dynamic_cast((Object *)dev, "vfio-pci")) {
+        intel_iommu = INTEL_IOMMU_DEVICE(iommu);
+        if (!intel_iommu->caching_mode) {
+            error_setg(errp, "Device assignment is not allowed without "
+                       "enabling caching-mode=on for Intel IOMMU.");
+            return false;
+        }
+    }
+
+    return true;
+}
+
 static void pc_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2968,6 +2988,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     pcmc->pvh_enabled = true;
     assert(!mc->get_hotplug_handler);
     mc->get_hotplug_handler = pc_get_hotplug_handler;
+    mc->hotplug_allowed = pc_hotplug_allowed;
     mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
     mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
     mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 07/10] pc/q35: Disallow vfio-pci hotplug without VT-d caching mode
@ 2019-09-17 15:11   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:11 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Eduardo Habkost, Peter Xu, Eric Auger,
	Paolo Bonzini, Richard Henderson

From: Peter Xu <peterx@redhat.com>

Instead of bailing out when trying to hotplug a vfio-pci device with
below configuration:

  -device intel-iommu,caching-mode=off

With this we can return a warning message to the user via QMP/HMP and
the VM will continue to work after failing the hotplug:

  (qemu) device_add vfio-pci,bus=root.3,host=05:00.0,id=vfio1
  Error: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20190916080718.3299-4-peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/pc.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index bad866fe44..0a6fa6e549 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2944,6 +2944,26 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
     }
 }
 
+
+static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
+{
+    X86IOMMUState *iommu = x86_iommu_get_default();
+    IntelIOMMUState *intel_iommu;
+
+    if (iommu &&
+        object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
+        object_dynamic_cast((Object *)dev, "vfio-pci")) {
+        intel_iommu = INTEL_IOMMU_DEVICE(iommu);
+        if (!intel_iommu->caching_mode) {
+            error_setg(errp, "Device assignment is not allowed without "
+                       "enabling caching-mode=on for Intel IOMMU.");
+            return false;
+        }
+    }
+
+    return true;
+}
+
 static void pc_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2968,6 +2988,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     pcmc->pvh_enabled = true;
     assert(!mc->get_hotplug_handler);
     mc->get_hotplug_handler = pc_get_hotplug_handler;
+    mc->hotplug_allowed = pc_hotplug_allowed;
     mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
     mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
     mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v3 4/4] intel_iommu: Remove the caching-mode check during flag change
@ 2019-09-17 15:11   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Peter Xu @ 2019-09-16  8:07 UTC (permalink / raw)
  To: qemu-devel
  Cc: Daniel P. Berrangé, Eduardo Habkost, Michael S. Tsirkin,
	Jason Wang, Alex Williamson, peterx, Eric Auger, Bandan Das,
	Igor Mammedov, Paolo Bonzini, Richard Henderson

That's never a good place to stop QEMU process... Since now we have
both the machine done sanity check and also the hotplug handler, we
can safely remove this to avoid that.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index bed8ffe446..f1de8fdb75 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2936,10 +2936,6 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
 
-    if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
-        vtd_panic_require_caching_mode();
-    }
-
     /* Update per-address-space notifier flags */
     vtd_as->notifier_flags = new;
 
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 08/10] intel_iommu: Remove the caching-mode check during flag change
@ 2019-09-17 15:11   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:11 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Eduardo Habkost, Peter Xu, Eric Auger,
	Paolo Bonzini, Richard Henderson

From: Peter Xu <peterx@redhat.com>

That's never a good place to stop QEMU process... Since now we have
both the machine done sanity check and also the hotplug handler, we
can safely remove this to avoid that.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20190916080718.3299-5-peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/intel_iommu.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index bed8ffe446..f1de8fdb75 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2936,10 +2936,6 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
 
-    if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
-        vtd_panic_require_caching_mode();
-    }
-
     /* Update per-address-space notifier flags */
     vtd_as->notifier_flags = new;
 
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v2] MAINTAINERS: update virtio-rng and virtio-serial maintainer
@ 2019-09-17 15:10 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Laurent Vivier @ 2019-09-10 14:03 UTC (permalink / raw)
  To: qemu-devel
  Cc: Laurent Vivier, Peter Maydell, Michael S . Tsirkin, Amit Shah,
	Amit Shah

As discussed with Amit, I volunteer to maintain virtio-rng and virtio-serial
previously maintained by Amit.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Acked-by: Amit Shah <amit@kernel.org>
---

Notes:
    v2: CC Michael
        add Acked-by from Amit

 MAINTAINERS | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 50eaf005f40e..db916ade55cd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1554,7 +1554,8 @@ F: include/hw/virtio/virtio-input.h
 F: contrib/vhost-user-input/*
 
 virtio-serial
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
@@ -1563,7 +1564,8 @@ F: tests/virtio-console-test.c
 F: tests/virtio-serial-test.c
 
 virtio-rng
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 01/10] MAINTAINERS: update virtio-rng and virtio-serial maintainer
@ 2019-09-17 15:10 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel
  Cc: Laurent Vivier, Peter Maydell, Amit Shah, Thomas Huth,
	Markus Armbruster, Philippe Mathieu-Daudé

From: Laurent Vivier <lvivier@redhat.com>

As discussed with Amit, I volunteer to maintain virtio-rng and virtio-serial
previously maintained by Amit.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Acked-by: Amit Shah <amit@kernel.org>
Message-Id: <20190910140350.2931-1-lvivier@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 50eaf005f4..db916ade55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1554,7 +1554,8 @@ F: include/hw/virtio/virtio-input.h
 F: contrib/vhost-user-input/*
 
 virtio-serial
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
@@ -1563,7 +1564,8 @@ F: tests/virtio-console-test.c
 F: tests/virtio-serial-test.c
 
 virtio-rng
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 1/2] vhost-user-blk: prevent using uninitialized vqs
@ 2019-09-17 15:10 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Raphael Norwitz @ 2019-08-22 18:34 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Michael S. Tsirkin, Max Reitz,
	open list:Block layer core, Raphael Norwitz

Same rational as: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844

Of the 3 virtqueues, seabios only sets cmd, leaving ctrl
and event without a physical address. This can cause
vhost_verify_ring_part_mapping to return ENOMEM, causing
the following logs:

qemu-system-x86_64: Unable to map available ring for ring 0
qemu-system-x86_64: Verify ring failure on region 0

This has already been fixed for vhost scsi devices and was
recently vhost-user scsi devices. This commit fixes it for
vhost-user-blk devices.

Suggested-by: Phillippe Mathieu-Daude <philmd@redhat.com>
Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
---
 hw/block/vhost-user-blk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0b8c5df..63da9bb 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -421,7 +421,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     s->inflight = g_new0(struct vhost_inflight, 1);
-    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
+    s->vqs = g_new0(struct vhost_virtqueue, s->num_queues);
     s->watch = 0;
     s->connected = false;
 
-- 
1.9.4



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 03/10] vhost-user-blk: prevent using uninitialized vqs
@ 2019-09-17 15:10 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Peter Maydell, qemu-block, Max Reitz, Stefan Hajnoczi,
	Raphael Norwitz, Phillippe Mathieu-Daude

From: Raphael Norwitz <raphael.norwitz@nutanix.com>

Same rational as: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844

Of the 3 virtqueues, seabios only sets cmd, leaving ctrl
and event without a physical address. This can cause
vhost_verify_ring_part_mapping to return ENOMEM, causing
the following logs:

qemu-system-x86_64: Unable to map available ring for ring 0
qemu-system-x86_64: Verify ring failure on region 0

This has already been fixed for vhost scsi devices and was
recently vhost-user scsi devices. This commit fixes it for
vhost-user-blk devices.

Suggested-by: Phillippe Mathieu-Daude <philmd@redhat.com>
Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Message-Id: <1566498865-55506-1-git-send-email-raphael.norwitz@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/vhost-user-blk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0b8c5dfeab..63da9bb619 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -421,7 +421,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     s->inflight = g_new0(struct vhost_inflight, 1);
-    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
+    s->vqs = g_new0(struct vhost_virtqueue, s->num_queues);
     s->watch = 0;
     s->connected = false;
 
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 2/2] backends/vhost-user.c: prevent using uninitialized vqs
@ 2019-09-17 15:10   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Raphael Norwitz @ 2019-08-22 18:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Michael S. Tsirkin, Raphael Norwitz

Similar rational to: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844

For vhost scsi and vhost-user-scsi an issue was observed
where, of the 3 virtqueues, seabios would only set cmd,
leaving ctrl and event without a physical address.
This can caused vhost_verify_ring_part_mapping to return
ENOMEM, causing the following logs:

qemu-system-x86_64: Unable to map available ring for ring 0
qemu-system-x86_64: Verify ring failure on region 0

The issue has already been fixed elsewhere, but it was noted
that in backends/vhost-user.c, the vhost_user_backend_dev_init()
function, which other vdevs use in their realize() to initialize
their vqs, was not being properly zeroing out the queues. This
commit ensures hardware modules using the
vhost_user_backend_dev_init() API properly zero out their vqs on
initialization.

Suggested-by: Philippe Mathieu-Daude <philmd@redhat.com>
Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
---
 backends/vhost-user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 0a13506..2bf3406 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -46,7 +46,7 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,

     b->vdev = vdev;
     b->dev.nvqs = nvqs;
-    b->dev.vqs = g_new(struct vhost_virtqueue, nvqs);
+    b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs);

     ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
     if (ret < 0) {
-- 
1.9.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 04/10] backends/vhost-user.c: prevent using uninitialized vqs
@ 2019-09-17 15:10   ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Philippe Mathieu-Daude, Stefan Hajnoczi,
	Raphael Norwitz

From: Raphael Norwitz <raphael.norwitz@nutanix.com>

Similar rational to: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844

For vhost scsi and vhost-user-scsi an issue was observed
where, of the 3 virtqueues, seabios would only set cmd,
leaving ctrl and event without a physical address.
This can caused vhost_verify_ring_part_mapping to return
ENOMEM, causing the following logs:

qemu-system-x86_64: Unable to map available ring for ring 0
qemu-system-x86_64: Verify ring failure on region 0

The issue has already been fixed elsewhere, but it was noted
that in backends/vhost-user.c, the vhost_user_backend_dev_init()
function, which other vdevs use in their realize() to initialize
their vqs, was not being properly zeroing out the queues. This
commit ensures hardware modules using the
vhost_user_backend_dev_init() API properly zero out their vqs on
initialization.

Suggested-by: Philippe Mathieu-Daude <philmd@redhat.com>
Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Message-Id: <1566498865-55506-2-git-send-email-raphael.norwitz@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 backends/vhost-user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 0a13506c98..2bf3406525 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -46,7 +46,7 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,
 
     b->vdev = vdev;
     b->dev.nvqs = nvqs;
-    b->dev.vqs = g_new(struct vhost_virtqueue, nvqs);
+    b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs);
 
     ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
     if (ret < 0) {
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] backends/vhost-user.c: prevent using uninitialized vqs
  2019-09-17 15:10   ` [Qemu-devel] [PULL 04/10] " Michael S. Tsirkin
  (?)
@ 2019-08-28  8:29   ` Stefan Hajnoczi
  -1 siblings, 0 replies; 35+ messages in thread
From: Stefan Hajnoczi @ 2019-08-28  8:29 UTC (permalink / raw)
  To: Raphael Norwitz; +Cc: qemu-devel, Michael S. Tsirkin

[-- Attachment #1: Type: text/plain, Size: 1203 bytes --]

On Thu, Aug 22, 2019 at 11:34:25AM -0700, Raphael Norwitz wrote:
> Similar rational to: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844
> 
> For vhost scsi and vhost-user-scsi an issue was observed
> where, of the 3 virtqueues, seabios would only set cmd,
> leaving ctrl and event without a physical address.
> This can caused vhost_verify_ring_part_mapping to return
> ENOMEM, causing the following logs:
> 
> qemu-system-x86_64: Unable to map available ring for ring 0
> qemu-system-x86_64: Verify ring failure on region 0
> 
> The issue has already been fixed elsewhere, but it was noted
> that in backends/vhost-user.c, the vhost_user_backend_dev_init()
> function, which other vdevs use in their realize() to initialize
> their vqs, was not being properly zeroing out the queues. This
> commit ensures hardware modules using the
> vhost_user_backend_dev_init() API properly zero out their vqs on
> initialization.
> 
> Suggested-by: Philippe Mathieu-Daude <philmd@redhat.com>
> Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
> ---
>  backends/vhost-user.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [Qemu-block] [PATCH 1/2] vhost-user-blk: prevent using uninitialized vqs
  2019-09-17 15:10 ` [Qemu-devel] [PULL 03/10] " Michael S. Tsirkin
  (?)
  (?)
@ 2019-08-23  3:43 ` yuchenlin via Qemu-devel
  -1 siblings, 0 replies; 35+ messages in thread
From: yuchenlin via Qemu-devel @ 2019-08-23  3:43 UTC (permalink / raw)
  To: Raphael Norwitz
  Cc: Kevin Wolf, open list:Block layer core, Michael S. Tsirkin,
	qemu-devel, Raphael Norwitz, Max Reitz

Raphael Norwitz <raphael.norwitz@nutanix.com> 於 2019-08-23 04:16 寫道： > > Same rational as: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844 > > Of the 3 virtqueues, seabios only sets cmd, leaving ctrl > and event without a physical address. This can cause > vhost_verify_ring_part_mapping to return ENOMEM, causing > the following logs: > > qemu-system-x86_64: Unable to map available ring for ring 0 > qemu-system-x86_64: Verify ring failure on region 0 > > This has already been fixed for vhost scsi devices and was > recently vhost-user scsi devices. This commit fixes it for > vhost-user-blk devices. > > Suggested-by: Phillippe Mathieu-Daude <philmd@redhat.com> > Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com> Reviewed-by: yuchenlin <yuchenlin@synology.com> Thanks. > > > --- > hw/block/vhost-user-blk.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > index 0b8c5df..63da9bb 100644 > --- a/hw/block/vhost-user-blk.c > +++ b/hw/block/vhost-user-blk.c > @@ -421,7 +421,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > } > > s->inflight = g_new0(struct vhost_inflight, 1); > - s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > + s->vqs = g_new0(struct vhost_virtqueue, s->num_queues); > s->watch = 0; > s->connected = false; > > -- > 1.9.4 > >

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] vhost-user-blk: prevent using uninitialized vqs
  2019-09-17 15:10 ` [Qemu-devel] [PULL 03/10] " Michael S. Tsirkin
                   ` (2 preceding siblings ...)
  (?)
@ 2019-08-28  8:28 ` Stefan Hajnoczi
  -1 siblings, 0 replies; 35+ messages in thread
From: Stefan Hajnoczi @ 2019-08-28  8:28 UTC (permalink / raw)
  To: Raphael Norwitz
  Cc: Kevin Wolf, Max Reitz, qemu-devel, open list:Block layer core,
	Michael S. Tsirkin

[-- Attachment #1: Type: text/plain, Size: 885 bytes --]

On Thu, Aug 22, 2019 at 11:34:24AM -0700, Raphael Norwitz wrote:
> Same rational as: e6cc11d64fc998c11a4dfcde8fda3fc33a74d844
> 
> Of the 3 virtqueues, seabios only sets cmd, leaving ctrl
> and event without a physical address. This can cause
> vhost_verify_ring_part_mapping to return ENOMEM, causing
> the following logs:
> 
> qemu-system-x86_64: Unable to map available ring for ring 0
> qemu-system-x86_64: Verify ring failure on region 0
> 
> This has already been fixed for vhost scsi devices and was
> recently vhost-user scsi devices. This commit fixes it for
> vhost-user-blk devices.
> 
> Suggested-by: Phillippe Mathieu-Daude <philmd@redhat.com>
> Signed-off-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
> ---
>  hw/block/vhost-user-blk.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v3] virtio pmem: user document
@ 2019-09-17 15:11 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Pankaj Gupta @ 2019-08-21 12:16 UTC (permalink / raw)
  To: qemu-devel
  Cc: pagupta, david, cohuck, lcapitulino, mst, stefanha, riel, nilal

This patch documents the steps to use virtio pmem.
It also documents other useful information about
virtio pmem e.g use-case, comparison with Qemu NVDIMM
backend and current limitations.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
---
v3->v3
 - Text format fixes - Cornerlia
v1->v2
 - Fixes on text format and 'Guest Data persistence'
   section - Cornelia

 docs/virtio-pmem.rst | 75 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 docs/virtio-pmem.rst

diff --git a/docs/virtio-pmem.rst b/docs/virtio-pmem.rst
new file mode 100644
index 0000000000..e77881b26f
--- /dev/null
+++ b/docs/virtio-pmem.rst
@@ -0,0 +1,75 @@
+
+========================
+QEMU virtio pmem
+========================
+
+ This document explains the setup and usage of the virtio pmem device
+ which is available since QEMU v4.1.0.
+
+ The virtio pmem device is a paravirtualized persistent memory device
+ on regular (i.e non-NVDIMM) storage.
+
+Usecase
+--------
+
+  Virtio pmem allows to bypass the guest page cache and directly use
+  host page cache. This reduces guest memory footprint as the host can
+  make efficient memory reclaim decisions under memory pressure.
+
+o How does virtio-pmem compare to the nvdimm emulation supported by QEMU?
+
+  NVDIMM emulation on regular (i.e. non-NVDIMM) host storage does not
+  persist the guest writes as there are no defined semantics in the device
+  specification. The virtio pmem device provides guest write persistence
+  on non-NVDIMM host storage.
+
+virtio pmem usage
+-----------------
+
+  A virtio pmem device backed by a memory-backend-file can be created on
+  the QEMU command line as in the following example:
+
+  -object memory-backend-file,id=mem1,share,mem-path=./virtio_pmem.img,size=4G
+  -device virtio-pmem-pci,memdev=mem1,id=nv1
+
+   where:
+   - "object memory-backend-file,id=mem1,share,mem-path=<image>, size=<image size>"
+     creates a backend file with the specified size.
+
+   - "device virtio-pmem-pci,id=nvdimm1,memdev=mem1" creates a virtio pmem
+     pci device whose storage is provided by above memory backend device.
+
+  Multiple virtio pmem devices can be created if multiple pairs of "-object"
+  and "-device" are provided.
+
+Hotplug
+-------
+
+Virtio pmem devices can be hotplugged via the QEMU monitor. First, the
+memory backing has to be added via 'object_add'; afterwards, the virtio
+pmem device can be added via 'device_add'.
+
+For example, the following commands add another 4GB virtio pmem device to
+the guest:
+
+ (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=virtio_pmem2.img,size=4G
+ (qemu) device_add virtio-pmem-pci,id=virtio_pmem2,memdev=mem2
+
+Guest Data Persistence
+----------------------
+
+ Guest data persistence on non-NVDIMM requires guest userspace applications
+ to perform fsync/msync. This is different from a real nvdimm backend where
+ no additional fsync/msync is required. This is to persist guest writes in
+ host backing file which otherwise remains in host page cache and there is
+ risk of losing the data in case of power failure.
+
+ With virtio pmem device, MAP_SYNC mmap flag is not supported. This provides
+ a hint to application to perform fsync for write persistence.
+
+Limitations
+------------
+- Real nvdimm device backend is not supported.
+- virtio pmem hotunplug is not supported.
+- ACPI NVDIMM features like regions/namespaces are not supported.
+- ndctl command is not supported.
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 09/10] virtio pmem: user document
@ 2019-09-17 15:11 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Cornelia Huck, Pankaj Gupta

From: Pankaj Gupta <pagupta@redhat.com>

This patch documents the steps to use virtio pmem.
It also documents other useful information about
virtio pmem e.g use-case, comparison with Qemu NVDIMM
backend and current limitations.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
Message-Id: <20190821121624.5382-1-pagupta@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/virtio-pmem.rst | 75 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 docs/virtio-pmem.rst

diff --git a/docs/virtio-pmem.rst b/docs/virtio-pmem.rst
new file mode 100644
index 0000000000..e77881b26f
--- /dev/null
+++ b/docs/virtio-pmem.rst
@@ -0,0 +1,75 @@
+
+========================
+QEMU virtio pmem
+========================
+
+ This document explains the setup and usage of the virtio pmem device
+ which is available since QEMU v4.1.0.
+
+ The virtio pmem device is a paravirtualized persistent memory device
+ on regular (i.e non-NVDIMM) storage.
+
+Usecase
+--------
+
+  Virtio pmem allows to bypass the guest page cache and directly use
+  host page cache. This reduces guest memory footprint as the host can
+  make efficient memory reclaim decisions under memory pressure.
+
+o How does virtio-pmem compare to the nvdimm emulation supported by QEMU?
+
+  NVDIMM emulation on regular (i.e. non-NVDIMM) host storage does not
+  persist the guest writes as there are no defined semantics in the device
+  specification. The virtio pmem device provides guest write persistence
+  on non-NVDIMM host storage.
+
+virtio pmem usage
+-----------------
+
+  A virtio pmem device backed by a memory-backend-file can be created on
+  the QEMU command line as in the following example:
+
+  -object memory-backend-file,id=mem1,share,mem-path=./virtio_pmem.img,size=4G
+  -device virtio-pmem-pci,memdev=mem1,id=nv1
+
+   where:
+   - "object memory-backend-file,id=mem1,share,mem-path=<image>, size=<image size>"
+     creates a backend file with the specified size.
+
+   - "device virtio-pmem-pci,id=nvdimm1,memdev=mem1" creates a virtio pmem
+     pci device whose storage is provided by above memory backend device.
+
+  Multiple virtio pmem devices can be created if multiple pairs of "-object"
+  and "-device" are provided.
+
+Hotplug
+-------
+
+Virtio pmem devices can be hotplugged via the QEMU monitor. First, the
+memory backing has to be added via 'object_add'; afterwards, the virtio
+pmem device can be added via 'device_add'.
+
+For example, the following commands add another 4GB virtio pmem device to
+the guest:
+
+ (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=virtio_pmem2.img,size=4G
+ (qemu) device_add virtio-pmem-pci,id=virtio_pmem2,memdev=mem2
+
+Guest Data Persistence
+----------------------
+
+ Guest data persistence on non-NVDIMM requires guest userspace applications
+ to perform fsync/msync. This is different from a real nvdimm backend where
+ no additional fsync/msync is required. This is to persist guest writes in
+ host backing file which otherwise remains in host page cache and there is
+ risk of losing the data in case of power failure.
+
+ With virtio pmem device, MAP_SYNC mmap flag is not supported. This provides
+ a hint to application to perform fsync for write persistence.
+
+Limitations
+------------
+- Real nvdimm device backend is not supported.
+- virtio pmem hotunplug is not supported.
+- ACPI NVDIMM features like regions/namespaces are not supported.
+- ndctl command is not supported.
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v3] virtio pmem: user document
  2019-09-17 15:11 ` [Qemu-devel] [PULL 09/10] " Michael S. Tsirkin
  (?)
@ 2019-08-26 12:46 ` Cornelia Huck
  2019-09-16  6:30   ` Pankaj Gupta
  -1 siblings, 1 reply; 35+ messages in thread
From: Cornelia Huck @ 2019-08-26 12:46 UTC (permalink / raw)
  To: Pankaj Gupta; +Cc: mst, david, qemu-devel, lcapitulino, stefanha, riel, nilal

On Wed, 21 Aug 2019 17:46:24 +0530
Pankaj Gupta <pagupta@redhat.com> wrote:

> This patch documents the steps to use virtio pmem.
> It also documents other useful information about
> virtio pmem e.g use-case, comparison with Qemu NVDIMM
> backend and current limitations.
> 
> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> ---
> v3->v3
>  - Text format fixes - Cornerlia
> v1->v2
>  - Fixes on text format and 'Guest Data persistence'
>    section - Cornelia
> 
>  docs/virtio-pmem.rst | 75 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 75 insertions(+)
>  create mode 100644 docs/virtio-pmem.rst

Looks good to me now.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v3] virtio pmem: user document
  2019-08-26 12:46 ` [Qemu-devel] [PATCH v3] " Cornelia Huck
@ 2019-09-16  6:30   ` Pankaj Gupta
  0 siblings, 0 replies; 35+ messages in thread
From: Pankaj Gupta @ 2019-09-16  6:30 UTC (permalink / raw)
  To: mst; +Cc: riel, Cornelia Huck, david, qemu-devel, lcapitulino, stefanha,
	nilal


Gentle ping.

Can we please merge this patch.

Thanks,
Pankaj

> 
> > This patch documents the steps to use virtio pmem.
> > It also documents other useful information about
> > virtio pmem e.g use-case, comparison with Qemu NVDIMM
> > backend and current limitations.
> > 
> > Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> > ---
> > v3->v3
> >  - Text format fixes - Cornerlia
> > v1->v2
> >  - Fixes on text format and 'Guest Data persistence'
> >    section - Cornelia
> > 
> >  docs/virtio-pmem.rst | 75 ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 75 insertions(+)
> >  create mode 100644 docs/virtio-pmem.rst
> 
> Looks good to me now.
> 
> Reviewed-by: Cornelia Huck <cohuck@redhat.com>
> 
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH v2] docs/nvdimm: add example on persistent backend setup
@ 2019-09-17 15:10 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Wei Yang @ 2019-08-01  0:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: pagupta, Wei Yang, xiaoguangrong.eric, stefanha, mst

Persistent backend setup requires some knowledge about nvdimm and ndctl
tool. Some users report they may struggle to gather these knowledge and
have difficulty to setup it properly.

Here we provide two examples for persistent backend and gives the link
to ndctl. By doing so, user could try it directly and do more
investigation on persistent backend setup with ndctl.

Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
Reviewed-by: Pankaj Gupta <pagupta@redhat.com>

---
v2: rephrase the doc based on Stefan Hajnoczi's suggestion
---
 docs/nvdimm.txt | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index b531cacd35..362e99109e 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -171,6 +171,35 @@ guest software that this vNVDIMM device contains a region that cannot
 accept persistent writes. In result, for example, the guest Linux
 NVDIMM driver, marks such vNVDIMM device as read-only.
 
+Backend File Setup Example
+--------------------------
+
+Here are two examples showing how to setup these persistent backends on
+linux using the tool ndctl [3].
+
+A. DAX device
+
+Use the following command to set up /dev/dax0.0 so that the entirety of
+namespace0.0 can be exposed as an emulated NVDIMM to the guest:
+
+    ndctl create-namespace -f -e namespace0.0 -m devdax
+
+The /dev/dax0.0 could be used directly in "mem-path" option.
+
+B. DAX file
+
+Individual files on a DAX host file system can be exposed as emulated
+NVDIMMS.  First an fsdax block device is created, partitioned, and then
+mounted with the "dax" mount option:
+
+    ndctl create-namespace -f -e namespace0.0 -m fsdax
+    (partition /dev/pmem0 with name pmem0p1)
+    mount -o dax /dev/pmem0p1 /mnt
+    (create or copy a disk image file with qemu-img(1), cp(1), or dd(1)
+     in /mnt)
+
+Then the new file in /mnt could be used in "mem-path" option.
+
 NVDIMM Persistence
 ------------------
 
@@ -212,3 +241,5 @@ References
     https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
 [2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
     http://pmem.io/pmdk/
+[3] ndctl-create-namespace - provision or reconfigure a namespace
+    http://pmem.io/ndctl/ndctl-create-namespace.html
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PULL 02/10] docs/nvdimm: add example on persistent backend setup
@ 2019-09-17 15:10 ` Michael S. Tsirkin
  0 siblings, 0 replies; 35+ messages in thread
From: Michael S. Tsirkin @ 2019-09-17 15:10 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Xiao Guangrong, Wei Yang, Stefan Hajnoczi,
	Pankaj Gupta

From: Wei Yang <richardw.yang@linux.intel.com>

Persistent backend setup requires some knowledge about nvdimm and ndctl
tool. Some users report they may struggle to gather these knowledge and
have difficulty to setup it properly.

Here we provide two examples for persistent backend and gives the link
to ndctl. By doing so, user could try it directly and do more
investigation on persistent backend setup with ndctl.

Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
Reviewed-by: Pankaj Gupta <pagupta@redhat.com>

Message-Id: <20190801004053.7021-1-richardw.yang@linux.intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/nvdimm.txt | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index b531cacd35..362e99109e 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -171,6 +171,35 @@ guest software that this vNVDIMM device contains a region that cannot
 accept persistent writes. In result, for example, the guest Linux
 NVDIMM driver, marks such vNVDIMM device as read-only.
 
+Backend File Setup Example
+--------------------------
+
+Here are two examples showing how to setup these persistent backends on
+linux using the tool ndctl [3].
+
+A. DAX device
+
+Use the following command to set up /dev/dax0.0 so that the entirety of
+namespace0.0 can be exposed as an emulated NVDIMM to the guest:
+
+    ndctl create-namespace -f -e namespace0.0 -m devdax
+
+The /dev/dax0.0 could be used directly in "mem-path" option.
+
+B. DAX file
+
+Individual files on a DAX host file system can be exposed as emulated
+NVDIMMS.  First an fsdax block device is created, partitioned, and then
+mounted with the "dax" mount option:
+
+    ndctl create-namespace -f -e namespace0.0 -m fsdax
+    (partition /dev/pmem0 with name pmem0p1)
+    mount -o dax /dev/pmem0p1 /mnt
+    (create or copy a disk image file with qemu-img(1), cp(1), or dd(1)
+     in /mnt)
+
+Then the new file in /mnt could be used in "mem-path" option.
+
 NVDIMM Persistence
 ------------------
 
@@ -212,3 +241,5 @@ References
     https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
 [2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
     http://pmem.io/pmdk/
+[3] ndctl-create-namespace - provision or reconfigure a namespace
+    http://pmem.io/ndctl/ndctl-create-namespace.html
-- 
MST



^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v2] docs/nvdimm: add example on persistent backend setup
  2019-09-17 15:10 ` [Qemu-devel] [PULL 02/10] " Michael S. Tsirkin
  (?)
@ 2019-08-01  8:05 ` Stefan Hajnoczi
  -1 siblings, 0 replies; 35+ messages in thread
From: Stefan Hajnoczi @ 2019-08-01  8:05 UTC (permalink / raw)
  To: Wei Yang
  Cc: Pankaj Gupta, Michael S. Tsirkin, qemu-devel, Stefan Hajnoczi,
	Xiao Guangrong

On Thu, Aug 1, 2019 at 1:41 AM Wei Yang <richardw.yang@linux.intel.com> wrote:
> Persistent backend setup requires some knowledge about nvdimm and ndctl
> tool. Some users report they may struggle to gather these knowledge and
> have difficulty to setup it properly.
>
> Here we provide two examples for persistent backend and gives the link
> to ndctl. By doing so, user could try it directly and do more
> investigation on persistent backend setup with ndctl.
>
> Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
> Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
>
> ---
> v2: rephrase the doc based on Stefan Hajnoczi's suggestion
> ---
>  docs/nvdimm.txt | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v2] docs/nvdimm: add example on persistent backend setup
  2019-09-17 15:10 ` [Qemu-devel] [PULL 02/10] " Michael S. Tsirkin
  (?)
  (?)
@ 2019-09-11  8:51 ` Wei Yang
  -1 siblings, 0 replies; 35+ messages in thread
From: Wei Yang @ 2019-09-11  8:51 UTC (permalink / raw)
  To: Wei Yang; +Cc: pagupta, mst, qemu-devel, stefanha, xiaoguangrong.eric

On Thu, Aug 01, 2019 at 08:40:53AM +0800, Wei Yang wrote:
>Persistent backend setup requires some knowledge about nvdimm and ndctl
>tool. Some users report they may struggle to gather these knowledge and
>have difficulty to setup it properly.
>
>Here we provide two examples for persistent backend and gives the link
>to ndctl. By doing so, user could try it directly and do more
>investigation on persistent backend setup with ndctl.
>
>Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
>Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
>

Would someone pick up this one?

Thanks ~


-- 
Wei Yang
Help you, Help me


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v2] docs/nvdimm: add example on persistent backend setup
  2019-09-17 15:10 ` [Qemu-devel] [PULL 02/10] " Michael S. Tsirkin
                   ` (2 preceding siblings ...)
  (?)
@ 2019-09-12 12:16 ` Stefan Hajnoczi
  2019-09-12 21:44   ` Wei Yang
  -1 siblings, 1 reply; 35+ messages in thread
From: Stefan Hajnoczi @ 2019-09-12 12:16 UTC (permalink / raw)
  To: Wei Yang; +Cc: pagupta, mst, qemu-devel, stefanha, xiaoguangrong.eric

[-- Attachment #1: Type: text/plain, Size: 945 bytes --]

On Thu, Aug 01, 2019 at 08:40:53AM +0800, Wei Yang wrote:
> Persistent backend setup requires some knowledge about nvdimm and ndctl
> tool. Some users report they may struggle to gather these knowledge and
> have difficulty to setup it properly.
> 
> Here we provide two examples for persistent backend and gives the link
> to ndctl. By doing so, user could try it directly and do more
> investigation on persistent backend setup with ndctl.
> 
> Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
> Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
> 
> ---
> v2: rephrase the doc based on Stefan Hajnoczi's suggestion
> ---
>  docs/nvdimm.txt | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)

Sorry, I was expecting someone else to pick this patch up.  But since
there have been no takers...

Thanks, applied to my block-next tree:
https://github.com/stefanha/qemu/commits/block-next

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH v2] docs/nvdimm: add example on persistent backend setup
  2019-09-12 12:16 ` Stefan Hajnoczi
@ 2019-09-12 21:44   ` Wei Yang
  0 siblings, 0 replies; 35+ messages in thread
From: Wei Yang @ 2019-09-12 21:44 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: pagupta, xiaoguangrong.eric, mst, qemu-devel, Wei Yang, stefanha

On Thu, Sep 12, 2019 at 02:16:00PM +0200, Stefan Hajnoczi wrote:
>On Thu, Aug 01, 2019 at 08:40:53AM +0800, Wei Yang wrote:
>> Persistent backend setup requires some knowledge about nvdimm and ndctl
>> tool. Some users report they may struggle to gather these knowledge and
>> have difficulty to setup it properly.
>> 
>> Here we provide two examples for persistent backend and gives the link
>> to ndctl. By doing so, user could try it directly and do more
>> investigation on persistent backend setup with ndctl.
>> 
>> Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
>> Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
>> 
>> ---
>> v2: rephrase the doc based on Stefan Hajnoczi's suggestion
>> ---
>>  docs/nvdimm.txt | 31 +++++++++++++++++++++++++++++++
>>  1 file changed, 31 insertions(+)
>
>Sorry, I was expecting someone else to pick this patch up.  But since
>there have been no takers...
>
>Thanks, applied to my block-next tree:
>https://github.com/stefanha/qemu/commits/block-next
>

Thanks :-)

>Stefan



-- 
Wei Yang
Help you, Help me


^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2019-09-23 14:01 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-09-13 12:06 [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2) personality (virtio-1) Sergio Lopez
2019-09-17 15:11 ` [Qemu-devel] [PULL 10/10] " Michael S. Tsirkin
2019-09-16 14:40 ` [Qemu-devel] [PATCH v3] " Cornelia Huck
2019-09-21  7:06 ` [Qemu-devel] [PATCH v3] virtio-mmio: implement modern (v2), " Vasyl Vavrychuk
2019-09-23 13:58   ` Sergio Lopez
  -- strict thread matches above, loose matches on Subject: below --
2019-09-17 15:10 [Qemu-devel] [PULL 00/10] virtio, vhost, pc: features, fixes, cleanups Michael S. Tsirkin
2019-09-19 10:13 ` Peter Maydell
2019-09-16  8:07 [Qemu-devel] [PATCH v3 0/4] intel_iommu: Do sanity check of vfio-pci earlier Peter Xu
2019-09-16  8:07 ` [Qemu-devel] [PATCH v3 1/4] intel_iommu: Sanity check vfio-pci config on machine init done Peter Xu
2019-09-17 15:10   ` [Qemu-devel] [PULL 05/10] " Michael S. Tsirkin
2019-09-16  8:07 ` [Qemu-devel] [PATCH v3 2/4] qdev/machine: Introduce hotplug_allowed hook Peter Xu
2019-09-17 15:10   ` [Qemu-devel] [PULL 06/10] " Michael S. Tsirkin
2019-09-16  8:07 ` [Qemu-devel] [PATCH v3 3/4] pc/q35: Disallow vfio-pci hotplug without VT-d caching mode Peter Xu
2019-09-17 15:11   ` [Qemu-devel] [PULL 07/10] " Michael S. Tsirkin
2019-09-16  8:07 ` [Qemu-devel] [PATCH v3 4/4] intel_iommu: Remove the caching-mode check during flag change Peter Xu
2019-09-17 15:11   ` [Qemu-devel] [PULL 08/10] " Michael S. Tsirkin
2019-09-10 14:03 [Qemu-devel] [PATCH v2] MAINTAINERS: update virtio-rng and virtio-serial maintainer Laurent Vivier
2019-09-17 15:10 ` [Qemu-devel] [PULL 01/10] " Michael S. Tsirkin
2019-08-22 18:34 [Qemu-devel] [PATCH 1/2] vhost-user-blk: prevent using uninitialized vqs Raphael Norwitz
2019-09-17 15:10 ` [Qemu-devel] [PULL 03/10] " Michael S. Tsirkin
2019-08-22 18:34 ` [Qemu-devel] [PATCH 2/2] backends/vhost-user.c: " Raphael Norwitz
2019-09-17 15:10   ` [Qemu-devel] [PULL 04/10] " Michael S. Tsirkin
2019-08-28  8:29   ` [Qemu-devel] [PATCH 2/2] " Stefan Hajnoczi
2019-08-23  3:43 ` [Qemu-devel] [Qemu-block] [PATCH 1/2] vhost-user-blk: " yuchenlin via Qemu-devel
2019-08-28  8:28 ` [Qemu-devel] " Stefan Hajnoczi
2019-08-21 12:16 [Qemu-devel] [PATCH v3] virtio pmem: user document Pankaj Gupta
2019-09-17 15:11 ` [Qemu-devel] [PULL 09/10] " Michael S. Tsirkin
2019-08-26 12:46 ` [Qemu-devel] [PATCH v3] " Cornelia Huck
2019-09-16  6:30   ` Pankaj Gupta
2019-08-01  0:40 [Qemu-devel] [PATCH v2] docs/nvdimm: add example on persistent backend setup Wei Yang
2019-09-17 15:10 ` [Qemu-devel] [PULL 02/10] " Michael S. Tsirkin
2019-08-01  8:05 ` [Qemu-devel] [PATCH v2] " Stefan Hajnoczi
2019-09-11  8:51 ` Wei Yang
2019-09-12 12:16 ` Stefan Hajnoczi
2019-09-12 21:44   ` Wei Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.