Michael S. Tsirkin writes: > On Mon, Jul 29, 2019 at 02:57:55PM +0200, Sergio Lopez wrote: >> Implement the modern (v2) personality, according to the VirtIO 1.0 >> specification. >> >> Support for v2 among guests is not as widespread as it'd be >> desirable. While the Linux driver has had it for a while, support is >> missing, at least, from Tianocore EDK II, NetBSD and FreeBSD. > > The fact that there was no open source hypervisor implementation has > probably contributed to this :) > >> For this reason, the v2 personality is disabled, keeping the legacy >> behavior as default. > > I agree it's a good default for existing machine types. > >> Machine types willing to use v2, can enable it >> using MachineClass's compat_props. > > Hmm. Are compat_props really the recommended mechanism to > tweak defaults? I was under the impression it's > only for compatibility with old machine types. > Eduardo, any opinion on this? Stefan suggested using something like "-global virtio-mmio.modern=true" which does the trick for the command line, but I'd also like a way to set it to true by default on microvm. We can discuss the best way to achieve that (if using compat_props isn't acceptable) on the next microvm patch series. >> >> Signed-off-by: Sergio Lopez > > Endian-ness seems to be wrong: > > static const MemoryRegionOps virtio_mem_ops = { > .read = virtio_mmio_read, > .write = virtio_mmio_write, > .endianness = DEVICE_NATIVE_ENDIAN, > }; > > you will see this if you test a big endian guest. Interesting, a Linux kernel compiled for aarch64_be works just fine. Looking further, seems like, on ARM, Linux assumes all memory I/O operations are little-endian and swaps the data if necessary: arch/arm64/include/asm/io.h: /* * Relaxed I/O memory access primitives. These follow the Device memory * ordering rules but do not guarantee any ordering relative to Normal memory * accesses. */ #define readb_relaxed(c) ({ u8 __r = __raw_readb(c); __r; }) #define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; }) #define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) #define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) #define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c))) #define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c))) #define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) #define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) The Apendix X from virtio-0.9.5 specs states that "The endianness of the registers follows the native endianness of the Guest". Luckily for us, this isn't the case, as with DEVICE_NATIVE_ENDIAN memory.c:adjust_endianness doesn't attempt any kind of transformation. In any, I guess we should follow the spec, and keep DEVICE_NATIVE_ENDIAN for the legacy mode while using DEVICE_LITTLE_ENDIAN for virtio-mmio-2/virtio-1. >> --- >> hw/virtio/virtio-mmio.c | 264 ++++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 254 insertions(+), 10 deletions(-) >> >> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c >> index 97b7f35496..1da841336f 100644 >> --- a/hw/virtio/virtio-mmio.c >> +++ b/hw/virtio/virtio-mmio.c >> @@ -47,14 +47,24 @@ >> OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO) >> >> #define VIRT_MAGIC 0x74726976 /* 'virt' */ >> -#define VIRT_VERSION 1 >> +#define VIRT_VERSION_LEGACY 1 >> +#define VIRT_VERSION_MODERN 2 >> #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */ >> >> +typedef struct VirtIOMMIOQueue { >> + uint16_t num; >> + bool enabled; >> + uint32_t desc[2]; >> + uint32_t avail[2]; >> + uint32_t used[2]; >> +} VirtIOMMIOQueue; >> + >> typedef struct { >> /* Generic */ >> SysBusDevice parent_obj; >> MemoryRegion iomem; >> qemu_irq irq; >> + bool modern; >> /* Guest accessible state needing migration and reset */ >> uint32_t host_features_sel; >> uint32_t guest_features_sel; >> @@ -62,6 +72,9 @@ typedef struct { >> /* virtio-bus */ >> VirtioBusState bus; >> bool format_transport_address; >> + /* Fields only used for v2 (modern) devices */ >> + uint32_t guest_features[2]; >> + VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX]; >> } VirtIOMMIOProxy; >> >> static bool virtio_mmio_ioeventfd_enabled(DeviceState *d) >> @@ -115,7 +128,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) >> case VIRTIO_MMIO_MAGIC_VALUE: >> return VIRT_MAGIC; >> case VIRTIO_MMIO_VERSION: >> - return VIRT_VERSION; >> + if (proxy->modern) { >> + return VIRT_VERSION_MODERN; >> + } else { >> + return VIRT_VERSION_LEGACY; >> + } >> case VIRTIO_MMIO_VENDOR_ID: >> return VIRT_VENDOR; >> default: >> @@ -146,14 +163,18 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) >> case VIRTIO_MMIO_MAGIC_VALUE: >> return VIRT_MAGIC; >> case VIRTIO_MMIO_VERSION: >> - return VIRT_VERSION; >> + if (proxy->modern) { >> + return VIRT_VERSION_MODERN; >> + } else { >> + return VIRT_VERSION_LEGACY; >> + } >> case VIRTIO_MMIO_DEVICE_ID: >> return vdev->device_id; >> case VIRTIO_MMIO_VENDOR_ID: >> return VIRT_VENDOR; >> case VIRTIO_MMIO_DEVICE_FEATURES: >> if (proxy->host_features_sel) { >> - return 0; >> + return vdev->host_features >> 32; > > I'd do vdev->host_features >> (32 * proxy->host_features_sel); OK, looks nicer. I'll sanitize the value host_features_sel on VIRTIO_MMIO_DEVICE_FEATURES too, as I'm already doing with guest_features_sel. >> } >> return vdev->host_features; >> case VIRTIO_MMIO_QUEUE_NUM_MAX: >> @@ -162,12 +183,34 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) >> } >> return VIRTQUEUE_MAX_SIZE; >> case VIRTIO_MMIO_QUEUE_PFN: >> + if (proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: read from legacy register in modern mode\n", >> + __func__); >> + return 0; >> + } >> return virtio_queue_get_addr(vdev, vdev->queue_sel) >> >> proxy->guest_page_shift; >> + case VIRTIO_MMIO_QUEUE_READY: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: read from modern register in legacy mode\n", >> + __func__); >> + return 0; >> + } >> + return proxy->vqs[vdev->queue_sel].enabled; >> case VIRTIO_MMIO_INTERRUPT_STATUS: >> return atomic_read(&vdev->isr); >> case VIRTIO_MMIO_STATUS: >> return vdev->status; >> + case VIRTIO_MMIO_CONFIG_GENERATION: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: read from modern register in legacy mode\n", >> + __func__); >> + return 0; >> + } >> + return vdev->generation; >> case VIRTIO_MMIO_DEVICE_FEATURES_SEL: >> case VIRTIO_MMIO_DRIVER_FEATURES: >> case VIRTIO_MMIO_DRIVER_FEATURES_SEL: >> @@ -177,6 +220,12 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) >> case VIRTIO_MMIO_QUEUE_ALIGN: >> case VIRTIO_MMIO_QUEUE_NOTIFY: >> case VIRTIO_MMIO_INTERRUPT_ACK: >> + case VIRTIO_MMIO_QUEUE_DESC_LOW: >> + case VIRTIO_MMIO_QUEUE_DESC_HIGH: >> + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: >> + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: >> + case VIRTIO_MMIO_QUEUE_USED_LOW: >> + case VIRTIO_MMIO_QUEUE_USED_HIGH: >> qemu_log_mask(LOG_GUEST_ERROR, >> "%s: read of write-only register\n", >> __func__); >> @@ -232,14 +281,26 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, >> proxy->host_features_sel = value; >> break; >> case VIRTIO_MMIO_DRIVER_FEATURES: >> - if (!proxy->guest_features_sel) { >> + if (proxy->modern) { >> + proxy->guest_features[proxy->guest_features_sel] = value; >> + } else if (!proxy->guest_features_sel) { >> virtio_set_features(vdev, value); >> } >> break; >> case VIRTIO_MMIO_DRIVER_FEATURES_SEL: >> - proxy->guest_features_sel = value; >> + if (value) { >> + proxy->guest_features_sel = 1; >> + } else { >> + proxy->guest_features_sel = 0; >> + } >> break; >> case VIRTIO_MMIO_GUEST_PAGE_SIZE: >> + if (proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> proxy->guest_page_shift = ctz32(value); >> if (proxy->guest_page_shift > 31) { >> proxy->guest_page_shift = 0; >> @@ -253,15 +314,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, >> break; >> case VIRTIO_MMIO_QUEUE_NUM: >> trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE); >> - virtio_queue_set_num(vdev, vdev->queue_sel, value); >> - /* Note: only call this function for legacy devices */ >> - virtio_queue_update_rings(vdev, vdev->queue_sel); >> + if (proxy->modern) { >> + proxy->vqs[vdev->queue_sel].num = value; >> + } else { >> + virtio_queue_set_num(vdev, vdev->queue_sel, value); >> + virtio_queue_update_rings(vdev, vdev->queue_sel); >> + } >> break; >> case VIRTIO_MMIO_QUEUE_ALIGN: >> - /* Note: this is only valid for legacy devices */ >> + if (proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> virtio_queue_set_align(vdev, vdev->queue_sel, value); >> break; >> case VIRTIO_MMIO_QUEUE_PFN: >> + if (proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> if (value == 0) { >> virtio_reset(vdev); >> } else { >> @@ -269,6 +344,24 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, >> value << proxy->guest_page_shift); >> } >> break; >> + case VIRTIO_MMIO_QUEUE_READY: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to modern register in legacy mode\n", >> + __func__); >> + return; >> + } >> + virtio_queue_set_num(vdev, vdev->queue_sel, >> + proxy->vqs[vdev->queue_sel].num); >> + virtio_queue_set_rings(vdev, vdev->queue_sel, >> + ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | >> + proxy->vqs[vdev->queue_sel].desc[0], >> + ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | >> + proxy->vqs[vdev->queue_sel].avail[0], >> + ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | >> + proxy->vqs[vdev->queue_sel].used[0]); >> + proxy->vqs[vdev->queue_sel].enabled = 1; >> + break; > > This one seems out of spec. > In this respect virtio mmio is more advanced that virtio pci: > it allows setting the ready status to 0. You're right, I'll fix it. >> case VIRTIO_MMIO_QUEUE_NOTIFY: >> if (value < VIRTIO_QUEUE_MAX) { >> virtio_queue_notify(vdev, value); >> @@ -283,6 +376,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, >> virtio_mmio_stop_ioeventfd(proxy); >> } >> >> + if (proxy->modern && (value & VIRTIO_CONFIG_S_FEATURES_OK)) { >> + virtio_set_features(vdev, >> + ((uint64_t)proxy->guest_features[1]) << 32 | >> + proxy->guest_features[0]); >> + } >> + >> virtio_set_status(vdev, value & 0xff); >> >> if (value & VIRTIO_CONFIG_S_DRIVER_OK) { >> @@ -293,6 +392,60 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, >> virtio_reset(vdev); >> } >> break; >> + case VIRTIO_MMIO_QUEUE_DESC_LOW: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> + proxy->vqs[vdev->queue_sel].desc[0] = value; >> + break; >> + case VIRTIO_MMIO_QUEUE_DESC_HIGH: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> + proxy->vqs[vdev->queue_sel].desc[1] = value; >> + break; >> + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> + proxy->vqs[vdev->queue_sel].avail[0] = value; >> + break; >> + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> + proxy->vqs[vdev->queue_sel].avail[1] = value; >> + break; >> + case VIRTIO_MMIO_QUEUE_USED_LOW: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> + proxy->vqs[vdev->queue_sel].used[0] = value; >> + break; >> + case VIRTIO_MMIO_QUEUE_USED_HIGH: >> + if (!proxy->modern) { >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: write to legacy register in modern mode\n", >> + __func__); >> + return; >> + } >> + proxy->vqs[vdev->queue_sel].used[1] = value; >> + break; >> case VIRTIO_MMIO_MAGIC_VALUE: >> case VIRTIO_MMIO_VERSION: >> case VIRTIO_MMIO_DEVICE_ID: >> @@ -300,6 +453,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, >> case VIRTIO_MMIO_DEVICE_FEATURES: >> case VIRTIO_MMIO_QUEUE_NUM_MAX: >> case VIRTIO_MMIO_INTERRUPT_STATUS: >> + case VIRTIO_MMIO_CONFIG_GENERATION: >> qemu_log_mask(LOG_GUEST_ERROR, >> "%s: write to readonly register\n", >> __func__); >> @@ -349,15 +503,90 @@ static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f) >> qemu_put_be32(f, proxy->guest_page_shift); >> } >> >> +static const VMStateDescription vmstate_virtio_mmio_modern_queue_state = { >> + .name = "virtio_mmio/modern_queue_state", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .fields = (VMStateField[]) { >> + VMSTATE_UINT16(num, VirtIOMMIOQueue), >> + VMSTATE_BOOL(enabled, VirtIOMMIOQueue), >> + VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2), >> + VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2), >> + VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2), >> + VMSTATE_END_OF_LIST() >> + } >> +}; >> + >> +static const VMStateDescription vmstate_virtio_mmio_modern_state_sub = { >> + .name = "virtio_mmio/modern_state", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .fields = (VMStateField[]) { >> + VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2), >> + VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0, >> + vmstate_virtio_mmio_modern_queue_state, >> + VirtIOMMIOQueue), >> + VMSTATE_END_OF_LIST() >> + } >> +}; >> + >> +static const VMStateDescription vmstate_virtio_mmio = { >> + .name = "virtio_mmio", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .minimum_version_id_old = 1, >> + .fields = (VMStateField[]) { >> + VMSTATE_END_OF_LIST() >> + }, >> + .subsections = (const VMStateDescription*[]) { >> + &vmstate_virtio_mmio_modern_state_sub, >> + NULL >> + } >> +}; >> + >> +static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f) >> +{ >> + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); >> + >> + vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL); >> +} >> + >> +static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f) >> +{ >> + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); >> + >> + return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1); >> +} >> + >> +static bool virtio_mmio_has_extra_state(DeviceState *opaque) >> +{ >> + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); >> + >> + return proxy->modern; >> +} >> + >> static void virtio_mmio_reset(DeviceState *d) >> { >> VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); >> + int i; >> >> virtio_mmio_stop_ioeventfd(proxy); >> virtio_bus_reset(&proxy->bus); >> proxy->host_features_sel = 0; >> proxy->guest_features_sel = 0; >> proxy->guest_page_shift = 0; >> + >> + if (proxy->modern) { >> + proxy->guest_features[0] = proxy->guest_features[1] = 0; >> + >> + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { >> + proxy->vqs[i].enabled = 0; >> + proxy->vqs[i].num = 0; >> + proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0; >> + proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0; >> + proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0; >> + } >> + } >> } >> >> static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, >> @@ -420,11 +649,22 @@ assign_error: >> return r; >> } >> >> +static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp) >> +{ >> + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); >> + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); >> + >> + if (proxy->modern) { >> + virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1); >> + } >> +} >> + >> /* virtio-mmio device */ >> >> static Property virtio_mmio_properties[] = { >> DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy, >> format_transport_address, true), >> + DEFINE_PROP_BOOL("modern", VirtIOMMIOProxy, modern, false), >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> @@ -508,9 +748,13 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) >> k->notify = virtio_mmio_update_irq; >> k->save_config = virtio_mmio_save_config; >> k->load_config = virtio_mmio_load_config; >> + k->save_extra_state = virtio_mmio_save_extra_state; >> + k->load_extra_state = virtio_mmio_load_extra_state; >> + k->has_extra_state = virtio_mmio_has_extra_state; >> k->set_guest_notifiers = virtio_mmio_set_guest_notifiers; >> k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; >> k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; >> + k->pre_plugged = virtio_mmio_pre_plugged; >> k->has_variable_vring_alignment = true; >> bus_class->max_dev = 1; >> bus_class->get_dev_path = virtio_mmio_bus_get_dev_path; >> -- >> 2.21.0