* [Qemu-devel] [RFC PATCH] virtio: Map virtqueue rings instead of referencing guest phys addrs
@ 2010-09-17 17:30 Alex Williamson
2010-09-17 18:04 ` Anthony Liguori
0 siblings, 1 reply; 2+ messages in thread
From: Alex Williamson @ 2010-09-17 17:30 UTC (permalink / raw)
To: qemu-devel; +Cc: alex.williamson, kvm
Nearly any operation on virtqueues require multiple reads/writes
to virtqueue ring descriptors using guest physical addresses
(ld*/st*_phys). These are expensive and result in
phys_page_find_alloc() and qemu_get_ram_ptr() showing up at the top
of profiles run under virtio net/block workloads.
We can avoid much of this by mapping the ring descriptors using
cpu_physical_memory_map() and referencing virtual addresses. This
does a good job at dropping these high hitters down in the profile,
and generally shows a minor increase in block and net throughput
and latency.
I'm posting this as an RFC because this isn't nearly the slam dunk
performance improvement that the virtio-net TX bottom half handler
had. On average, I see just shy of a 7% improvement in overall
network performance. I'm having a hard time pinning down any
numbers for block since the gains typically seems quite a bit
smaller than my standard deviation. I'm getting tired of testing
it, so I'll toss it out and see if anyone thinks this is a good
idea. I have tested this with local migrations and haven't seen
any issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Makefile.objs | 3 -
Makefile.target | 5 +
hw/virtio.c | 230 +++++++++++++++++++++++++++++++++++++++----------------
3 files changed, 164 insertions(+), 74 deletions(-)
diff --git a/Makefile.objs b/Makefile.objs
index 4a1eaa1..f362b8f 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -145,7 +145,6 @@ user-obj-y += cutils.o cache-utils.o
hw-obj-y =
hw-obj-y += vl.o loader.o
-hw-obj-y += virtio.o virtio-console.o
hw-obj-y += fw_cfg.o pci.o pci_host.o pcie_host.o
hw-obj-y += watchdog.o
hw-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
@@ -244,8 +243,6 @@ sound-obj-$(CONFIG_CS4231A) += cs4231a.o
adlib.o fmopl.o: QEMU_CFLAGS += -DBUILD_Y8950=0
hw-obj-$(CONFIG_SOUND) += $(sound-obj-y)
-hw-obj-$(CONFIG_VIRTFS) += virtio-9p-debug.o virtio-9p-local.o
-
######################################################################
# libdis
# NOTE: the disassembler code is only needed for debugging
diff --git a/Makefile.target b/Makefile.target
index 18826bb..4e0cf8d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -165,11 +165,12 @@ ifdef CONFIG_SOFTMMU
obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o
# virtio has to be here due to weird dependency between PCI and virtio-net.
# need to fix this properly
-obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-serial-bus.o
+obj-y += virtio.o virtio-console.o virtio-blk.o virtio-balloon.o
+obj-y += virtio-net.o virtio-serial-bus.o
obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
obj-y += vhost_net.o
obj-$(CONFIG_VHOST_NET) += vhost.o
-obj-$(CONFIG_VIRTFS) += virtio-9p.o
+obj-$(CONFIG_VIRTFS) += virtio-9p.o virtio-9p-debug.o virtio-9p-local.o
obj-y += rwhandler.o
obj-$(CONFIG_KVM) += kvm.o kvm-all.o
obj-$(CONFIG_NO_KVM) += kvm-stub.o
diff --git a/hw/virtio.c b/hw/virtio.c
index 85312b3..0dcf375 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -15,6 +15,8 @@
#include "virtio.h"
#include "sysemu.h"
+#include "cpu.h"
+#include "exec-all.h"
/* The alignment to use between consumer and producer parts of vring.
* x86 pagesize again. */
@@ -61,8 +63,11 @@ typedef struct VRing
{
unsigned int num;
target_phys_addr_t desc;
+ void *desc_va;
target_phys_addr_t avail;
+ void *avail_va;
target_phys_addr_t used;
+ void *used_va;
} VRing;
struct VirtQueue
@@ -78,107 +83,145 @@ struct VirtQueue
EventNotifier host_notifier;
};
+static void *virtqueue_map(target_phys_addr_t addr,
+ target_phys_addr_t len, int write)
+{
+ void *map;
+ target_phys_addr_t mapped = len;
+
+ map = cpu_physical_memory_map(addr, &mapped, write);
+ if (!map) {
+ return NULL;
+ }
+ if (mapped != len) {
+ cpu_physical_memory_unmap(map, mapped, write, 0);
+ return NULL;
+ }
+ return map;
+}
+
/* virt queue functions */
static void virtqueue_init(VirtQueue *vq)
{
- target_phys_addr_t pa = vq->pa;
+ target_phys_addr_t size, pa = vq->pa;
vq->vring.desc = pa;
vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
vq->vring.used = vring_align(vq->vring.avail +
offsetof(VRingAvail, ring[vq->vring.num]),
VIRTIO_PCI_VRING_ALIGN);
+
+ size = vq->vring.num * sizeof(VRingDesc);
+ if (!(vq->vring.desc_va = virtqueue_map(vq->vring.desc, size, 0))) {
+ fprintf(stderr, "Failed to map vring.desc\n");
+ exit(1); /* Ugh, wishing for -ENOMEM */
+ }
+ size = offsetof(VRingAvail, ring[vq->vring.num]);
+ if (!(vq->vring.avail_va = virtqueue_map(vq->vring.avail, size, 0))) {
+ fprintf(stderr, "Failed to map vring.avail\n");
+ exit(1); /* Ugh, wishing for -ENOMEM */
+ }
+ size = offsetof(VRingUsed, ring[vq->vring.num]);
+ if (!(vq->vring.used_va = virtqueue_map(vq->vring.used, size, 1))) {
+ fprintf(stderr, "Failed to map vring.used\n");
+ exit(1); /* Ugh, wishing for -ENOMEM */
+ }
}
-static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
+static inline void sync_dirty_bytes(void *ptr, int bytes)
{
- target_phys_addr_t pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
- return ldq_phys(pa);
+ if (cpu_physical_memory_get_dirty_tracking()) {
+ ram_addr_t addr = qemu_ram_addr_from_host(ptr);
+
+ if (!cpu_physical_memory_is_dirty(addr)) {
+ tb_invalidate_phys_page_range(addr, addr + bytes, 0);
+ cpu_physical_memory_set_dirty_flags(addr,
+ (0xff & ~CODE_DIRTY_FLAG));
+ }
+ }
}
-static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
+static inline uint64_t vring_desc_addr(void *desc_va, int i)
{
- target_phys_addr_t pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
- return ldl_phys(pa);
+ void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
+ return ldq_p(ptr);
}
-static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
+static inline uint32_t vring_desc_len(void *desc_va, int i)
{
- target_phys_addr_t pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
- return lduw_phys(pa);
+ void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
+ return ldl_p(ptr);
}
-static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
+static inline uint16_t vring_desc_flags(void *desc_va, int i)
{
- target_phys_addr_t pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
- return lduw_phys(pa);
+ void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
+ return lduw_p(ptr);
+}
+
+static inline uint16_t vring_desc_next(void *desc_va, int i)
+{
+ void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
+ return lduw_p(ptr);
}
static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
- target_phys_addr_t pa;
- pa = vq->vring.avail + offsetof(VRingAvail, flags);
- return lduw_phys(pa);
+ void *ptr = vq->vring.avail_va + offsetof(VRingAvail, flags);
+ return lduw_p(ptr);
}
static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
- target_phys_addr_t pa;
- pa = vq->vring.avail + offsetof(VRingAvail, idx);
- return lduw_phys(pa);
+ void *ptr = vq->vring.avail_va + offsetof(VRingAvail, idx);
+ return lduw_p(ptr);
}
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
- target_phys_addr_t pa;
- pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
- return lduw_phys(pa);
+ void *ptr = vq->vring.avail_va + offsetof(VRingAvail, ring[i]);
+ return lduw_p(ptr);
}
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
{
- target_phys_addr_t pa;
- pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
- stl_phys(pa, val);
+ void *ptr = vq->vring.used_va + offsetof(VRingUsed, ring[i].id);
+ stl_p(ptr, val);
+ sync_dirty_bytes(ptr, 4);
}
static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
{
- target_phys_addr_t pa;
- pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
- stl_phys(pa, val);
+ void *ptr = vq->vring.used_va + offsetof(VRingUsed, ring[i].len);
+ stl_p(ptr, val);
+ sync_dirty_bytes(ptr, 4);
}
static uint16_t vring_used_idx(VirtQueue *vq)
{
- target_phys_addr_t pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- return lduw_phys(pa);
+ void *ptr = vq->vring.used_va + offsetof(VRingUsed, idx);
+ return lduw_p(ptr);
}
static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
{
- target_phys_addr_t pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- stw_phys(pa, vring_used_idx(vq) + val);
+ void *ptr = vq->vring.used_va + offsetof(VRingUsed, idx);
+ stw_p(ptr, vring_used_idx(vq) + val);
+ sync_dirty_bytes(ptr, 2);
}
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
- target_phys_addr_t pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- stw_phys(pa, lduw_phys(pa) | mask);
+ void *ptr = vq->vring.used_va + offsetof(VRingUsed, flags);
+ stw_p(ptr, lduw_p(ptr) | mask);
+ sync_dirty_bytes(ptr, 2);
}
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
- target_phys_addr_t pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- stw_phys(pa, lduw_phys(pa) & ~mask);
+ void *ptr = vq->vring.used_va + offsetof(VRingUsed, flags);
+ stw_p(ptr, lduw_p(ptr) & ~mask);
+ sync_dirty_bytes(ptr, 2);
}
void virtio_queue_set_notification(VirtQueue *vq, int enable)
@@ -274,17 +317,17 @@ static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
return head;
}
-static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
+static unsigned virtqueue_next_desc(void *desc_va,
unsigned int i, unsigned int max)
{
unsigned int next;
/* If this descriptor says it doesn't chain, we're done. */
- if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
+ if (!(vring_desc_flags(desc_va, i) & VRING_DESC_F_NEXT))
return max;
/* Check they're not leading us off end of descriptors. */
- next = vring_desc_next(desc_pa, i);
+ next = vring_desc_next(desc_va, i);
/* Make sure compiler knows to grab that: we don't want it changing! */
wmb();
@@ -306,16 +349,18 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
total_bufs = in_total = out_total = 0;
while (virtqueue_num_heads(vq, idx)) {
unsigned int max, num_bufs, indirect = 0;
- target_phys_addr_t desc_pa;
+ target_phys_addr_t len = 0;
+ void *desc_va;
int i;
max = vq->vring.num;
num_bufs = total_bufs;
i = virtqueue_get_head(vq, idx++);
- desc_pa = vq->vring.desc;
+ desc_va = vq->vring.desc_va;
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
- if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
+ if (vring_desc_flags(desc_va, i) & VRING_DESC_F_INDIRECT) {
+ len = vring_desc_len(desc_va, i);
+ if (len % sizeof(VRingDesc)) {
fprintf(stderr, "Invalid size for indirect buffer table\n");
exit(1);
}
@@ -328,9 +373,13 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
/* loop over the indirect descriptor table */
indirect = 1;
- max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
+ max = len / sizeof(VRingDesc);
num_bufs = i = 0;
- desc_pa = vring_desc_addr(desc_pa, i);
+ desc_va = virtqueue_map(vring_desc_addr(desc_va, i), len, 0);
+ if (!desc_va) {
+ fprintf(stderr, "Failed to map indirect virtqueue\n");
+ exit(1);
+ }
}
do {
@@ -340,21 +389,31 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
exit(1);
}
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
+ if (vring_desc_flags(desc_va, i) & VRING_DESC_F_WRITE) {
if (in_bytes > 0 &&
- (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
+ (in_total += vring_desc_len(desc_va, i)) >= in_bytes) {
+ if (indirect) {
+ cpu_physical_memory_unmap(desc_va, len, 0, 0);
+ }
return 1;
+ }
} else {
if (out_bytes > 0 &&
- (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
+ (out_total += vring_desc_len(desc_va, i)) >= out_bytes) {
+ if (indirect) {
+ cpu_physical_memory_unmap(desc_va, len, 0, 0);
+ }
return 1;
+ }
}
- } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+ } while ((i = virtqueue_next_desc(desc_va, i, max)) != max);
- if (!indirect)
+ if (!indirect) {
total_bufs = num_bufs;
- else
+ } else {
+ cpu_physical_memory_unmap(desc_va, len, 0, 0);
total_bufs++;
+ }
}
return 0;
@@ -379,7 +438,9 @@ void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
{
unsigned int i, head, max;
- target_phys_addr_t desc_pa = vq->vring.desc;
+ void *desc_va = vq->vring.desc_va;
+ target_phys_addr_t len = 0;
+ int indirect = 0;
if (!virtqueue_num_heads(vq, vq->last_avail_idx))
return 0;
@@ -391,15 +452,21 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
- if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
+ if (vring_desc_flags(desc_va, i) & VRING_DESC_F_INDIRECT) {
+ len = vring_desc_len(desc_va, i);
+ if (len % sizeof(VRingDesc)) {
fprintf(stderr, "Invalid size for indirect buffer table\n");
exit(1);
}
/* loop over the indirect descriptor table */
- max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
- desc_pa = vring_desc_addr(desc_pa, i);
+ max = len / sizeof(VRingDesc);
+ desc_va = virtqueue_map(vring_desc_addr(desc_va, i), len, 0);
+ if (!desc_va) {
+ fprintf(stderr, "Failed to map indirect virtqueue\n");
+ exit(1);
+ }
+ indirect = 1;
i = 0;
}
@@ -407,22 +474,26 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
do {
struct iovec *sg;
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
- elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
+ if (vring_desc_flags(desc_va, i) & VRING_DESC_F_WRITE) {
+ elem->in_addr[elem->in_num] = vring_desc_addr(desc_va, i);
sg = &elem->in_sg[elem->in_num++];
} else {
- elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
+ elem->out_addr[elem->out_num] = vring_desc_addr(desc_va, i);
sg = &elem->out_sg[elem->out_num++];
}
- sg->iov_len = vring_desc_len(desc_pa, i);
+ sg->iov_len = vring_desc_len(desc_va, i);
/* If we've got too many, that implies a descriptor loop. */
if ((elem->in_num + elem->out_num) > max) {
fprintf(stderr, "Looped descriptor");
exit(1);
}
- } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+ } while ((i = virtqueue_next_desc(desc_va, i, max)) != max);
+
+ if (indirect) {
+ cpu_physical_memory_unmap(desc_va, len, 0, 0);
+ }
/* Now map what we have collected */
virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
@@ -467,6 +538,27 @@ void virtio_reset(void *opaque)
vdev->vq[i].vring.desc = 0;
vdev->vq[i].vring.avail = 0;
vdev->vq[i].vring.used = 0;
+ if (vdev->vq[i].vring.desc_va) {
+ cpu_physical_memory_unmap(vdev->vq[i].vring.desc_va,
+ vdev->vq[i].vring.num *
+ sizeof(VRingDesc), 0, 0);
+ vdev->vq[i].vring.desc_va = NULL;
+ }
+ if (vdev->vq[i].vring.avail_va) {
+ cpu_physical_memory_unmap(vdev->vq[i].vring.avail_va,
+ offsetof(VRingAvail,
+ ring[vdev->vq[i].vring.num]),
+ 0, 0);
+ vdev->vq[i].vring.avail_va = NULL;
+ }
+ if (vdev->vq[i].vring.used_va) {
+ cpu_physical_memory_unmap(vdev->vq[i].vring.used_va,
+ offsetof(VRingUsed,
+ ring[vdev->vq[i].vring.num]), 1,
+ offsetof(VRingUsed,
+ ring[vdev->vq[i].vring.num]));
+ vdev->vq[i].vring.used_va = NULL;
+ }
vdev->vq[i].last_avail_idx = 0;
vdev->vq[i].pa = 0;
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [Qemu-devel] [RFC PATCH] virtio: Map virtqueue rings instead of referencing guest phys addrs
2010-09-17 17:30 [Qemu-devel] [RFC PATCH] virtio: Map virtqueue rings instead of referencing guest phys addrs Alex Williamson
@ 2010-09-17 18:04 ` Anthony Liguori
0 siblings, 0 replies; 2+ messages in thread
From: Anthony Liguori @ 2010-09-17 18:04 UTC (permalink / raw)
To: Alex Williamson; +Cc: qemu-devel, kvm
On 09/17/2010 12:30 PM, Alex Williamson wrote:
> Nearly any operation on virtqueues require multiple reads/writes
> to virtqueue ring descriptors using guest physical addresses
> (ld*/st*_phys). These are expensive and result in
> phys_page_find_alloc() and qemu_get_ram_ptr() showing up at the top
> of profiles run under virtio net/block workloads.
>
> We can avoid much of this by mapping the ring descriptors using
> cpu_physical_memory_map() and referencing virtual addresses. This
> does a good job at dropping these high hitters down in the profile,
> and generally shows a minor increase in block and net throughput
> and latency.
>
> I'm posting this as an RFC because this isn't nearly the slam dunk
> performance improvement that the virtio-net TX bottom half handler
> had. On average, I see just shy of a 7% improvement in overall
> network performance. I'm having a hard time pinning down any
> numbers for block since the gains typically seems quite a bit
> smaller than my standard deviation. I'm getting tired of testing
> it, so I'll toss it out and see if anyone thinks this is a good
> idea. I have tested this with local migrations and haven't seen
> any issues.
>
> Signed-off-by: Alex Williamson<alex.williamson@redhat.com>
> ---
>
> Makefile.objs | 3 -
> Makefile.target | 5 +
> hw/virtio.c | 230 +++++++++++++++++++++++++++++++++++++++----------------
> 3 files changed, 164 insertions(+), 74 deletions(-)
>
> diff --git a/Makefile.objs b/Makefile.objs
> index 4a1eaa1..f362b8f 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -145,7 +145,6 @@ user-obj-y += cutils.o cache-utils.o
>
> hw-obj-y =
> hw-obj-y += vl.o loader.o
> -hw-obj-y += virtio.o virtio-console.o
> hw-obj-y += fw_cfg.o pci.o pci_host.o pcie_host.o
> hw-obj-y += watchdog.o
> hw-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
> @@ -244,8 +243,6 @@ sound-obj-$(CONFIG_CS4231A) += cs4231a.o
> adlib.o fmopl.o: QEMU_CFLAGS += -DBUILD_Y8950=0
> hw-obj-$(CONFIG_SOUND) += $(sound-obj-y)
>
> -hw-obj-$(CONFIG_VIRTFS) += virtio-9p-debug.o virtio-9p-local.o
> -
> ######################################################################
> # libdis
> # NOTE: the disassembler code is only needed for debugging
> diff --git a/Makefile.target b/Makefile.target
> index 18826bb..4e0cf8d 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -165,11 +165,12 @@ ifdef CONFIG_SOFTMMU
> obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o
> # virtio has to be here due to weird dependency between PCI and virtio-net.
> # need to fix this properly
> -obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-serial-bus.o
> +obj-y += virtio.o virtio-console.o virtio-blk.o virtio-balloon.o
> +obj-y += virtio-net.o virtio-serial-bus.o
> obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
> obj-y += vhost_net.o
> obj-$(CONFIG_VHOST_NET) += vhost.o
> -obj-$(CONFIG_VIRTFS) += virtio-9p.o
> +obj-$(CONFIG_VIRTFS) += virtio-9p.o virtio-9p-debug.o virtio-9p-local.o
> obj-y += rwhandler.o
> obj-$(CONFIG_KVM) += kvm.o kvm-all.o
> obj-$(CONFIG_NO_KVM) += kvm-stub.o
> diff --git a/hw/virtio.c b/hw/virtio.c
> index 85312b3..0dcf375 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -15,6 +15,8 @@
>
> #include "virtio.h"
> #include "sysemu.h"
> +#include "cpu.h"
> +#include "exec-all.h"
>
> /* The alignment to use between consumer and producer parts of vring.
> * x86 pagesize again. */
> @@ -61,8 +63,11 @@ typedef struct VRing
> {
> unsigned int num;
> target_phys_addr_t desc;
> + void *desc_va;
> target_phys_addr_t avail;
> + void *avail_va;
> target_phys_addr_t used;
> + void *used_va;
> } VRing;
>
> struct VirtQueue
> @@ -78,107 +83,145 @@ struct VirtQueue
> EventNotifier host_notifier;
> };
>
> +static void *virtqueue_map(target_phys_addr_t addr,
> + target_phys_addr_t len, int write)
> +{
> + void *map;
> + target_phys_addr_t mapped = len;
> +
> + map = cpu_physical_memory_map(addr,&mapped, write);
> + if (!map) {
> + return NULL;
> + }
> + if (mapped != len) {
> + cpu_physical_memory_unmap(map, mapped, write, 0);
> + return NULL;
> + }
> + return map;
> +}
>
You can't hold a reference from memory_map() once you fall back to the
main loop.
Regards,
Anthony Liguori
> +
> /* virt queue functions */
> static void virtqueue_init(VirtQueue *vq)
> {
> - target_phys_addr_t pa = vq->pa;
> + target_phys_addr_t size, pa = vq->pa;
>
> vq->vring.desc = pa;
> vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
> vq->vring.used = vring_align(vq->vring.avail +
> offsetof(VRingAvail, ring[vq->vring.num]),
> VIRTIO_PCI_VRING_ALIGN);
> +
> + size = vq->vring.num * sizeof(VRingDesc);
> + if (!(vq->vring.desc_va = virtqueue_map(vq->vring.desc, size, 0))) {
> + fprintf(stderr, "Failed to map vring.desc\n");
> + exit(1); /* Ugh, wishing for -ENOMEM */
> + }
> + size = offsetof(VRingAvail, ring[vq->vring.num]);
> + if (!(vq->vring.avail_va = virtqueue_map(vq->vring.avail, size, 0))) {
> + fprintf(stderr, "Failed to map vring.avail\n");
> + exit(1); /* Ugh, wishing for -ENOMEM */
> + }
> + size = offsetof(VRingUsed, ring[vq->vring.num]);
> + if (!(vq->vring.used_va = virtqueue_map(vq->vring.used, size, 1))) {
> + fprintf(stderr, "Failed to map vring.used\n");
> + exit(1); /* Ugh, wishing for -ENOMEM */
> + }
> }
>
> -static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
> +static inline void sync_dirty_bytes(void *ptr, int bytes)
> {
> - target_phys_addr_t pa;
> - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
> - return ldq_phys(pa);
> + if (cpu_physical_memory_get_dirty_tracking()) {
> + ram_addr_t addr = qemu_ram_addr_from_host(ptr);
> +
> + if (!cpu_physical_memory_is_dirty(addr)) {
> + tb_invalidate_phys_page_range(addr, addr + bytes, 0);
> + cpu_physical_memory_set_dirty_flags(addr,
> + (0xff& ~CODE_DIRTY_FLAG));
> + }
> + }
> }
>
> -static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
> +static inline uint64_t vring_desc_addr(void *desc_va, int i)
> {
> - target_phys_addr_t pa;
> - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
> - return ldl_phys(pa);
> + void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
> + return ldq_p(ptr);
> }
>
> -static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
> +static inline uint32_t vring_desc_len(void *desc_va, int i)
> {
> - target_phys_addr_t pa;
> - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
> - return lduw_phys(pa);
> + void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
> + return ldl_p(ptr);
> }
>
> -static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
> +static inline uint16_t vring_desc_flags(void *desc_va, int i)
> {
> - target_phys_addr_t pa;
> - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
> - return lduw_phys(pa);
> + void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
> + return lduw_p(ptr);
> +}
> +
> +static inline uint16_t vring_desc_next(void *desc_va, int i)
> +{
> + void *ptr = desc_va + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
> + return lduw_p(ptr);
> }
>
> static inline uint16_t vring_avail_flags(VirtQueue *vq)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.avail + offsetof(VRingAvail, flags);
> - return lduw_phys(pa);
> + void *ptr = vq->vring.avail_va + offsetof(VRingAvail, flags);
> + return lduw_p(ptr);
> }
>
> static inline uint16_t vring_avail_idx(VirtQueue *vq)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.avail + offsetof(VRingAvail, idx);
> - return lduw_phys(pa);
> + void *ptr = vq->vring.avail_va + offsetof(VRingAvail, idx);
> + return lduw_p(ptr);
> }
>
> static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
> - return lduw_phys(pa);
> + void *ptr = vq->vring.avail_va + offsetof(VRingAvail, ring[i]);
> + return lduw_p(ptr);
> }
>
> static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
> - stl_phys(pa, val);
> + void *ptr = vq->vring.used_va + offsetof(VRingUsed, ring[i].id);
> + stl_p(ptr, val);
> + sync_dirty_bytes(ptr, 4);
> }
>
> static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
> - stl_phys(pa, val);
> + void *ptr = vq->vring.used_va + offsetof(VRingUsed, ring[i].len);
> + stl_p(ptr, val);
> + sync_dirty_bytes(ptr, 4);
> }
>
> static uint16_t vring_used_idx(VirtQueue *vq)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.used + offsetof(VRingUsed, idx);
> - return lduw_phys(pa);
> + void *ptr = vq->vring.used_va + offsetof(VRingUsed, idx);
> + return lduw_p(ptr);
> }
>
> static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.used + offsetof(VRingUsed, idx);
> - stw_phys(pa, vring_used_idx(vq) + val);
> + void *ptr = vq->vring.used_va + offsetof(VRingUsed, idx);
> + stw_p(ptr, vring_used_idx(vq) + val);
> + sync_dirty_bytes(ptr, 2);
> }
>
> static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.used + offsetof(VRingUsed, flags);
> - stw_phys(pa, lduw_phys(pa) | mask);
> + void *ptr = vq->vring.used_va + offsetof(VRingUsed, flags);
> + stw_p(ptr, lduw_p(ptr) | mask);
> + sync_dirty_bytes(ptr, 2);
> }
>
> static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
> {
> - target_phys_addr_t pa;
> - pa = vq->vring.used + offsetof(VRingUsed, flags);
> - stw_phys(pa, lduw_phys(pa)& ~mask);
> + void *ptr = vq->vring.used_va + offsetof(VRingUsed, flags);
> + stw_p(ptr, lduw_p(ptr)& ~mask);
> + sync_dirty_bytes(ptr, 2);
> }
>
> void virtio_queue_set_notification(VirtQueue *vq, int enable)
> @@ -274,17 +317,17 @@ static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
> return head;
> }
>
> -static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
> +static unsigned virtqueue_next_desc(void *desc_va,
> unsigned int i, unsigned int max)
> {
> unsigned int next;
>
> /* If this descriptor says it doesn't chain, we're done. */
> - if (!(vring_desc_flags(desc_pa, i)& VRING_DESC_F_NEXT))
> + if (!(vring_desc_flags(desc_va, i)& VRING_DESC_F_NEXT))
> return max;
>
> /* Check they're not leading us off end of descriptors. */
> - next = vring_desc_next(desc_pa, i);
> + next = vring_desc_next(desc_va, i);
> /* Make sure compiler knows to grab that: we don't want it changing! */
> wmb();
>
> @@ -306,16 +349,18 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
> total_bufs = in_total = out_total = 0;
> while (virtqueue_num_heads(vq, idx)) {
> unsigned int max, num_bufs, indirect = 0;
> - target_phys_addr_t desc_pa;
> + target_phys_addr_t len = 0;
> + void *desc_va;
> int i;
>
> max = vq->vring.num;
> num_bufs = total_bufs;
> i = virtqueue_get_head(vq, idx++);
> - desc_pa = vq->vring.desc;
> + desc_va = vq->vring.desc_va;
>
> - if (vring_desc_flags(desc_pa, i)& VRING_DESC_F_INDIRECT) {
> - if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
> + if (vring_desc_flags(desc_va, i)& VRING_DESC_F_INDIRECT) {
> + len = vring_desc_len(desc_va, i);
> + if (len % sizeof(VRingDesc)) {
> fprintf(stderr, "Invalid size for indirect buffer table\n");
> exit(1);
> }
> @@ -328,9 +373,13 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
>
> /* loop over the indirect descriptor table */
> indirect = 1;
> - max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
> + max = len / sizeof(VRingDesc);
> num_bufs = i = 0;
> - desc_pa = vring_desc_addr(desc_pa, i);
> + desc_va = virtqueue_map(vring_desc_addr(desc_va, i), len, 0);
> + if (!desc_va) {
> + fprintf(stderr, "Failed to map indirect virtqueue\n");
> + exit(1);
> + }
> }
>
> do {
> @@ -340,21 +389,31 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
> exit(1);
> }
>
> - if (vring_desc_flags(desc_pa, i)& VRING_DESC_F_WRITE) {
> + if (vring_desc_flags(desc_va, i)& VRING_DESC_F_WRITE) {
> if (in_bytes> 0&&
> - (in_total += vring_desc_len(desc_pa, i))>= in_bytes)
> + (in_total += vring_desc_len(desc_va, i))>= in_bytes) {
> + if (indirect) {
> + cpu_physical_memory_unmap(desc_va, len, 0, 0);
> + }
> return 1;
> + }
> } else {
> if (out_bytes> 0&&
> - (out_total += vring_desc_len(desc_pa, i))>= out_bytes)
> + (out_total += vring_desc_len(desc_va, i))>= out_bytes) {
> + if (indirect) {
> + cpu_physical_memory_unmap(desc_va, len, 0, 0);
> + }
> return 1;
> + }
> }
> - } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
> + } while ((i = virtqueue_next_desc(desc_va, i, max)) != max);
>
> - if (!indirect)
> + if (!indirect) {
> total_bufs = num_bufs;
> - else
> + } else {
> + cpu_physical_memory_unmap(desc_va, len, 0, 0);
> total_bufs++;
> + }
> }
>
> return 0;
> @@ -379,7 +438,9 @@ void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
> int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> {
> unsigned int i, head, max;
> - target_phys_addr_t desc_pa = vq->vring.desc;
> + void *desc_va = vq->vring.desc_va;
> + target_phys_addr_t len = 0;
> + int indirect = 0;
>
> if (!virtqueue_num_heads(vq, vq->last_avail_idx))
> return 0;
> @@ -391,15 +452,21 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>
> i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
>
> - if (vring_desc_flags(desc_pa, i)& VRING_DESC_F_INDIRECT) {
> - if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
> + if (vring_desc_flags(desc_va, i)& VRING_DESC_F_INDIRECT) {
> + len = vring_desc_len(desc_va, i);
> + if (len % sizeof(VRingDesc)) {
> fprintf(stderr, "Invalid size for indirect buffer table\n");
> exit(1);
> }
>
> /* loop over the indirect descriptor table */
> - max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
> - desc_pa = vring_desc_addr(desc_pa, i);
> + max = len / sizeof(VRingDesc);
> + desc_va = virtqueue_map(vring_desc_addr(desc_va, i), len, 0);
> + if (!desc_va) {
> + fprintf(stderr, "Failed to map indirect virtqueue\n");
> + exit(1);
> + }
> + indirect = 1;
> i = 0;
> }
>
> @@ -407,22 +474,26 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> do {
> struct iovec *sg;
>
> - if (vring_desc_flags(desc_pa, i)& VRING_DESC_F_WRITE) {
> - elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
> + if (vring_desc_flags(desc_va, i)& VRING_DESC_F_WRITE) {
> + elem->in_addr[elem->in_num] = vring_desc_addr(desc_va, i);
> sg =&elem->in_sg[elem->in_num++];
> } else {
> - elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
> + elem->out_addr[elem->out_num] = vring_desc_addr(desc_va, i);
> sg =&elem->out_sg[elem->out_num++];
> }
>
> - sg->iov_len = vring_desc_len(desc_pa, i);
> + sg->iov_len = vring_desc_len(desc_va, i);
>
> /* If we've got too many, that implies a descriptor loop. */
> if ((elem->in_num + elem->out_num)> max) {
> fprintf(stderr, "Looped descriptor");
> exit(1);
> }
> - } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
> + } while ((i = virtqueue_next_desc(desc_va, i, max)) != max);
> +
> + if (indirect) {
> + cpu_physical_memory_unmap(desc_va, len, 0, 0);
> + }
>
> /* Now map what we have collected */
> virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
> @@ -467,6 +538,27 @@ void virtio_reset(void *opaque)
> vdev->vq[i].vring.desc = 0;
> vdev->vq[i].vring.avail = 0;
> vdev->vq[i].vring.used = 0;
> + if (vdev->vq[i].vring.desc_va) {
> + cpu_physical_memory_unmap(vdev->vq[i].vring.desc_va,
> + vdev->vq[i].vring.num *
> + sizeof(VRingDesc), 0, 0);
> + vdev->vq[i].vring.desc_va = NULL;
> + }
> + if (vdev->vq[i].vring.avail_va) {
> + cpu_physical_memory_unmap(vdev->vq[i].vring.avail_va,
> + offsetof(VRingAvail,
> + ring[vdev->vq[i].vring.num]),
> + 0, 0);
> + vdev->vq[i].vring.avail_va = NULL;
> + }
> + if (vdev->vq[i].vring.used_va) {
> + cpu_physical_memory_unmap(vdev->vq[i].vring.used_va,
> + offsetof(VRingUsed,
> + ring[vdev->vq[i].vring.num]), 1,
> + offsetof(VRingUsed,
> + ring[vdev->vq[i].vring.num]));
> + vdev->vq[i].vring.used_va = NULL;
> + }
> vdev->vq[i].last_avail_idx = 0;
> vdev->vq[i].pa = 0;
> vdev->vq[i].vector = VIRTIO_NO_VECTOR;
>
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2010-09-17 18:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-17 17:30 [Qemu-devel] [RFC PATCH] virtio: Map virtqueue rings instead of referencing guest phys addrs Alex Williamson
2010-09-17 18:04 ` Anthony Liguori
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).