* [Qemu-devel] [PATCH 0/2] virtio: indirect ring entries @ 2008-12-18 17:24 Mark McLoughlin 2008-12-18 17:24 ` [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs Mark McLoughlin 0 siblings, 1 reply; 4+ messages in thread From: Mark McLoughlin @ 2008-12-18 17:24 UTC (permalink / raw) To: qemu-devel Hi, I've just submitted some virtio patches to lkml: http://lkml.org/lkml/2008/12/18/213 Here's the patches to support it in qemu; just posting here for completeness sake for now. Cheers, Mark. ^ permalink raw reply [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs 2008-12-18 17:24 [Qemu-devel] [PATCH 0/2] virtio: indirect ring entries Mark McLoughlin @ 2008-12-18 17:24 ` Mark McLoughlin 2008-12-18 17:24 ` [Qemu-devel] [PATCH 2/2] virtio: add support for indirect ring entries Mark McLoughlin 0 siblings, 1 reply; 4+ messages in thread From: Mark McLoughlin @ 2008-12-18 17:24 UTC (permalink / raw) To: qemu-devel; +Cc: Mark McLoughlin Change the vring descriptor helpers to take the physical address of the descriptor table rather than a virtqueue. This is needed in order to allow these helpers to be used with an indirect descriptor table. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- hw/virtio.c | 62 ++++++++++++++++++++++++++++++++-------------------------- 1 files changed, 34 insertions(+), 28 deletions(-) diff --git a/hw/virtio.c b/hw/virtio.c index dba80f8..e997a5e 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -159,31 +159,31 @@ static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) VIRTIO_PCI_VRING_ALIGN); } -static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) +static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); return ldq_phys(pa); } -static inline uint32_t vring_desc_len(VirtQueue *vq, int i) +static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); return ldl_phys(pa); } -static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) +static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); return lduw_phys(pa); } -static inline uint16_t vring_desc_next(VirtQueue *vq, int i) +static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); return lduw_phys(pa); } @@ -356,20 +356,21 @@ static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) return head; } -static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) +static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa, + unsigned int i, unsigned int max) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ - if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) - return vq->vring.num; + if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT)) + return max; /* Check they're not leading us off end of descriptors. */ - next = vring_desc_next(vq, i); + next = vring_desc_next(desc_pa, i); /* Make sure compiler knows to grab that: we don't want it changing! */ wmb(); - if (next >= vq->vring.num) { + if (next >= max) { fprintf(stderr, "Desc next is %u", next); exit(1); } @@ -379,10 +380,12 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) { - unsigned int idx; + target_phys_addr_t desc_pa = vq->vring.desc; + unsigned int idx, max; int num_bufs, in_total, out_total; idx = vq->last_avail_idx; + max = vq->vring.num; num_bufs = in_total = out_total = 0; while (virtqueue_num_heads(vq, idx)) { @@ -391,21 +394,21 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) i = virtqueue_get_head(vq, idx++); do { /* If we've got too many, that implies a descriptor loop. */ - if (++num_bufs > vq->vring.num) { + if (++num_bufs > max) { fprintf(stderr, "Looped descriptor"); exit(1); } - if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { if (in_bytes > 0 && - (in_total += vring_desc_len(vq, i)) >= in_bytes) + (in_total += vring_desc_len(desc_pa, i)) >= in_bytes) return 1; } else { if (out_bytes > 0 && - (out_total += vring_desc_len(vq, i)) >= out_bytes) + (out_total += vring_desc_len(desc_pa, i)) >= out_bytes) return 1; } - } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); + } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); } return 0; @@ -413,7 +416,8 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) { - unsigned int i, head; + target_phys_addr_t desc_pa = vq->vring.desc; + unsigned int i, head, max; if (!virtqueue_num_heads(vq, vq->last_avail_idx)) return 0; @@ -421,21 +425,23 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) /* When we start there are none of either input nor output. */ elem->out_num = elem->in_num = 0; + max = vq->vring.num; + i = head = virtqueue_get_head(vq, vq->last_avail_idx++); do { struct iovec *sg; - if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { - elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { + elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); sg = &elem->in_sg[elem->in_num++]; } else sg = &elem->out_sg[elem->out_num++]; /* Grab the first descriptor, and check it's OK. */ - sg->iov_len = vring_desc_len(vq, i); + sg->iov_len = vring_desc_len(desc_pa, i); #ifdef VIRTIO_ZERO_COPY - sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len); + sg->iov_base = virtio_map_gpa(vring_desc_addr(desc_pa, i), sg->iov_len); #else /* cap individual scatter element size to prevent unbounded allocations of memory from the guest. Practically speaking, no virtio driver @@ -448,8 +454,8 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) sg->iov_base = qemu_malloc(sg->iov_len); if (sg->iov_base && - !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) { - cpu_physical_memory_read(vring_desc_addr(vq, i), + !(vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE)) { + cpu_physical_memory_read(vring_desc_addr(desc_pa, i), sg->iov_base, sg->iov_len); } @@ -460,11 +466,11 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) } /* If we've got too many, that implies a descriptor loop. */ - if ((elem->in_num + elem->out_num) > vq->vring.num) { + if ((elem->in_num + elem->out_num) > max) { fprintf(stderr, "Looped descriptor"); exit(1); } - } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); + } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); elem->index = head; -- 1.6.0.5 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 2/2] virtio: add support for indirect ring entries 2008-12-18 17:24 ` [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs Mark McLoughlin @ 2008-12-18 17:24 ` Mark McLoughlin 0 siblings, 0 replies; 4+ messages in thread From: Mark McLoughlin @ 2008-12-18 17:24 UTC (permalink / raw) To: qemu-devel; +Cc: Mark McLoughlin Support a new feature flag for indirect ring entries. These are ring entries which point to a table of buffer descriptors. The idea here is to increase the ring capacity by allowing a larger effective ring size whereby the ring size dictates the number of requests that may be outstanding, rather than the size of those requests. This should be most effective in the case of block I/O where we can potentially benefit by concurrently dispatching a large number of large requests. Even in the simple case of single segment block requests, this results in a threefold increase in ring capacity. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- hw/virtio.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- hw/virtio.h | 4 ++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/hw/virtio.c b/hw/virtio.c index e997a5e..1f0f3eb 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -380,18 +380,41 @@ static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa, int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) { - target_phys_addr_t desc_pa = vq->vring.desc; - unsigned int idx, max; - int num_bufs, in_total, out_total; + unsigned int idx; + int total_bufs, in_total, out_total; idx = vq->last_avail_idx; - max = vq->vring.num; - num_bufs = in_total = out_total = 0; + total_bufs = in_total = out_total = 0; while (virtqueue_num_heads(vq, idx)) { + unsigned int max, num_bufs, indirect = 0; + target_phys_addr_t desc_pa; int i; + max = vq->vring.num; + num_bufs = total_bufs; i = virtqueue_get_head(vq, idx++); + desc_pa = vq->vring.desc; + + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { + if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { + fprintf(stderr, "Invalid size for indirect buffer table\n"); + exit(1); + } + + /* If we've got too many, that implies a descriptor loop. */ + if (num_bufs >= max) { + fprintf(stderr, "Looped descriptor"); + exit(1); + } + + /* loop over the indirect descriptor table */ + indirect = 1; + max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); + num_bufs = i = 0; + desc_pa = vring_desc_addr(desc_pa, i); + } + do { /* If we've got too many, that implies a descriptor loop. */ if (++num_bufs > max) { @@ -409,6 +432,11 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) return 1; } } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); + + if (!indirect) + total_bufs = num_bufs; + else + total_bufs++; } return 0; @@ -428,6 +456,19 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) max = vq->vring.num; i = head = virtqueue_get_head(vq, vq->last_avail_idx++); + + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { + if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { + fprintf(stderr, "Invalid size for indirect buffer table\n"); + exit(1); + } + + /* loop over the indirect descriptor table */ + max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); + desc_pa = vring_desc_addr(desc_pa, i); + i = 0; + } + do { struct iovec *sg; @@ -563,6 +604,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) case VIRTIO_PCI_HOST_FEATURES: ret = vdev->get_features(vdev); ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); + ret |= (1 << VIRTIO_RING_F_INDIRECT_DESC); break; case VIRTIO_PCI_GUEST_FEATURES: ret = vdev->features; diff --git a/hw/virtio.h b/hw/virtio.h index 83511e2..52252fc 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -32,6 +32,8 @@ /* We notify when the ring is completely used, even if the guest is supressing * callbacks */ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 /* from Linux's linux/virtio_ring.h */ @@ -39,6 +41,8 @@ #define VRING_DESC_F_NEXT 1 /* This marks a buffer as write-only (otherwise read-only). */ #define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 /* This means don't notify other side when buffer added. */ #define VRING_USED_F_NO_NOTIFY 1 -- 1.6.0.5 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 0/2] virtio indirect ring entries @ 2009-06-17 10:35 Mark McLoughlin 2009-06-17 10:37 ` [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs Mark McLoughlin 0 siblings, 1 reply; 4+ messages in thread From: Mark McLoughlin @ 2009-06-17 10:35 UTC (permalink / raw) To: qemu-devel Hi, Indirect ring entries has been merged for 2.6.31, these patches implement the qemu side. Cheers, Mark. ^ permalink raw reply [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs 2009-06-17 10:35 [Qemu-devel] [PATCH 0/2] virtio " Mark McLoughlin @ 2009-06-17 10:37 ` Mark McLoughlin 0 siblings, 0 replies; 4+ messages in thread From: Mark McLoughlin @ 2009-06-17 10:37 UTC (permalink / raw) To: qemu-devel Change the vring descriptor helpers to take the physical address of the descriptor table rather than a virtqueue. This is needed in order to allow these helpers to be used with an indirect descriptor table. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- hw/virtio.c | 59 +++++++++++++++++++++++++++++++++-------------------------- 1 files changed, 33 insertions(+), 26 deletions(-) diff --git a/hw/virtio.c b/hw/virtio.c index 45a49fa..1e8376d 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -85,31 +85,31 @@ static void virtqueue_init(VirtQueue *vq) VIRTIO_PCI_VRING_ALIGN); } -static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) +static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); return ldq_phys(pa); } -static inline uint32_t vring_desc_len(VirtQueue *vq, int i) +static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); return ldl_phys(pa); } -static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) +static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); return lduw_phys(pa); } -static inline uint16_t vring_desc_next(VirtQueue *vq, int i) +static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i) { target_phys_addr_t pa; - pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); + pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); return lduw_phys(pa); } @@ -269,20 +269,21 @@ static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) return head; } -static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) +static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa, + unsigned int i, unsigned int max) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ - if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) - return vq->vring.num; + if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT)) + return max; /* Check they're not leading us off end of descriptors. */ - next = vring_desc_next(vq, i); + next = vring_desc_next(desc_pa, i); /* Make sure compiler knows to grab that: we don't want it changing! */ wmb(); - if (next >= vq->vring.num) { + if (next >= max) { fprintf(stderr, "Desc next is %u", next); exit(1); } @@ -292,10 +293,12 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) { - unsigned int idx; + target_phys_addr_t desc_pa = vq->vring.desc; + unsigned int idx, max; int num_bufs, in_total, out_total; idx = vq->last_avail_idx; + max = vq->vring.num; num_bufs = in_total = out_total = 0; while (virtqueue_num_heads(vq, idx)) { @@ -304,21 +307,21 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) i = virtqueue_get_head(vq, idx++); do { /* If we've got too many, that implies a descriptor loop. */ - if (++num_bufs > vq->vring.num) { + if (++num_bufs > max) { fprintf(stderr, "Looped descriptor"); exit(1); } - if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { if (in_bytes > 0 && - (in_total += vring_desc_len(vq, i)) >= in_bytes) + (in_total += vring_desc_len(desc_pa, i)) >= in_bytes) return 1; } else { if (out_bytes > 0 && - (out_total += vring_desc_len(vq, i)) >= out_bytes) + (out_total += vring_desc_len(desc_pa, i)) >= out_bytes) return 1; } - } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); + } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); } return 0; @@ -326,7 +329,8 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) { - unsigned int i, head; + unsigned int i, head, max; + target_phys_addr_t desc_pa = vq->vring.desc; target_phys_addr_t len; if (!virtqueue_num_heads(vq, vq->last_avail_idx)) @@ -335,23 +339,26 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) /* When we start there are none of either input nor output. */ elem->out_num = elem->in_num = 0; + max = vq->vring.num; + i = head = virtqueue_get_head(vq, vq->last_avail_idx++); do { struct iovec *sg; int is_write = 0; - if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { - elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); + if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { + elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); sg = &elem->in_sg[elem->in_num++]; is_write = 1; } else sg = &elem->out_sg[elem->out_num++]; /* Grab the first descriptor, and check it's OK. */ - sg->iov_len = vring_desc_len(vq, i); + sg->iov_len = vring_desc_len(desc_pa, i); len = sg->iov_len; - sg->iov_base = cpu_physical_memory_map(vring_desc_addr(vq, i), &len, is_write); + sg->iov_base = cpu_physical_memory_map(vring_desc_addr(desc_pa, i), + &len, is_write); if (sg->iov_base == NULL || len != sg->iov_len) { fprintf(stderr, "virtio: trying to map MMIO memory\n"); @@ -359,11 +366,11 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) } /* If we've got too many, that implies a descriptor loop. */ - if ((elem->in_num + elem->out_num) > vq->vring.num) { + if ((elem->in_num + elem->out_num) > max) { fprintf(stderr, "Looped descriptor"); exit(1); } - } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); + } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); elem->index = head; -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-06-17 10:37 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-12-18 17:24 [Qemu-devel] [PATCH 0/2] virtio: indirect ring entries Mark McLoughlin 2008-12-18 17:24 ` [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs Mark McLoughlin 2008-12-18 17:24 ` [Qemu-devel] [PATCH 2/2] virtio: add support for indirect ring entries Mark McLoughlin -- strict thread matches above, loose matches on Subject: below -- 2009-06-17 10:35 [Qemu-devel] [PATCH 0/2] virtio " Mark McLoughlin 2009-06-17 10:37 ` [Qemu-devel] [PATCH 1/2] virtio: make vring_desc_*() take phys addrs Mark McLoughlin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).