From: Jason Wang <jasowang@redhat.com>
To: wexu@redhat.com, qemu-devel@nongnu.org
Cc: tiwei.bie@intel.com, jfreimann@redhat.com, maxime.coquelin@redhat.com
Subject: Re: [Qemu-devel] [[RFC v3 07/12] virtio: fill/flush/pop for packed ring
Date: Mon, 15 Oct 2018 14:14:11 +0800 [thread overview]
Message-ID: <59562881-dba6-1843-9ebb-3f80a1d37e15@redhat.com> (raw)
In-Reply-To: <1539266915-15216-8-git-send-email-wexu@redhat.com>
On 2018年10月11日 22:08, wexu@redhat.com wrote:
> From: Wei Xu <wexu@redhat.com>
>
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
> hw/virtio/virtio.c | 258 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 244 insertions(+), 14 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 13c6c98..d12a7e3 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -386,6 +386,21 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
> virtio_tswap16s(vdev, &desc->id);
> }
>
> +static void vring_packed_desc_write(VirtIODevice *vdev, VRingPackedDesc *desc,
> + MemoryRegionCache *cache, int i)
> +{
> + virtio_tswap64s(vdev, &desc->addr);
> + virtio_tswap32s(vdev, &desc->len);
> + virtio_tswap16s(vdev, &desc->id);
> + virtio_tswap16s(vdev, &desc->flags);
> + address_space_write_cached(cache,
> + sizeof(VRingPackedDesc) * i, desc,
> + sizeof(VRingPackedDesc));
> + address_space_cache_invalidate(cache,
> + sizeof(VRingPackedDesc) * i,
> + sizeof(VRingPackedDesc));
> +}
> +
> static void vring_packed_desc_read_flags(VirtIODevice *vdev,
> VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
> {
> @@ -559,19 +574,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
> }
>
> /* Called within rcu_read_lock(). */
> -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
> unsigned int len, unsigned int idx)
> {
> VRingUsedElem uelem;
>
> - trace_virtqueue_fill(vq, elem, len, idx);
> -
> - virtqueue_unmap_sg(vq, elem, len);
> -
> - if (unlikely(vq->vdev->broken)) {
> - return;
> - }
> -
> if (unlikely(!vq->vring.used)) {
> return;
> }
> @@ -583,16 +590,64 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> vring_used_write(vq, &uelem, idx);
> }
>
> -/* Called within rcu_read_lock(). */
> -void virtqueue_flush(VirtQueue *vq, unsigned int count)
> +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
> + unsigned int len, unsigned int idx)
> {
> - uint16_t old, new;
> + uint16_t w, head;
> + VRingMemoryRegionCaches *caches;
> + VRingPackedDesc desc = {
> + .addr = 0,
> + .flags = 0,
> + };
> +
> + if (unlikely(!vq->vring.desc)) {
> + return;
> + }
> +
> + caches = vring_get_region_caches(vq);
> + head = vq->used_idx + idx;
> + head = head >= vq->vring.num ? (head - vq->vring.num) : head;
> + vring_packed_desc_read(vq->vdev, &desc, &caches->desc, head);
Is this a must for reading descriptor once more? Shouldn't device
maintain its own used_wrap_counter?
> +
> + w = (desc.flags & AVAIL_DESC_PACKED(1)) >> 7;
> + desc.flags &= ~(AVAIL_DESC_PACKED(1) | USED_DESC_PACKED(1));
> + desc.flags |= AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w);
> + if (!(desc.flags & VRING_DESC_F_INDIRECT)) {
> + if (!(desc.flags & VRING_DESC_F_WRITE)) {
> + desc.len = 0;
> + } else {
> + desc.len = len;
> + }
> + }
> + vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head);
> +
> + /* Make sure flags has been updated */
> + smp_mb();
This is wrong in two places:
- What you want is to make sure flag was updated after all other fields,
you can do it in vring_packed_desc_write()
- A write barrier instead of a full barrier is needed.
> +}
> +
> +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> + unsigned int len, unsigned int idx)
> +{
> + trace_virtqueue_fill(vq, elem, len, idx);
> +
> + virtqueue_unmap_sg(vq, elem, len);
>
> if (unlikely(vq->vdev->broken)) {
> - vq->inuse -= count;
> return;
> }
>
> + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> + virtqueue_packed_fill(vq, elem, len, idx);
> + } else {
> + virtqueue_split_fill(vq, elem, len, idx);
> + }
> +}
> +
> +/* Called within rcu_read_lock(). */
> +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
> +{
> + uint16_t old, new;
> +
> if (unlikely(!vq->vring.used)) {
> return;
> }
> @@ -608,6 +663,33 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
> vq->signalled_used_valid = false;
> }
>
> +static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
> +{
> + if (unlikely(!vq->vring.desc)) {
> + return;
> + }
> +
> + vq->inuse -= count;
> + vq->used_idx += count;
> + if (vq->used_idx >= vq->vring.num) {
> + vq->used_idx -= vq->vring.num;
> + }
> +}
> +
> +void virtqueue_flush(VirtQueue *vq, unsigned int count)
> +{
> + if (unlikely(vq->vdev->broken)) {
> + vq->inuse -= count;
> + return;
> + }
> +
> + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> + virtqueue_packed_flush(vq, count);
> + } else {
> + virtqueue_split_flush(vq, count);
> + }
> +}
> +
> void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
> unsigned int len)
> {
> @@ -1091,7 +1173,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
> return elem;
> }
>
> -void *virtqueue_pop(VirtQueue *vq, size_t sz)
> +static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
> {
> unsigned int i, head, max;
> VRingMemoryRegionCaches *caches;
> @@ -1226,6 +1308,154 @@ err_undo_map:
> goto done;
> }
>
> +static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
> +{
> + unsigned int i, head, max;
> + VRingMemoryRegionCaches *caches;
> + MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> + MemoryRegionCache *cache;
> + int64_t len;
> + VirtIODevice *vdev = vq->vdev;
> + VirtQueueElement *elem = NULL;
> + unsigned out_num, in_num, elem_entries;
> + hwaddr addr[VIRTQUEUE_MAX_SIZE];
> + struct iovec iov[VIRTQUEUE_MAX_SIZE];
> + VRingPackedDesc desc;
> +
> + if (unlikely(vdev->broken)) {
> + return NULL;
> + }
> +
> + rcu_read_lock();
> + if (virtio_queue_packed_empty_rcu(vq)) {
> + goto done;
> + }
> +
> + /* When we start there are none of either input nor output. */
> + out_num = in_num = elem_entries = 0;
> +
> + max = vq->vring.num;
> +
> + if (vq->inuse >= vq->vring.num) {
> + virtio_error(vdev, "Virtqueue size exceeded");
> + goto done;
> + }
The above is duplicated with split version, please unify them.
> +
> + head = vq->last_avail_idx;
> + i = head;
> +
> + caches = vring_get_region_caches(vq);
> + cache = &caches->desc;
> + vring_packed_desc_read(vdev, &desc, cache, i);
> + /* Make sure we see all the fields*/
> + smp_rmb();
This is wrong, as I've pointed out.
> + if (desc.flags & VRING_DESC_F_INDIRECT) {
> + if (desc.len % sizeof(VRingPackedDesc)) {
> + virtio_error(vdev, "Invalid size for indirect buffer table");
> + goto done;
> + }
> +
> + /* loop over the indirect descriptor table */
> + len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
> + desc.addr, desc.len, false);
> + cache = &indirect_desc_cache;
> + if (len < desc.len) {
> + virtio_error(vdev, "Cannot map indirect buffer");
> + goto done;
> + }
> +
> + max = desc.len / sizeof(VRingPackedDesc);
> + i = 0;
> + vring_packed_desc_read(vdev, &desc, cache, i);
> + /* Make sure we see all the fields*/
> + smp_rmb();
> + }
> +
> + /* Collect all the descriptors */
> + while (1) {
> + bool map_ok;
> +
> + if (desc.flags & VRING_DESC_F_WRITE) {
> + map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
> + iov + out_num,
> + VIRTQUEUE_MAX_SIZE - out_num, true,
> + desc.addr, desc.len);
> + } else {
> + if (in_num) {
> + virtio_error(vdev, "Incorrect order for descriptors");
> + goto err_undo_map;
> + }
> + map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
> + VIRTQUEUE_MAX_SIZE, false,
> + desc.addr, desc.len);
> + }
> + if (!map_ok) {
> + goto err_undo_map;
> + }
> +
> + /* If we've got too many, that implies a descriptor loop. */
> + if (++elem_entries > max) {
> + virtio_error(vdev, "Looped descriptor");
> + goto err_undo_map;
> + }
> +
> + if (++i >= vq->vring.num) {
> + i -= vq->vring.num;
> + }
> +
> + if (desc.flags & VRING_DESC_F_NEXT) {
> + vring_packed_desc_read(vq->vdev, &desc, cache, i);
> + /* Make sure we see all the fields*/
> + smp_rmb();
This looks unnecessary.
Thanks
> + } else {
> + break;
> + }
> + }
> +
> + /* Now copy what we have collected and mapped */
> + elem = virtqueue_alloc_element(sz, out_num, in_num);
> + for (i = 0; i < out_num; i++) {
> + elem->out_addr[i] = addr[i];
> + elem->out_sg[i] = iov[i];
> + }
> + for (i = 0; i < in_num; i++) {
> + elem->in_addr[i] = addr[head + out_num + i];
> + elem->in_sg[i] = iov[out_num + i];
> + }
> +
> + vq->last_avail_idx += (cache == &indirect_desc_cache) ?
> + 1 : out_num + in_num;
> + if (vq->last_avail_idx >= vq->vring.num) {
> + vq->last_avail_idx -= vq->vring.num;
> + vq->avail_wrap_counter = !vq->avail_wrap_counter;
> + }
> + vq->inuse++;
> +
> + vq->event_idx = vq->last_avail_idx;
> + vq->event_wrap_counter = vq->avail_wrap_counter;
> +
> + trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
> +done:
> + address_space_cache_destroy(&indirect_desc_cache);
> + rcu_read_unlock();
> +
> + return elem;
> +
> +err_undo_map:
> + virtqueue_undo_map_desc(out_num, in_num, iov);
> + g_free(elem);
> + goto done;
> +}
> +
> +void *virtqueue_pop(VirtQueue *vq, size_t sz)
> +{
> + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> + return virtqueue_packed_pop(vq, sz);
> + } else {
> + return virtqueue_split_pop(vq, sz);
> + }
> +}
> +
> /* virtqueue_drop_all:
> * @vq: The #VirtQueue
> * Drops all queued buffers and indicates them to the guest
next prev parent reply other threads:[~2018-10-15 6:14 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-11 14:08 [Qemu-devel] [RFC v3 00/12] packed ring virtio-net userspace backend support wexu
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 01/12] virtio: introduce packed ring definitions wexu
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 02/12] virtio: redefine structure & memory cache for packed ring wexu
2018-10-15 3:03 ` Jason Wang
2018-10-15 7:26 ` Wei Xu
2018-10-15 8:03 ` Jason Wang
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 03/12] virtio: init " wexu
2018-10-15 3:10 ` Jason Wang
2018-10-15 7:09 ` Wei Xu
2018-10-15 7:54 ` Jason Wang
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 04/12] virtio: init wrap counter " wexu
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 05/12] virtio: init and desc empty check " wexu
2018-10-15 3:18 ` Jason Wang
2018-10-15 7:04 ` Wei Xu
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 06/12] virtio: get avail bytes " wexu
2018-10-15 3:47 ` Jason Wang
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 07/12] virtio: fill/flush/pop " wexu
2018-10-15 6:14 ` Jason Wang [this message]
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 08/12] virtio: event suppression support " wexu
2018-10-15 6:55 ` Jason Wang
2018-10-15 6:59 ` Jason Wang
2018-10-15 8:20 ` Wei Xu
2018-10-15 9:11 ` Jason Wang
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 09/12] virtio-net: fill head desc after done all in a chain wexu
2018-10-15 7:45 ` Jason Wang
2018-10-15 8:03 ` Wei Xu
2018-10-15 8:05 ` Jason Wang
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 10/12] virtio: packed ring feature bit for userspace backend wexu
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 11/12] virtio: enable packed ring via a new command line wexu
2018-10-11 14:08 ` [Qemu-devel] [[RFC v3 12/12] virtio: feature vhost-net support for packed ring wexu
2018-10-15 7:50 ` Jason Wang
2018-10-15 8:11 ` Wei Xu
2018-11-21 13:03 ` Maxime Coquelin
2018-11-22 3:46 ` Wei Xu
2018-11-21 14:39 ` [Qemu-devel] [RFC v3 00/12] packed ring virtio-net userspace backend support Tiwei Bie
2018-11-22 3:43 ` Wei Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=59562881-dba6-1843-9ebb-3f80a1d37e15@redhat.com \
--to=jasowang@redhat.com \
--cc=jfreimann@redhat.com \
--cc=maxime.coquelin@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=tiwei.bie@intel.com \
--cc=wexu@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).