linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Rusty Russell <rusty@rustcorp.com.au>
Cc: netdev@vger.kernel.org,
	virtualization@lists.linux-foundation.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, mingo@elte.hu, linux-mm@kvack.org,
	akpm@linux-foundation.org, hpa@zytor.com,
	gregory.haskins@gmail.com, s.hetze@linux-ag.com,
	Daniel Walker <dwalker@fifo99.com>,
	Eric Dumazet <eric.dumazet@gmail.com>
Subject: Re: virtio: put last_used and last_avail index into ring itself.
Date: Tue, 4 May 2010 21:22:36 +0300	[thread overview]
Message-ID: <20100504182236.GA14141@redhat.com> (raw)
In-Reply-To: <200911091647.29655.rusty@rustcorp.com.au>

> virtio: put last_used and last_avail index into ring itself.
> 
> Generally, the other end of the virtio ring doesn't need to see where
> you're up to in consuming the ring.  However, to completely understand
> what's going on from the outside, this information must be exposed.
> For example, if you want to save and restore a virtio_ring, but you're
> not the consumer because the kernel is using it directly.
> 
> Fortunately, we have room to expand: the ring is always a whole number
> of pages and there's hundreds of bytes of padding after the avail ring
> and the used ring, whatever the number of descriptors (which must be a
> power of 2).
> 
> We add a feature bit so the guest can tell the host that it's writing
> out the current value there, if it wants to use that.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

I've been looking at this patch some more (more on why
later), and I wonder: would it be better to add some
alignment to the last used index address, so that
if we later add more stuff at the tail, it all
fits in a single cache line?

We use a new feature bit anyway, so layout change should not be
a problem.

Since I raised the question of caches: for used ring,
the ring is not aligned to 64 bit, so on CPUs with 64 bit
or larger cache lines, used entries will often cross
cache line boundaries. Am I right and might it
have been better to align ring entries to cache line boundaries?

What do you think?

> ---
>  drivers/virtio/virtio_ring.c |   23 +++++++++++++++--------
>  include/linux/virtio_ring.h  |   12 +++++++++++-
>  2 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -71,9 +71,6 @@ struct vring_virtqueue
>  	/* Number we've added since last sync. */
>  	unsigned int num_added;
>  
> -	/* Last used index we've seen. */
> -	u16 last_used_idx;
> -
>  	/* How to notify other side. FIXME: commonalize hcalls! */
>  	void (*notify)(struct virtqueue *vq);
>  
> @@ -278,12 +275,13 @@ static void detach_buf(struct vring_virt
>  
>  static inline bool more_used(const struct vring_virtqueue *vq)
>  {
> -	return vq->last_used_idx != vq->vring.used->idx;
> +	return vring_last_used(&vq->vring) != vq->vring.used->idx;
>  }
>  
>  static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
> +	struct vring_used_elem *u;
>  	void *ret;
>  	unsigned int i;
>  
> @@ -300,8 +298,11 @@ static void *vring_get_buf(struct virtqu
>  		return NULL;
>  	}
>  
> -	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
> -	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
> +	u = &vq->vring.used->ring[vring_last_used(&vq->vring) % vq->vring.num];
> +	i = u->id;
> +	*len = u->len;
> +	/* Make sure we don't reload i after doing checks. */
> +	rmb();
>  
>  	if (unlikely(i >= vq->vring.num)) {
>  		BAD_RING(vq, "id %u out of range\n", i);
> @@ -315,7 +316,8 @@ static void *vring_get_buf(struct virtqu
>  	/* detach_buf clears data, so grab it now. */
>  	ret = vq->data[i];
>  	detach_buf(vq, i);
> -	vq->last_used_idx++;
> +	vring_last_used(&vq->vring)++;
> +
>  	END_USE(vq);
>  	return ret;
>  }
> @@ -402,7 +404,6 @@ struct virtqueue *vring_new_virtqueue(un
>  	vq->vq.name = name;
>  	vq->notify = notify;
>  	vq->broken = false;
> -	vq->last_used_idx = 0;
>  	vq->num_added = 0;
>  	list_add_tail(&vq->vq.list, &vdev->vqs);
>  #ifdef DEBUG
> @@ -413,6 +414,10 @@ struct virtqueue *vring_new_virtqueue(un
>  
>  	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
>  
> +	/* We publish indices whether they offer it or not: if not, it's junk
> +	 * space anyway.  But calling this acknowledges the feature. */
> +	virtio_has_feature(vdev, VIRTIO_RING_F_PUBLISH_INDICES);
> +
>  	/* No callback?  Tell other side not to bother us. */
>  	if (!callback)
>  		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
> @@ -443,6 +448,8 @@ void vring_transport_features(struct vir
>  		switch (i) {
>  		case VIRTIO_RING_F_INDIRECT_DESC:
>  			break;
> +		case VIRTIO_RING_F_PUBLISH_INDICES:
> +			break;
>  		default:
>  			/* We don't understand this bit. */
>  			clear_bit(i, vdev->features);
> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> --- a/include/linux/virtio_ring.h
> +++ b/include/linux/virtio_ring.h
> @@ -29,6 +29,9 @@
>  /* We support indirect buffer descriptors */
>  #define VIRTIO_RING_F_INDIRECT_DESC	28
>  
> +/* We publish our last-seen used index at the end of the avail ring. */
> +#define VIRTIO_RING_F_PUBLISH_INDICES	29
> +
>  /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
>  struct vring_desc
>  {
> @@ -87,6 +90,7 @@ struct vring {
>   *	__u16 avail_flags;
>   *	__u16 avail_idx;
>   *	__u16 available[num];
> + *	__u16 last_used_idx;
>   *
>   *	// Padding to the next align boundary.
>   *	char pad[];
> @@ -95,6 +99,7 @@ struct vring {
>   *	__u16 used_flags;
>   *	__u16 used_idx;
>   *	struct vring_used_elem used[num];
> + *	__u16 last_avail_idx;
>   * };
>   */
>  static inline void vring_init(struct vring *vr, unsigned int num, void *p,
> @@ -111,9 +116,14 @@ static inline unsigned vring_size(unsign
>  {
>  	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
>  		 + align - 1) & ~(align - 1))
> -		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
> +		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num + 2;
>  }
>  
> +/* We publish the last-seen used index at the end of the available ring, and
> + * vice-versa.  These are at the end for backwards compatibility. */
> +#define vring_last_used(vr) ((vr)->avail->ring[(vr)->num])
> +#define vring_last_avail(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
> +
>  #ifdef __KERNEL__
>  #include <linux/irqreturn.h>
>  struct virtio_device;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-05-04 18:26 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1257349249.git.mst@redhat.com>
2009-11-04 15:55 ` [PATCHv8 1/3] tun: export underlying socket Michael S. Tsirkin
2009-11-04 15:55 ` [PATCHv8 2/3] mm: export use_mm/unuse_mm to modules Michael S. Tsirkin
2009-11-04 15:57 ` [PATCHv8 3/3] vhost_net: a kernel-level virtio server Michael S. Tsirkin
2009-11-06  4:59   ` Rusty Russell
2009-11-08 11:35     ` Michael S. Tsirkin
2009-11-09  6:17       ` Rusty Russell
2009-11-09  7:10         ` Michael S. Tsirkin
2009-11-10  1:08           ` Rusty Russell
2009-11-09  7:20         ` Michael S. Tsirkin
2009-11-09 11:55         ` Michael S. Tsirkin
2010-05-04 18:22         ` Michael S. Tsirkin [this message]
2010-05-06  0:52           ` virtio: put last_used and last_avail index into ring itself Rusty Russell
2010-05-06  6:27             ` Michael S. Tsirkin
2010-05-07  3:05               ` Rusty Russell
2010-05-09  8:57                 ` Michael S. Tsirkin
2010-05-10  3:11                   ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100504182236.GA14141@redhat.com \
    --to=mst@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dwalker@fifo99.com \
    --cc=eric.dumazet@gmail.com \
    --cc=gregory.haskins@gmail.com \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=netdev@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=s.hetze@linux-ag.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).