Re: [RFC][ PATCH 3/3] vhost-net: Add mergeable RX buffer support to vhost-net

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Michael S. Tsirkin" <mst@redhat.com>
To: David Stevens <dlstevens@us.ibm.com>
Cc: rusty@rustcorp.com.au, netdev@vger.kernel.org,
	kvm@vger.kernel.org, virtualization@lists.osdl.org
Subject: Re: [RFC][ PATCH 3/3] vhost-net: Add mergeable RX buffer support to vhost-net
Date: Sun, 7 Mar 2010 18:26:33 +0200	[thread overview]
Message-ID: <20100307162633.GC24997@redhat.com> (raw)
In-Reply-To: <OFD112C680.DB48ADA2-ON882576DB.0000CE4F-882576DB.0001E192@us.ibm.com>

On Tue, Mar 02, 2010 at 05:20:34PM -0700, David Stevens wrote:
> This patch glues them all together and makes sure we
> notify whenever we don't have enough buffers to receive
> a max-sized packet, and adds the feature bit.
> 
> Signed-off-by: David L Stevens <dlstevens@us.ibm.com>

Maybe split this up?

> diff -ruN net-next-p2/drivers/vhost/net.c net-next-p3/drivers/vhost/net.c
> --- net-next-p2/drivers/vhost/net.c     2010-03-02 13:01:34.000000000 
> -0800
> +++ net-next-p3/drivers/vhost/net.c     2010-03-02 15:25:15.000000000 
> -0800
> @@ -54,26 +54,6 @@
>         enum vhost_net_poll_state tx_poll_state;
>  };
>  
> -/* Pop first len bytes from iovec. Return number of segments used. */
> -static int move_iovec_hdr(struct iovec *from, struct iovec *to,
> -                         size_t len, int iov_count)
> -{
> -       int seg = 0;
> -       size_t size;
> -       while (len && seg < iov_count) {
> -               size = min(from->iov_len, len);
> -               to->iov_base = from->iov_base;
> -               to->iov_len = size;
> -               from->iov_len -= size;
> -               from->iov_base += size;
> -               len -= size;
> -               ++from;
> -               ++to;
> -               ++seg;
> -       }
> -       return seg;
> -}
> -
>  /* Caller must have TX VQ lock */
>  static void tx_poll_stop(struct vhost_net *net)
>  {
> @@ -97,7 +77,7 @@
>  static void handle_tx(struct vhost_net *net)
>  {
>         struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
> -       unsigned out, in, s;
> +       unsigned out, in;
>         struct iovec head;
>         struct msghdr msg = {
>                 .msg_name = NULL,
> @@ -110,6 +90,7 @@
>         size_t len, total_len = 0;
>         int err, wmem;
>         struct socket *sock = rcu_dereference(vq->private_data);
> +

I tend not to add empty lines if line below it is already short.

>         if (!sock)
>                 return;
>  
> @@ -166,11 +147,11 @@
>                 /* Skip header. TODO: support TSO. */
>                 msg.msg_iovlen = out;
>                 head.iov_len = len = iov_length(vq->iov, out);
> +

I tend not to add empty lines if line below it is a comment.

>                 /* Sanity check */
>                 if (!len) {
>                         vq_err(vq, "Unexpected header len for TX: "
> -                              "%zd expected %zd\n",
> -                              len, vq->guest_hlen);
> +                              "%zd expected %zd\n", len, vq->guest_hlen);
>                         break;
>                 }
>                 /* TODO: Check specific error and bomb out unless ENOBUFS? 
> */
> @@ -214,7 +195,7 @@
>  static void handle_rx(struct vhost_net *net)
>  {
>         struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
> -       unsigned in, log, s;
> +       unsigned in, log;
>         struct vhost_log *vq_log;
>         struct msghdr msg = {
>                 .msg_name = NULL,
> @@ -245,30 +226,36 @@
>                 if (!headcount) {
>                         vhost_enable_notify(vq);
>                         break;
> -               }
> +               } else if (vq->maxheadcount < headcount)
> +                       vq->maxheadcount = headcount;
>                 /* Skip header. TODO: support TSO/mergeable rx buffers. */
>                 msg.msg_iovlen = in;
>                 len = iov_length(vq->iov, in);
> -
>                 /* Sanity check */
>                 if (!len) {
>                         vq_err(vq, "Unexpected header len for RX: "
> -                              "%zd expected %zd\n",
> -                              len, vq->guest_hlen);
> +                              "%zd expected %zd\n", len, vq->guest_hlen);
>                         break;
>                 }
>                 err = sock->ops->recvmsg(NULL, sock, &msg,
>                                          len, MSG_DONTWAIT | MSG_TRUNC);
> -               /* TODO: Check specific error and bomb out unless EAGAIN? 
> */
>                 if (err < 0) {
> -                       vhost_discard(vq, 1);
> +                       vhost_discard(vq, headcount);
>                         break;
>                 }
>                 /* TODO: Should check and handle checksum. */
> +               if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) 
> {
> +                       struct virtio_net_hdr_mrg_rxbuf *vhdr =
> +                               (struct virtio_net_hdr_mrg_rxbuf *)
> +                               vq->iov[0].iov_base;
> +                       /* add num_bufs */
> +                       vq->iov[0].iov_len = vq->guest_hlen;
> +                       vhdr->num_buffers = headcount;

I don't understand this. iov_base is a userspace pointer, isn't it.
How can you assign values to it like that?
Rusty also commented earlier that it's not a good idea to assume
specific layout, such as first chunk being large enough to
include virtio_net_hdr_mrg_rxbuf.

I think we need to use memcpy to/from iovec etc.

> +               }
>                 if (err > len) {
>                         pr_err("Discarded truncated rx packet: "
>                                " len %d > %zd\n", err, len);
> -                       vhost_discard(vq, 1);
> +                       vhost_discard(vq, headcount);
>                         continue;
>                 }
>                 len = err;
> @@ -573,8 +560,6 @@
>  
>  static int vhost_net_set_features(struct vhost_net *n, u64 features)
>  {
> -       size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
> -               sizeof(struct virtio_net_hdr) : 0;
>         int i;
>         mutex_lock(&n->dev.mutex);
>         if ((features & (1 << VHOST_F_LOG_ALL)) &&
> diff -ruN net-next-p2/drivers/vhost/vhost.c 
> net-next-p3/drivers/vhost/vhost.c
> --- net-next-p2/drivers/vhost/vhost.c   2010-03-02 12:53:02.000000000 
> -0800
> +++ net-next-p3/drivers/vhost/vhost.c   2010-03-02 15:24:50.000000000 
> -0800
> @@ -115,6 +115,7 @@
>         vq->log_addr = -1ull;
>         vq->guest_hlen = 0;
>         vq->sock_hlen = 0;
> +       vq->maxheadcount = 0;
>         vq->private_data = NULL;
>         vq->log_base = NULL;
>         vq->error_ctx = NULL;
> @@ -410,6 +411,7 @@
>                 vq->last_avail_idx = s.num;
>                 /* Forget the cached index value. */
>                 vq->avail_idx = vq->last_avail_idx;
> +               vq->maxheadcount = 0;
>                 break;
>         case VHOST_GET_VRING_BASE:
>                 s.index = idx;
> @@ -1114,10 +1116,23 @@
>         return 0;
>  }
>  
> +int vhost_available(struct vhost_virtqueue *vq)
> +{
> +       int avail;
> +
> +       if (!vq->maxheadcount)  /* haven't got any yet */
> +               return 1;
> +       avail = vq->avail_idx - vq->last_avail_idx;
> +       if (avail < 0)
> +               avail += 0x10000; /* wrapped */
> +       return avail;
> +}
> +
>  /* This actually signals the guest, using eventfd. */
>  void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
>  {
>         __u16 flags = 0;
> +

I tend not to add empty lines if a line above it is already short.

>         if (get_user(flags, &vq->avail->flags)) {
>                 vq_err(vq, "Failed to get flags");
>                 return;
> @@ -1125,7 +1140,7 @@
>  
>         /* If they don't want an interrupt, don't signal, unless empty. */
>         if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
> -           (vq->avail_idx != vq->last_avail_idx ||
> +           (vhost_available(vq) > vq->maxheadcount ||

I don't understand this change. It seems to make
code not match the comments.

>              !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
>                 return;
>  
> diff -ruN net-next-p2/drivers/vhost/vhost.h 
> net-next-p3/drivers/vhost/vhost.h
> --- net-next-p2/drivers/vhost/vhost.h   2010-03-02 13:02:03.000000000 
> -0800
> +++ net-next-p3/drivers/vhost/vhost.h   2010-03-02 14:29:44.000000000 
> -0800
> @@ -85,6 +85,7 @@
>         struct iovec iov[VHOST_NET_MAX_SG+1]; /* an extra for vnet hdr */
>         struct iovec heads[VHOST_NET_MAX_SG];
>         size_t guest_hlen, sock_hlen;
> +       int maxheadcount;

I don't completely understand what does this field does.
It seems to be only set on rx? Maybe name should reflect this?

>         /* We use a kind of RCU to access private pointer.
>          * All readers access it from workqueue, which makes it possible 
> to
>          * flush the workqueue instead of synchronize_rcu. Therefore 
> readers do
> @@ -151,7 +152,8 @@
>         VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
>                          (1 << VIRTIO_RING_F_INDIRECT_DESC) |
>                          (1 << VHOST_F_LOG_ALL) |
> -                        (1 << VHOST_NET_F_VIRTIO_NET_HDR),
> +                        (1 << VHOST_NET_F_VIRTIO_NET_HDR) |
> +                        (1 << VIRTIO_NET_F_MRG_RXBUF),
>  };
>  
>  static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
>

next prev parent reply	other threads:[~2010-03-07 16:26 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-03  0:20 [RFC][ PATCH 3/3] vhost-net: Add mergeable RX buffer support to vhost-net David Stevens
2010-03-07 16:26 ` Michael S. Tsirkin [this message]
2010-03-08  2:06   ` David Stevens
2010-03-08  8:07     ` Michael S. Tsirkin
2010-03-31  1:23       ` [PATCH v2] Add Mergeable RX buffer feature to vhost_net David Stevens
2010-03-31 12:02         ` Michael S. Tsirkin
2010-03-31 22:04           ` David Stevens
2010-04-01 10:54             ` Michael S. Tsirkin
2010-04-01 18:22               ` David Stevens
2010-04-04  8:55                 ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100307162633.GC24997@redhat.com \
    --to=mst@redhat.com \
    --cc=dlstevens@us.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=virtualization@lists.osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).