All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Asias He <asias@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>,
	penberg@kernel.org, will.deacon@arm.com, marc.zyngier@arm.com,
	kvm@vger.kernel.org, jasowang@redhat.com, rusty@rustcorp.com.au
Subject: Re: [PATCH] kvm tools: virtio-net mergable rx buffers
Date: Wed, 24 Apr 2013 12:35:58 +0300	[thread overview]
Message-ID: <20130424093558.GE11245@redhat.com> (raw)
In-Reply-To: <20130424053256.GA12077@hj.localdomain>

On Wed, Apr 24, 2013 at 01:32:56PM +0800, Asias He wrote:
> On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote:
> > Support mergable rx buffers for virtio-net. This helps reduce the amount
> > of memory the guest kernel has to allocate per rx vq.
> 
> With this, we always do an extra copy of the rx data into a linear buffer.
> I am thinking about whether we should keep the non MRG_RXBUF code. For
> tap use case, it is fine, user can use vhost. But for uip use case, we
> can not use vhost.

You have to keep it but it might be ok not to optimize it.

> > Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> > ---
> >  tools/kvm/include/kvm/uip.h  |  4 ++--
> >  tools/kvm/include/kvm/util.h |  3 +++
> >  tools/kvm/net/uip/core.c     | 54 +++++---------------------------------------
> >  tools/kvm/net/uip/tcp.c      |  2 +-
> >  tools/kvm/net/uip/udp.c      |  2 +-
> >  tools/kvm/util/util.c        | 15 ++++++++++++
> >  tools/kvm/virtio/net.c       | 37 ++++++++++++++++++++++--------
> >  7 files changed, 55 insertions(+), 62 deletions(-)
> > 
> > diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> > index ac248d2..fa82f10 100644
> > --- a/tools/kvm/include/kvm/uip.h
> > +++ b/tools/kvm/include/kvm/uip.h
> > @@ -252,7 +252,7 @@ struct uip_tcp_socket {
> >  };
> >  
> >  struct uip_tx_arg {
> > -	struct virtio_net_hdr *vnet;
> > +	struct virtio_net_hdr_mrg_rxbuf *vnet;
> >  	struct uip_info *info;
> >  	struct uip_eth *eth;
> >  	int vnet_len;
> > @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
> >  }
> >  
> >  int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
> > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
> > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
> >  int uip_init(struct uip_info *info);
> >  
> >  int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
> > diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> > index 0df9f0d..6f8ac83 100644
> > --- a/tools/kvm/include/kvm/util.h
> > +++ b/tools/kvm/include/kvm/util.h
> > @@ -22,6 +22,7 @@
> >  #include <sys/param.h>
> >  #include <sys/types.h>
> >  #include <linux/types.h>
> > +#include <sys/uio.h>
> >  
> >  #ifdef __GNUC__
> >  #define NORETURN __attribute__((__noreturn__))
> > @@ -94,4 +95,6 @@ struct kvm;
> >  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> >  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
> >  
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> > +
> >  #endif /* KVM__UTIL_H */
> > diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
> > index 4e5bb82..d9e9993 100644
> > --- a/tools/kvm/net/uip/core.c
> > +++ b/tools/kvm/net/uip/core.c
> > @@ -7,7 +7,7 @@
> >  
> >  int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> >  {
> > -	struct virtio_net_hdr *vnet;
> > +	struct virtio_net_hdr_mrg_rxbuf *vnet;
> >  	struct uip_tx_arg arg;
> >  	int eth_len, vnet_len;
> >  	struct uip_eth *eth;
> > @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> >  	return vnet_len + eth_len;
> >  }
> >  
> > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
> > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
> >  {
> > -	struct virtio_net_hdr *vnet;
> > -	struct uip_eth *eth;
> >  	struct uip_buf *buf;
> > -	int vnet_len;
> > -	int eth_len;
> > -	char *p;
> >  	int len;
> > -	int cnt;
> > -	int i;
> >  
> >  	/*
> >  	 * Sleep until there is a buffer for guest
> >  	 */
> >  	buf = uip_buf_get_used(info);
> >  
> > -	/*
> > -	 * Fill device to guest buffer, vnet hdr fisrt
> > -	 */
> > -	vnet_len = iov[0].iov_len;
> > -	vnet = iov[0].iov_base;
> > -	if (buf->vnet_len > vnet_len) {
> > -		len = -1;
> > -		goto out;
> > -	}
> > -	memcpy(vnet, buf->vnet, buf->vnet_len);
> > -
> > -	/*
> > -	 * Then, the real eth data
> > -	 * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides
> > -	 */
> > -	cnt = buf->eth_len;
> > -	p = buf->eth;
> > -	for (i = 1; i < in; i++) {
> > -		eth_len = iov[i].iov_len;
> > -		eth = iov[i].iov_base;
> > -		if (cnt > eth_len) {
> > -			memcpy(eth, p, eth_len);
> > -			cnt -= eth_len;
> > -			p += eth_len;
> > -		} else {
> > -			memcpy(eth, p, cnt);
> > -			cnt -= cnt;
> > -			break;
> > -		}
> > -	}
> > -
> > -	if (cnt) {
> > -		pr_warning("uip_rx error");
> > -		len = -1;
> > -		goto out;
> > -	}
> > +	memcpy(buffer, buf->vnet, buf->vnet_len);
> > +	memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
> >  
> >  	len = buf->vnet_len + buf->eth_len;
> >  
> > -out:
> >  	uip_buf_set_free(info, buf);
> >  	return len;
> >  }
> > @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
> >  	}
> >  
> >  	list_for_each_entry(buf, buf_head, list) {
> > -		buf->vnet	= malloc(sizeof(struct virtio_net_hdr));
> > -		buf->vnet_len	= sizeof(struct virtio_net_hdr);
> > +		buf->vnet	= malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf));
> > +		buf->vnet_len	= sizeof(struct virtio_net_hdr_mrg_rxbuf);
> >  		buf->eth	= malloc(1024*64 + sizeof(struct uip_pseudo_hdr));
> >  		buf->eth_len	= 1024*64 + sizeof(struct uip_pseudo_hdr);
> >  
> > diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> > index 9044f40..a99b95e 100644
> > --- a/tools/kvm/net/uip/tcp.c
> > +++ b/tools/kvm/net/uip/tcp.c
> > @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
> >  	/*
> >  	 * virtio_net_hdr
> >  	 */
> > -	buf->vnet_len	= sizeof(struct virtio_net_hdr);
> > +	buf->vnet_len	= sizeof(struct virtio_net_hdr_mrg_rxbuf);
> >  	memset(buf->vnet, 0, buf->vnet_len);
> >  
> >  	buf->eth_len	= ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> > diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
> > index 31c417c..083c221 100644
> > --- a/tools/kvm/net/uip/udp.c
> > +++ b/tools/kvm/net/uip/udp.c
> > @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui
> >  	/*
> >  	 * virtio_net_hdr
> >  	 */
> > -	buf->vnet_len	= sizeof(struct virtio_net_hdr);
> > +	buf->vnet_len	= sizeof(struct virtio_net_hdr_mrg_rxbuf);
> >  	memset(buf->vnet, 0, buf->vnet_len);
> >  
> >  	buf->eth_len	= ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> > diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
> > index c11a15a..4e0069c 100644
> > --- a/tools/kvm/util/util.c
> > +++ b/tools/kvm/util/util.c
> > @@ -9,6 +9,7 @@
> >  #include <sys/mman.h>
> >  #include <sys/stat.h>
> >  #include <sys/statfs.h>
> > +#include <linux/kernel.h>
> >  
> >  static void report(const char *prefix, const char *err, va_list params)
> >  {
> > @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si
> >  		return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> >  	}
> >  }
> > +
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len)
> > +{
> > +	size_t copy, start_len = len;
> > +
> > +	for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
> > +		copy = min(iov->iov_len, len);
> > +		memcpy(iov->iov_base, kdata, copy);
> > +		kdata += copy;
> > +		len -= copy;
> > +	}
> > +
> > +	return start_len - len;
> > +}
> > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> > index c0a8f12..c99c5a6 100644
> > --- a/tools/kvm/virtio/net.c
> > +++ b/tools/kvm/virtio/net.c
> > @@ -32,8 +32,8 @@
> >  struct net_dev;
> >  
> >  struct net_dev_operations {
> > -	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> > -	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> > +	int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
> > +	int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
> >  };
> >  
> >  struct net_dev {
> > @@ -63,6 +63,8 @@ struct net_dev {
> >  static LIST_HEAD(ndevs);
> >  static int compat_id = -1;
> >  
> > +#define MAX_PACKET_SIZE ((u16)(-1))
> 
> Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP
> plus 14 bytes eth header.
> 
> > +
> >  static void *virtio_net_rx_thread(void *p)
> >  {
> >  	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
> > @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
> >  		mutex_unlock(&ndev->io_lock[id]);
> >  
> >  		while (virt_queue__available(vq)) {
> > +			unsigned char buffer[MAX_PACKET_SIZE];
> 
> The buffer size should at least be:
> 
> MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)
> 
> > +			struct virtio_net_hdr_mrg_rxbuf *hdr;
> > +
> > +			len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
> >  			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> > -			len = ndev->ops->rx(iov, in, ndev);
> > -			virt_queue__set_used_elem(vq, head, len);
> > +			hdr = (void *)iov[0].iov_base;
> > +			while (len) {
> > +				int copied;
> > +
> > +				copied = memcpy_toiovecend(iov, in, buffer, len);
> > +				len -= copied;
> > +				hdr->num_buffers++;
> > +				virt_queue__set_used_elem(vq, head, copied);
> > +				if (len == 0)
> > +					break;
> > +				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> 
> Here, need to check if we still have available buffer from guest. If not
> need to wait for it.
> 
> > +			}
> >  
> >  			/* We should interrupt guest right now, otherwise latency is huge. */
> >  			if (virtio_queue__should_signal(vq))
> > @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params,
> >  		goto fail;
> >  	}
> >  
> > -	hdr_len = sizeof(struct virtio_net_hdr);
> > +	hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> >  	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
> >  		pr_warning("Config tap device TUNSETVNETHDRSZ error");
> >  
> > @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> >  	return writev(ndev->tap_fd, iov, out);
> >  }
> >  
> > -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> > +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> >  {
> > -	return readv(ndev->tap_fd, iov, in);
> > +	return read(ndev->tap_fd, buffer, length);
> >  }
> >  
> >  static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> >  	return uip_tx(iov, out, &ndev->info);
> >  }
> >  
> > -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> > +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> >  {
> > -	return uip_rx(iov, in, &ndev->info);
> > +	return uip_rx(buffer, length, &ndev->info);
> >  }
> >  
> >  static struct net_dev_operations tap_ops = {
> > @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
> >  		| 1UL << VIRTIO_RING_F_EVENT_IDX
> >  		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
> >  		| 1UL << VIRTIO_NET_F_CTRL_VQ
> > -		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
> > +		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
> > +		| 1UL << VIRTIO_NET_F_MRG_RXBUF;
> >  }
> >  
> >  static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
> > -- 
> > 1.8.2.1
> > 
> 
> -- 
> Asias

      reply	other threads:[~2013-04-24  9:36 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-23  0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
2013-04-23  9:06 ` Pekka Enberg
2013-04-23 14:19   ` Asias He
2013-04-23 15:01 ` Sasha Levin
2013-04-23 16:35 ` Eric Northup
2013-04-24  2:51   ` Sasha Levin
2013-04-24  6:51     ` Pekka Enberg
2013-04-24  9:23       ` Michael S. Tsirkin
2013-04-29  0:44         ` Rusty Russell
2013-04-29  2:36           ` Sasha Levin
2013-04-24  5:32 ` Asias He
2013-04-24  9:35   ` Michael S. Tsirkin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130424093558.GE11245@redhat.com \
    --to=mst@redhat.com \
    --cc=asias@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=marc.zyngier@arm.com \
    --cc=penberg@kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=sasha.levin@oracle.com \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.