From: "Michael S. Tsirkin" <mst@redhat.com>
To: Asias He <asias@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>,
penberg@kernel.org, will.deacon@arm.com, marc.zyngier@arm.com,
kvm@vger.kernel.org, jasowang@redhat.com, rusty@rustcorp.com.au
Subject: Re: [PATCH] kvm tools: virtio-net mergable rx buffers
Date: Wed, 24 Apr 2013 12:35:58 +0300 [thread overview]
Message-ID: <20130424093558.GE11245@redhat.com> (raw)
In-Reply-To: <20130424053256.GA12077@hj.localdomain>
On Wed, Apr 24, 2013 at 01:32:56PM +0800, Asias He wrote:
> On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote:
> > Support mergable rx buffers for virtio-net. This helps reduce the amount
> > of memory the guest kernel has to allocate per rx vq.
>
> With this, we always do an extra copy of the rx data into a linear buffer.
> I am thinking about whether we should keep the non MRG_RXBUF code. For
> tap use case, it is fine, user can use vhost. But for uip use case, we
> can not use vhost.
You have to keep it but it might be ok not to optimize it.
> > Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> > ---
> > tools/kvm/include/kvm/uip.h | 4 ++--
> > tools/kvm/include/kvm/util.h | 3 +++
> > tools/kvm/net/uip/core.c | 54 +++++---------------------------------------
> > tools/kvm/net/uip/tcp.c | 2 +-
> > tools/kvm/net/uip/udp.c | 2 +-
> > tools/kvm/util/util.c | 15 ++++++++++++
> > tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++--------
> > 7 files changed, 55 insertions(+), 62 deletions(-)
> >
> > diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> > index ac248d2..fa82f10 100644
> > --- a/tools/kvm/include/kvm/uip.h
> > +++ b/tools/kvm/include/kvm/uip.h
> > @@ -252,7 +252,7 @@ struct uip_tcp_socket {
> > };
> >
> > struct uip_tx_arg {
> > - struct virtio_net_hdr *vnet;
> > + struct virtio_net_hdr_mrg_rxbuf *vnet;
> > struct uip_info *info;
> > struct uip_eth *eth;
> > int vnet_len;
> > @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
> > }
> >
> > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
> > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
> > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
> > int uip_init(struct uip_info *info);
> >
> > int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
> > diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> > index 0df9f0d..6f8ac83 100644
> > --- a/tools/kvm/include/kvm/util.h
> > +++ b/tools/kvm/include/kvm/util.h
> > @@ -22,6 +22,7 @@
> > #include <sys/param.h>
> > #include <sys/types.h>
> > #include <linux/types.h>
> > +#include <sys/uio.h>
> >
> > #ifdef __GNUC__
> > #define NORETURN __attribute__((__noreturn__))
> > @@ -94,4 +95,6 @@ struct kvm;
> > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
> >
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> > +
> > #endif /* KVM__UTIL_H */
> > diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
> > index 4e5bb82..d9e9993 100644
> > --- a/tools/kvm/net/uip/core.c
> > +++ b/tools/kvm/net/uip/core.c
> > @@ -7,7 +7,7 @@
> >
> > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> > {
> > - struct virtio_net_hdr *vnet;
> > + struct virtio_net_hdr_mrg_rxbuf *vnet;
> > struct uip_tx_arg arg;
> > int eth_len, vnet_len;
> > struct uip_eth *eth;
> > @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> > return vnet_len + eth_len;
> > }
> >
> > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
> > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
> > {
> > - struct virtio_net_hdr *vnet;
> > - struct uip_eth *eth;
> > struct uip_buf *buf;
> > - int vnet_len;
> > - int eth_len;
> > - char *p;
> > int len;
> > - int cnt;
> > - int i;
> >
> > /*
> > * Sleep until there is a buffer for guest
> > */
> > buf = uip_buf_get_used(info);
> >
> > - /*
> > - * Fill device to guest buffer, vnet hdr fisrt
> > - */
> > - vnet_len = iov[0].iov_len;
> > - vnet = iov[0].iov_base;
> > - if (buf->vnet_len > vnet_len) {
> > - len = -1;
> > - goto out;
> > - }
> > - memcpy(vnet, buf->vnet, buf->vnet_len);
> > -
> > - /*
> > - * Then, the real eth data
> > - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides
> > - */
> > - cnt = buf->eth_len;
> > - p = buf->eth;
> > - for (i = 1; i < in; i++) {
> > - eth_len = iov[i].iov_len;
> > - eth = iov[i].iov_base;
> > - if (cnt > eth_len) {
> > - memcpy(eth, p, eth_len);
> > - cnt -= eth_len;
> > - p += eth_len;
> > - } else {
> > - memcpy(eth, p, cnt);
> > - cnt -= cnt;
> > - break;
> > - }
> > - }
> > -
> > - if (cnt) {
> > - pr_warning("uip_rx error");
> > - len = -1;
> > - goto out;
> > - }
> > + memcpy(buffer, buf->vnet, buf->vnet_len);
> > + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
> >
> > len = buf->vnet_len + buf->eth_len;
> >
> > -out:
> > uip_buf_set_free(info, buf);
> > return len;
> > }
> > @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
> > }
> >
> > list_for_each_entry(buf, buf_head, list) {
> > - buf->vnet = malloc(sizeof(struct virtio_net_hdr));
> > - buf->vnet_len = sizeof(struct virtio_net_hdr);
> > + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf));
> > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr));
> > buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr);
> >
> > diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> > index 9044f40..a99b95e 100644
> > --- a/tools/kvm/net/uip/tcp.c
> > +++ b/tools/kvm/net/uip/tcp.c
> > @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
> > /*
> > * virtio_net_hdr
> > */
> > - buf->vnet_len = sizeof(struct virtio_net_hdr);
> > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > memset(buf->vnet, 0, buf->vnet_len);
> >
> > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> > diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
> > index 31c417c..083c221 100644
> > --- a/tools/kvm/net/uip/udp.c
> > +++ b/tools/kvm/net/uip/udp.c
> > @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui
> > /*
> > * virtio_net_hdr
> > */
> > - buf->vnet_len = sizeof(struct virtio_net_hdr);
> > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > memset(buf->vnet, 0, buf->vnet_len);
> >
> > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> > diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
> > index c11a15a..4e0069c 100644
> > --- a/tools/kvm/util/util.c
> > +++ b/tools/kvm/util/util.c
> > @@ -9,6 +9,7 @@
> > #include <sys/mman.h>
> > #include <sys/stat.h>
> > #include <sys/statfs.h>
> > +#include <linux/kernel.h>
> >
> > static void report(const char *prefix, const char *err, va_list params)
> > {
> > @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si
> > return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> > }
> > }
> > +
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len)
> > +{
> > + size_t copy, start_len = len;
> > +
> > + for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
> > + copy = min(iov->iov_len, len);
> > + memcpy(iov->iov_base, kdata, copy);
> > + kdata += copy;
> > + len -= copy;
> > + }
> > +
> > + return start_len - len;
> > +}
> > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> > index c0a8f12..c99c5a6 100644
> > --- a/tools/kvm/virtio/net.c
> > +++ b/tools/kvm/virtio/net.c
> > @@ -32,8 +32,8 @@
> > struct net_dev;
> >
> > struct net_dev_operations {
> > - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> > - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> > + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
> > + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
> > };
> >
> > struct net_dev {
> > @@ -63,6 +63,8 @@ struct net_dev {
> > static LIST_HEAD(ndevs);
> > static int compat_id = -1;
> >
> > +#define MAX_PACKET_SIZE ((u16)(-1))
>
> Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP
> plus 14 bytes eth header.
>
> > +
> > static void *virtio_net_rx_thread(void *p)
> > {
> > struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
> > @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
> > mutex_unlock(&ndev->io_lock[id]);
> >
> > while (virt_queue__available(vq)) {
> > + unsigned char buffer[MAX_PACKET_SIZE];
>
> The buffer size should at least be:
>
> MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)
>
> > + struct virtio_net_hdr_mrg_rxbuf *hdr;
> > +
> > + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
> > head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> > - len = ndev->ops->rx(iov, in, ndev);
> > - virt_queue__set_used_elem(vq, head, len);
> > + hdr = (void *)iov[0].iov_base;
> > + while (len) {
> > + int copied;
> > +
> > + copied = memcpy_toiovecend(iov, in, buffer, len);
> > + len -= copied;
> > + hdr->num_buffers++;
> > + virt_queue__set_used_elem(vq, head, copied);
> > + if (len == 0)
> > + break;
> > + head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
>
> Here, need to check if we still have available buffer from guest. If not
> need to wait for it.
>
> > + }
> >
> > /* We should interrupt guest right now, otherwise latency is huge. */
> > if (virtio_queue__should_signal(vq))
> > @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params,
> > goto fail;
> > }
> >
> > - hdr_len = sizeof(struct virtio_net_hdr);
> > + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
> > pr_warning("Config tap device TUNSETVNETHDRSZ error");
> >
> > @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > return writev(ndev->tap_fd, iov, out);
> > }
> >
> > -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> > +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> > {
> > - return readv(ndev->tap_fd, iov, in);
> > + return read(ndev->tap_fd, buffer, length);
> > }
> >
> > static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > return uip_tx(iov, out, &ndev->info);
> > }
> >
> > -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> > +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> > {
> > - return uip_rx(iov, in, &ndev->info);
> > + return uip_rx(buffer, length, &ndev->info);
> > }
> >
> > static struct net_dev_operations tap_ops = {
> > @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
> > | 1UL << VIRTIO_RING_F_EVENT_IDX
> > | 1UL << VIRTIO_RING_F_INDIRECT_DESC
> > | 1UL << VIRTIO_NET_F_CTRL_VQ
> > - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
> > + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
> > + | 1UL << VIRTIO_NET_F_MRG_RXBUF;
> > }
> >
> > static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
> > --
> > 1.8.2.1
> >
>
> --
> Asias
prev parent reply other threads:[~2013-04-24 9:36 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-23 0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
2013-04-23 9:06 ` Pekka Enberg
2013-04-23 14:19 ` Asias He
2013-04-23 15:01 ` Sasha Levin
2013-04-23 16:35 ` Eric Northup
2013-04-24 2:51 ` Sasha Levin
2013-04-24 6:51 ` Pekka Enberg
2013-04-24 9:23 ` Michael S. Tsirkin
2013-04-29 0:44 ` Rusty Russell
2013-04-29 2:36 ` Sasha Levin
2013-04-24 5:32 ` Asias He
2013-04-24 9:35 ` Michael S. Tsirkin [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130424093558.GE11245@redhat.com \
--to=mst@redhat.com \
--cc=asias@redhat.com \
--cc=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=marc.zyngier@arm.com \
--cc=penberg@kernel.org \
--cc=rusty@rustcorp.com.au \
--cc=sasha.levin@oracle.com \
--cc=will.deacon@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox