* [PATCH] kvm tools: virtio-net mergable rx buffers
@ 2013-04-23 0:32 Sasha Levin
2013-04-23 9:06 ` Pekka Enberg
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Sasha Levin @ 2013-04-23 0:32 UTC (permalink / raw)
To: penberg
Cc: will.deacon, marc.zyngier, kvm, asias, jasowang, rusty, mst,
Sasha Levin
Support mergable rx buffers for virtio-net. This helps reduce the amount
of memory the guest kernel has to allocate per rx vq.
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
---
tools/kvm/include/kvm/uip.h | 4 ++--
tools/kvm/include/kvm/util.h | 3 +++
tools/kvm/net/uip/core.c | 54 +++++---------------------------------------
tools/kvm/net/uip/tcp.c | 2 +-
tools/kvm/net/uip/udp.c | 2 +-
tools/kvm/util/util.c | 15 ++++++++++++
tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++--------
7 files changed, 55 insertions(+), 62 deletions(-)
diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
index ac248d2..fa82f10 100644
--- a/tools/kvm/include/kvm/uip.h
+++ b/tools/kvm/include/kvm/uip.h
@@ -252,7 +252,7 @@ struct uip_tcp_socket {
};
struct uip_tx_arg {
- struct virtio_net_hdr *vnet;
+ struct virtio_net_hdr_mrg_rxbuf *vnet;
struct uip_info *info;
struct uip_eth *eth;
int vnet_len;
@@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
}
int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
-int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
+int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
int uip_init(struct uip_info *info);
int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
index 0df9f0d..6f8ac83 100644
--- a/tools/kvm/include/kvm/util.h
+++ b/tools/kvm/include/kvm/util.h
@@ -22,6 +22,7 @@
#include <sys/param.h>
#include <sys/types.h>
#include <linux/types.h>
+#include <sys/uio.h>
#ifdef __GNUC__
#define NORETURN __attribute__((__noreturn__))
@@ -94,4 +95,6 @@ struct kvm;
void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
+int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
+
#endif /* KVM__UTIL_H */
diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
index 4e5bb82..d9e9993 100644
--- a/tools/kvm/net/uip/core.c
+++ b/tools/kvm/net/uip/core.c
@@ -7,7 +7,7 @@
int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
{
- struct virtio_net_hdr *vnet;
+ struct virtio_net_hdr_mrg_rxbuf *vnet;
struct uip_tx_arg arg;
int eth_len, vnet_len;
struct uip_eth *eth;
@@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
return vnet_len + eth_len;
}
-int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
+int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
{
- struct virtio_net_hdr *vnet;
- struct uip_eth *eth;
struct uip_buf *buf;
- int vnet_len;
- int eth_len;
- char *p;
int len;
- int cnt;
- int i;
/*
* Sleep until there is a buffer for guest
*/
buf = uip_buf_get_used(info);
- /*
- * Fill device to guest buffer, vnet hdr fisrt
- */
- vnet_len = iov[0].iov_len;
- vnet = iov[0].iov_base;
- if (buf->vnet_len > vnet_len) {
- len = -1;
- goto out;
- }
- memcpy(vnet, buf->vnet, buf->vnet_len);
-
- /*
- * Then, the real eth data
- * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides
- */
- cnt = buf->eth_len;
- p = buf->eth;
- for (i = 1; i < in; i++) {
- eth_len = iov[i].iov_len;
- eth = iov[i].iov_base;
- if (cnt > eth_len) {
- memcpy(eth, p, eth_len);
- cnt -= eth_len;
- p += eth_len;
- } else {
- memcpy(eth, p, cnt);
- cnt -= cnt;
- break;
- }
- }
-
- if (cnt) {
- pr_warning("uip_rx error");
- len = -1;
- goto out;
- }
+ memcpy(buffer, buf->vnet, buf->vnet_len);
+ memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
len = buf->vnet_len + buf->eth_len;
-out:
uip_buf_set_free(info, buf);
return len;
}
@@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
}
list_for_each_entry(buf, buf_head, list) {
- buf->vnet = malloc(sizeof(struct virtio_net_hdr));
- buf->vnet_len = sizeof(struct virtio_net_hdr);
+ buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf));
+ buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr));
buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr);
diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
index 9044f40..a99b95e 100644
--- a/tools/kvm/net/uip/tcp.c
+++ b/tools/kvm/net/uip/tcp.c
@@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
/*
* virtio_net_hdr
*/
- buf->vnet_len = sizeof(struct virtio_net_hdr);
+ buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
memset(buf->vnet, 0, buf->vnet_len);
buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
index 31c417c..083c221 100644
--- a/tools/kvm/net/uip/udp.c
+++ b/tools/kvm/net/uip/udp.c
@@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui
/*
* virtio_net_hdr
*/
- buf->vnet_len = sizeof(struct virtio_net_hdr);
+ buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
memset(buf->vnet, 0, buf->vnet_len);
buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
index c11a15a..4e0069c 100644
--- a/tools/kvm/util/util.c
+++ b/tools/kvm/util/util.c
@@ -9,6 +9,7 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/statfs.h>
+#include <linux/kernel.h>
static void report(const char *prefix, const char *err, va_list params)
{
@@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si
return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
}
}
+
+int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len)
+{
+ size_t copy, start_len = len;
+
+ for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
+ copy = min(iov->iov_len, len);
+ memcpy(iov->iov_base, kdata, copy);
+ kdata += copy;
+ len -= copy;
+ }
+
+ return start_len - len;
+}
diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
index c0a8f12..c99c5a6 100644
--- a/tools/kvm/virtio/net.c
+++ b/tools/kvm/virtio/net.c
@@ -32,8 +32,8 @@
struct net_dev;
struct net_dev_operations {
- int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
- int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
+ int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
+ int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
};
struct net_dev {
@@ -63,6 +63,8 @@ struct net_dev {
static LIST_HEAD(ndevs);
static int compat_id = -1;
+#define MAX_PACKET_SIZE ((u16)(-1))
+
static void *virtio_net_rx_thread(void *p)
{
struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
@@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
mutex_unlock(&ndev->io_lock[id]);
while (virt_queue__available(vq)) {
+ unsigned char buffer[MAX_PACKET_SIZE];
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+
+ len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
- len = ndev->ops->rx(iov, in, ndev);
- virt_queue__set_used_elem(vq, head, len);
+ hdr = (void *)iov[0].iov_base;
+ while (len) {
+ int copied;
+
+ copied = memcpy_toiovecend(iov, in, buffer, len);
+ len -= copied;
+ hdr->num_buffers++;
+ virt_queue__set_used_elem(vq, head, copied);
+ if (len == 0)
+ break;
+ head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
+ }
/* We should interrupt guest right now, otherwise latency is huge. */
if (virtio_queue__should_signal(vq))
@@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params,
goto fail;
}
- hdr_len = sizeof(struct virtio_net_hdr);
+ hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
pr_warning("Config tap device TUNSETVNETHDRSZ error");
@@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
return writev(ndev->tap_fd, iov, out);
}
-static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
+static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
{
- return readv(ndev->tap_fd, iov, in);
+ return read(ndev->tap_fd, buffer, length);
}
static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
@@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
return uip_tx(iov, out, &ndev->info);
}
-static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
+static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
{
- return uip_rx(iov, in, &ndev->info);
+ return uip_rx(buffer, length, &ndev->info);
}
static struct net_dev_operations tap_ops = {
@@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
| 1UL << VIRTIO_RING_F_EVENT_IDX
| 1UL << VIRTIO_RING_F_INDIRECT_DESC
| 1UL << VIRTIO_NET_F_CTRL_VQ
- | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
+ | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
+ | 1UL << VIRTIO_NET_F_MRG_RXBUF;
}
static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
--
1.8.2.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-23 0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
@ 2013-04-23 9:06 ` Pekka Enberg
2013-04-23 14:19 ` Asias He
2013-04-23 15:01 ` Sasha Levin
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Pekka Enberg @ 2013-04-23 9:06 UTC (permalink / raw)
To: Sasha Levin
Cc: Will Deacon, Marc Zyngier, KVM General, Asias He, jasowang,
Rusty Russell, Michael S. Tsirkin
On Tue, Apr 23, 2013 at 3:32 AM, Sasha Levin <sasha.levin@oracle.com> wrote:
> @@ -94,4 +95,6 @@ struct kvm;
> void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
>
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> +
I hate the name. Maybe append_to_iovec or something?
Otherwise looks OK to me. Asias?
Pekka
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-23 9:06 ` Pekka Enberg
@ 2013-04-23 14:19 ` Asias He
0 siblings, 0 replies; 12+ messages in thread
From: Asias He @ 2013-04-23 14:19 UTC (permalink / raw)
To: Pekka Enberg
Cc: Sasha Levin, Will Deacon, Marc Zyngier, KVM General, jasowang,
Rusty Russell, Michael S. Tsirkin
On Tue, Apr 23, 2013 at 12:06:56PM +0300, Pekka Enberg wrote:
> On Tue, Apr 23, 2013 at 3:32 AM, Sasha Levin <sasha.levin@oracle.com> wrote:
> > @@ -94,4 +95,6 @@ struct kvm;
> > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
> >
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> > +
>
> I hate the name. Maybe append_to_iovec or something?
>
> Otherwise looks OK to me. Asias?
I will look into it tomorrow.
> Pekka
--
Asias
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-23 0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
2013-04-23 9:06 ` Pekka Enberg
@ 2013-04-23 15:01 ` Sasha Levin
2013-04-23 16:35 ` Eric Northup
2013-04-24 5:32 ` Asias He
3 siblings, 0 replies; 12+ messages in thread
From: Sasha Levin @ 2013-04-23 15:01 UTC (permalink / raw)
To: Sasha Levin; +Cc: penberg, will.deacon, marc.zyngier, kvm, asias
(off list)
On 04/22/2013 08:32 PM, Sasha Levin wrote:
> Support mergable rx buffers for virtio-net. This helps reduce the amount
> of memory the guest kernel has to allocate per rx vq.
One of the benefits of this is that even one virtio-net device with just
one pair of vqs will require much less memory - so you could try booting
smaller guests than before!
Thanks,
Sasha
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-23 0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
2013-04-23 9:06 ` Pekka Enberg
2013-04-23 15:01 ` Sasha Levin
@ 2013-04-23 16:35 ` Eric Northup
2013-04-24 2:51 ` Sasha Levin
2013-04-24 5:32 ` Asias He
3 siblings, 1 reply; 12+ messages in thread
From: Eric Northup @ 2013-04-23 16:35 UTC (permalink / raw)
To: Sasha Levin
Cc: Pekka Enberg, will.deacon, marc.zyngier, KVM, asias, jasowang,
rusty, Michael S. Tsirkin
Do you care about guests with drivers that don't negotiate
VIRTIO_NET_F_MRG_RXBUF?
On Mon, Apr 22, 2013 at 5:32 PM, Sasha Levin <sasha.levin@oracle.com> wrote:
>
> Support mergable rx buffers for virtio-net. This helps reduce the amount
> of memory the guest kernel has to allocate per rx vq.
>
> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> ---
> tools/kvm/include/kvm/uip.h | 4 ++--
> tools/kvm/include/kvm/util.h | 3 +++
> tools/kvm/net/uip/core.c | 54 +++++---------------------------------------
> tools/kvm/net/uip/tcp.c | 2 +-
> tools/kvm/net/uip/udp.c | 2 +-
> tools/kvm/util/util.c | 15 ++++++++++++
> tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++--------
> 7 files changed, 55 insertions(+), 62 deletions(-)
>
> diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> index ac248d2..fa82f10 100644
> --- a/tools/kvm/include/kvm/uip.h
> +++ b/tools/kvm/include/kvm/uip.h
> @@ -252,7 +252,7 @@ struct uip_tcp_socket {
> };
>
> struct uip_tx_arg {
> - struct virtio_net_hdr *vnet;
> + struct virtio_net_hdr_mrg_rxbuf *vnet;
> struct uip_info *info;
> struct uip_eth *eth;
> int vnet_len;
> @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
> }
>
> int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
> -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
> +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
> int uip_init(struct uip_info *info);
>
> int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
> diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> index 0df9f0d..6f8ac83 100644
> --- a/tools/kvm/include/kvm/util.h
> +++ b/tools/kvm/include/kvm/util.h
> @@ -22,6 +22,7 @@
> #include <sys/param.h>
> #include <sys/types.h>
> #include <linux/types.h>
> +#include <sys/uio.h>
>
> #ifdef __GNUC__
> #define NORETURN __attribute__((__noreturn__))
> @@ -94,4 +95,6 @@ struct kvm;
> void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
>
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> +
> #endif /* KVM__UTIL_H */
> diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
> index 4e5bb82..d9e9993 100644
> --- a/tools/kvm/net/uip/core.c
> +++ b/tools/kvm/net/uip/core.c
> @@ -7,7 +7,7 @@
>
> int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> {
> - struct virtio_net_hdr *vnet;
> + struct virtio_net_hdr_mrg_rxbuf *vnet;
> struct uip_tx_arg arg;
> int eth_len, vnet_len;
> struct uip_eth *eth;
> @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> return vnet_len + eth_len;
> }
>
> -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
> +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
> {
> - struct virtio_net_hdr *vnet;
> - struct uip_eth *eth;
> struct uip_buf *buf;
> - int vnet_len;
> - int eth_len;
> - char *p;
> int len;
> - int cnt;
> - int i;
>
> /*
> * Sleep until there is a buffer for guest
> */
> buf = uip_buf_get_used(info);
>
> - /*
> - * Fill device to guest buffer, vnet hdr fisrt
> - */
> - vnet_len = iov[0].iov_len;
> - vnet = iov[0].iov_base;
> - if (buf->vnet_len > vnet_len) {
> - len = -1;
> - goto out;
> - }
> - memcpy(vnet, buf->vnet, buf->vnet_len);
> -
> - /*
> - * Then, the real eth data
> - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides
> - */
> - cnt = buf->eth_len;
> - p = buf->eth;
> - for (i = 1; i < in; i++) {
> - eth_len = iov[i].iov_len;
> - eth = iov[i].iov_base;
> - if (cnt > eth_len) {
> - memcpy(eth, p, eth_len);
> - cnt -= eth_len;
> - p += eth_len;
> - } else {
> - memcpy(eth, p, cnt);
> - cnt -= cnt;
> - break;
> - }
> - }
> -
> - if (cnt) {
> - pr_warning("uip_rx error");
> - len = -1;
> - goto out;
> - }
> + memcpy(buffer, buf->vnet, buf->vnet_len);
> + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
>
> len = buf->vnet_len + buf->eth_len;
>
> -out:
> uip_buf_set_free(info, buf);
> return len;
> }
> @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
> }
>
> list_for_each_entry(buf, buf_head, list) {
> - buf->vnet = malloc(sizeof(struct virtio_net_hdr));
> - buf->vnet_len = sizeof(struct virtio_net_hdr);
> + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf));
> + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr));
> buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr);
>
> diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> index 9044f40..a99b95e 100644
> --- a/tools/kvm/net/uip/tcp.c
> +++ b/tools/kvm/net/uip/tcp.c
> @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
> /*
> * virtio_net_hdr
> */
> - buf->vnet_len = sizeof(struct virtio_net_hdr);
> + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> memset(buf->vnet, 0, buf->vnet_len);
>
> buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
> index 31c417c..083c221 100644
> --- a/tools/kvm/net/uip/udp.c
> +++ b/tools/kvm/net/uip/udp.c
> @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui
> /*
> * virtio_net_hdr
> */
> - buf->vnet_len = sizeof(struct virtio_net_hdr);
> + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> memset(buf->vnet, 0, buf->vnet_len);
>
> buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
> index c11a15a..4e0069c 100644
> --- a/tools/kvm/util/util.c
> +++ b/tools/kvm/util/util.c
> @@ -9,6 +9,7 @@
> #include <sys/mman.h>
> #include <sys/stat.h>
> #include <sys/statfs.h>
> +#include <linux/kernel.h>
>
> static void report(const char *prefix, const char *err, va_list params)
> {
> @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si
> return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> }
> }
> +
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len)
> +{
> + size_t copy, start_len = len;
> +
> + for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
> + copy = min(iov->iov_len, len);
> + memcpy(iov->iov_base, kdata, copy);
> + kdata += copy;
> + len -= copy;
> + }
> +
> + return start_len - len;
> +}
> diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> index c0a8f12..c99c5a6 100644
> --- a/tools/kvm/virtio/net.c
> +++ b/tools/kvm/virtio/net.c
> @@ -32,8 +32,8 @@
> struct net_dev;
>
> struct net_dev_operations {
> - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
> + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
> };
>
> struct net_dev {
> @@ -63,6 +63,8 @@ struct net_dev {
> static LIST_HEAD(ndevs);
> static int compat_id = -1;
>
> +#define MAX_PACKET_SIZE ((u16)(-1))
> +
> static void *virtio_net_rx_thread(void *p)
> {
> struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
> @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
> mutex_unlock(&ndev->io_lock[id]);
>
> while (virt_queue__available(vq)) {
> + unsigned char buffer[MAX_PACKET_SIZE];
> + struct virtio_net_hdr_mrg_rxbuf *hdr;
> +
> + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
> head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> - len = ndev->ops->rx(iov, in, ndev);
> - virt_queue__set_used_elem(vq, head, len);
> + hdr = (void *)iov[0].iov_base;
> + while (len) {
> + int copied;
> +
> + copied = memcpy_toiovecend(iov, in, buffer, len);
> + len -= copied;
> + hdr->num_buffers++;
> + virt_queue__set_used_elem(vq, head, copied);
> + if (len == 0)
> + break;
> + head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
Need to check that virt_queue__available(vq) first?
> + }
>
> /* We should interrupt guest right now, otherwise latency is huge. */
> if (virtio_queue__should_signal(vq))
> @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params,
> goto fail;
> }
>
> - hdr_len = sizeof(struct virtio_net_hdr);
> + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
> pr_warning("Config tap device TUNSETVNETHDRSZ error");
>
> @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> return writev(ndev->tap_fd, iov, out);
> }
>
> -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> {
> - return readv(ndev->tap_fd, iov, in);
> + return read(ndev->tap_fd, buffer, length);
> }
>
> static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> return uip_tx(iov, out, &ndev->info);
> }
>
> -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> {
> - return uip_rx(iov, in, &ndev->info);
> + return uip_rx(buffer, length, &ndev->info);
> }
>
> static struct net_dev_operations tap_ops = {
> @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
> | 1UL << VIRTIO_RING_F_EVENT_IDX
> | 1UL << VIRTIO_RING_F_INDIRECT_DESC
> | 1UL << VIRTIO_NET_F_CTRL_VQ
> - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
> + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
> + | 1UL << VIRTIO_NET_F_MRG_RXBUF;
> }
>
> static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
> --
> 1.8.2.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-23 16:35 ` Eric Northup
@ 2013-04-24 2:51 ` Sasha Levin
2013-04-24 6:51 ` Pekka Enberg
0 siblings, 1 reply; 12+ messages in thread
From: Sasha Levin @ 2013-04-24 2:51 UTC (permalink / raw)
To: Eric Northup
Cc: Pekka Enberg, will.deacon, marc.zyngier, KVM, asias, jasowang,
rusty, Michael S. Tsirkin
On 04/23/2013 12:35 PM, Eric Northup wrote:
> Do you care about guests with drivers that don't negotiate
> VIRTIO_NET_F_MRG_RXBUF?
We usually try to keep backward compatibility, but in this case
mergable RX buffers are about 5 years old now, so it's safe to
assume they'll be running in any guest.
Unless there is a specific reason to allow working without them
I'd rather keep the code simple in this case.
> On Mon, Apr 22, 2013 at 5:32 PM, Sasha Levin <sasha.levin@oracle.com> wrote:
>> + copied = memcpy_toiovecend(iov, in, buffer, len);
>> + len -= copied;
>> + hdr->num_buffers++;
>> + virt_queue__set_used_elem(vq, head, copied);
>> + if (len == 0)
>> + break;
>> + head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
>
> Need to check that virt_queue__available(vq) first?
Yup. I wonder why it didn't blow up running 'ping -f' with a huge packet size.
Thanks,
Sasha
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-23 0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
` (2 preceding siblings ...)
2013-04-23 16:35 ` Eric Northup
@ 2013-04-24 5:32 ` Asias He
2013-04-24 9:35 ` Michael S. Tsirkin
3 siblings, 1 reply; 12+ messages in thread
From: Asias He @ 2013-04-24 5:32 UTC (permalink / raw)
To: Sasha Levin; +Cc: penberg, will.deacon, marc.zyngier, kvm, jasowang, rusty, mst
On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote:
> Support mergable rx buffers for virtio-net. This helps reduce the amount
> of memory the guest kernel has to allocate per rx vq.
With this, we always do an extra copy of the rx data into a linear buffer.
I am thinking about whether we should keep the non MRG_RXBUF code. For
tap use case, it is fine, user can use vhost. But for uip use case, we
can not use vhost.
> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> ---
> tools/kvm/include/kvm/uip.h | 4 ++--
> tools/kvm/include/kvm/util.h | 3 +++
> tools/kvm/net/uip/core.c | 54 +++++---------------------------------------
> tools/kvm/net/uip/tcp.c | 2 +-
> tools/kvm/net/uip/udp.c | 2 +-
> tools/kvm/util/util.c | 15 ++++++++++++
> tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++--------
> 7 files changed, 55 insertions(+), 62 deletions(-)
>
> diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> index ac248d2..fa82f10 100644
> --- a/tools/kvm/include/kvm/uip.h
> +++ b/tools/kvm/include/kvm/uip.h
> @@ -252,7 +252,7 @@ struct uip_tcp_socket {
> };
>
> struct uip_tx_arg {
> - struct virtio_net_hdr *vnet;
> + struct virtio_net_hdr_mrg_rxbuf *vnet;
> struct uip_info *info;
> struct uip_eth *eth;
> int vnet_len;
> @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
> }
>
> int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
> -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
> +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
> int uip_init(struct uip_info *info);
>
> int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
> diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> index 0df9f0d..6f8ac83 100644
> --- a/tools/kvm/include/kvm/util.h
> +++ b/tools/kvm/include/kvm/util.h
> @@ -22,6 +22,7 @@
> #include <sys/param.h>
> #include <sys/types.h>
> #include <linux/types.h>
> +#include <sys/uio.h>
>
> #ifdef __GNUC__
> #define NORETURN __attribute__((__noreturn__))
> @@ -94,4 +95,6 @@ struct kvm;
> void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
>
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> +
> #endif /* KVM__UTIL_H */
> diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
> index 4e5bb82..d9e9993 100644
> --- a/tools/kvm/net/uip/core.c
> +++ b/tools/kvm/net/uip/core.c
> @@ -7,7 +7,7 @@
>
> int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> {
> - struct virtio_net_hdr *vnet;
> + struct virtio_net_hdr_mrg_rxbuf *vnet;
> struct uip_tx_arg arg;
> int eth_len, vnet_len;
> struct uip_eth *eth;
> @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> return vnet_len + eth_len;
> }
>
> -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
> +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
> {
> - struct virtio_net_hdr *vnet;
> - struct uip_eth *eth;
> struct uip_buf *buf;
> - int vnet_len;
> - int eth_len;
> - char *p;
> int len;
> - int cnt;
> - int i;
>
> /*
> * Sleep until there is a buffer for guest
> */
> buf = uip_buf_get_used(info);
>
> - /*
> - * Fill device to guest buffer, vnet hdr fisrt
> - */
> - vnet_len = iov[0].iov_len;
> - vnet = iov[0].iov_base;
> - if (buf->vnet_len > vnet_len) {
> - len = -1;
> - goto out;
> - }
> - memcpy(vnet, buf->vnet, buf->vnet_len);
> -
> - /*
> - * Then, the real eth data
> - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides
> - */
> - cnt = buf->eth_len;
> - p = buf->eth;
> - for (i = 1; i < in; i++) {
> - eth_len = iov[i].iov_len;
> - eth = iov[i].iov_base;
> - if (cnt > eth_len) {
> - memcpy(eth, p, eth_len);
> - cnt -= eth_len;
> - p += eth_len;
> - } else {
> - memcpy(eth, p, cnt);
> - cnt -= cnt;
> - break;
> - }
> - }
> -
> - if (cnt) {
> - pr_warning("uip_rx error");
> - len = -1;
> - goto out;
> - }
> + memcpy(buffer, buf->vnet, buf->vnet_len);
> + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
>
> len = buf->vnet_len + buf->eth_len;
>
> -out:
> uip_buf_set_free(info, buf);
> return len;
> }
> @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
> }
>
> list_for_each_entry(buf, buf_head, list) {
> - buf->vnet = malloc(sizeof(struct virtio_net_hdr));
> - buf->vnet_len = sizeof(struct virtio_net_hdr);
> + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf));
> + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr));
> buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr);
>
> diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> index 9044f40..a99b95e 100644
> --- a/tools/kvm/net/uip/tcp.c
> +++ b/tools/kvm/net/uip/tcp.c
> @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
> /*
> * virtio_net_hdr
> */
> - buf->vnet_len = sizeof(struct virtio_net_hdr);
> + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> memset(buf->vnet, 0, buf->vnet_len);
>
> buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
> index 31c417c..083c221 100644
> --- a/tools/kvm/net/uip/udp.c
> +++ b/tools/kvm/net/uip/udp.c
> @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui
> /*
> * virtio_net_hdr
> */
> - buf->vnet_len = sizeof(struct virtio_net_hdr);
> + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> memset(buf->vnet, 0, buf->vnet_len);
>
> buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
> index c11a15a..4e0069c 100644
> --- a/tools/kvm/util/util.c
> +++ b/tools/kvm/util/util.c
> @@ -9,6 +9,7 @@
> #include <sys/mman.h>
> #include <sys/stat.h>
> #include <sys/statfs.h>
> +#include <linux/kernel.h>
>
> static void report(const char *prefix, const char *err, va_list params)
> {
> @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si
> return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> }
> }
> +
> +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len)
> +{
> + size_t copy, start_len = len;
> +
> + for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
> + copy = min(iov->iov_len, len);
> + memcpy(iov->iov_base, kdata, copy);
> + kdata += copy;
> + len -= copy;
> + }
> +
> + return start_len - len;
> +}
> diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> index c0a8f12..c99c5a6 100644
> --- a/tools/kvm/virtio/net.c
> +++ b/tools/kvm/virtio/net.c
> @@ -32,8 +32,8 @@
> struct net_dev;
>
> struct net_dev_operations {
> - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
> + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
> };
>
> struct net_dev {
> @@ -63,6 +63,8 @@ struct net_dev {
> static LIST_HEAD(ndevs);
> static int compat_id = -1;
>
> +#define MAX_PACKET_SIZE ((u16)(-1))
Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP
plus 14 bytes eth header.
> +
> static void *virtio_net_rx_thread(void *p)
> {
> struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
> @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
> mutex_unlock(&ndev->io_lock[id]);
>
> while (virt_queue__available(vq)) {
> + unsigned char buffer[MAX_PACKET_SIZE];
The buffer size should at least be:
MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)
> + struct virtio_net_hdr_mrg_rxbuf *hdr;
> +
> + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
> head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> - len = ndev->ops->rx(iov, in, ndev);
> - virt_queue__set_used_elem(vq, head, len);
> + hdr = (void *)iov[0].iov_base;
> + while (len) {
> + int copied;
> +
> + copied = memcpy_toiovecend(iov, in, buffer, len);
> + len -= copied;
> + hdr->num_buffers++;
> + virt_queue__set_used_elem(vq, head, copied);
> + if (len == 0)
> + break;
> + head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
Here, need to check if we still have available buffer from guest. If not
need to wait for it.
> + }
>
> /* We should interrupt guest right now, otherwise latency is huge. */
> if (virtio_queue__should_signal(vq))
> @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params,
> goto fail;
> }
>
> - hdr_len = sizeof(struct virtio_net_hdr);
> + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
> pr_warning("Config tap device TUNSETVNETHDRSZ error");
>
> @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> return writev(ndev->tap_fd, iov, out);
> }
>
> -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> {
> - return readv(ndev->tap_fd, iov, in);
> + return read(ndev->tap_fd, buffer, length);
> }
>
> static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> return uip_tx(iov, out, &ndev->info);
> }
>
> -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> {
> - return uip_rx(iov, in, &ndev->info);
> + return uip_rx(buffer, length, &ndev->info);
> }
>
> static struct net_dev_operations tap_ops = {
> @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
> | 1UL << VIRTIO_RING_F_EVENT_IDX
> | 1UL << VIRTIO_RING_F_INDIRECT_DESC
> | 1UL << VIRTIO_NET_F_CTRL_VQ
> - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
> + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
> + | 1UL << VIRTIO_NET_F_MRG_RXBUF;
> }
>
> static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
> --
> 1.8.2.1
>
--
Asias
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-24 2:51 ` Sasha Levin
@ 2013-04-24 6:51 ` Pekka Enberg
2013-04-24 9:23 ` Michael S. Tsirkin
0 siblings, 1 reply; 12+ messages in thread
From: Pekka Enberg @ 2013-04-24 6:51 UTC (permalink / raw)
To: Sasha Levin
Cc: Eric Northup, Will Deacon, Marc Zyngier, KVM, Asias He, jasowang,
Rusty Russell, Michael S. Tsirkin
Hi,
On 04/23/2013 12:35 PM, Eric Northup wrote:
>> Do you care about guests with drivers that don't negotiate
>> VIRTIO_NET_F_MRG_RXBUF?
On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote:
> We usually try to keep backward compatibility, but in this case
> mergable RX buffers are about 5 years old now, so it's safe to
> assume they'll be running in any guest.
>
> Unless there is a specific reason to allow working without them
> I'd rather keep the code simple in this case.
Are there such guests around? What's the failure scenario for them
after this patch?
Pekka
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-24 6:51 ` Pekka Enberg
@ 2013-04-24 9:23 ` Michael S. Tsirkin
2013-04-29 0:44 ` Rusty Russell
0 siblings, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-04-24 9:23 UTC (permalink / raw)
To: Pekka Enberg
Cc: Sasha Levin, Eric Northup, Will Deacon, Marc Zyngier, KVM,
Asias He, jasowang, Rusty Russell
On Wed, Apr 24, 2013 at 09:51:57AM +0300, Pekka Enberg wrote:
> Hi,
>
> On 04/23/2013 12:35 PM, Eric Northup wrote:
> >> Do you care about guests with drivers that don't negotiate
> >> VIRTIO_NET_F_MRG_RXBUF?
>
> On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote:
> > We usually try to keep backward compatibility, but in this case
> > mergable RX buffers are about 5 years old now, so it's safe to
> > assume they'll be running in any guest.
> >
> > Unless there is a specific reason to allow working without them
> > I'd rather keep the code simple in this case.
>
> Are there such guests around? What's the failure scenario for them
> after this patch?
>
> Pekka
Warning: have not looked at the patch, just a general comment.
I think it's reasonable to assume embedded guests such as PXE won't
negotiate any features. And, running old guests is one of the reasons
people use virtualization at all. So 5 years is not a lot.
In any case, stick to the device spec please, if you want it changed
please send a spec patch, don't deviate from it randomly.
--
MST
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-24 5:32 ` Asias He
@ 2013-04-24 9:35 ` Michael S. Tsirkin
0 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2013-04-24 9:35 UTC (permalink / raw)
To: Asias He
Cc: Sasha Levin, penberg, will.deacon, marc.zyngier, kvm, jasowang,
rusty
On Wed, Apr 24, 2013 at 01:32:56PM +0800, Asias He wrote:
> On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote:
> > Support mergable rx buffers for virtio-net. This helps reduce the amount
> > of memory the guest kernel has to allocate per rx vq.
>
> With this, we always do an extra copy of the rx data into a linear buffer.
> I am thinking about whether we should keep the non MRG_RXBUF code. For
> tap use case, it is fine, user can use vhost. But for uip use case, we
> can not use vhost.
You have to keep it but it might be ok not to optimize it.
> > Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> > ---
> > tools/kvm/include/kvm/uip.h | 4 ++--
> > tools/kvm/include/kvm/util.h | 3 +++
> > tools/kvm/net/uip/core.c | 54 +++++---------------------------------------
> > tools/kvm/net/uip/tcp.c | 2 +-
> > tools/kvm/net/uip/udp.c | 2 +-
> > tools/kvm/util/util.c | 15 ++++++++++++
> > tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++--------
> > 7 files changed, 55 insertions(+), 62 deletions(-)
> >
> > diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> > index ac248d2..fa82f10 100644
> > --- a/tools/kvm/include/kvm/uip.h
> > +++ b/tools/kvm/include/kvm/uip.h
> > @@ -252,7 +252,7 @@ struct uip_tcp_socket {
> > };
> >
> > struct uip_tx_arg {
> > - struct virtio_net_hdr *vnet;
> > + struct virtio_net_hdr_mrg_rxbuf *vnet;
> > struct uip_info *info;
> > struct uip_eth *eth;
> > int vnet_len;
> > @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth)
> > }
> >
> > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info);
> > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info);
> > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info);
> > int uip_init(struct uip_info *info);
> >
> > int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg);
> > diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> > index 0df9f0d..6f8ac83 100644
> > --- a/tools/kvm/include/kvm/util.h
> > +++ b/tools/kvm/include/kvm/util.h
> > @@ -22,6 +22,7 @@
> > #include <sys/param.h>
> > #include <sys/types.h>
> > #include <linux/types.h>
> > +#include <sys/uio.h>
> >
> > #ifdef __GNUC__
> > #define NORETURN __attribute__((__noreturn__))
> > @@ -94,4 +95,6 @@ struct kvm;
> > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
> >
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len);
> > +
> > #endif /* KVM__UTIL_H */
> > diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
> > index 4e5bb82..d9e9993 100644
> > --- a/tools/kvm/net/uip/core.c
> > +++ b/tools/kvm/net/uip/core.c
> > @@ -7,7 +7,7 @@
> >
> > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> > {
> > - struct virtio_net_hdr *vnet;
> > + struct virtio_net_hdr_mrg_rxbuf *vnet;
> > struct uip_tx_arg arg;
> > int eth_len, vnet_len;
> > struct uip_eth *eth;
> > @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info)
> > return vnet_len + eth_len;
> > }
> >
> > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info)
> > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info)
> > {
> > - struct virtio_net_hdr *vnet;
> > - struct uip_eth *eth;
> > struct uip_buf *buf;
> > - int vnet_len;
> > - int eth_len;
> > - char *p;
> > int len;
> > - int cnt;
> > - int i;
> >
> > /*
> > * Sleep until there is a buffer for guest
> > */
> > buf = uip_buf_get_used(info);
> >
> > - /*
> > - * Fill device to guest buffer, vnet hdr fisrt
> > - */
> > - vnet_len = iov[0].iov_len;
> > - vnet = iov[0].iov_base;
> > - if (buf->vnet_len > vnet_len) {
> > - len = -1;
> > - goto out;
> > - }
> > - memcpy(vnet, buf->vnet, buf->vnet_len);
> > -
> > - /*
> > - * Then, the real eth data
> > - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides
> > - */
> > - cnt = buf->eth_len;
> > - p = buf->eth;
> > - for (i = 1; i < in; i++) {
> > - eth_len = iov[i].iov_len;
> > - eth = iov[i].iov_base;
> > - if (cnt > eth_len) {
> > - memcpy(eth, p, eth_len);
> > - cnt -= eth_len;
> > - p += eth_len;
> > - } else {
> > - memcpy(eth, p, cnt);
> > - cnt -= cnt;
> > - break;
> > - }
> > - }
> > -
> > - if (cnt) {
> > - pr_warning("uip_rx error");
> > - len = -1;
> > - goto out;
> > - }
> > + memcpy(buffer, buf->vnet, buf->vnet_len);
> > + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len);
> >
> > len = buf->vnet_len + buf->eth_len;
> >
> > -out:
> > uip_buf_set_free(info, buf);
> > return len;
> > }
> > @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info)
> > }
> >
> > list_for_each_entry(buf, buf_head, list) {
> > - buf->vnet = malloc(sizeof(struct virtio_net_hdr));
> > - buf->vnet_len = sizeof(struct virtio_net_hdr);
> > + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf));
> > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr));
> > buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr);
> >
> > diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> > index 9044f40..a99b95e 100644
> > --- a/tools/kvm/net/uip/tcp.c
> > +++ b/tools/kvm/net/uip/tcp.c
> > @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
> > /*
> > * virtio_net_hdr
> > */
> > - buf->vnet_len = sizeof(struct virtio_net_hdr);
> > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > memset(buf->vnet, 0, buf->vnet_len);
> >
> > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> > diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
> > index 31c417c..083c221 100644
> > --- a/tools/kvm/net/uip/udp.c
> > +++ b/tools/kvm/net/uip/udp.c
> > @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui
> > /*
> > * virtio_net_hdr
> > */
> > - buf->vnet_len = sizeof(struct virtio_net_hdr);
> > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > memset(buf->vnet, 0, buf->vnet_len);
> >
> > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
> > diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c
> > index c11a15a..4e0069c 100644
> > --- a/tools/kvm/util/util.c
> > +++ b/tools/kvm/util/util.c
> > @@ -9,6 +9,7 @@
> > #include <sys/mman.h>
> > #include <sys/stat.h>
> > #include <sys/statfs.h>
> > +#include <linux/kernel.h>
> >
> > static void report(const char *prefix, const char *err, va_list params)
> > {
> > @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si
> > return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> > }
> > }
> > +
> > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len)
> > +{
> > + size_t copy, start_len = len;
> > +
> > + for (; len > 0 && iovlen > 0; ++iov, --iovlen) {
> > + copy = min(iov->iov_len, len);
> > + memcpy(iov->iov_base, kdata, copy);
> > + kdata += copy;
> > + len -= copy;
> > + }
> > +
> > + return start_len - len;
> > +}
> > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> > index c0a8f12..c99c5a6 100644
> > --- a/tools/kvm/virtio/net.c
> > +++ b/tools/kvm/virtio/net.c
> > @@ -32,8 +32,8 @@
> > struct net_dev;
> >
> > struct net_dev_operations {
> > - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> > - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
> > + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev);
> > + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev);
> > };
> >
> > struct net_dev {
> > @@ -63,6 +63,8 @@ struct net_dev {
> > static LIST_HEAD(ndevs);
> > static int compat_id = -1;
> >
> > +#define MAX_PACKET_SIZE ((u16)(-1))
>
> Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP
> plus 14 bytes eth header.
>
> > +
> > static void *virtio_net_rx_thread(void *p)
> > {
> > struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
> > @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p)
> > mutex_unlock(&ndev->io_lock[id]);
> >
> > while (virt_queue__available(vq)) {
> > + unsigned char buffer[MAX_PACKET_SIZE];
>
> The buffer size should at least be:
>
> MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)
>
> > + struct virtio_net_hdr_mrg_rxbuf *hdr;
> > +
> > + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev);
> > head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
> > - len = ndev->ops->rx(iov, in, ndev);
> > - virt_queue__set_used_elem(vq, head, len);
> > + hdr = (void *)iov[0].iov_base;
> > + while (len) {
> > + int copied;
> > +
> > + copied = memcpy_toiovecend(iov, in, buffer, len);
> > + len -= copied;
> > + hdr->num_buffers++;
> > + virt_queue__set_used_elem(vq, head, copied);
> > + if (len == 0)
> > + break;
> > + head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
>
> Here, need to check if we still have available buffer from guest. If not
> need to wait for it.
>
> > + }
> >
> > /* We should interrupt guest right now, otherwise latency is huge. */
> > if (virtio_queue__should_signal(vq))
> > @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params,
> > goto fail;
> > }
> >
> > - hdr_len = sizeof(struct virtio_net_hdr);
> > + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> > if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
> > pr_warning("Config tap device TUNSETVNETHDRSZ error");
> >
> > @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > return writev(ndev->tap_fd, iov, out);
> > }
> >
> > -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> > +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> > {
> > - return readv(ndev->tap_fd, iov, in);
> > + return read(ndev->tap_fd, buffer, length);
> > }
> >
> > static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> > return uip_tx(iov, out, &ndev->info);
> > }
> >
> > -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
> > +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev)
> > {
> > - return uip_rx(iov, in, &ndev->info);
> > + return uip_rx(buffer, length, &ndev->info);
> > }
> >
> > static struct net_dev_operations tap_ops = {
> > @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
> > | 1UL << VIRTIO_RING_F_EVENT_IDX
> > | 1UL << VIRTIO_RING_F_INDIRECT_DESC
> > | 1UL << VIRTIO_NET_F_CTRL_VQ
> > - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
> > + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
> > + | 1UL << VIRTIO_NET_F_MRG_RXBUF;
> > }
> >
> > static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
> > --
> > 1.8.2.1
> >
>
> --
> Asias
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-24 9:23 ` Michael S. Tsirkin
@ 2013-04-29 0:44 ` Rusty Russell
2013-04-29 2:36 ` Sasha Levin
0 siblings, 1 reply; 12+ messages in thread
From: Rusty Russell @ 2013-04-29 0:44 UTC (permalink / raw)
To: Michael S. Tsirkin, Pekka Enberg
Cc: Sasha Levin, Eric Northup, Will Deacon, Marc Zyngier, KVM,
Asias He, jasowang
"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Wed, Apr 24, 2013 at 09:51:57AM +0300, Pekka Enberg wrote:
>> Hi,
>>
>> On 04/23/2013 12:35 PM, Eric Northup wrote:
>> >> Do you care about guests with drivers that don't negotiate
>> >> VIRTIO_NET_F_MRG_RXBUF?
>>
>> On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote:
>> > We usually try to keep backward compatibility, but in this case
>> > mergable RX buffers are about 5 years old now, so it's safe to
>> > assume they'll be running in any guest.
>> >
>> > Unless there is a specific reason to allow working without them
>> > I'd rather keep the code simple in this case.
>>
>> Are there such guests around? What's the failure scenario for them
>> after this patch?
>>
>> Pekka
>
> Warning: have not looked at the patch, just a general comment.
>
> I think it's reasonable to assume embedded guests such as PXE won't
> negotiate any features. And, running old guests is one of the reasons
> people use virtualization at all. So 5 years is not a lot.
>
> In any case, stick to the device spec please, if you want it changed
> please send a spec patch, don't deviate from it randomly.
Supporting old guests is an quality of implementation issue. It's like
any ABI: if noone will notice, you can remove stuff.
But the case of "I can receive GSO packets but I don't support mergeable
buffers" is a trivial one: you can "support" it by pretending the guest
can't handle GSO :)
If you want to support non-Linux guests (eg. bootloaders), you probably
want to keep support for very dumb drivers with no mergable rxbufs
though.
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] kvm tools: virtio-net mergable rx buffers
2013-04-29 0:44 ` Rusty Russell
@ 2013-04-29 2:36 ` Sasha Levin
0 siblings, 0 replies; 12+ messages in thread
From: Sasha Levin @ 2013-04-29 2:36 UTC (permalink / raw)
To: Rusty Russell
Cc: Michael S. Tsirkin, Pekka Enberg, Eric Northup, Will Deacon,
Marc Zyngier, KVM, Asias He, jasowang
On 04/28/2013 08:44 PM, Rusty Russell wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> > On Wed, Apr 24, 2013 at 09:51:57AM +0300, Pekka Enberg wrote:
>>> >> Hi,
>>> >>
>>> >> On 04/23/2013 12:35 PM, Eric Northup wrote:
>>>>> >> >> Do you care about guests with drivers that don't negotiate
>>>>> >> >> VIRTIO_NET_F_MRG_RXBUF?
>>> >>
>>> >> On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote:
>>>> >> > We usually try to keep backward compatibility, but in this case
>>>> >> > mergable RX buffers are about 5 years old now, so it's safe to
>>>> >> > assume they'll be running in any guest.
>>>> >> >
>>>> >> > Unless there is a specific reason to allow working without them
>>>> >> > I'd rather keep the code simple in this case.
>>> >>
>>> >> Are there such guests around? What's the failure scenario for them
>>> >> after this patch?
>>> >>
>>> >> Pekka
>> >
>> > Warning: have not looked at the patch, just a general comment.
>> >
>> > I think it's reasonable to assume embedded guests such as PXE won't
>> > negotiate any features. And, running old guests is one of the reasons
>> > people use virtualization at all. So 5 years is not a lot.
>> >
>> > In any case, stick to the device spec please, if you want it changed
>> > please send a spec patch, don't deviate from it randomly.
> Supporting old guests is an quality of implementation issue. It's like
> any ABI: if noone will notice, you can remove stuff.
>
> But the case of "I can receive GSO packets but I don't support mergeable
> buffers" is a trivial one: you can "support" it by pretending the guest
> can't handle GSO :)
>
> If you want to support non-Linux guests (eg. bootloaders), you probably
> want to keep support for very dumb drivers with no mergable rxbufs
> though.
Yup, I'm planning on sending a version that supports older guests soonish.
Thanks,
Sasha
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2013-04-29 2:37 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-23 0:32 [PATCH] kvm tools: virtio-net mergable rx buffers Sasha Levin
2013-04-23 9:06 ` Pekka Enberg
2013-04-23 14:19 ` Asias He
2013-04-23 15:01 ` Sasha Levin
2013-04-23 16:35 ` Eric Northup
2013-04-24 2:51 ` Sasha Levin
2013-04-24 6:51 ` Pekka Enberg
2013-04-24 9:23 ` Michael S. Tsirkin
2013-04-29 0:44 ` Rusty Russell
2013-04-29 2:36 ` Sasha Levin
2013-04-24 5:32 ` Asias He
2013-04-24 9:35 ` Michael S. Tsirkin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox