netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] kvm tools: Implement multiple VQ for virtio-net
@ 2011-11-11 22:12 Sasha Levin
  2011-11-13 10:24 ` Michael S. Tsirkin
  0 siblings, 1 reply; 17+ messages in thread
From: Sasha Levin @ 2011-11-11 22:12 UTC (permalink / raw)
  To: penberg
  Cc: Krishna Kumar, kvm, Michael S. Tsirkin, asias.hejun,
	virtualization, gorcunov, Sasha Levin, netdev, mingo

This is a patch based on Krishna Kumar's patch series which implements
multiple VQ support for virtio-net.

The patch was tested with ver3 of the patch.

Cc: Krishna Kumar <krkumar2@in.ibm.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: virtualization@lists.linux-foundation.org
Cc: netdev@vger.kernel.org
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 tools/kvm/include/kvm/virtio-pci.h |    2 +-
 tools/kvm/virtio/net.c             |   94 +++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/tools/kvm/include/kvm/virtio-pci.h b/tools/kvm/include/kvm/virtio-pci.h
index 2bbb271..94d20ee 100644
--- a/tools/kvm/include/kvm/virtio-pci.h
+++ b/tools/kvm/include/kvm/virtio-pci.h
@@ -6,7 +6,7 @@
 
 #include <linux/types.h>
 
-#define VIRTIO_PCI_MAX_VQ	3
+#define VIRTIO_PCI_MAX_VQ	16
 #define VIRTIO_PCI_MAX_CONFIG	1
 
 struct kvm;
diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
index cee2b5b..0754795 100644
--- a/tools/kvm/virtio/net.c
+++ b/tools/kvm/virtio/net.c
@@ -27,9 +27,8 @@
 #include <sys/wait.h>
 
 #define VIRTIO_NET_QUEUE_SIZE		128
-#define VIRTIO_NET_NUM_QUEUES		2
-#define VIRTIO_NET_RX_QUEUE		0
-#define VIRTIO_NET_TX_QUEUE		1
+#define VIRTIO_NET_NUM_QUEUES		16
+#define VIRTIO_NET_IS_RX_QUEUE(x)	(((x) % 2) == 0)
 
 struct net_dev;
 
@@ -49,14 +48,13 @@ struct net_dev {
 	struct virtio_net_config	config;
 	u32				features;
 
-	pthread_t			io_rx_thread;
-	pthread_mutex_t			io_rx_lock;
-	pthread_cond_t			io_rx_cond;
-
-	pthread_t			io_tx_thread;
-	pthread_mutex_t			io_tx_lock;
-	pthread_cond_t			io_tx_cond;
+	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES];
+	pthread_mutex_t			io_lock[VIRTIO_NET_NUM_QUEUES];
+	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES];
 
+	int				rx_vq_num;
+	int				tx_vq_num;
+	int				vq_num;
 	int				tap_fd;
 	char				tap_name[IFNAMSIZ];
 
@@ -78,17 +76,22 @@ static void *virtio_net_rx_thread(void *p)
 	struct net_dev *ndev = p;
 	u16 out, in;
 	u16 head;
-	int len;
+	int len, queue_num;
+
+	mutex_lock(&ndev->mutex);
+	queue_num = ndev->rx_vq_num * 2;
+	ndev->tx_vq_num++;
+	mutex_unlock(&ndev->mutex);
 
 	kvm	= ndev->kvm;
-	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
+	vq	= &ndev->vqs[queue_num];
 
 	while (1) {
 
-		mutex_lock(&ndev->io_rx_lock);
+		mutex_lock(&ndev->io_lock[queue_num]);
 		if (!virt_queue__available(vq))
-			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
-		mutex_unlock(&ndev->io_rx_lock);
+			pthread_cond_wait(&ndev->io_cond[queue_num], &ndev->io_lock[queue_num]);
+		mutex_unlock(&ndev->io_lock[queue_num]);
 
 		while (virt_queue__available(vq)) {
 
@@ -99,7 +102,7 @@ static void *virtio_net_rx_thread(void *p)
 			virt_queue__set_used_elem(vq, head, len);
 
 			/* We should interrupt guest right now, otherwise latency is huge. */
-			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_RX_QUEUE);
+			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, queue_num);
 		}
 
 	}
@@ -117,16 +120,21 @@ static void *virtio_net_tx_thread(void *p)
 	struct net_dev *ndev = p;
 	u16 out, in;
 	u16 head;
-	int len;
+	int len, queue_num;
+
+	mutex_lock(&ndev->mutex);
+	queue_num = ndev->tx_vq_num * 2 + 1;
+	ndev->tx_vq_num++;
+	mutex_unlock(&ndev->mutex);
 
 	kvm	= ndev->kvm;
-	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
+	vq	= &ndev->vqs[queue_num];
 
 	while (1) {
-		mutex_lock(&ndev->io_tx_lock);
+		mutex_lock(&ndev->io_lock[queue_num]);
 		if (!virt_queue__available(vq))
-			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
-		mutex_unlock(&ndev->io_tx_lock);
+			pthread_cond_wait(&ndev->io_cond[queue_num], &ndev->io_lock[queue_num]);
+		mutex_unlock(&ndev->io_lock[queue_num]);
 
 		while (virt_queue__available(vq)) {
 
@@ -137,7 +145,7 @@ static void *virtio_net_tx_thread(void *p)
 			virt_queue__set_used_elem(vq, head, len);
 		}
 
-		ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE);
+		ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, queue_num);
 	}
 
 	pthread_exit(NULL);
@@ -148,20 +156,9 @@ static void *virtio_net_tx_thread(void *p)
 
 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
 {
-	switch (queue) {
-	case VIRTIO_NET_TX_QUEUE:
-		mutex_lock(&ndev->io_tx_lock);
-		pthread_cond_signal(&ndev->io_tx_cond);
-		mutex_unlock(&ndev->io_tx_lock);
-		break;
-	case VIRTIO_NET_RX_QUEUE:
-		mutex_lock(&ndev->io_rx_lock);
-		pthread_cond_signal(&ndev->io_rx_cond);
-		mutex_unlock(&ndev->io_rx_lock);
-		break;
-	default:
-		pr_warning("Unknown queue index %u", queue);
-	}
+	mutex_lock(&ndev->io_lock[queue]);
+	pthread_cond_signal(&ndev->io_cond[queue]);
+	mutex_unlock(&ndev->io_lock[queue]);
 }
 
 static bool virtio_net__tap_init(const struct virtio_net_params *params,
@@ -248,14 +245,17 @@ fail:
 
 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
 {
-	pthread_mutex_init(&ndev->io_tx_lock, NULL);
-	pthread_mutex_init(&ndev->io_rx_lock, NULL);
+	int i;
 
-	pthread_cond_init(&ndev->io_tx_cond, NULL);
-	pthread_cond_init(&ndev->io_rx_cond, NULL);
+	for (i = 0; i < ndev->vq_num; i++) {
+		pthread_mutex_init(&ndev->io_lock[i], NULL);
+		pthread_cond_init(&ndev->io_cond[i], NULL);
+	}
 
-	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
-	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
+	for (i = 0; i < ndev->vq_num; i += 2) {
+		pthread_create(&ndev->io_thread[i], NULL, virtio_net_tx_thread, ndev);
+		pthread_create(&ndev->io_thread[i + 1], NULL, virtio_net_rx_thread, ndev);
+	}
 }
 
 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
@@ -311,13 +311,19 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
 		| 1UL << VIRTIO_NET_F_HOST_TSO6
 		| 1UL << VIRTIO_NET_F_GUEST_UFO
 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
-		| 1UL << VIRTIO_NET_F_GUEST_TSO6;
+		| 1UL << VIRTIO_NET_F_GUEST_TSO6
+		| 1UL << VIRTIO_NET_F_MULTIQUEUE;
 }
 
 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
 {
 	struct net_dev *ndev = dev;
 
+	if (features & (1UL << VIRTIO_NET_F_MULTIQUEUE))
+		ndev->vq_num = ndev->config.num_queues;
+	else
+		ndev->vq_num = 2;
+
 	ndev->features = features;
 }
 
@@ -395,6 +401,8 @@ void virtio_net__init(const struct virtio_net_params *params)
 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
 	}
 
+	ndev->config.num_queues = VIRTIO_NET_NUM_QUEUES;
+
 	ndev->mode = params->mode;
 	if (ndev->mode == NET_MODE_TAP) {
 		if (!virtio_net__tap_init(params, ndev))
-- 
1.7.7.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-11 22:12 [RFC] kvm tools: Implement multiple VQ for virtio-net Sasha Levin
@ 2011-11-13 10:24 ` Michael S. Tsirkin
  2011-11-13 15:00   ` Sasha Levin
  0 siblings, 1 reply; 17+ messages in thread
From: Michael S. Tsirkin @ 2011-11-13 10:24 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Krishna Kumar, gorcunov, kvm, asias.hejun, virtualization,
	penberg, netdev, mingo

On Sat, Nov 12, 2011 at 12:12:01AM +0200, Sasha Levin wrote:
> This is a patch based on Krishna Kumar's patch series which implements
> multiple VQ support for virtio-net.
> 
> The patch was tested with ver3 of the patch.
> 
> Cc: Krishna Kumar <krkumar2@in.ibm.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: virtualization@lists.linux-foundation.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>

Any performance numbers?

> ---
>  tools/kvm/include/kvm/virtio-pci.h |    2 +-
>  tools/kvm/virtio/net.c             |   94 +++++++++++++++++++----------------
>  2 files changed, 52 insertions(+), 44 deletions(-)
> 
> diff --git a/tools/kvm/include/kvm/virtio-pci.h b/tools/kvm/include/kvm/virtio-pci.h
> index 2bbb271..94d20ee 100644
> --- a/tools/kvm/include/kvm/virtio-pci.h
> +++ b/tools/kvm/include/kvm/virtio-pci.h
> @@ -6,7 +6,7 @@
>  
>  #include <linux/types.h>
>  
> -#define VIRTIO_PCI_MAX_VQ	3
> +#define VIRTIO_PCI_MAX_VQ	16
>  #define VIRTIO_PCI_MAX_CONFIG	1
>  
>  struct kvm;
> diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
> index cee2b5b..0754795 100644
> --- a/tools/kvm/virtio/net.c
> +++ b/tools/kvm/virtio/net.c
> @@ -27,9 +27,8 @@
>  #include <sys/wait.h>
>  
>  #define VIRTIO_NET_QUEUE_SIZE		128
> -#define VIRTIO_NET_NUM_QUEUES		2
> -#define VIRTIO_NET_RX_QUEUE		0
> -#define VIRTIO_NET_TX_QUEUE		1
> +#define VIRTIO_NET_NUM_QUEUES		16
> +#define VIRTIO_NET_IS_RX_QUEUE(x)	(((x) % 2) == 0)
>  
>  struct net_dev;
>  
> @@ -49,14 +48,13 @@ struct net_dev {
>  	struct virtio_net_config	config;
>  	u32				features;
>  
> -	pthread_t			io_rx_thread;
> -	pthread_mutex_t			io_rx_lock;
> -	pthread_cond_t			io_rx_cond;
> -
> -	pthread_t			io_tx_thread;
> -	pthread_mutex_t			io_tx_lock;
> -	pthread_cond_t			io_tx_cond;
> +	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES];
> +	pthread_mutex_t			io_lock[VIRTIO_NET_NUM_QUEUES];
> +	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES];
>  
> +	int				rx_vq_num;
> +	int				tx_vq_num;
> +	int				vq_num;
>  	int				tap_fd;
>  	char				tap_name[IFNAMSIZ];
>  
> @@ -78,17 +76,22 @@ static void *virtio_net_rx_thread(void *p)
>  	struct net_dev *ndev = p;
>  	u16 out, in;
>  	u16 head;
> -	int len;
> +	int len, queue_num;
> +
> +	mutex_lock(&ndev->mutex);
> +	queue_num = ndev->rx_vq_num * 2;
> +	ndev->tx_vq_num++;
> +	mutex_unlock(&ndev->mutex);
>  
>  	kvm	= ndev->kvm;
> -	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
> +	vq	= &ndev->vqs[queue_num];
>  
>  	while (1) {
>  
> -		mutex_lock(&ndev->io_rx_lock);
> +		mutex_lock(&ndev->io_lock[queue_num]);
>  		if (!virt_queue__available(vq))
> -			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
> -		mutex_unlock(&ndev->io_rx_lock);
> +			pthread_cond_wait(&ndev->io_cond[queue_num], &ndev->io_lock[queue_num]);
> +		mutex_unlock(&ndev->io_lock[queue_num]);
>  
>  		while (virt_queue__available(vq)) {
>  
> @@ -99,7 +102,7 @@ static void *virtio_net_rx_thread(void *p)
>  			virt_queue__set_used_elem(vq, head, len);
>  
>  			/* We should interrupt guest right now, otherwise latency is huge. */
> -			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_RX_QUEUE);
> +			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, queue_num);
>  		}
>  
>  	}
> @@ -117,16 +120,21 @@ static void *virtio_net_tx_thread(void *p)
>  	struct net_dev *ndev = p;
>  	u16 out, in;
>  	u16 head;
> -	int len;
> +	int len, queue_num;
> +
> +	mutex_lock(&ndev->mutex);
> +	queue_num = ndev->tx_vq_num * 2 + 1;
> +	ndev->tx_vq_num++;
> +	mutex_unlock(&ndev->mutex);
>  
>  	kvm	= ndev->kvm;
> -	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
> +	vq	= &ndev->vqs[queue_num];
>  
>  	while (1) {
> -		mutex_lock(&ndev->io_tx_lock);
> +		mutex_lock(&ndev->io_lock[queue_num]);
>  		if (!virt_queue__available(vq))
> -			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
> -		mutex_unlock(&ndev->io_tx_lock);
> +			pthread_cond_wait(&ndev->io_cond[queue_num], &ndev->io_lock[queue_num]);
> +		mutex_unlock(&ndev->io_lock[queue_num]);
>  
>  		while (virt_queue__available(vq)) {
>  
> @@ -137,7 +145,7 @@ static void *virtio_net_tx_thread(void *p)
>  			virt_queue__set_used_elem(vq, head, len);
>  		}
>  
> -		ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE);
> +		ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, queue_num);
>  	}
>  
>  	pthread_exit(NULL);
> @@ -148,20 +156,9 @@ static void *virtio_net_tx_thread(void *p)
>  
>  static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
>  {
> -	switch (queue) {
> -	case VIRTIO_NET_TX_QUEUE:
> -		mutex_lock(&ndev->io_tx_lock);
> -		pthread_cond_signal(&ndev->io_tx_cond);
> -		mutex_unlock(&ndev->io_tx_lock);
> -		break;
> -	case VIRTIO_NET_RX_QUEUE:
> -		mutex_lock(&ndev->io_rx_lock);
> -		pthread_cond_signal(&ndev->io_rx_cond);
> -		mutex_unlock(&ndev->io_rx_lock);
> -		break;
> -	default:
> -		pr_warning("Unknown queue index %u", queue);
> -	}
> +	mutex_lock(&ndev->io_lock[queue]);
> +	pthread_cond_signal(&ndev->io_cond[queue]);
> +	mutex_unlock(&ndev->io_lock[queue]);
>  }
>  
>  static bool virtio_net__tap_init(const struct virtio_net_params *params,
> @@ -248,14 +245,17 @@ fail:
>  
>  static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
>  {
> -	pthread_mutex_init(&ndev->io_tx_lock, NULL);
> -	pthread_mutex_init(&ndev->io_rx_lock, NULL);
> +	int i;
>  
> -	pthread_cond_init(&ndev->io_tx_cond, NULL);
> -	pthread_cond_init(&ndev->io_rx_cond, NULL);
> +	for (i = 0; i < ndev->vq_num; i++) {
> +		pthread_mutex_init(&ndev->io_lock[i], NULL);
> +		pthread_cond_init(&ndev->io_cond[i], NULL);
> +	}
>  
> -	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
> -	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
> +	for (i = 0; i < ndev->vq_num; i += 2) {
> +		pthread_create(&ndev->io_thread[i], NULL, virtio_net_tx_thread, ndev);
> +		pthread_create(&ndev->io_thread[i + 1], NULL, virtio_net_rx_thread, ndev);
> +	}
>  }
>  
>  static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
> @@ -311,13 +311,19 @@ static u32 get_host_features(struct kvm *kvm, void *dev)
>  		| 1UL << VIRTIO_NET_F_HOST_TSO6
>  		| 1UL << VIRTIO_NET_F_GUEST_UFO
>  		| 1UL << VIRTIO_NET_F_GUEST_TSO4
> -		| 1UL << VIRTIO_NET_F_GUEST_TSO6;
> +		| 1UL << VIRTIO_NET_F_GUEST_TSO6
> +		| 1UL << VIRTIO_NET_F_MULTIQUEUE;
>  }
>  
>  static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
>  {
>  	struct net_dev *ndev = dev;
>  
> +	if (features & (1UL << VIRTIO_NET_F_MULTIQUEUE))
> +		ndev->vq_num = ndev->config.num_queues;
> +	else
> +		ndev->vq_num = 2;
> +
>  	ndev->features = features;
>  }
>  
> @@ -395,6 +401,8 @@ void virtio_net__init(const struct virtio_net_params *params)
>  		ndev->info.host_mac.addr[i]	= params->host_mac[i];
>  	}
>  
> +	ndev->config.num_queues = VIRTIO_NET_NUM_QUEUES;
> +
>  	ndev->mode = params->mode;
>  	if (ndev->mode == NET_MODE_TAP) {
>  		if (!virtio_net__tap_init(params, ndev))
> -- 
> 1.7.7.2

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-13 10:24 ` Michael S. Tsirkin
@ 2011-11-13 15:00   ` Sasha Levin
  2011-11-13 15:32     ` Sasha Levin
  2011-11-14  2:04     ` Asias He
  0 siblings, 2 replies; 17+ messages in thread
From: Sasha Levin @ 2011-11-13 15:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: penberg, kvm, mingo, asias.hejun, gorcunov, Krishna Kumar,
	Rusty Russell, virtualization, netdev

On Sun, 2011-11-13 at 12:24 +0200, Michael S. Tsirkin wrote:
> On Sat, Nov 12, 2011 at 12:12:01AM +0200, Sasha Levin wrote:
> > This is a patch based on Krishna Kumar's patch series which implements
> > multiple VQ support for virtio-net.
> > 
> > The patch was tested with ver3 of the patch.
> > 
> > Cc: Krishna Kumar <krkumar2@in.ibm.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Rusty Russell <rusty@rustcorp.com.au>
> > Cc: virtualization@lists.linux-foundation.org
> > Cc: netdev@vger.kernel.org
> > Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> 
> Any performance numbers?

I tried finding a box with more than two cores so I could test it on
something like that as well.

>From what I see this patch causes a performance regression on my 2 core
box.

I'll send an updated KVM tools patch in a bit as well.

Before:

# netperf -H 192.168.33.4,ipv4 -t TCP_RR
MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size     Size    Time     Rate         
bytes  Bytes  bytes    bytes   secs.    per sec   

16384  87380  1        1       10.00    11160.63   
16384  87380

# netperf -H 192.168.33.4,ipv4 -t UDP_RR
MIGRATED UDP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size     Size    Time     Rate         
bytes  Bytes  bytes    bytes   secs.    per sec   

122880 122880 1        1       10.00    12072.64   
229376 229376

# netperf -H 192.168.33.4,ipv4 -t TCP_STREAM
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Recv   Send    Send                          
Socket Socket  Message  Elapsed              
Size   Size    Size     Time     Throughput  
bytes  bytes   bytes    secs.    10^6bits/sec  

 87380  16384  16384    10.00    4654.50

netperf -H 192.168.33.4,ipv4 -t TCP_STREAM -- -m 128
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Recv   Send    Send                          
Socket Socket  Message  Elapsed              
Size   Size    Size     Time     Throughput  
bytes  bytes   bytes    secs.    10^6bits/sec  

 87380  16384    128    10.00     635.45

# netperf -H 192.168.33.4,ipv4 -t UDP_STREAM           
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Socket  Message  Elapsed      Messages                
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

122880   65507   10.00      113894      0    5968.54
229376           10.00       89373           4683.54

# netperf -H 192.168.33.4,ipv4 -t UDP_STREAM -- -m 128
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Socket  Message  Elapsed      Messages                
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

122880     128   10.00      550634      0      56.38
229376           10.00      398786             40.84


After:

# netperf -H 192.168.33.4,ipv4 -t TCP_RR
MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size     Size    Time     Rate         
bytes  Bytes  bytes    bytes   secs.    per sec   

16384  87380  1        1       10.00    8952.47   
16384  87380

# netperf -H 192.168.33.4,ipv4 -t UDP_RR
MIGRATED UDP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size     Size    Time     Rate         
bytes  Bytes  bytes    bytes   secs.    per sec   

122880 122880 1        1       10.00    9534.52   
229376 229376

# netperf -H 192.168.33.4,ipv4 -t TCP_STREAM
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Recv   Send    Send                          
Socket Socket  Message  Elapsed              
Size   Size    Size     Time     Throughput  
bytes  bytes   bytes    secs.    10^6bits/sec  

 87380  16384  16384    10.13    2278.23

# netperf -H 192.168.33.4,ipv4 -t TCP_STREAM -- -m 128
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Recv   Send    Send                          
Socket Socket  Message  Elapsed              
Size   Size    Size     Time     Throughput  
bytes  bytes   bytes    secs.    10^6bits/sec  

 87380  16384    128    10.00     623.27   

# netperf -H 192.168.33.4,ipv4 -t UDP_STREAM
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Socket  Message  Elapsed      Messages                
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

122880   65507   10.00      136930      0    7175.72
229376           10.00       16726            876.51

# netperf -H 192.168.33.4,ipv4 -t UDP_STREAM -- -m 128
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.33.4 (192.168.33.4) port 0 AF_INET
Socket  Message  Elapsed      Messages                
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

122880     128   10.00      982492      0     100.61
229376           10.00      249597             25.56

-- 

Sasha.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-13 15:00   ` Sasha Levin
@ 2011-11-13 15:32     ` Sasha Levin
  2011-11-14  2:04     ` Asias He
  1 sibling, 0 replies; 17+ messages in thread
From: Sasha Levin @ 2011-11-13 15:32 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Krishna Kumar, gorcunov, kvm, asias.hejun, virtualization,
	penberg, netdev, mingo

On Sun, 2011-11-13 at 17:00 +0200, Sasha Levin wrote:
> On Sun, 2011-11-13 at 12:24 +0200, Michael S. Tsirkin wrote:
> > On Sat, Nov 12, 2011 at 12:12:01AM +0200, Sasha Levin wrote:
> > > This is a patch based on Krishna Kumar's patch series which implements
> > > multiple VQ support for virtio-net.
> > > 
> > > The patch was tested with ver3 of the patch.
> > > 
> > > Cc: Krishna Kumar <krkumar2@in.ibm.com>
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: Rusty Russell <rusty@rustcorp.com.au>
> > > Cc: virtualization@lists.linux-foundation.org
> > > Cc: netdev@vger.kernel.org
> > > Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> > 
> > Any performance numbers?
> 
> I tried finding a box with more than two cores so I could test it on
> something like that as well.
> 
> From what I see this patch causes a performance regression on my 2 core
> box.
> 

[snip]

After discussing it with Michael, we found out that for a single TCP
flow the guest signals the same TX VQ, but the RX VQ keeps changing -
which means that theres missing host-guest synchronization of hash flow.

-- 

Sasha.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-13 15:00   ` Sasha Levin
  2011-11-13 15:32     ` Sasha Levin
@ 2011-11-14  2:04     ` Asias He
  2011-11-14 10:15       ` Sasha Levin
  2011-11-14 12:25       ` Pekka Enberg
  1 sibling, 2 replies; 17+ messages in thread
From: Asias He @ 2011-11-14  2:04 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Michael S. Tsirkin, penberg, kvm, mingo, gorcunov, Krishna Kumar,
	Rusty Russell, virtualization, netdev

Hi, Shsha

On 11/13/2011 11:00 PM, Sasha Levin wrote:
> On Sun, 2011-11-13 at 12:24 +0200, Michael S. Tsirkin wrote:
>> On Sat, Nov 12, 2011 at 12:12:01AM +0200, Sasha Levin wrote:
>>> This is a patch based on Krishna Kumar's patch series which implements
>>> multiple VQ support for virtio-net.
>>>
>>> The patch was tested with ver3 of the patch.
>>>
>>> Cc: Krishna Kumar<krkumar2@in.ibm.com>
>>> Cc: Michael S. Tsirkin<mst@redhat.com>
>>> Cc: Rusty Russell<rusty@rustcorp.com.au>
>>> Cc: virtualization@lists.linux-foundation.org
>>> Cc: netdev@vger.kernel.org
>>> Signed-off-by: Sasha Levin<levinsasha928@gmail.com>
>>
>> Any performance numbers?
>
> I tried finding a box with more than two cores so I could test it on
> something like that as well.
>
>> From what I see this patch causes a performance regression on my 2 core
> box.
>
> I'll send an updated KVM tools patch in a bit as well.
>
> Before:
>
> # netperf -H 192.168.33.4,ipv4 -t TCP_RR
> MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size     Size    Time     Rate
> bytes  Bytes  bytes    bytes   secs.    per sec
>
> 16384  87380  1        1       10.00    11160.63
> 16384  87380
>
> # netperf -H 192.168.33.4,ipv4 -t UDP_RR
> MIGRATED UDP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size     Size    Time     Rate
> bytes  Bytes  bytes    bytes   secs.    per sec
>
> 122880 122880 1        1       10.00    12072.64
> 229376 229376
>
> # netperf -H 192.168.33.4,ipv4 -t TCP_STREAM
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
>
>   87380  16384  16384    10.00    4654.50
>
> netperf -H 192.168.33.4,ipv4 -t TCP_STREAM -- -m 128
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
>
>   87380  16384    128    10.00     635.45
>
> # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM
> MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Socket  Message  Elapsed      Messages
> Size    Size     Time         Okay Errors   Throughput
> bytes   bytes    secs            #      #   10^6bits/sec
>
> 122880   65507   10.00      113894      0    5968.54
> 229376           10.00       89373           4683.54
>
> # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM -- -m 128
> MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Socket  Message  Elapsed      Messages
> Size    Size     Time         Okay Errors   Throughput
> bytes   bytes    secs            #      #   10^6bits/sec
>
> 122880     128   10.00      550634      0      56.38
> 229376           10.00      398786             40.84
>
>
> After:
>
> # netperf -H 192.168.33.4,ipv4 -t TCP_RR
> MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size     Size    Time     Rate
> bytes  Bytes  bytes    bytes   secs.    per sec
>
> 16384  87380  1        1       10.00    8952.47
> 16384  87380
>
> # netperf -H 192.168.33.4,ipv4 -t UDP_RR
> MIGRATED UDP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size     Size    Time     Rate
> bytes  Bytes  bytes    bytes   secs.    per sec
>
> 122880 122880 1        1       10.00    9534.52
> 229376 229376
>
> # netperf -H 192.168.33.4,ipv4 -t TCP_STREAM
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
>
>   87380  16384  16384    10.13    2278.23
>
> # netperf -H 192.168.33.4,ipv4 -t TCP_STREAM -- -m 128
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
>
>   87380  16384    128    10.00     623.27
>
> # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM
> MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Socket  Message  Elapsed      Messages
> Size    Size     Time         Okay Errors   Throughput
> bytes   bytes    secs            #      #   10^6bits/sec
>
> 122880   65507   10.00      136930      0    7175.72
> 229376           10.00       16726            876.51
>
> # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM -- -m 128
> MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 192.168.33.4 (192.168.33.4) port 0 AF_INET
> Socket  Message  Elapsed      Messages
> Size    Size     Time         Okay Errors   Throughput
> bytes   bytes    secs            #      #   10^6bits/sec
>
> 122880     128   10.00      982492      0     100.61
> 229376           10.00      249597             25.56
>

Why both the bandwidth and latency performance are dropping so 
dramatically with multiple VQ?

-- 
Asias He

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-14  2:04     ` Asias He
@ 2011-11-14 10:15       ` Sasha Levin
  2011-11-15  4:44         ` Krishna Kumar2
  2011-11-14 12:25       ` Pekka Enberg
  1 sibling, 1 reply; 17+ messages in thread
From: Sasha Levin @ 2011-11-14 10:15 UTC (permalink / raw)
  To: Asias He
  Cc: Michael S. Tsirkin, penberg, kvm, mingo, gorcunov, Krishna Kumar,
	Rusty Russell, virtualization, netdev

On Mon, 2011-11-14 at 10:04 +0800, Asias He wrote:
> Hi, Shsha
> 
> On 11/13/2011 11:00 PM, Sasha Levin wrote:
> > On Sun, 2011-11-13 at 12:24 +0200, Michael S. Tsirkin wrote:
> >> On Sat, Nov 12, 2011 at 12:12:01AM +0200, Sasha Levin wrote:
> >>> This is a patch based on Krishna Kumar's patch series which implements
> >>> multiple VQ support for virtio-net.
> >>>
> >>> The patch was tested with ver3 of the patch.
> >>>
> >>> Cc: Krishna Kumar<krkumar2@in.ibm.com>
> >>> Cc: Michael S. Tsirkin<mst@redhat.com>
> >>> Cc: Rusty Russell<rusty@rustcorp.com.au>
> >>> Cc: virtualization@lists.linux-foundation.org
> >>> Cc: netdev@vger.kernel.org
> >>> Signed-off-by: Sasha Levin<levinsasha928@gmail.com>
> >>
> >> Any performance numbers?
> >
> > I tried finding a box with more than two cores so I could test it on
> > something like that as well.
> >
> >> From what I see this patch causes a performance regression on my 2 core
> > box.
> >
> > I'll send an updated KVM tools patch in a bit as well.
> >
> > Before:
> >
> > # netperf -H 192.168.33.4,ipv4 -t TCP_RR
> > MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> > to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> > Local /Remote
> > Socket Size   Request  Resp.   Elapsed  Trans.
> > Send   Recv   Size     Size    Time     Rate
> > bytes  Bytes  bytes    bytes   secs.    per sec
> >
> > 16384  87380  1        1       10.00    11160.63
> > 16384  87380
> >
> > # netperf -H 192.168.33.4,ipv4 -t UDP_RR
> > MIGRATED UDP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> > to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> > Local /Remote
> > Socket Size   Request  Resp.   Elapsed  Trans.
> > Send   Recv   Size     Size    Time     Rate
> > bytes  Bytes  bytes    bytes   secs.    per sec
> >
> > 122880 122880 1        1       10.00    12072.64
> > 229376 229376
> >
> > # netperf -H 192.168.33.4,ipv4 -t TCP_STREAM
> > MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Recv   Send    Send
> > Socket Socket  Message  Elapsed
> > Size   Size    Size     Time     Throughput
> > bytes  bytes   bytes    secs.    10^6bits/sec
> >
> >   87380  16384  16384    10.00    4654.50
> >
> > netperf -H 192.168.33.4,ipv4 -t TCP_STREAM -- -m 128
> > MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Recv   Send    Send
> > Socket Socket  Message  Elapsed
> > Size   Size    Size     Time     Throughput
> > bytes  bytes   bytes    secs.    10^6bits/sec
> >
> >   87380  16384    128    10.00     635.45
> >
> > # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM
> > MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Socket  Message  Elapsed      Messages
> > Size    Size     Time         Okay Errors   Throughput
> > bytes   bytes    secs            #      #   10^6bits/sec
> >
> > 122880   65507   10.00      113894      0    5968.54
> > 229376           10.00       89373           4683.54
> >
> > # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM -- -m 128
> > MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Socket  Message  Elapsed      Messages
> > Size    Size     Time         Okay Errors   Throughput
> > bytes   bytes    secs            #      #   10^6bits/sec
> >
> > 122880     128   10.00      550634      0      56.38
> > 229376           10.00      398786             40.84
> >
> >
> > After:
> >
> > # netperf -H 192.168.33.4,ipv4 -t TCP_RR
> > MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> > to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> > Local /Remote
> > Socket Size   Request  Resp.   Elapsed  Trans.
> > Send   Recv   Size     Size    Time     Rate
> > bytes  Bytes  bytes    bytes   secs.    per sec
> >
> > 16384  87380  1        1       10.00    8952.47
> > 16384  87380
> >
> > # netperf -H 192.168.33.4,ipv4 -t UDP_RR
> > MIGRATED UDP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> > to 192.168.33.4 (192.168.33.4) port 0 AF_INET : first burst 0
> > Local /Remote
> > Socket Size   Request  Resp.   Elapsed  Trans.
> > Send   Recv   Size     Size    Time     Rate
> > bytes  Bytes  bytes    bytes   secs.    per sec
> >
> > 122880 122880 1        1       10.00    9534.52
> > 229376 229376
> >
> > # netperf -H 192.168.33.4,ipv4 -t TCP_STREAM
> > MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Recv   Send    Send
> > Socket Socket  Message  Elapsed
> > Size   Size    Size     Time     Throughput
> > bytes  bytes   bytes    secs.    10^6bits/sec
> >
> >   87380  16384  16384    10.13    2278.23
> >
> > # netperf -H 192.168.33.4,ipv4 -t TCP_STREAM -- -m 128
> > MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Recv   Send    Send
> > Socket Socket  Message  Elapsed
> > Size   Size    Size     Time     Throughput
> > bytes  bytes   bytes    secs.    10^6bits/sec
> >
> >   87380  16384    128    10.00     623.27
> >
> > # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM
> > MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Socket  Message  Elapsed      Messages
> > Size    Size     Time         Okay Errors   Throughput
> > bytes   bytes    secs            #      #   10^6bits/sec
> >
> > 122880   65507   10.00      136930      0    7175.72
> > 229376           10.00       16726            876.51
> >
> > # netperf -H 192.168.33.4,ipv4 -t UDP_STREAM -- -m 128
> > MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 192.168.33.4 (192.168.33.4) port 0 AF_INET
> > Socket  Message  Elapsed      Messages
> > Size    Size     Time         Okay Errors   Throughput
> > bytes   bytes    secs            #      #   10^6bits/sec
> >
> > 122880     128   10.00      982492      0     100.61
> > 229376           10.00      249597             25.56
> >
> 
> Why both the bandwidth and latency performance are dropping so 
> dramatically with multiple VQ?

It looks like theres no hash sync between host and guest, which makes
the RX VQ change for every packet. This is my guess.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-14  2:04     ` Asias He
  2011-11-14 10:15       ` Sasha Levin
@ 2011-11-14 12:25       ` Pekka Enberg
  2011-11-14 13:05         ` Michael S. Tsirkin
  1 sibling, 1 reply; 17+ messages in thread
From: Pekka Enberg @ 2011-11-14 12:25 UTC (permalink / raw)
  To: Asias He
  Cc: Sasha Levin, Michael S. Tsirkin, kvm, mingo, gorcunov,
	Krishna Kumar, Rusty Russell, virtualization, netdev

On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> Why both the bandwidth and latency performance are dropping so dramatically
> with multiple VQ?

What's the expected benefit from multiple VQs i.e. why are doing the
patches Sasha?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-14 12:25       ` Pekka Enberg
@ 2011-11-14 13:05         ` Michael S. Tsirkin
  2011-11-16  0:04           ` Rusty Russell
  0 siblings, 1 reply; 17+ messages in thread
From: Michael S. Tsirkin @ 2011-11-14 13:05 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Krishna Kumar, kvm, Asias He, virtualization, gorcunov,
	Sasha Levin, netdev, mingo

On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > Why both the bandwidth and latency performance are dropping so dramatically
> > with multiple VQ?
> 
> What's the expected benefit from multiple VQs

Heh, the original patchset didn't mention this :) It really should.
They are supposed to speed up networking for high smp guests.

> i.e. why are doing the patches Sasha?

-- 
MST

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-14 10:15       ` Sasha Levin
@ 2011-11-15  4:44         ` Krishna Kumar2
  2011-11-15 15:30           ` Sasha Levin
  2011-11-16  6:10           ` jason wang
  0 siblings, 2 replies; 17+ messages in thread
From: Krishna Kumar2 @ 2011-11-15  4:44 UTC (permalink / raw)
  To: Sasha Levin
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, netdev, mingo

Sasha Levin <levinsasha928@gmail.com> wrote on 11/14/2011 03:45:40 PM:

> > Why both the bandwidth and latency performance are dropping so
> > dramatically with multiple VQ?
>
> It looks like theres no hash sync between host and guest, which makes
> the RX VQ change for every packet. This is my guess.

Yes, I confirmed this happens for macvtap. I am
using ixgbe - it calls skb_record_rx_queue when
a skb is allocated, but sets rxhash when a packet
arrives. Macvtap is relying on record_rx_queue
first ahead of rxhash (as part of my patch making
macvtap multiqueue), hence different skbs result
in macvtap selecting different vq's.

Reordering macvtap to use rxhash first results in
all packets going to the same VQ. The code snippet
is:

{
	...
	if (!numvtaps)
                goto out;

	rxq = skb_get_rxhash(skb);
	if (rxq) {
		tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
		if (tap)
			goto out;
	}

	if (likely(skb_rx_queue_recorded(skb))) {
		rxq = skb_get_rx_queue(skb);

		while (unlikely(rxq >= numvtaps))
			rxq -= numvtaps;
			tap = rcu_dereference(vlan->taps[rxq]);
			if (tap)
				goto out;
	}
}

I will submit a patch for macvtap separately. I am working
towards the other issue pointed out - different vhost
threads handling rx/tx of a single flow.

thanks,

- KK

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-15  4:44         ` Krishna Kumar2
@ 2011-11-15 15:30           ` Sasha Levin
  2011-11-16  6:10           ` jason wang
  1 sibling, 0 replies; 17+ messages in thread
From: Sasha Levin @ 2011-11-15 15:30 UTC (permalink / raw)
  To: Krishna Kumar2
  Cc: Asias He, gorcunov, kvm, mingo, Michael S. Tsirkin, netdev,
	penberg, Rusty Russell, virtualization

On Tue, 2011-11-15 at 10:14 +0530, Krishna Kumar2 wrote:
> Sasha Levin <levinsasha928@gmail.com> wrote on 11/14/2011 03:45:40 PM:
> 
> > > Why both the bandwidth and latency performance are dropping so
> > > dramatically with multiple VQ?
> >
> > It looks like theres no hash sync between host and guest, which makes
> > the RX VQ change for every packet. This is my guess.
> 
> Yes, I confirmed this happens for macvtap. I am
> using ixgbe - it calls skb_record_rx_queue when
> a skb is allocated, but sets rxhash when a packet
> arrives. Macvtap is relying on record_rx_queue
> first ahead of rxhash (as part of my patch making
> macvtap multiqueue), hence different skbs result
> in macvtap selecting different vq's.

I'm seeing this behavior in non-macvtep related setup as well (simple
tap <-> virtio-net).

-- 

Sasha.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-14 13:05         ` Michael S. Tsirkin
@ 2011-11-16  0:04           ` Rusty Russell
  2011-11-16  7:23             ` Michael S. Tsirkin
  0 siblings, 1 reply; 17+ messages in thread
From: Rusty Russell @ 2011-11-16  0:04 UTC (permalink / raw)
  To: Michael S. Tsirkin, Pekka Enberg
  Cc: Krishna Kumar, kvm, Asias He, virtualization, gorcunov,
	Sasha Levin, netdev, mingo, Stephen Hemminger

On Mon, 14 Nov 2011 15:05:07 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> > On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > > Why both the bandwidth and latency performance are dropping so dramatically
> > > with multiple VQ?
> > 
> > What's the expected benefit from multiple VQs
> 
> Heh, the original patchset didn't mention this :) It really should.
> They are supposed to speed up networking for high smp guests.

If we have one queue per guest CPU, does this allow us to run lockless?

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-15  4:44         ` Krishna Kumar2
  2011-11-15 15:30           ` Sasha Levin
@ 2011-11-16  6:10           ` jason wang
  2011-11-16  9:09             ` Krishna Kumar2
  1 sibling, 1 reply; 17+ messages in thread
From: jason wang @ 2011-11-16  6:10 UTC (permalink / raw)
  To: Krishna Kumar2
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, Sasha Levin, netdev, mingo

On 11/15/2011 12:44 PM, Krishna Kumar2 wrote:
> Sasha Levin <levinsasha928@gmail.com> wrote on 11/14/2011 03:45:40 PM:
>
>>> Why both the bandwidth and latency performance are dropping so
>>> dramatically with multiple VQ?
>> It looks like theres no hash sync between host and guest, which makes
>> the RX VQ change for every packet. This is my guess.
> Yes, I confirmed this happens for macvtap. I am
> using ixgbe - it calls skb_record_rx_queue when
> a skb is allocated, but sets rxhash when a packet
> arrives. Macvtap is relying on record_rx_queue
> first ahead of rxhash (as part of my patch making
> macvtap multiqueue), hence different skbs result
> in macvtap selecting different vq's.
>
> Reordering macvtap to use rxhash first results in
> all packets going to the same VQ. The code snippet
> is:
>
> {
> 	...
> 	if (!numvtaps)
>                 goto out;
>
> 	rxq = skb_get_rxhash(skb);
> 	if (rxq) {
> 		tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
> 		if (tap)
> 			goto out;
> 	}
>
> 	if (likely(skb_rx_queue_recorded(skb))) {
> 		rxq = skb_get_rx_queue(skb);
>
> 		while (unlikely(rxq >= numvtaps))
> 			rxq -= numvtaps;
> 			tap = rcu_dereference(vlan->taps[rxq]);
> 			if (tap)
> 				goto out;
> 	}
> }
>
> I will submit a patch for macvtap separately. I am working
> towards the other issue pointed out - different vhost
> threads handling rx/tx of a single flow.
Hello Krishna:

Have any thought in mind to solve the issue of flow handling?

Maybe some performance numbers first is better, it would let us know
where we are. During the test of my patchset, I find big regression of
small packet transmission, and more retransmissions were noticed. This
maybe also the issue of flow affinity. One interesting things is to see
whether this happens in your patches :)

I've played with a basic flow director implementation based on my series
which want to make sure the packets of a flow was handled by the same
vhost thread/guest vcpu. This is done by:

- bind virtqueue to guest cpu
- record the hash to queue mapping when guest sending packets and use
this mapping to choose the virtqueue when forwarding packets to guest

Test shows some help during for receiving packets from external host and
packet sending to local host. But it would hurt the performance of
sending packets to remote host. This is not the perfect solution as it
can not handle guest moving processes among vcpus, I plan to try
accelerate RFS and sharing the mapping between host and guest.

Anyway this is just for receiving, the small packet sending need more
thoughts.

Thanks

>
> thanks,
>
> - KK
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-16  0:04           ` Rusty Russell
@ 2011-11-16  7:23             ` Michael S. Tsirkin
  2011-11-21  0:41               ` Rusty Russell
  0 siblings, 1 reply; 17+ messages in thread
From: Michael S. Tsirkin @ 2011-11-16  7:23 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Krishna Kumar, gorcunov, kvm, Asias He, virtualization,
	Pekka Enberg, Sasha Levin, netdev, mingo, Stephen Hemminger

On Wed, Nov 16, 2011 at 10:34:42AM +1030, Rusty Russell wrote:
> On Mon, 14 Nov 2011 15:05:07 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> > > On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > > > Why both the bandwidth and latency performance are dropping so dramatically
> > > > with multiple VQ?
> > > 
> > > What's the expected benefit from multiple VQs
> > 
> > Heh, the original patchset didn't mention this :) It really should.
> > They are supposed to speed up networking for high smp guests.
> 
> If we have one queue per guest CPU, does this allow us to run lockless?
> 
> Thanks,
> Rusty.

LLTX? It's supposed to be deprecated, isn't it?

-- 
MST

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-16  6:10           ` jason wang
@ 2011-11-16  9:09             ` Krishna Kumar2
  2011-11-16 10:05               ` jason wang
  0 siblings, 1 reply; 17+ messages in thread
From: Krishna Kumar2 @ 2011-11-16  9:09 UTC (permalink / raw)
  To: jason wang
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, Sasha Levin, netdev, mingo

jason wang <jasowang@redhat.com> wrote on 11/16/2011 11:40:45 AM:

Hi Jason,

> Have any thought in mind to solve the issue of flow handling?

So far nothing concrete.

> Maybe some performance numbers first is better, it would let us know
> where we are. During the test of my patchset, I find big regression of
> small packet transmission, and more retransmissions were noticed. This
> maybe also the issue of flow affinity. One interesting things is to see
> whether this happens in your patches :)

I haven't got any results for small packet, but will run this week
and send an update. I remember my earlier patches having regression
for small packets.

> I've played with a basic flow director implementation based on my series
> which want to make sure the packets of a flow was handled by the same
> vhost thread/guest vcpu. This is done by:
>
> - bind virtqueue to guest cpu
> - record the hash to queue mapping when guest sending packets and use
> this mapping to choose the virtqueue when forwarding packets to guest
>
> Test shows some help during for receiving packets from external host and
> packet sending to local host. But it would hurt the performance of
> sending packets to remote host. This is not the perfect solution as it
> can not handle guest moving processes among vcpus, I plan to try
> accelerate RFS and sharing the mapping between host and guest.
>
> Anyway this is just for receiving, the small packet sending need more
> thoughts.

I don't recollect small packet performance for guest->local host.
Also, using multiple tuns devices on the bridge (instead of mq-tun)
balances the rx/tx of a flow to a single vq. Then you can avoid
mq-tun with it's queue selector function, etc. Have you tried it?

I will run my tests this week and get back.

thanks,

- KK

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-16  9:09             ` Krishna Kumar2
@ 2011-11-16 10:05               ` jason wang
  0 siblings, 0 replies; 17+ messages in thread
From: jason wang @ 2011-11-16 10:05 UTC (permalink / raw)
  To: Krishna Kumar2
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, Sasha Levin, netdev, mingo

On 11/16/2011 05:09 PM, Krishna Kumar2 wrote:
> jason wang <jasowang@redhat.com> wrote on 11/16/2011 11:40:45 AM:
>
> Hi Jason,
>
>> Have any thought in mind to solve the issue of flow handling?
> So far nothing concrete.
>
>> Maybe some performance numbers first is better, it would let us know
>> where we are. During the test of my patchset, I find big regression of
>> small packet transmission, and more retransmissions were noticed. This
>> maybe also the issue of flow affinity. One interesting things is to see
>> whether this happens in your patches :)
> I haven't got any results for small packet, but will run this week
> and send an update. I remember my earlier patches having regression
> for small packets.
>
>> I've played with a basic flow director implementation based on my series
>> which want to make sure the packets of a flow was handled by the same
>> vhost thread/guest vcpu. This is done by:
>>
>> - bind virtqueue to guest cpu
>> - record the hash to queue mapping when guest sending packets and use
>> this mapping to choose the virtqueue when forwarding packets to guest
>>
>> Test shows some help during for receiving packets from external host and
>> packet sending to local host. But it would hurt the performance of
>> sending packets to remote host. This is not the perfect solution as it
>> can not handle guest moving processes among vcpus, I plan to try
>> accelerate RFS and sharing the mapping between host and guest.
>>
>> Anyway this is just for receiving, the small packet sending need more
>> thoughts.
> I don't recollect small packet performance for guest->local host.
> Also, using multiple tuns devices on the bridge (instead of mq-tun)
> balances the rx/tx of a flow to a single vq. Then you can avoid
> mq-tun with it's queue selector function, etc. Have you tried it?

I remember it works when I test your patchset early this year, but don't
measure its performance. If multiple tuns devices were used, the mac
address table would be updated very frequently and packets can not be
forwarded in parallel ( unless we make bridge to support multiqueue ).

>
> I will run my tests this week and get back.
>
> thanks,
>
> - KK
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-16  7:23             ` Michael S. Tsirkin
@ 2011-11-21  0:41               ` Rusty Russell
  2011-11-22 18:14                 ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: Rusty Russell @ 2011-11-21  0:41 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Krishna Kumar, gorcunov, kvm, Asias He, virtualization,
	Pekka Enberg, Sasha Levin, netdev, mingo, Stephen Hemminger

On Wed, 16 Nov 2011 09:23:17 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Wed, Nov 16, 2011 at 10:34:42AM +1030, Rusty Russell wrote:
> > On Mon, 14 Nov 2011 15:05:07 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> > > > On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > > > > Why both the bandwidth and latency performance are dropping so dramatically
> > > > > with multiple VQ?
> > > > 
> > > > What's the expected benefit from multiple VQs
> > > 
> > > Heh, the original patchset didn't mention this :) It really should.
> > > They are supposed to speed up networking for high smp guests.
> > 
> > If we have one queue per guest CPU, does this allow us to run lockless?
> > 
> > Thanks,
> > Rusty.
> 
> LLTX? It's supposed to be deprecated, isn't it?

I was referring back to "Subject: virtio net lockless ring" which
Stephen sent back in June, nothing more specific.

I assumed from his query that this was still an active area of
exploration...

Stephen?

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
  2011-11-21  0:41               ` Rusty Russell
@ 2011-11-22 18:14                 ` Stephen Hemminger
  0 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2011-11-22 18:14 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Michael S. Tsirkin, Pekka Enberg, Asias He, Sasha Levin, kvm,
	mingo, gorcunov, Krishna Kumar, virtualization, netdev

I have been playing with userspace-rcu which has a number of neat
lockless routines for queuing and hashing. But there aren't kernel versions
and several of them may require cmpxchg to work.


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2011-11-22 18:14 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-11 22:12 [RFC] kvm tools: Implement multiple VQ for virtio-net Sasha Levin
2011-11-13 10:24 ` Michael S. Tsirkin
2011-11-13 15:00   ` Sasha Levin
2011-11-13 15:32     ` Sasha Levin
2011-11-14  2:04     ` Asias He
2011-11-14 10:15       ` Sasha Levin
2011-11-15  4:44         ` Krishna Kumar2
2011-11-15 15:30           ` Sasha Levin
2011-11-16  6:10           ` jason wang
2011-11-16  9:09             ` Krishna Kumar2
2011-11-16 10:05               ` jason wang
2011-11-14 12:25       ` Pekka Enberg
2011-11-14 13:05         ` Michael S. Tsirkin
2011-11-16  0:04           ` Rusty Russell
2011-11-16  7:23             ` Michael S. Tsirkin
2011-11-21  0:41               ` Rusty Russell
2011-11-22 18:14                 ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).