public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts
@ 2014-10-15 14:32 Michael S. Tsirkin
  2014-10-15 14:32 ` [PATCH RFC v2 1/3] virtio_net: enable tx interrupt Michael S. Tsirkin
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Michael S. Tsirkin @ 2014-10-15 14:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason Wang

RFC patches to enable tx interrupts.
This is to demonstrate how this can be done without
core virtio changes, and to make sure I understand
the new APIs correctly.

Testing TBD: I'm going offline for several days, will only be able to
test afterwards (unless someone beats me to this).
So I thought I'd post this early.

Applies on top of patch: "virtio_net: fix use after free"
that I recently sent.

Changes:
	address comments by Jason Wang, use delayed cb everywhere
	rebased Jason's patch on top of mine and include it (with some tweaks)

Jason, feel free to either use this, try it out, or take ideas out
of this patchset and reuse in any other way you see fit.

Jason Wang (1):
  virtio-net: optimize free_old_xmit_skbs stats

Michael S. Tsirkin (2):
  virtio_net: enable tx interrupt
  virtio_net: bql

 drivers/net/virtio_net.c | 153 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 111 insertions(+), 42 deletions(-)

-- 
MST


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH RFC v2 1/3] virtio_net: enable tx interrupt
  2014-10-15 14:32 [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Michael S. Tsirkin
@ 2014-10-15 14:32 ` Michael S. Tsirkin
  2014-10-17  5:11   ` Jason Wang
  2014-10-15 14:32 ` [PATCH RFC v2 2/3] virtio_net: bql Michael S. Tsirkin
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Michael S. Tsirkin @ 2014-10-15 14:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason Wang, Rusty Russell, virtualization, netdev

On newer hosts that support delayed tx interrupts,
we probably don't have much to gain from orphaning
packets early.

Based on patch by Jason Wang.

Note: this might degrade performance for
hosts without event idx support.
Should be addressed by the next patch.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 137 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 94 insertions(+), 43 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 13d0a8b..a9bf178 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -72,6 +72,8 @@ struct send_queue {
 
 	/* Name of the send queue: output.$index */
 	char name[40];
+
+	struct napi_struct napi;
 };
 
 /* Internal representation of a receive virtqueue */
@@ -217,15 +219,37 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 	return p;
 }
 
+static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
+{
+	struct sk_buff *skb;
+	unsigned int len;
+	struct virtnet_info *vi = sq->vq->vdev->priv;
+	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
+	unsigned int packets = 0;
+
+	while (packets < budget &&
+	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		pr_debug("Sent skb %p\n", skb);
+
+		u64_stats_update_begin(&stats->tx_syncp);
+		stats->tx_bytes += skb->len;
+		stats->tx_packets++;
+		u64_stats_update_end(&stats->tx_syncp);
+
+		dev_kfree_skb_any(skb);
+		packets++;
+	}
+
+	return packets;
+}
+
 static void skb_xmit_done(struct virtqueue *vq)
 {
 	struct virtnet_info *vi = vq->vdev->priv;
+	struct send_queue *sq = &vi->sq[vq2txq(vq)];
 
-	/* Suppress further interrupts. */
-	virtqueue_disable_cb(vq);
-
-	/* We were probably waiting for more output buffers. */
-	netif_wake_subqueue(vi->dev, vq2txq(vq));
+	if (napi_schedule_prep(&sq->napi))
+		__napi_schedule(&sq->napi);
 }
 
 static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
@@ -774,6 +798,37 @@ again:
 	return received;
 }
 
+static int virtnet_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct send_queue *sq =
+		container_of(napi, struct send_queue, napi);
+	struct virtnet_info *vi = sq->vq->vdev->priv;
+	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
+	unsigned int sent = 0;
+	bool enable_done;
+
+again:
+	__netif_tx_lock(txq, smp_processor_id());
+	virtqueue_disable_cb(sq->vq);
+	sent += free_old_xmit_skbs(sq, budget - sent);
+
+	if (sent < budget) {
+		enable_done = virtqueue_enable_cb_delayed(sq->vq);
+		napi_complete(napi);
+		__netif_tx_unlock(txq);
+		if (unlikely(enable_done) && napi_schedule_prep(napi)) {
+			virtqueue_disable_cb(sq->vq);
+			__napi_schedule(napi);
+			goto again;
+		}
+	} else {
+		__netif_tx_unlock(txq);
+	}
+
+	netif_wake_subqueue(vi->dev, vq2txq(sq->vq));
+	return sent;
+}
+
 #ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 static int virtnet_busy_poll(struct napi_struct *napi)
@@ -822,30 +877,12 @@ static int virtnet_open(struct net_device *dev)
 			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
 		virtnet_napi_enable(&vi->rq[i]);
+		napi_enable(&vi->sq[i].napi);
 	}
 
 	return 0;
 }
 
-static void free_old_xmit_skbs(struct send_queue *sq)
-{
-	struct sk_buff *skb;
-	unsigned int len;
-	struct virtnet_info *vi = sq->vq->vdev->priv;
-	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
-
-	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-		pr_debug("Sent skb %p\n", skb);
-
-		u64_stats_update_begin(&stats->tx_syncp);
-		stats->tx_bytes += skb->len;
-		stats->tx_packets++;
-		u64_stats_update_end(&stats->tx_syncp);
-
-		dev_kfree_skb_any(skb);
-	}
-}
-
 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr;
@@ -911,7 +948,9 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 		sg_set_buf(sq->sg, hdr, hdr_len);
 		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
 	}
-	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
+
+	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb,
+				    GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -919,12 +958,16 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct virtnet_info *vi = netdev_priv(dev);
 	int qnum = skb_get_queue_mapping(skb);
 	struct send_queue *sq = &vi->sq[qnum];
-	int err;
+	int err, qsize = virtqueue_get_vring_size(sq->vq);
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
 	bool kick = !skb->xmit_more;
+	bool stopped;
+
+	virtqueue_disable_cb(sq->vq);
 
-	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(sq);
+	/* We are going to push one skb.
+	 * Try to pop one off to free space for it. */
+	free_old_xmit_skbs(sq, 1);
 
 	/* Try to transmit */
 	err = xmit_skb(sq, skb);
@@ -940,27 +983,25 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
-	/* Don't wait up for transmitted skbs to be freed. */
-	skb_orphan(skb);
-	nf_reset(skb);
-
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
 		netif_stop_subqueue(dev, qnum);
-		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
-			/* More just got used, free them then recheck. */
-			free_old_xmit_skbs(sq);
-			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
-				netif_start_subqueue(dev, qnum);
-				virtqueue_disable_cb(sq->vq);
-			}
-		}
+		stopped = true;
+	} else {
+		stopped = false;
 	}
 
 	if (kick || netif_xmit_stopped(txq))
 		virtqueue_kick(sq->vq);
 
+	if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+		/* More just got used, free them then recheck. */
+		free_old_xmit_skbs(sq, qsize);
+		if (stopped && sq->vq->num_free >= 2+MAX_SKB_FRAGS)
+			netif_start_subqueue(dev, qnum);
+	}
+
 	return NETDEV_TX_OK;
 }
 
@@ -1137,8 +1178,10 @@ static int virtnet_close(struct net_device *dev)
 	/* Make sure refill_work doesn't re-enable napi! */
 	cancel_delayed_work_sync(&vi->refill);
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
+	for (i = 0; i < vi->max_queue_pairs; i++) {
 		napi_disable(&vi->rq[i].napi);
+		napi_disable(&vi->sq[i].napi);
+	}
 
 	return 0;
 }
@@ -1457,8 +1500,10 @@ static void virtnet_free_queues(struct virtnet_info *vi)
 {
 	int i;
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
+	for (i = 0; i < vi->max_queue_pairs; i++) {
 		netif_napi_del(&vi->rq[i].napi);
+		netif_napi_del(&vi->sq[i].napi);
+	}
 
 	kfree(vi->rq);
 	kfree(vi->sq);
@@ -1612,6 +1657,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
 		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
 			       napi_weight);
 		napi_hash_add(&vi->rq[i].napi);
+		netif_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
+			       napi_weight);
 
 		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
 		ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
@@ -1916,8 +1963,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	if (netif_running(vi->dev)) {
 		for (i = 0; i < vi->max_queue_pairs; i++) {
 			napi_disable(&vi->rq[i].napi);
+			napi_disable(&vi->sq[i].napi);
 			napi_hash_del(&vi->rq[i].napi);
 			netif_napi_del(&vi->rq[i].napi);
+			netif_napi_del(&vi->sq[i].napi);
 		}
 	}
 
@@ -1942,8 +1991,10 @@ static int virtnet_restore(struct virtio_device *vdev)
 			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
 
-		for (i = 0; i < vi->max_queue_pairs; i++)
+		for (i = 0; i < vi->max_queue_pairs; i++) {
 			virtnet_napi_enable(&vi->rq[i]);
+			napi_enable(&vi->sq[i].napi);
+		}
 	}
 
 	netif_device_attach(vi->dev);
-- 
MST


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC v2 2/3] virtio_net: bql
  2014-10-15 14:32 [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Michael S. Tsirkin
  2014-10-15 14:32 ` [PATCH RFC v2 1/3] virtio_net: enable tx interrupt Michael S. Tsirkin
@ 2014-10-15 14:32 ` Michael S. Tsirkin
  2014-10-17  5:16   ` Jason Wang
  2014-10-15 14:32 ` [PATCH RFC v2 3/3] virtio-net: optimize free_old_xmit_skbs stats Michael S. Tsirkin
  2014-10-16  0:31 ` [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Jason Wang
  3 siblings, 1 reply; 7+ messages in thread
From: Michael S. Tsirkin @ 2014-10-15 14:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason Wang, Rusty Russell, virtualization, netdev

Improve tx batching using byte queue limits.
Should be especially effective for MQ.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a9bf178..8dea411 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -219,13 +219,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 	return p;
 }
 
-static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
+static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
+				       struct send_queue *sq, int budget)
 {
 	struct sk_buff *skb;
 	unsigned int len;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 	unsigned int packets = 0;
+	unsigned int bytes = 0;
 
 	while (packets < budget &&
 	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
@@ -233,6 +235,7 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
 
 		u64_stats_update_begin(&stats->tx_syncp);
 		stats->tx_bytes += skb->len;
+		bytes += skb->len;
 		stats->tx_packets++;
 		u64_stats_update_end(&stats->tx_syncp);
 
@@ -240,6 +243,8 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
 		packets++;
 	}
 
+	netdev_tx_completed_queue(txq, packets, bytes);
+
 	return packets;
 }
 
@@ -810,7 +815,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
 again:
 	__netif_tx_lock(txq, smp_processor_id());
 	virtqueue_disable_cb(sq->vq);
-	sent += free_old_xmit_skbs(sq, budget - sent);
+	sent += free_old_xmit_skbs(txq, sq, budget - sent);
 
 	if (sent < budget) {
 		enable_done = virtqueue_enable_cb_delayed(sq->vq);
@@ -962,12 +967,13 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
 	bool kick = !skb->xmit_more;
 	bool stopped;
+	unsigned int bytes = skb->len;
 
 	virtqueue_disable_cb(sq->vq);
 
 	/* We are going to push one skb.
 	 * Try to pop one off to free space for it. */
-	free_old_xmit_skbs(sq, 1);
+	free_old_xmit_skbs(txq, sq, 1);
 
 	/* Try to transmit */
 	err = xmit_skb(sq, skb);
@@ -983,6 +989,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
+	netdev_tx_sent_queue(txq, bytes);
+
+	/* Kick early so device can process descriptors in parallel with us. */
+	if (kick)
+		virtqueue_kick(sq->vq);
+
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
@@ -997,7 +1009,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 		/* More just got used, free them then recheck. */
-		free_old_xmit_skbs(sq, qsize);
+		free_old_xmit_skbs(txq, sq, qsize);
 		if (stopped && sq->vq->num_free >= 2+MAX_SKB_FRAGS)
 			netif_start_subqueue(dev, qnum);
 	}
-- 
MST


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC v2 3/3] virtio-net: optimize free_old_xmit_skbs stats
  2014-10-15 14:32 [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Michael S. Tsirkin
  2014-10-15 14:32 ` [PATCH RFC v2 1/3] virtio_net: enable tx interrupt Michael S. Tsirkin
  2014-10-15 14:32 ` [PATCH RFC v2 2/3] virtio_net: bql Michael S. Tsirkin
@ 2014-10-15 14:32 ` Michael S. Tsirkin
  2014-10-16  0:31 ` [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Jason Wang
  3 siblings, 0 replies; 7+ messages in thread
From: Michael S. Tsirkin @ 2014-10-15 14:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason Wang, Rusty Russell, virtualization, netdev

From: Jason Wang <jasowang@redhat.com>

We already have counters for sent packets and sent bytes.
Use them to reduce the number of u64_stats_update_begin/end().

Take care not to bother with stats update when called
speculatively.

Based on a patch by Jason Wang.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8dea411..4e12023 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -233,16 +233,22 @@ static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
 	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 
-		u64_stats_update_begin(&stats->tx_syncp);
-		stats->tx_bytes += skb->len;
 		bytes += skb->len;
-		stats->tx_packets++;
-		u64_stats_update_end(&stats->tx_syncp);
+		packets++;
 
 		dev_kfree_skb_any(skb);
-		packets++;
 	}
 
+	/* Avoid overhead when no packets have been processed
+	 * happens when called speculatively from start_xmit. */
+	if (!packets)
+		return 0;
+
+	u64_stats_update_begin(&stats->tx_syncp);
+	stats->tx_bytes += bytes;
+	stats->tx_packets += packets;
+	u64_stats_update_end(&stats->tx_syncp);
+
 	netdev_tx_completed_queue(txq, packets, bytes);
 
 	return packets;
-- 
MST


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts
  2014-10-15 14:32 [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Michael S. Tsirkin
                   ` (2 preceding siblings ...)
  2014-10-15 14:32 ` [PATCH RFC v2 3/3] virtio-net: optimize free_old_xmit_skbs stats Michael S. Tsirkin
@ 2014-10-16  0:31 ` Jason Wang
  3 siblings, 0 replies; 7+ messages in thread
From: Jason Wang @ 2014-10-16  0:31 UTC (permalink / raw)
  To: Michael S. Tsirkin, linux-kernel

On 10/15/2014 10:32 PM, Michael S. Tsirkin wrote:
> RFC patches to enable tx interrupts.
> This is to demonstrate how this can be done without
> core virtio changes, and to make sure I understand
> the new APIs correctly.
>
> Testing TBD: I'm going offline for several days, will only be able to
> test afterwards (unless someone beats me to this).
> So I thought I'd post this early.
>
> Applies on top of patch: "virtio_net: fix use after free"
> that I recently sent.
>
> Changes:
> 	address comments by Jason Wang, use delayed cb everywhere
> 	rebased Jason's patch on top of mine and include it (with some tweaks)
>
> Jason, feel free to either use this, try it out, or take ideas out
> of this patchset and reuse in any other way you see fit.

Sure, will do some tests first.

Thanks
>
> Jason Wang (1):
>   virtio-net: optimize free_old_xmit_skbs stats
>
> Michael S. Tsirkin (2):
>   virtio_net: enable tx interrupt
>   virtio_net: bql
>
>  drivers/net/virtio_net.c | 153 ++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 111 insertions(+), 42 deletions(-)
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC v2 1/3] virtio_net: enable tx interrupt
  2014-10-15 14:32 ` [PATCH RFC v2 1/3] virtio_net: enable tx interrupt Michael S. Tsirkin
@ 2014-10-17  5:11   ` Jason Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Jason Wang @ 2014-10-17  5:11 UTC (permalink / raw)
  To: Michael S. Tsirkin, linux-kernel; +Cc: Rusty Russell, virtualization, netdev

On 10/15/2014 10:32 PM, Michael S. Tsirkin wrote:
> On newer hosts that support delayed tx interrupts,
> we probably don't have much to gain from orphaning
> packets early.
>
> Based on patch by Jason Wang.
>
> Note: this might degrade performance for
> hosts without event idx support.
> Should be addressed by the next patch.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  drivers/net/virtio_net.c | 137 ++++++++++++++++++++++++++++++++---------------
>  1 file changed, 94 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 13d0a8b..a9bf178 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -72,6 +72,8 @@ struct send_queue {
>  
>  	/* Name of the send queue: output.$index */
>  	char name[40];
> +
> +	struct napi_struct napi;
>  };
>  
>  /* Internal representation of a receive virtqueue */
> @@ -217,15 +219,37 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
>  	return p;
>  }
>  
> +static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
> +{
> +	struct sk_buff *skb;
> +	unsigned int len;
> +	struct virtnet_info *vi = sq->vq->vdev->priv;
> +	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> +	unsigned int packets = 0;
> +
> +	while (packets < budget &&
> +	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> +		pr_debug("Sent skb %p\n", skb);
> +
> +		u64_stats_update_begin(&stats->tx_syncp);
> +		stats->tx_bytes += skb->len;
> +		stats->tx_packets++;
> +		u64_stats_update_end(&stats->tx_syncp);
> +
> +		dev_kfree_skb_any(skb);
> +		packets++;
> +	}
> +
> +	return packets;
> +}
> +
>  static void skb_xmit_done(struct virtqueue *vq)
>  {
>  	struct virtnet_info *vi = vq->vdev->priv;
> +	struct send_queue *sq = &vi->sq[vq2txq(vq)];
>  
> -	/* Suppress further interrupts. */
> -	virtqueue_disable_cb(vq);
> -
> -	/* We were probably waiting for more output buffers. */
> -	netif_wake_subqueue(vi->dev, vq2txq(vq));
> +	if (napi_schedule_prep(&sq->napi))
> +		__napi_schedule(&sq->napi);
>  }
>  
>  static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
> @@ -774,6 +798,37 @@ again:
>  	return received;
>  }
>  
> +static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> +{
> +	struct send_queue *sq =
> +		container_of(napi, struct send_queue, napi);
> +	struct virtnet_info *vi = sq->vq->vdev->priv;
> +	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
> +	unsigned int sent = 0;
> +	bool enable_done;
> +
> +again:
> +	__netif_tx_lock(txq, smp_processor_id());
> +	virtqueue_disable_cb(sq->vq);
> +	sent += free_old_xmit_skbs(sq, budget - sent);
> +
> +	if (sent < budget) {
> +		enable_done = virtqueue_enable_cb_delayed(sq->vq);
> +		napi_complete(napi);
> +		__netif_tx_unlock(txq);
> +		if (unlikely(enable_done) && napi_schedule_prep(napi)) {

I think you mean unlikely(!enable_done) here?
> +			virtqueue_disable_cb(sq->vq);
> +			__napi_schedule(napi);
> +			goto again;
> +		}
> +	} else {
> +		__netif_tx_unlock(txq);
> +	}
> +
> +	netif_wake_subqueue(vi->dev, vq2txq(sq->vq));
> +	return sent;
> +}
> +
>  #ifdef CONFIG_NET_RX_BUSY_POLL
>  /* must be called with local_bh_disable()d */
>  static int virtnet_busy_poll(struct napi_struct *napi)
> @@ -822,30 +877,12 @@ static int virtnet_open(struct net_device *dev)
>  			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
>  				schedule_delayed_work(&vi->refill, 0);
>  		virtnet_napi_enable(&vi->rq[i]);
> +		napi_enable(&vi->sq[i].napi);
>  	}
>  
>  	return 0;
>  }
>  
> -static void free_old_xmit_skbs(struct send_queue *sq)
> -{
> -	struct sk_buff *skb;
> -	unsigned int len;
> -	struct virtnet_info *vi = sq->vq->vdev->priv;
> -	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> -
> -	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> -		pr_debug("Sent skb %p\n", skb);
> -
> -		u64_stats_update_begin(&stats->tx_syncp);
> -		stats->tx_bytes += skb->len;
> -		stats->tx_packets++;
> -		u64_stats_update_end(&stats->tx_syncp);
> -
> -		dev_kfree_skb_any(skb);
> -	}
> -}
> -
>  static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
>  {
>  	struct skb_vnet_hdr *hdr;
> @@ -911,7 +948,9 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
>  		sg_set_buf(sq->sg, hdr, hdr_len);
>  		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
>  	}
> -	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
> +
> +	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb,
> +				    GFP_ATOMIC);
>  }
>  
>  static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> @@ -919,12 +958,16 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct virtnet_info *vi = netdev_priv(dev);
>  	int qnum = skb_get_queue_mapping(skb);
>  	struct send_queue *sq = &vi->sq[qnum];
> -	int err;
> +	int err, qsize = virtqueue_get_vring_size(sq->vq);
>  	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
>  	bool kick = !skb->xmit_more;
> +	bool stopped;
> +
> +	virtqueue_disable_cb(sq->vq);
>  
> -	/* Free up any pending old buffers before queueing new ones. */
> -	free_old_xmit_skbs(sq);
> +	/* We are going to push one skb.
> +	 * Try to pop one off to free space for it. */
> +	free_old_xmit_skbs(sq, 1);

Looks like qsize instead of 1 is better? The more skbs freed in
ndo_start_xmit() the less chance tx napi will be scheduled.
>  
>  	/* Try to transmit */
>  	err = xmit_skb(sq, skb);
> @@ -940,27 +983,25 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		return NETDEV_TX_OK;
>  	}
>  
> -	/* Don't wait up for transmitted skbs to be freed. */
> -	skb_orphan(skb);
> -	nf_reset(skb);
> -
>  	/* Apparently nice girls don't return TX_BUSY; stop the queue
>  	 * before it gets out of hand.  Naturally, this wastes entries. */
>  	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
>  		netif_stop_subqueue(dev, qnum);
> -		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> -			/* More just got used, free them then recheck. */
> -			free_old_xmit_skbs(sq);
> -			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
> -				netif_start_subqueue(dev, qnum);
> -				virtqueue_disable_cb(sq->vq);
> -			}
> -		}
> +		stopped = true;
> +	} else {
> +		stopped = false;
>  	}
>  
>  	if (kick || netif_xmit_stopped(txq))
>  		virtqueue_kick(sq->vq);

Looks like we'd better move this in the end in case the queue could be
restarted by following lines?
>  
> +	if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> +		/* More just got used, free them then recheck. */
> +		free_old_xmit_skbs(sq, qsize);
> +		if (stopped && sq->vq->num_free >= 2+MAX_SKB_FRAGS)
> +			netif_start_subqueue(dev, qnum);

Why drop virtqueue_disable_cb() here?
> +	}
> +
>  	return NETDEV_TX_OK;
>  }
>  
> @@ -1137,8 +1178,10 @@ static int virtnet_close(struct net_device *dev)
>  	/* Make sure refill_work doesn't re-enable napi! */
>  	cancel_delayed_work_sync(&vi->refill);
>  
> -	for (i = 0; i < vi->max_queue_pairs; i++)
> +	for (i = 0; i < vi->max_queue_pairs; i++) {
>  		napi_disable(&vi->rq[i].napi);
> +		napi_disable(&vi->sq[i].napi);
> +	}
>  
>  	return 0;
>  }
> @@ -1457,8 +1500,10 @@ static void virtnet_free_queues(struct virtnet_info *vi)
>  {
>  	int i;
>  
> -	for (i = 0; i < vi->max_queue_pairs; i++)
> +	for (i = 0; i < vi->max_queue_pairs; i++) {
>  		netif_napi_del(&vi->rq[i].napi);
> +		netif_napi_del(&vi->sq[i].napi);
> +	}
>  
>  	kfree(vi->rq);
>  	kfree(vi->sq);
> @@ -1612,6 +1657,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
>  		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
>  			       napi_weight);
>  		napi_hash_add(&vi->rq[i].napi);
> +		netif_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
> +			       napi_weight);
>  
>  		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
>  		ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
> @@ -1916,8 +1963,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
>  	if (netif_running(vi->dev)) {
>  		for (i = 0; i < vi->max_queue_pairs; i++) {
>  			napi_disable(&vi->rq[i].napi);
> +			napi_disable(&vi->sq[i].napi);
>  			napi_hash_del(&vi->rq[i].napi);
>  			netif_napi_del(&vi->rq[i].napi);
> +			netif_napi_del(&vi->sq[i].napi);
>  		}
>  	}
>  
> @@ -1942,8 +1991,10 @@ static int virtnet_restore(struct virtio_device *vdev)
>  			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
>  				schedule_delayed_work(&vi->refill, 0);
>  
> -		for (i = 0; i < vi->max_queue_pairs; i++)
> +		for (i = 0; i < vi->max_queue_pairs; i++) {
>  			virtnet_napi_enable(&vi->rq[i]);
> +			napi_enable(&vi->sq[i].napi);
> +		}
>  	}
>  
>  	netif_device_attach(vi->dev);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC v2 2/3] virtio_net: bql
  2014-10-15 14:32 ` [PATCH RFC v2 2/3] virtio_net: bql Michael S. Tsirkin
@ 2014-10-17  5:16   ` Jason Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Jason Wang @ 2014-10-17  5:16 UTC (permalink / raw)
  To: Michael S. Tsirkin, linux-kernel; +Cc: Rusty Russell, virtualization, netdev

On 10/15/2014 10:32 PM, Michael S. Tsirkin wrote:
> Improve tx batching using byte queue limits.
> Should be especially effective for MQ.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  drivers/net/virtio_net.c | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index a9bf178..8dea411 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -219,13 +219,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
>  	return p;
>  }
>  
> -static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
> +static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
> +				       struct send_queue *sq, int budget)
>  {
>  	struct sk_buff *skb;
>  	unsigned int len;
>  	struct virtnet_info *vi = sq->vq->vdev->priv;
>  	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>  	unsigned int packets = 0;
> +	unsigned int bytes = 0;
>  
>  	while (packets < budget &&
>  	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> @@ -233,6 +235,7 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
>  
>  		u64_stats_update_begin(&stats->tx_syncp);
>  		stats->tx_bytes += skb->len;
> +		bytes += skb->len;
>  		stats->tx_packets++;
>  		u64_stats_update_end(&stats->tx_syncp);
>  
> @@ -240,6 +243,8 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget)
>  		packets++;
>  	}
>  
> +	netdev_tx_completed_queue(txq, packets, bytes);
> +
>  	return packets;
>  }
>  
> @@ -810,7 +815,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
>  again:
>  	__netif_tx_lock(txq, smp_processor_id());
>  	virtqueue_disable_cb(sq->vq);
> -	sent += free_old_xmit_skbs(sq, budget - sent);
> +	sent += free_old_xmit_skbs(txq, sq, budget - sent);
>  
>  	if (sent < budget) {
>  		enable_done = virtqueue_enable_cb_delayed(sq->vq);
> @@ -962,12 +967,13 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
>  	bool kick = !skb->xmit_more;
>  	bool stopped;
> +	unsigned int bytes = skb->len;
>  
>  	virtqueue_disable_cb(sq->vq);
>  
>  	/* We are going to push one skb.
>  	 * Try to pop one off to free space for it. */
> -	free_old_xmit_skbs(sq, 1);
> +	free_old_xmit_skbs(txq, sq, 1);
>  
>  	/* Try to transmit */
>  	err = xmit_skb(sq, skb);
> @@ -983,6 +989,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		return NETDEV_TX_OK;
>  	}
>  
> +	netdev_tx_sent_queue(txq, bytes);
> +
> +	/* Kick early so device can process descriptors in parallel with us. */
> +	if (kick)
> +		virtqueue_kick(sq->vq);

Haven't figured out how this will help for bql, consider only a
netif_stop_subqueue() may be called during two possible kicks. And since
we don't add any buffer between the two kicks, the send kick is almost
useless.
> +
>  	/* Apparently nice girls don't return TX_BUSY; stop the queue
>  	 * before it gets out of hand.  Naturally, this wastes entries. */
>  	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
> @@ -997,7 +1009,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  
>  	if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
>  		/* More just got used, free them then recheck. */
> -		free_old_xmit_skbs(sq, qsize);
> +		free_old_xmit_skbs(txq, sq, qsize);
>  		if (stopped && sq->vq->num_free >= 2+MAX_SKB_FRAGS)
>  			netif_start_subqueue(dev, qnum);
>  	}


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-10-17  5:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-15 14:32 [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Michael S. Tsirkin
2014-10-15 14:32 ` [PATCH RFC v2 1/3] virtio_net: enable tx interrupt Michael S. Tsirkin
2014-10-17  5:11   ` Jason Wang
2014-10-15 14:32 ` [PATCH RFC v2 2/3] virtio_net: bql Michael S. Tsirkin
2014-10-17  5:16   ` Jason Wang
2014-10-15 14:32 ` [PATCH RFC v2 3/3] virtio-net: optimize free_old_xmit_skbs stats Michael S. Tsirkin
2014-10-16  0:31 ` [PATCH RFC v2 0/3] virtio_net: enabling tx interrupts Jason Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox