From: "Michael S. Tsirkin" <mst@redhat.com>
To: Jiri Pirko <jiri@resnulli.us>
Cc: netdev@vger.kernel.org, davem@davemloft.net, edumazet@google.com,
kuba@kernel.org, pabeni@redhat.com, jasowang@redhat.com,
xuanzhuo@linux.alibaba.com, virtualization@lists.linux.dev,
ast@kernel.org, daniel@iogearbox.net, hawk@kernel.org,
john.fastabend@gmail.com, dave.taht@gmail.com,
kerneljasonxing@gmail.com, hengqi@linux.alibaba.com
Subject: Re: [PATCH net-next v2] virtio_net: add support for Byte Queue Limits
Date: Mon, 17 Jun 2024 12:14:11 -0400 [thread overview]
Message-ID: <20240617121234-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20240612170851.1004604-1-jiri@resnulli.us>
On Wed, Jun 12, 2024 at 07:08:51PM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@nvidia.com>
>
> Add support for Byte Queue Limits (BQL).
>
> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
> running in background. Netperf TCP_RR results:
>
> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>
> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
> ---
> v1->v2:
> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
> propagate use_napi flag to __free_old_xmit() and only call
> netdev_tx_completed_queue() in case it is true
> - added forgotten call to netdev_tx_reset_queue()
> - fixed stats for xdp packets
> - fixed bql accounting when __free_old_xmit() is called from xdp path
> - handle the !use_napi case in start_xmit() kick section
> ---
> drivers/net/virtio_net.c | 50 +++++++++++++++++++++++++---------------
> 1 file changed, 32 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 61a57d134544..5863c663ccab 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -84,7 +84,9 @@ struct virtnet_stat_desc {
>
> struct virtnet_sq_free_stats {
> u64 packets;
> + u64 xdp_packets;
> u64 bytes;
> + u64 xdp_bytes;
> };
>
> struct virtnet_sq_stats {
> @@ -506,29 +508,33 @@ static struct xdp_frame *ptr_to_xdp(void *ptr)
> return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
> }
>
> -static void __free_old_xmit(struct send_queue *sq, bool in_napi,
> +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> + bool in_napi, bool use_napi,
> struct virtnet_sq_free_stats *stats)
> {
> unsigned int len;
> void *ptr;
>
> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> - ++stats->packets;
> -
> if (!is_xdp_frame(ptr)) {
> struct sk_buff *skb = ptr;
>
> pr_debug("Sent skb %p\n", skb);
>
> + stats->packets++;
> stats->bytes += skb->len;
> napi_consume_skb(skb, in_napi);
> } else {
> struct xdp_frame *frame = ptr_to_xdp(ptr);
>
> - stats->bytes += xdp_get_frame_len(frame);
> + stats->xdp_packets++;
> + stats->xdp_bytes += xdp_get_frame_len(frame);
> xdp_return_frame(frame);
> }
> }
> + if (use_napi)
> + netdev_tx_completed_queue(txq, stats->packets, stats->bytes);
> +
> }
>
> /* Converting between virtqueue no. and kernel tx/rx queue no.
> @@ -955,21 +961,22 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
> virtnet_rq_free_buf(vi, rq, buf);
> }
>
> -static void free_old_xmit(struct send_queue *sq, bool in_napi)
> +static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> + bool in_napi, bool use_napi)
> {
> struct virtnet_sq_free_stats stats = {0};
>
> - __free_old_xmit(sq, in_napi, &stats);
> + __free_old_xmit(sq, txq, in_napi, use_napi, &stats);
>
> /* Avoid overhead when no packets have been processed
> * happens when called speculatively from start_xmit.
> */
> - if (!stats.packets)
> + if (!stats.packets && !stats.xdp_packets)
> return;
>
> u64_stats_update_begin(&sq->stats.syncp);
> - u64_stats_add(&sq->stats.bytes, stats.bytes);
> - u64_stats_add(&sq->stats.packets, stats.packets);
> + u64_stats_add(&sq->stats.bytes, stats.bytes + stats.xdp_bytes);
> + u64_stats_add(&sq->stats.packets, stats.packets + stats.xdp_packets);
> u64_stats_update_end(&sq->stats.syncp);
> }
>
> @@ -1003,7 +1010,9 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
> * early means 16 slots are typically wasted.
> */
> if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
> - netif_stop_subqueue(dev, qnum);
> + struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
> +
> + netif_tx_stop_queue(txq);
> u64_stats_update_begin(&sq->stats.syncp);
> u64_stats_inc(&sq->stats.stop);
> u64_stats_update_end(&sq->stats.syncp);
> @@ -1012,7 +1021,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
> virtqueue_napi_schedule(&sq->napi, sq->vq);
> } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> /* More just got used, free them then recheck. */
> - free_old_xmit(sq, false);
> + free_old_xmit(sq, txq, false, use_napi);
> if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
> netif_start_subqueue(dev, qnum);
> u64_stats_update_begin(&sq->stats.syncp);
> @@ -1138,7 +1147,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,
> }
>
> /* Free up any pending old buffers before queueing new ones. */
> - __free_old_xmit(sq, false, &stats);
> + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
> + false, sq->napi.weight, &stats);
So what happens if sq->napi.weight changes while packet is in the queue?
What prevents that?
>
> for (i = 0; i < n; i++) {
> struct xdp_frame *xdpf = frames[i];
> @@ -2313,7 +2323,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
>
> do {
> virtqueue_disable_cb(sq->vq);
> - free_old_xmit(sq, true);
> + free_old_xmit(sq, txq, true, true);
> } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
>
> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> @@ -2412,6 +2422,7 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> goto err_xdp_reg_mem_model;
>
> virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> + netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
> virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
>
> return 0;
> @@ -2471,7 +2482,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> txq = netdev_get_tx_queue(vi->dev, index);
> __netif_tx_lock(txq, raw_smp_processor_id());
> virtqueue_disable_cb(sq->vq);
> - free_old_xmit(sq, true);
> + free_old_xmit(sq, txq, true, true);
>
> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> if (netif_tx_queue_stopped(txq)) {
> @@ -2559,17 +2570,18 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> struct send_queue *sq = &vi->sq[qnum];
> int err;
> struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
> - bool kick = !netdev_xmit_more();
> + bool xmit_more = netdev_xmit_more();
> bool use_napi = sq->napi.weight;
> + bool kick;
>
> /* Free up any pending old buffers before queueing new ones. */
> do {
> if (use_napi)
> virtqueue_disable_cb(sq->vq);
>
> - free_old_xmit(sq, false);
> + free_old_xmit(sq, txq, false, use_napi);
>
> - } while (use_napi && kick &&
> + } while (use_napi && !xmit_more &&
> unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
>
> /* timestamp packet in software */
> @@ -2598,7 +2610,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>
> check_sq_full_and_disable(vi, dev, sq);
>
> - if (kick || netif_xmit_stopped(txq)) {
> + kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) :
> + !xmit_more && netif_xmit_stopped(txq);
> + if (kick) {
> if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
> u64_stats_update_begin(&sq->stats.syncp);
> u64_stats_inc(&sq->stats.kicks);
> --
> 2.45.1
next prev parent reply other threads:[~2024-06-17 16:14 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-12 17:08 [PATCH net-next v2] virtio_net: add support for Byte Queue Limits Jiri Pirko
2024-06-12 22:30 ` Michael S. Tsirkin
2024-06-14 9:54 ` Jason Xing
2024-06-17 9:15 ` Jiri Pirko
2024-06-17 15:47 ` Jason Xing
2024-06-17 2:34 ` Jason Wang
2024-06-17 9:18 ` Jiri Pirko
2024-06-18 0:53 ` Jason Wang
2024-06-18 7:52 ` Jiri Pirko
2024-06-17 16:14 ` Michael S. Tsirkin [this message]
2024-06-18 7:52 ` Jiri Pirko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240617121234-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=ast@kernel.org \
--cc=daniel@iogearbox.net \
--cc=dave.taht@gmail.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=hengqi@linux.alibaba.com \
--cc=jasowang@redhat.com \
--cc=jiri@resnulli.us \
--cc=john.fastabend@gmail.com \
--cc=kerneljasonxing@gmail.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=virtualization@lists.linux.dev \
--cc=xuanzhuo@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.