* [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
@ 2024-06-18 14:44 ` Jiri Pirko
2024-06-18 18:18 ` Michael S. Tsirkin
` (2 more replies)
0 siblings, 3 replies; 19+ messages in thread
From: Jiri Pirko @ 2024-06-18 14:44 UTC (permalink / raw)
To: netdev
Cc: davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
From: Jiri Pirko <jiri@nvidia.com>
Add support for Byte Queue Limits (BQL).
Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
running in background. Netperf TCP_RR results:
NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
v2->v3:
- fixed the switch from/to orphan mode while skbs are yet to be
completed by using the second least significant bit in virtqueue
token pointer to indicate skb is orphan. Don't account orphan
skbs in completion.
- reorganized parallel skb/xdp free stats accounting to napi/others.
- fixed kick condition check in orphan mode
v1->v2:
- moved netdev_tx_completed_queue() call into __free_old_xmit(),
propagate use_napi flag to __free_old_xmit() and only call
netdev_tx_completed_queue() in case it is true
- added forgotten call to netdev_tx_reset_queue()
- fixed stats for xdp packets
- fixed bql accounting when __free_old_xmit() is called from xdp path
- handle the !use_napi case in start_xmit() kick section
---
drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
1 file changed, 57 insertions(+), 24 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 61a57d134544..9f9b86874173 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -47,7 +47,8 @@ module_param(napi_tx, bool, 0644);
#define VIRTIO_XDP_TX BIT(0)
#define VIRTIO_XDP_REDIR BIT(1)
-#define VIRTIO_XDP_FLAG BIT(0)
+#define VIRTIO_XDP_FLAG BIT(0)
+#define VIRTIO_ORPHAN_FLAG BIT(1)
/* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled
@@ -85,6 +86,8 @@ struct virtnet_stat_desc {
struct virtnet_sq_free_stats {
u64 packets;
u64 bytes;
+ u64 napi_packets;
+ u64 napi_bytes;
};
struct virtnet_sq_stats {
@@ -506,29 +509,50 @@ static struct xdp_frame *ptr_to_xdp(void *ptr)
return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
}
-static void __free_old_xmit(struct send_queue *sq, bool in_napi,
- struct virtnet_sq_free_stats *stats)
+static bool is_orphan_skb(void *ptr)
+{
+ return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG;
+}
+
+static void *skb_to_ptr(struct sk_buff *skb, bool orphan)
+{
+ return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0));
+}
+
+static struct sk_buff *ptr_to_skb(void *ptr)
+{
+ return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG);
+}
+
+static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
+ bool in_napi, struct virtnet_sq_free_stats *stats)
{
unsigned int len;
void *ptr;
while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- ++stats->packets;
-
if (!is_xdp_frame(ptr)) {
- struct sk_buff *skb = ptr;
+ struct sk_buff *skb = ptr_to_skb(ptr);
pr_debug("Sent skb %p\n", skb);
- stats->bytes += skb->len;
+ if (is_orphan_skb(ptr)) {
+ stats->packets++;
+ stats->bytes += skb->len;
+ } else {
+ stats->napi_packets++;
+ stats->napi_bytes += skb->len;
+ }
napi_consume_skb(skb, in_napi);
} else {
struct xdp_frame *frame = ptr_to_xdp(ptr);
+ stats->packets++;
stats->bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame);
}
}
+ netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
}
/* Converting between virtqueue no. and kernel tx/rx queue no.
@@ -955,21 +979,22 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
virtnet_rq_free_buf(vi, rq, buf);
}
-static void free_old_xmit(struct send_queue *sq, bool in_napi)
+static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
+ bool in_napi)
{
struct virtnet_sq_free_stats stats = {0};
- __free_old_xmit(sq, in_napi, &stats);
+ __free_old_xmit(sq, txq, in_napi, &stats);
/* Avoid overhead when no packets have been processed
* happens when called speculatively from start_xmit.
*/
- if (!stats.packets)
+ if (!stats.packets && !stats.napi_packets)
return;
u64_stats_update_begin(&sq->stats.syncp);
- u64_stats_add(&sq->stats.bytes, stats.bytes);
- u64_stats_add(&sq->stats.packets, stats.packets);
+ u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes);
+ u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets);
u64_stats_update_end(&sq->stats.syncp);
}
@@ -1003,7 +1028,9 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
* early means 16 slots are typically wasted.
*/
if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
- netif_stop_subqueue(dev, qnum);
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
+
+ netif_tx_stop_queue(txq);
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.stop);
u64_stats_update_end(&sq->stats.syncp);
@@ -1012,7 +1039,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
virtqueue_napi_schedule(&sq->napi, sq->vq);
} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
- free_old_xmit(sq, false);
+ free_old_xmit(sq, txq, false);
if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
netif_start_subqueue(dev, qnum);
u64_stats_update_begin(&sq->stats.syncp);
@@ -1138,7 +1165,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,
}
/* Free up any pending old buffers before queueing new ones. */
- __free_old_xmit(sq, false, &stats);
+ __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
+ false, &stats);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
@@ -2313,7 +2341,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
do {
virtqueue_disable_cb(sq->vq);
- free_old_xmit(sq, true);
+ free_old_xmit(sq, txq, true);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
@@ -2412,6 +2440,7 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
goto err_xdp_reg_mem_model;
virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
+ netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
return 0;
@@ -2471,7 +2500,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
- free_old_xmit(sq, true);
+ free_old_xmit(sq, txq, true);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
if (netif_tx_queue_stopped(txq)) {
@@ -2505,7 +2534,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
return 0;
}
-static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
+static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
@@ -2549,7 +2578,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
return num_sg;
num_sg++;
}
- return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
+ return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
+ skb_to_ptr(skb, orphan), GFP_ATOMIC);
}
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -2559,24 +2589,25 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
struct send_queue *sq = &vi->sq[qnum];
int err;
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
- bool kick = !netdev_xmit_more();
+ bool xmit_more = netdev_xmit_more();
bool use_napi = sq->napi.weight;
+ bool kick;
/* Free up any pending old buffers before queueing new ones. */
do {
if (use_napi)
virtqueue_disable_cb(sq->vq);
- free_old_xmit(sq, false);
+ free_old_xmit(sq, txq, false);
- } while (use_napi && kick &&
+ } while (use_napi && !xmit_more &&
unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
/* timestamp packet in software */
skb_tx_timestamp(skb);
/* Try to transmit */
- err = xmit_skb(sq, skb);
+ err = xmit_skb(sq, skb, !use_napi);
/* This should not happen! */
if (unlikely(err)) {
@@ -2598,7 +2629,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
check_sq_full_and_disable(vi, dev, sq);
- if (kick || netif_xmit_stopped(txq)) {
+ kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) :
+ !xmit_more || netif_xmit_stopped(txq);
+ if (kick) {
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.kicks);
--
2.45.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-18 14:44 ` [PATCH net-next v3] virtio_net: add support for Byte Queue Limits Jiri Pirko
@ 2024-06-18 18:18 ` Michael S. Tsirkin
2024-06-19 5:45 ` Jiri Pirko
2024-06-20 0:40 ` patchwork-bot+netdevbpf
2024-08-12 14:57 ` Marek Szyprowski
2 siblings, 1 reply; 19+ messages in thread
From: Michael S. Tsirkin @ 2024-06-18 18:18 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
This looks like a sensible way to do this.
Yet something to improve:
On Tue, Jun 18, 2024 at 04:44:56PM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@nvidia.com>
>
> Add support for Byte Queue Limits (BQL).
>
> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
> running in background. Netperf TCP_RR results:
>
> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>
> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
> ---
> v2->v3:
> - fixed the switch from/to orphan mode while skbs are yet to be
> completed by using the second least significant bit in virtqueue
> token pointer to indicate skb is orphan. Don't account orphan
> skbs in completion.
> - reorganized parallel skb/xdp free stats accounting to napi/others.
> - fixed kick condition check in orphan mode
> v1->v2:
> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
> propagate use_napi flag to __free_old_xmit() and only call
> netdev_tx_completed_queue() in case it is true
> - added forgotten call to netdev_tx_reset_queue()
> - fixed stats for xdp packets
> - fixed bql accounting when __free_old_xmit() is called from xdp path
> - handle the !use_napi case in start_xmit() kick section
> ---
> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
> 1 file changed, 57 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 61a57d134544..9f9b86874173 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -47,7 +47,8 @@ module_param(napi_tx, bool, 0644);
> #define VIRTIO_XDP_TX BIT(0)
> #define VIRTIO_XDP_REDIR BIT(1)
>
> -#define VIRTIO_XDP_FLAG BIT(0)
> +#define VIRTIO_XDP_FLAG BIT(0)
> +#define VIRTIO_ORPHAN_FLAG BIT(1)
>
> /* RX packet size EWMA. The average packet size is used to determine the packet
> * buffer size when refilling RX rings. As the entire RX ring may be refilled
> @@ -85,6 +86,8 @@ struct virtnet_stat_desc {
> struct virtnet_sq_free_stats {
> u64 packets;
> u64 bytes;
> + u64 napi_packets;
> + u64 napi_bytes;
> };
>
> struct virtnet_sq_stats {
> @@ -506,29 +509,50 @@ static struct xdp_frame *ptr_to_xdp(void *ptr)
> return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
> }
>
> -static void __free_old_xmit(struct send_queue *sq, bool in_napi,
> - struct virtnet_sq_free_stats *stats)
> +static bool is_orphan_skb(void *ptr)
> +{
> + return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG;
> +}
> +
> +static void *skb_to_ptr(struct sk_buff *skb, bool orphan)
> +{
> + return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0));
> +}
> +
> +static struct sk_buff *ptr_to_skb(void *ptr)
> +{
> + return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG);
> +}
> +
> +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> + bool in_napi, struct virtnet_sq_free_stats *stats)
> {
> unsigned int len;
> void *ptr;
>
> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> - ++stats->packets;
> -
> if (!is_xdp_frame(ptr)) {
> - struct sk_buff *skb = ptr;
> + struct sk_buff *skb = ptr_to_skb(ptr);
>
> pr_debug("Sent skb %p\n", skb);
>
> - stats->bytes += skb->len;
> + if (is_orphan_skb(ptr)) {
> + stats->packets++;
> + stats->bytes += skb->len;
> + } else {
> + stats->napi_packets++;
> + stats->napi_bytes += skb->len;
> + }
> napi_consume_skb(skb, in_napi);
> } else {
> struct xdp_frame *frame = ptr_to_xdp(ptr);
>
> + stats->packets++;
> stats->bytes += xdp_get_frame_len(frame);
> xdp_return_frame(frame);
> }
> }
> + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
Are you sure it's right? You are completing larger and larger
number of bytes and packets each time.
For example as won't this eventually trigger this inside dql_completed:
BUG_ON(count > num_queued - dql->num_completed);
?
If I am right the perf testing has to be redone with this fixed ...
> }
>
> /* Converting between virtqueue no. and kernel tx/rx queue no.
> @@ -955,21 +979,22 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
> virtnet_rq_free_buf(vi, rq, buf);
> }
>
> -static void free_old_xmit(struct send_queue *sq, bool in_napi)
> +static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> + bool in_napi)
> {
> struct virtnet_sq_free_stats stats = {0};
>
> - __free_old_xmit(sq, in_napi, &stats);
> + __free_old_xmit(sq, txq, in_napi, &stats);
>
> /* Avoid overhead when no packets have been processed
> * happens when called speculatively from start_xmit.
> */
> - if (!stats.packets)
> + if (!stats.packets && !stats.napi_packets)
> return;
>
> u64_stats_update_begin(&sq->stats.syncp);
> - u64_stats_add(&sq->stats.bytes, stats.bytes);
> - u64_stats_add(&sq->stats.packets, stats.packets);
> + u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes);
> + u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets);
> u64_stats_update_end(&sq->stats.syncp);
> }
>
> @@ -1003,7 +1028,9 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
> * early means 16 slots are typically wasted.
> */
> if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
> - netif_stop_subqueue(dev, qnum);
> + struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
> +
> + netif_tx_stop_queue(txq);
> u64_stats_update_begin(&sq->stats.syncp);
> u64_stats_inc(&sq->stats.stop);
> u64_stats_update_end(&sq->stats.syncp);
> @@ -1012,7 +1039,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
> virtqueue_napi_schedule(&sq->napi, sq->vq);
> } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> /* More just got used, free them then recheck. */
> - free_old_xmit(sq, false);
> + free_old_xmit(sq, txq, false);
> if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
> netif_start_subqueue(dev, qnum);
> u64_stats_update_begin(&sq->stats.syncp);
> @@ -1138,7 +1165,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,
> }
>
> /* Free up any pending old buffers before queueing new ones. */
> - __free_old_xmit(sq, false, &stats);
> + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
> + false, &stats);
>
> for (i = 0; i < n; i++) {
> struct xdp_frame *xdpf = frames[i];
> @@ -2313,7 +2341,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
>
> do {
> virtqueue_disable_cb(sq->vq);
> - free_old_xmit(sq, true);
> + free_old_xmit(sq, txq, true);
> } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
>
> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> @@ -2412,6 +2440,7 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> goto err_xdp_reg_mem_model;
>
> virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> + netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
> virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
>
> return 0;
> @@ -2471,7 +2500,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> txq = netdev_get_tx_queue(vi->dev, index);
> __netif_tx_lock(txq, raw_smp_processor_id());
> virtqueue_disable_cb(sq->vq);
> - free_old_xmit(sq, true);
> + free_old_xmit(sq, txq, true);
>
> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> if (netif_tx_queue_stopped(txq)) {
> @@ -2505,7 +2534,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> return 0;
> }
>
> -static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
> +static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
> {
> struct virtio_net_hdr_mrg_rxbuf *hdr;
> const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
> @@ -2549,7 +2578,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
> return num_sg;
> num_sg++;
> }
> - return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
> + return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
> + skb_to_ptr(skb, orphan), GFP_ATOMIC);
> }
>
> static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> @@ -2559,24 +2589,25 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> struct send_queue *sq = &vi->sq[qnum];
> int err;
> struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
> - bool kick = !netdev_xmit_more();
> + bool xmit_more = netdev_xmit_more();
> bool use_napi = sq->napi.weight;
> + bool kick;
>
> /* Free up any pending old buffers before queueing new ones. */
> do {
> if (use_napi)
> virtqueue_disable_cb(sq->vq);
>
> - free_old_xmit(sq, false);
> + free_old_xmit(sq, txq, false);
>
> - } while (use_napi && kick &&
> + } while (use_napi && !xmit_more &&
> unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
>
> /* timestamp packet in software */
> skb_tx_timestamp(skb);
>
> /* Try to transmit */
> - err = xmit_skb(sq, skb);
> + err = xmit_skb(sq, skb, !use_napi);
>
> /* This should not happen! */
> if (unlikely(err)) {
> @@ -2598,7 +2629,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>
> check_sq_full_and_disable(vi, dev, sq);
>
> - if (kick || netif_xmit_stopped(txq)) {
> + kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) :
> + !xmit_more || netif_xmit_stopped(txq);
> + if (kick) {
> if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
> u64_stats_update_begin(&sq->stats.syncp);
> u64_stats_inc(&sq->stats.kicks);
> --
> 2.45.1
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-18 18:18 ` Michael S. Tsirkin
@ 2024-06-19 5:45 ` Jiri Pirko
2024-06-19 7:26 ` Michael S. Tsirkin
0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2024-06-19 5:45 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Tue, Jun 18, 2024 at 08:18:12PM CEST, mst@redhat.com wrote:
>This looks like a sensible way to do this.
>Yet something to improve:
>
>
>On Tue, Jun 18, 2024 at 04:44:56PM +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@nvidia.com>
>>
[...]
>> +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
>> + bool in_napi, struct virtnet_sq_free_stats *stats)
>> {
>> unsigned int len;
>> void *ptr;
>>
>> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
>> - ++stats->packets;
>> -
>> if (!is_xdp_frame(ptr)) {
>> - struct sk_buff *skb = ptr;
>> + struct sk_buff *skb = ptr_to_skb(ptr);
>>
>> pr_debug("Sent skb %p\n", skb);
>>
>> - stats->bytes += skb->len;
>> + if (is_orphan_skb(ptr)) {
>> + stats->packets++;
>> + stats->bytes += skb->len;
>> + } else {
>> + stats->napi_packets++;
>> + stats->napi_bytes += skb->len;
>> + }
>> napi_consume_skb(skb, in_napi);
>> } else {
>> struct xdp_frame *frame = ptr_to_xdp(ptr);
>>
>> + stats->packets++;
>> stats->bytes += xdp_get_frame_len(frame);
>> xdp_return_frame(frame);
>> }
>> }
>> + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
>
>Are you sure it's right? You are completing larger and larger
>number of bytes and packets each time.
Not sure I get you. __free_old_xmit() is always called with stats
zeroed. So this is just sum-up of one queue completion run.
I don't see how this could become "larger and larger number" as you
describe.
>
>For example as won't this eventually trigger this inside dql_completed:
>
> BUG_ON(count > num_queued - dql->num_completed);
Nope, I don't see how we can hit it. Do not complete anything else
in addition to what was started in xmit(). Am I missing something?
>
>?
>
>
>If I am right the perf testing has to be redone with this fixed ...
>
>
>> }
>>
[...]
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-19 5:45 ` Jiri Pirko
@ 2024-06-19 7:26 ` Michael S. Tsirkin
2024-06-19 8:05 ` Jiri Pirko
0 siblings, 1 reply; 19+ messages in thread
From: Michael S. Tsirkin @ 2024-06-19 7:26 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
On Wed, Jun 19, 2024 at 07:45:16AM +0200, Jiri Pirko wrote:
> Tue, Jun 18, 2024 at 08:18:12PM CEST, mst@redhat.com wrote:
> >This looks like a sensible way to do this.
> >Yet something to improve:
> >
> >
> >On Tue, Jun 18, 2024 at 04:44:56PM +0200, Jiri Pirko wrote:
> >> From: Jiri Pirko <jiri@nvidia.com>
> >>
>
> [...]
>
>
> >> +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> >> + bool in_napi, struct virtnet_sq_free_stats *stats)
> >> {
> >> unsigned int len;
> >> void *ptr;
> >>
> >> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> >> - ++stats->packets;
> >> -
> >> if (!is_xdp_frame(ptr)) {
> >> - struct sk_buff *skb = ptr;
> >> + struct sk_buff *skb = ptr_to_skb(ptr);
> >>
> >> pr_debug("Sent skb %p\n", skb);
> >>
> >> - stats->bytes += skb->len;
> >> + if (is_orphan_skb(ptr)) {
> >> + stats->packets++;
> >> + stats->bytes += skb->len;
> >> + } else {
> >> + stats->napi_packets++;
> >> + stats->napi_bytes += skb->len;
> >> + }
> >> napi_consume_skb(skb, in_napi);
> >> } else {
> >> struct xdp_frame *frame = ptr_to_xdp(ptr);
> >>
> >> + stats->packets++;
> >> stats->bytes += xdp_get_frame_len(frame);
> >> xdp_return_frame(frame);
> >> }
> >> }
> >> + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
> >
> >Are you sure it's right? You are completing larger and larger
> >number of bytes and packets each time.
>
> Not sure I get you. __free_old_xmit() is always called with stats
> zeroed. So this is just sum-up of one queue completion run.
> I don't see how this could become "larger and larger number" as you
> describe.
Oh. Right of course. Worth a comment maybe? Just to make sure
we remember not to call __free_old_xmit twice in a row
without reinitializing stats.
Or move the initialization into __free_old_xmit to make it
self-contained ..
WDYT?
>
> >
> >For example as won't this eventually trigger this inside dql_completed:
> >
> > BUG_ON(count > num_queued - dql->num_completed);
>
> Nope, I don't see how we can hit it. Do not complete anything else
> in addition to what was started in xmit(). Am I missing something?
>
>
> >
> >?
> >
> >
> >If I am right the perf testing has to be redone with this fixed ...
> >
> >
> >> }
> >>
>
> [...]
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-19 7:26 ` Michael S. Tsirkin
@ 2024-06-19 8:05 ` Jiri Pirko
2024-06-19 8:17 ` Michael S. Tsirkin
2024-06-19 8:23 ` Michael S. Tsirkin
0 siblings, 2 replies; 19+ messages in thread
From: Jiri Pirko @ 2024-06-19 8:05 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Wed, Jun 19, 2024 at 09:26:22AM CEST, mst@redhat.com wrote:
>On Wed, Jun 19, 2024 at 07:45:16AM +0200, Jiri Pirko wrote:
>> Tue, Jun 18, 2024 at 08:18:12PM CEST, mst@redhat.com wrote:
>> >This looks like a sensible way to do this.
>> >Yet something to improve:
>> >
>> >
>> >On Tue, Jun 18, 2024 at 04:44:56PM +0200, Jiri Pirko wrote:
>> >> From: Jiri Pirko <jiri@nvidia.com>
>> >>
>>
>> [...]
>>
>>
>> >> +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
>> >> + bool in_napi, struct virtnet_sq_free_stats *stats)
>> >> {
>> >> unsigned int len;
>> >> void *ptr;
>> >>
>> >> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
>> >> - ++stats->packets;
>> >> -
>> >> if (!is_xdp_frame(ptr)) {
>> >> - struct sk_buff *skb = ptr;
>> >> + struct sk_buff *skb = ptr_to_skb(ptr);
>> >>
>> >> pr_debug("Sent skb %p\n", skb);
>> >>
>> >> - stats->bytes += skb->len;
>> >> + if (is_orphan_skb(ptr)) {
>> >> + stats->packets++;
>> >> + stats->bytes += skb->len;
>> >> + } else {
>> >> + stats->napi_packets++;
>> >> + stats->napi_bytes += skb->len;
>> >> + }
>> >> napi_consume_skb(skb, in_napi);
>> >> } else {
>> >> struct xdp_frame *frame = ptr_to_xdp(ptr);
>> >>
>> >> + stats->packets++;
>> >> stats->bytes += xdp_get_frame_len(frame);
>> >> xdp_return_frame(frame);
>> >> }
>> >> }
>> >> + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
>> >
>> >Are you sure it's right? You are completing larger and larger
>> >number of bytes and packets each time.
>>
>> Not sure I get you. __free_old_xmit() is always called with stats
>> zeroed. So this is just sum-up of one queue completion run.
>> I don't see how this could become "larger and larger number" as you
>> describe.
>
>Oh. Right of course. Worth a comment maybe? Just to make sure
>we remember not to call __free_old_xmit twice in a row
>without reinitializing stats.
>Or move the initialization into __free_old_xmit to make it
>self-contained ..
Well, the initialization happens in the caller by {0}, Wouldn't
memset in __free_old_xmit() add an extra overhead? IDK.
Perhaps a small comment in __free_old_xmit() would do better.
One way or another, I think this is parallel to this patchset. Will
handle it separatelly if you don't mind.
>WDYT?
>
>>
>> >
>> >For example as won't this eventually trigger this inside dql_completed:
>> >
>> > BUG_ON(count > num_queued - dql->num_completed);
>>
>> Nope, I don't see how we can hit it. Do not complete anything else
>> in addition to what was started in xmit(). Am I missing something?
>>
>>
>> >
>> >?
>> >
>> >
>> >If I am right the perf testing has to be redone with this fixed ...
>> >
>> >
>> >> }
>> >>
>>
>> [...]
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-19 8:05 ` Jiri Pirko
@ 2024-06-19 8:17 ` Michael S. Tsirkin
2024-06-19 8:23 ` Michael S. Tsirkin
1 sibling, 0 replies; 19+ messages in thread
From: Michael S. Tsirkin @ 2024-06-19 8:17 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
On Wed, Jun 19, 2024 at 10:05:41AM +0200, Jiri Pirko wrote:
> Wed, Jun 19, 2024 at 09:26:22AM CEST, mst@redhat.com wrote:
> >On Wed, Jun 19, 2024 at 07:45:16AM +0200, Jiri Pirko wrote:
> >> Tue, Jun 18, 2024 at 08:18:12PM CEST, mst@redhat.com wrote:
> >> >This looks like a sensible way to do this.
> >> >Yet something to improve:
> >> >
> >> >
> >> >On Tue, Jun 18, 2024 at 04:44:56PM +0200, Jiri Pirko wrote:
> >> >> From: Jiri Pirko <jiri@nvidia.com>
> >> >>
> >>
> >> [...]
> >>
> >>
> >> >> +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
> >> >> + bool in_napi, struct virtnet_sq_free_stats *stats)
> >> >> {
> >> >> unsigned int len;
> >> >> void *ptr;
> >> >>
> >> >> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> >> >> - ++stats->packets;
> >> >> -
> >> >> if (!is_xdp_frame(ptr)) {
> >> >> - struct sk_buff *skb = ptr;
> >> >> + struct sk_buff *skb = ptr_to_skb(ptr);
> >> >>
> >> >> pr_debug("Sent skb %p\n", skb);
> >> >>
> >> >> - stats->bytes += skb->len;
> >> >> + if (is_orphan_skb(ptr)) {
> >> >> + stats->packets++;
> >> >> + stats->bytes += skb->len;
> >> >> + } else {
> >> >> + stats->napi_packets++;
> >> >> + stats->napi_bytes += skb->len;
> >> >> + }
> >> >> napi_consume_skb(skb, in_napi);
> >> >> } else {
> >> >> struct xdp_frame *frame = ptr_to_xdp(ptr);
> >> >>
> >> >> + stats->packets++;
> >> >> stats->bytes += xdp_get_frame_len(frame);
> >> >> xdp_return_frame(frame);
> >> >> }
> >> >> }
> >> >> + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
> >> >
> >> >Are you sure it's right? You are completing larger and larger
> >> >number of bytes and packets each time.
> >>
> >> Not sure I get you. __free_old_xmit() is always called with stats
> >> zeroed. So this is just sum-up of one queue completion run.
> >> I don't see how this could become "larger and larger number" as you
> >> describe.
> >
> >Oh. Right of course. Worth a comment maybe? Just to make sure
> >we remember not to call __free_old_xmit twice in a row
> >without reinitializing stats.
> >Or move the initialization into __free_old_xmit to make it
> >self-contained ..
>
> Well, the initialization happens in the caller by {0}, Wouldn't
> memset in __free_old_xmit() add an extra overhead? IDK.
> Perhaps a small comment in __free_old_xmit() would do better.
>
> One way or another, I think this is parallel to this patchset. Will
> handle it separatelly if you don't mind.
Okay.
Acked-by: Michael S. Tsirkin <mst@redhat.com>
> >WDYT?
> >
> >>
> >> >
> >> >For example as won't this eventually trigger this inside dql_completed:
> >> >
> >> > BUG_ON(count > num_queued - dql->num_completed);
> >>
> >> Nope, I don't see how we can hit it. Do not complete anything else
> >> in addition to what was started in xmit(). Am I missing something?
> >>
> >>
> >> >
> >> >?
> >> >
> >> >
> >> >If I am right the perf testing has to be redone with this fixed ...
> >> >
> >> >
> >> >> }
> >> >>
> >>
> >> [...]
> >
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-19 8:05 ` Jiri Pirko
2024-06-19 8:17 ` Michael S. Tsirkin
@ 2024-06-19 8:23 ` Michael S. Tsirkin
2024-06-19 10:09 ` Jiri Pirko
1 sibling, 1 reply; 19+ messages in thread
From: Michael S. Tsirkin @ 2024-06-19 8:23 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
On Wed, Jun 19, 2024 at 10:05:41AM +0200, Jiri Pirko wrote:
> >Oh. Right of course. Worth a comment maybe? Just to make sure
> >we remember not to call __free_old_xmit twice in a row
> >without reinitializing stats.
> >Or move the initialization into __free_old_xmit to make it
> >self-contained ..
>
> Well, the initialization happens in the caller by {0}, Wouldn't
> memset in __free_old_xmit() add an extra overhead? IDK.
Well if I did the below the binary is a bit smaller.
If you have to respin you can include it.
If not I can submit separately.
----
virtio-net: cleanup __free_old_xmit
Two call sites of __free_old_xmit zero-initialize stats,
doing it inside __free_old_xmit seems to make compiler's
job a bit easier:
$ size /tmp/orig/virtio_net.o
text data bss dec hex filename
65857 3892 100 69849 110d9 /tmp/orig/virtio_net.o
$ size /tmp/new/virtio_net.o
text data bss dec hex filename
65760 3892 100 69752 11078 /tmp/new/virtio_net.o
Couldn't measure any performance impact, unsurprizingly.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 283b34d50296..c2ce8de340f7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -383,6 +383,8 @@ static void __free_old_xmit(struct send_queue *sq, bool in_napi,
unsigned int len;
void *ptr;
+ stats->bytes = stats->packets = 0;
+
while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
++stats->packets;
@@ -828,7 +830,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
static void free_old_xmit(struct send_queue *sq, bool in_napi)
{
- struct virtnet_sq_free_stats stats = {0};
+ struct virtnet_sq_free_stats stats;
__free_old_xmit(sq, in_napi, &stats);
@@ -979,7 +981,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames, u32 flags)
{
struct virtnet_info *vi = netdev_priv(dev);
- struct virtnet_sq_free_stats stats = {0};
+ struct virtnet_sq_free_stats stats;
struct receive_queue *rq = vi->rq;
struct bpf_prog *xdp_prog;
struct send_queue *sq;
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-19 8:23 ` Michael S. Tsirkin
@ 2024-06-19 10:09 ` Jiri Pirko
2024-06-20 7:43 ` Michael S. Tsirkin
0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2024-06-19 10:09 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Wed, Jun 19, 2024 at 10:23:25AM CEST, mst@redhat.com wrote:
>On Wed, Jun 19, 2024 at 10:05:41AM +0200, Jiri Pirko wrote:
>> >Oh. Right of course. Worth a comment maybe? Just to make sure
>> >we remember not to call __free_old_xmit twice in a row
>> >without reinitializing stats.
>> >Or move the initialization into __free_old_xmit to make it
>> >self-contained ..
>>
>> Well, the initialization happens in the caller by {0}, Wouldn't
>> memset in __free_old_xmit() add an extra overhead? IDK.
>
>
>Well if I did the below the binary is a bit smaller.
>
>If you have to respin you can include it.
>If not I can submit separately.
Please send it reparately. It's should be a separate patch.
Thanks!
>
>----
>
>
>virtio-net: cleanup __free_old_xmit
>
>Two call sites of __free_old_xmit zero-initialize stats,
>doing it inside __free_old_xmit seems to make compiler's
>job a bit easier:
>
>$ size /tmp/orig/virtio_net.o
> text data bss dec hex filename
> 65857 3892 100 69849 110d9 /tmp/orig/virtio_net.o
>$ size /tmp/new/virtio_net.o
> text data bss dec hex filename
> 65760 3892 100 69752 11078 /tmp/new/virtio_net.o
>
>Couldn't measure any performance impact, unsurprizingly.
>
>Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
>---
>
>diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>index 283b34d50296..c2ce8de340f7 100644
>--- a/drivers/net/virtio_net.c
>+++ b/drivers/net/virtio_net.c
>@@ -383,6 +383,8 @@ static void __free_old_xmit(struct send_queue *sq, bool in_napi,
> unsigned int len;
> void *ptr;
>
>+ stats->bytes = stats->packets = 0;
Memset perhaps?
>+
> while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> ++stats->packets;
>
>@@ -828,7 +830,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
>
> static void free_old_xmit(struct send_queue *sq, bool in_napi)
> {
>- struct virtnet_sq_free_stats stats = {0};
>+ struct virtnet_sq_free_stats stats;
>
> __free_old_xmit(sq, in_napi, &stats);
>
>@@ -979,7 +981,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
> int n, struct xdp_frame **frames, u32 flags)
> {
> struct virtnet_info *vi = netdev_priv(dev);
>- struct virtnet_sq_free_stats stats = {0};
>+ struct virtnet_sq_free_stats stats;
> struct receive_queue *rq = vi->rq;
> struct bpf_prog *xdp_prog;
> struct send_queue *sq;
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-18 14:44 ` [PATCH net-next v3] virtio_net: add support for Byte Queue Limits Jiri Pirko
2024-06-18 18:18 ` Michael S. Tsirkin
@ 2024-06-20 0:40 ` patchwork-bot+netdevbpf
2024-08-12 14:57 ` Marek Szyprowski
2 siblings, 0 replies; 19+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-06-20 0:40 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Hello:
This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:
On Tue, 18 Jun 2024 16:44:56 +0200 you wrote:
> From: Jiri Pirko <jiri@nvidia.com>
>
> Add support for Byte Queue Limits (BQL).
>
> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
> running in background. Netperf TCP_RR results:
>
> [...]
Here is the summary with links:
- [net-next,v3] virtio_net: add support for Byte Queue Limits
https://git.kernel.org/netdev/net-next/c/c8bd1f7f3e61
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-19 10:09 ` Jiri Pirko
@ 2024-06-20 7:43 ` Michael S. Tsirkin
0 siblings, 0 replies; 19+ messages in thread
From: Michael S. Tsirkin @ 2024-06-20 7:43 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
On Wed, Jun 19, 2024 at 12:09:45PM +0200, Jiri Pirko wrote:
> Wed, Jun 19, 2024 at 10:23:25AM CEST, mst@redhat.com wrote:
> >On Wed, Jun 19, 2024 at 10:05:41AM +0200, Jiri Pirko wrote:
> >> >Oh. Right of course. Worth a comment maybe? Just to make sure
> >> >we remember not to call __free_old_xmit twice in a row
> >> >without reinitializing stats.
> >> >Or move the initialization into __free_old_xmit to make it
> >> >self-contained ..
> >>
> >> Well, the initialization happens in the caller by {0}, Wouldn't
> >> memset in __free_old_xmit() add an extra overhead? IDK.
> >
> >
> >Well if I did the below the binary is a bit smaller.
> >
> >If you have to respin you can include it.
> >If not I can submit separately.
>
> Please send it reparately. It's should be a separate patch.
>
> Thanks!
>
> >
> >----
> >
> >
> >virtio-net: cleanup __free_old_xmit
> >
> >Two call sites of __free_old_xmit zero-initialize stats,
> >doing it inside __free_old_xmit seems to make compiler's
> >job a bit easier:
> >
> >$ size /tmp/orig/virtio_net.o
> > text data bss dec hex filename
> > 65857 3892 100 69849 110d9 /tmp/orig/virtio_net.o
> >$ size /tmp/new/virtio_net.o
> > text data bss dec hex filename
> > 65760 3892 100 69752 11078 /tmp/new/virtio_net.o
> >
> >Couldn't measure any performance impact, unsurprizingly.
> >
> >Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >
> >---
> >
> >diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >index 283b34d50296..c2ce8de340f7 100644
> >--- a/drivers/net/virtio_net.c
> >+++ b/drivers/net/virtio_net.c
> >@@ -383,6 +383,8 @@ static void __free_old_xmit(struct send_queue *sq, bool in_napi,
> > unsigned int len;
> > void *ptr;
> >
> >+ stats->bytes = stats->packets = 0;
>
> Memset perhaps?
Generates the same code and I find it less readable -
virtio generally opts for explicit initialization.
> >+
> > while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > ++stats->packets;
> >
> >@@ -828,7 +830,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
> >
> > static void free_old_xmit(struct send_queue *sq, bool in_napi)
> > {
> >- struct virtnet_sq_free_stats stats = {0};
> >+ struct virtnet_sq_free_stats stats;
> >
> > __free_old_xmit(sq, in_napi, &stats);
> >
> >@@ -979,7 +981,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
> > int n, struct xdp_frame **frames, u32 flags)
> > {
> > struct virtnet_info *vi = netdev_priv(dev);
> >- struct virtnet_sq_free_stats stats = {0};
> >+ struct virtnet_sq_free_stats stats;
> > struct receive_queue *rq = vi->rq;
> > struct bpf_prog *xdp_prog;
> > struct send_queue *sq;
> >
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-06-18 14:44 ` [PATCH net-next v3] virtio_net: add support for Byte Queue Limits Jiri Pirko
2024-06-18 18:18 ` Michael S. Tsirkin
2024-06-20 0:40 ` patchwork-bot+netdevbpf
@ 2024-08-12 14:57 ` Marek Szyprowski
2024-08-12 16:47 ` Jiri Pirko
2 siblings, 1 reply; 19+ messages in thread
From: Marek Szyprowski @ 2024-08-12 14:57 UTC (permalink / raw)
To: Jiri Pirko, netdev
Cc: davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Hi,
On 18.06.2024 16:44, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@nvidia.com>
>
> Add support for Byte Queue Limits (BQL).
>
> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
> running in background. Netperf TCP_RR results:
>
> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>
> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
> ---
> v2->v3:
> - fixed the switch from/to orphan mode while skbs are yet to be
> completed by using the second least significant bit in virtqueue
> token pointer to indicate skb is orphan. Don't account orphan
> skbs in completion.
> - reorganized parallel skb/xdp free stats accounting to napi/others.
> - fixed kick condition check in orphan mode
> v1->v2:
> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
> propagate use_napi flag to __free_old_xmit() and only call
> netdev_tx_completed_queue() in case it is true
> - added forgotten call to netdev_tx_reset_queue()
> - fixed stats for xdp packets
> - fixed bql accounting when __free_old_xmit() is called from xdp path
> - handle the !use_napi case in start_xmit() kick section
> ---
> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
> 1 file changed, 57 insertions(+), 24 deletions(-)
I've recently found an issue with virtio-net driver and system
suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
support for Byte Queue Limits") commit and this patch. Once it got
merged to linux-next and then Linus trees, the driver occasionally
crashes with the following log (captured on QEMU's ARM 32bit 'virt'
machine):
root@target:~# time rtcwake -s10 -mmem
rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
PM: suspend entry (s2idle)
Filesystems sync: 0.000 seconds
Freezing user space processes
Freezing user space processes completed (elapsed 0.006 seconds)
OOM killer disabled.
Freezing remaining freezable tasks
Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
------------[ cut here ]------------
kernel BUG at lib/dynamic_queue_limits.c:99!
Internal error: Oops - BUG: 0 [#1] SMP ARM
Modules linked in: bluetooth ecdh_generic ecc libaes
CPU: 1 PID: 1282 Comm: rtcwake Not tainted
6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
Hardware name: Generic DT based system
PC is at dql_completed+0x270/0x2cc
LR is at __free_old_xmit+0x120/0x198
pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
...
Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
Control: 10c5387d Table: 43a4406a DAC: 00000051
...
Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
Stack: (0xe0805e80 to 0xe0806000)
...
Call trace:
dql_completed from __free_old_xmit+0x120/0x198
__free_old_xmit from free_old_xmit+0x44/0xe4
free_old_xmit from virtnet_poll_tx+0x88/0x1b4
virtnet_poll_tx from __napi_poll+0x2c/0x1d4
__napi_poll from net_rx_action+0x140/0x2b4
net_rx_action from handle_softirqs+0x11c/0x350
handle_softirqs from call_with_stack+0x18/0x20
call_with_stack from do_softirq+0x48/0x50
do_softirq from __local_bh_enable_ip+0xa0/0xa4
__local_bh_enable_ip from virtnet_open+0xd4/0x21c
virtnet_open from virtnet_restore+0x94/0x120
virtnet_restore from virtio_device_restore+0x110/0x1f4
virtio_device_restore from dpm_run_callback+0x3c/0x100
dpm_run_callback from device_resume+0x12c/0x2a8
device_resume from dpm_resume+0x12c/0x1e0
dpm_resume from dpm_resume_end+0xc/0x18
dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
suspend_devices_and_enter from pm_suspend+0x270/0x2a0
pm_suspend from state_store+0x68/0xc8
state_store from kernfs_fop_write_iter+0x10c/0x1cc
kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
vfs_write from ksys_write+0x5c/0xd4
ksys_write from ret_fast_syscall+0x0/0x54
Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
...
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Fatal exception in interrupt
---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
I have fully reproducible setup for this issue. Reverting it together
with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
recent linux-next releases. Let me know if I can help debugging this
issue further and help fixing.
> ...
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-12 14:57 ` Marek Szyprowski
@ 2024-08-12 16:47 ` Jiri Pirko
2024-08-12 16:55 ` Marek Szyprowski
0 siblings, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2024-08-12 16:47 UTC (permalink / raw)
To: Marek Szyprowski
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Mon, Aug 12, 2024 at 04:57:24PM CEST, m.szyprowski@samsung.com wrote:
>Hi,
>
>On 18.06.2024 16:44, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@nvidia.com>
>>
>> Add support for Byte Queue Limits (BQL).
>>
>> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
>> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
>> running in background. Netperf TCP_RR results:
>>
>> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
>> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
>> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
>> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
>> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
>> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
>> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
>> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
>> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
>> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
>> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
>> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>>
>> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
>> ---
>> v2->v3:
>> - fixed the switch from/to orphan mode while skbs are yet to be
>> completed by using the second least significant bit in virtqueue
>> token pointer to indicate skb is orphan. Don't account orphan
>> skbs in completion.
>> - reorganized parallel skb/xdp free stats accounting to napi/others.
>> - fixed kick condition check in orphan mode
>> v1->v2:
>> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
>> propagate use_napi flag to __free_old_xmit() and only call
>> netdev_tx_completed_queue() in case it is true
>> - added forgotten call to netdev_tx_reset_queue()
>> - fixed stats for xdp packets
>> - fixed bql accounting when __free_old_xmit() is called from xdp path
>> - handle the !use_napi case in start_xmit() kick section
>> ---
>> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
>> 1 file changed, 57 insertions(+), 24 deletions(-)
>
>I've recently found an issue with virtio-net driver and system
>suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
>support for Byte Queue Limits") commit and this patch. Once it got
>merged to linux-next and then Linus trees, the driver occasionally
>crashes with the following log (captured on QEMU's ARM 32bit 'virt'
>machine):
>
>root@target:~# time rtcwake -s10 -mmem
>rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
>PM: suspend entry (s2idle)
>Filesystems sync: 0.000 seconds
>Freezing user space processes
>Freezing user space processes completed (elapsed 0.006 seconds)
>OOM killer disabled.
>Freezing remaining freezable tasks
>Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
>------------[ cut here ]------------
>kernel BUG at lib/dynamic_queue_limits.c:99!
>Internal error: Oops - BUG: 0 [#1] SMP ARM
>Modules linked in: bluetooth ecdh_generic ecc libaes
>CPU: 1 PID: 1282 Comm: rtcwake Not tainted
>6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
>Hardware name: Generic DT based system
>PC is at dql_completed+0x270/0x2cc
>LR is at __free_old_xmit+0x120/0x198
>pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
>...
>Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
>Control: 10c5387d Table: 43a4406a DAC: 00000051
>...
>Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
>Stack: (0xe0805e80 to 0xe0806000)
>...
>Call trace:
> dql_completed from __free_old_xmit+0x120/0x198
> __free_old_xmit from free_old_xmit+0x44/0xe4
> free_old_xmit from virtnet_poll_tx+0x88/0x1b4
> virtnet_poll_tx from __napi_poll+0x2c/0x1d4
> __napi_poll from net_rx_action+0x140/0x2b4
> net_rx_action from handle_softirqs+0x11c/0x350
> handle_softirqs from call_with_stack+0x18/0x20
> call_with_stack from do_softirq+0x48/0x50
> do_softirq from __local_bh_enable_ip+0xa0/0xa4
> __local_bh_enable_ip from virtnet_open+0xd4/0x21c
> virtnet_open from virtnet_restore+0x94/0x120
> virtnet_restore from virtio_device_restore+0x110/0x1f4
> virtio_device_restore from dpm_run_callback+0x3c/0x100
> dpm_run_callback from device_resume+0x12c/0x2a8
> device_resume from dpm_resume+0x12c/0x1e0
> dpm_resume from dpm_resume_end+0xc/0x18
> dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
> suspend_devices_and_enter from pm_suspend+0x270/0x2a0
> pm_suspend from state_store+0x68/0xc8
> state_store from kernfs_fop_write_iter+0x10c/0x1cc
> kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
> vfs_write from ksys_write+0x5c/0xd4
> ksys_write from ret_fast_syscall+0x0/0x54
>Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
>...
>---[ end trace 0000000000000000 ]---
>Kernel panic - not syncing: Fatal exception in interrupt
>---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>
>I have fully reproducible setup for this issue. Reverting it together
>with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
>some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
>recent linux-next releases. Let me know if I can help debugging this
>issue further and help fixing.
Will fix this tomorrow. In the meantime, could you provide full
reproduce steps?
Thanks!
>
> > ...
>
>Best regards
>--
>Marek Szyprowski, PhD
>Samsung R&D Institute Poland
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-12 16:47 ` Jiri Pirko
@ 2024-08-12 16:55 ` Marek Szyprowski
2024-08-14 7:49 ` Jiri Pirko
0 siblings, 1 reply; 19+ messages in thread
From: Marek Szyprowski @ 2024-08-12 16:55 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
On 12.08.2024 18:47, Jiri Pirko wrote:
> Mon, Aug 12, 2024 at 04:57:24PM CEST, m.szyprowski@samsung.com wrote:
>> On 18.06.2024 16:44, Jiri Pirko wrote:
>>> From: Jiri Pirko <jiri@nvidia.com>
>>>
>>> Add support for Byte Queue Limits (BQL).
>>>
>>> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
>>> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
>>> running in background. Netperf TCP_RR results:
>>>
>>> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
>>> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
>>> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
>>> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
>>> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
>>> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
>>> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
>>> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
>>> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
>>> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
>>> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
>>> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>>>
>>> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
>>> ---
>>> v2->v3:
>>> - fixed the switch from/to orphan mode while skbs are yet to be
>>> completed by using the second least significant bit in virtqueue
>>> token pointer to indicate skb is orphan. Don't account orphan
>>> skbs in completion.
>>> - reorganized parallel skb/xdp free stats accounting to napi/others.
>>> - fixed kick condition check in orphan mode
>>> v1->v2:
>>> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
>>> propagate use_napi flag to __free_old_xmit() and only call
>>> netdev_tx_completed_queue() in case it is true
>>> - added forgotten call to netdev_tx_reset_queue()
>>> - fixed stats for xdp packets
>>> - fixed bql accounting when __free_old_xmit() is called from xdp path
>>> - handle the !use_napi case in start_xmit() kick section
>>> ---
>>> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
>>> 1 file changed, 57 insertions(+), 24 deletions(-)
>> I've recently found an issue with virtio-net driver and system
>> suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
>> support for Byte Queue Limits") commit and this patch. Once it got
>> merged to linux-next and then Linus trees, the driver occasionally
>> crashes with the following log (captured on QEMU's ARM 32bit 'virt'
>> machine):
>>
>> root@target:~# time rtcwake -s10 -mmem
>> rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
>> PM: suspend entry (s2idle)
>> Filesystems sync: 0.000 seconds
>> Freezing user space processes
>> Freezing user space processes completed (elapsed 0.006 seconds)
>> OOM killer disabled.
>> Freezing remaining freezable tasks
>> Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
>> ------------[ cut here ]------------
>> kernel BUG at lib/dynamic_queue_limits.c:99!
>> Internal error: Oops - BUG: 0 [#1] SMP ARM
>> Modules linked in: bluetooth ecdh_generic ecc libaes
>> CPU: 1 PID: 1282 Comm: rtcwake Not tainted
>> 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
>> Hardware name: Generic DT based system
>> PC is at dql_completed+0x270/0x2cc
>> LR is at __free_old_xmit+0x120/0x198
>> pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
>> ...
>> Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
>> Control: 10c5387d Table: 43a4406a DAC: 00000051
>> ...
>> Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
>> Stack: (0xe0805e80 to 0xe0806000)
>> ...
>> Call trace:
>> dql_completed from __free_old_xmit+0x120/0x198
>> __free_old_xmit from free_old_xmit+0x44/0xe4
>> free_old_xmit from virtnet_poll_tx+0x88/0x1b4
>> virtnet_poll_tx from __napi_poll+0x2c/0x1d4
>> __napi_poll from net_rx_action+0x140/0x2b4
>> net_rx_action from handle_softirqs+0x11c/0x350
>> handle_softirqs from call_with_stack+0x18/0x20
>> call_with_stack from do_softirq+0x48/0x50
>> do_softirq from __local_bh_enable_ip+0xa0/0xa4
>> __local_bh_enable_ip from virtnet_open+0xd4/0x21c
>> virtnet_open from virtnet_restore+0x94/0x120
>> virtnet_restore from virtio_device_restore+0x110/0x1f4
>> virtio_device_restore from dpm_run_callback+0x3c/0x100
>> dpm_run_callback from device_resume+0x12c/0x2a8
>> device_resume from dpm_resume+0x12c/0x1e0
>> dpm_resume from dpm_resume_end+0xc/0x18
>> dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
>> suspend_devices_and_enter from pm_suspend+0x270/0x2a0
>> pm_suspend from state_store+0x68/0xc8
>> state_store from kernfs_fop_write_iter+0x10c/0x1cc
>> kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
>> vfs_write from ksys_write+0x5c/0xd4
>> ksys_write from ret_fast_syscall+0x0/0x54
>> Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
>> ...
>> ---[ end trace 0000000000000000 ]---
>> Kernel panic - not syncing: Fatal exception in interrupt
>> ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>>
>> I have fully reproducible setup for this issue. Reverting it together
>> with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
>> some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
>> recent linux-next releases. Let me know if I can help debugging this
>> issue further and help fixing.
> Will fix this tomorrow. In the meantime, could you provide full
> reproduce steps?
Well, it is easy to reproduce it simply by calling
# time rtcwake -s10 -mmem
a few times and sooner or later it will cause a kernel panic.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-12 16:55 ` Marek Szyprowski
@ 2024-08-14 7:49 ` Jiri Pirko
2024-08-14 8:17 ` Jiri Pirko
2024-08-14 9:17 ` Marek Szyprowski
0 siblings, 2 replies; 19+ messages in thread
From: Jiri Pirko @ 2024-08-14 7:49 UTC (permalink / raw)
To: Marek Szyprowski
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Mon, Aug 12, 2024 at 06:55:26PM CEST, m.szyprowski@samsung.com wrote:
>On 12.08.2024 18:47, Jiri Pirko wrote:
>> Mon, Aug 12, 2024 at 04:57:24PM CEST, m.szyprowski@samsung.com wrote:
>>> On 18.06.2024 16:44, Jiri Pirko wrote:
>>>> From: Jiri Pirko <jiri@nvidia.com>
>>>>
>>>> Add support for Byte Queue Limits (BQL).
>>>>
>>>> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
>>>> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
>>>> running in background. Netperf TCP_RR results:
>>>>
>>>> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
>>>> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
>>>> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
>>>> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
>>>> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
>>>> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
>>>> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
>>>> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
>>>> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
>>>> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
>>>> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
>>>> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>>>>
>>>> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
>>>> ---
>>>> v2->v3:
>>>> - fixed the switch from/to orphan mode while skbs are yet to be
>>>> completed by using the second least significant bit in virtqueue
>>>> token pointer to indicate skb is orphan. Don't account orphan
>>>> skbs in completion.
>>>> - reorganized parallel skb/xdp free stats accounting to napi/others.
>>>> - fixed kick condition check in orphan mode
>>>> v1->v2:
>>>> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
>>>> propagate use_napi flag to __free_old_xmit() and only call
>>>> netdev_tx_completed_queue() in case it is true
>>>> - added forgotten call to netdev_tx_reset_queue()
>>>> - fixed stats for xdp packets
>>>> - fixed bql accounting when __free_old_xmit() is called from xdp path
>>>> - handle the !use_napi case in start_xmit() kick section
>>>> ---
>>>> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
>>>> 1 file changed, 57 insertions(+), 24 deletions(-)
>>> I've recently found an issue with virtio-net driver and system
>>> suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
>>> support for Byte Queue Limits") commit and this patch. Once it got
>>> merged to linux-next and then Linus trees, the driver occasionally
>>> crashes with the following log (captured on QEMU's ARM 32bit 'virt'
>>> machine):
>>>
>>> root@target:~# time rtcwake -s10 -mmem
>>> rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
>>> PM: suspend entry (s2idle)
>>> Filesystems sync: 0.000 seconds
>>> Freezing user space processes
>>> Freezing user space processes completed (elapsed 0.006 seconds)
>>> OOM killer disabled.
>>> Freezing remaining freezable tasks
>>> Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
>>> ------------[ cut here ]------------
>>> kernel BUG at lib/dynamic_queue_limits.c:99!
>>> Internal error: Oops - BUG: 0 [#1] SMP ARM
>>> Modules linked in: bluetooth ecdh_generic ecc libaes
>>> CPU: 1 PID: 1282 Comm: rtcwake Not tainted
>>> 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
>>> Hardware name: Generic DT based system
>>> PC is at dql_completed+0x270/0x2cc
>>> LR is at __free_old_xmit+0x120/0x198
>>> pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
>>> ...
>>> Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
>>> Control: 10c5387d Table: 43a4406a DAC: 00000051
>>> ...
>>> Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
>>> Stack: (0xe0805e80 to 0xe0806000)
>>> ...
>>> Call trace:
>>> dql_completed from __free_old_xmit+0x120/0x198
>>> __free_old_xmit from free_old_xmit+0x44/0xe4
>>> free_old_xmit from virtnet_poll_tx+0x88/0x1b4
>>> virtnet_poll_tx from __napi_poll+0x2c/0x1d4
>>> __napi_poll from net_rx_action+0x140/0x2b4
>>> net_rx_action from handle_softirqs+0x11c/0x350
>>> handle_softirqs from call_with_stack+0x18/0x20
>>> call_with_stack from do_softirq+0x48/0x50
>>> do_softirq from __local_bh_enable_ip+0xa0/0xa4
>>> __local_bh_enable_ip from virtnet_open+0xd4/0x21c
>>> virtnet_open from virtnet_restore+0x94/0x120
>>> virtnet_restore from virtio_device_restore+0x110/0x1f4
>>> virtio_device_restore from dpm_run_callback+0x3c/0x100
>>> dpm_run_callback from device_resume+0x12c/0x2a8
>>> device_resume from dpm_resume+0x12c/0x1e0
>>> dpm_resume from dpm_resume_end+0xc/0x18
>>> dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
>>> suspend_devices_and_enter from pm_suspend+0x270/0x2a0
>>> pm_suspend from state_store+0x68/0xc8
>>> state_store from kernfs_fop_write_iter+0x10c/0x1cc
>>> kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
>>> vfs_write from ksys_write+0x5c/0xd4
>>> ksys_write from ret_fast_syscall+0x0/0x54
>>> Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
>>> ...
>>> ---[ end trace 0000000000000000 ]---
>>> Kernel panic - not syncing: Fatal exception in interrupt
>>> ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>>>
>>> I have fully reproducible setup for this issue. Reverting it together
>>> with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
>>> some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
>>> recent linux-next releases. Let me know if I can help debugging this
>>> issue further and help fixing.
>> Will fix this tomorrow. In the meantime, could you provide full
>> reproduce steps?
>
>Well, it is easy to reproduce it simply by calling
>
># time rtcwake -s10 -mmem
>
>a few times and sooner or later it will cause a kernel panic.
I found the problem. Following patch will help:
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3f10c72743e9..c6af18948092 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
if (err < 0)
goto err_xdp_reg_mem_model;
- virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
+ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
return 0;
Will submit the patch in a jiff. Thanks!
>
>Best regards
>--
>Marek Szyprowski, PhD
>Samsung R&D Institute Poland
>
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-14 7:49 ` Jiri Pirko
@ 2024-08-14 8:17 ` Jiri Pirko
2024-08-14 9:43 ` Michael S. Tsirkin
2024-08-14 9:17 ` Marek Szyprowski
1 sibling, 1 reply; 19+ messages in thread
From: Jiri Pirko @ 2024-08-14 8:17 UTC (permalink / raw)
To: Marek Szyprowski
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Wed, Aug 14, 2024 at 09:49:57AM CEST, jiri@resnulli.us wrote:
>Mon, Aug 12, 2024 at 06:55:26PM CEST, m.szyprowski@samsung.com wrote:
>>On 12.08.2024 18:47, Jiri Pirko wrote:
>>> Mon, Aug 12, 2024 at 04:57:24PM CEST, m.szyprowski@samsung.com wrote:
>>>> On 18.06.2024 16:44, Jiri Pirko wrote:
>>>>> From: Jiri Pirko <jiri@nvidia.com>
>>>>>
>>>>> Add support for Byte Queue Limits (BQL).
>>>>>
>>>>> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
>>>>> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
>>>>> running in background. Netperf TCP_RR results:
>>>>>
>>>>> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
>>>>> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
>>>>> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
>>>>> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
>>>>> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
>>>>> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
>>>>> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
>>>>> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
>>>>> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
>>>>> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
>>>>> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
>>>>> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>>>>>
>>>>> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
>>>>> ---
>>>>> v2->v3:
>>>>> - fixed the switch from/to orphan mode while skbs are yet to be
>>>>> completed by using the second least significant bit in virtqueue
>>>>> token pointer to indicate skb is orphan. Don't account orphan
>>>>> skbs in completion.
>>>>> - reorganized parallel skb/xdp free stats accounting to napi/others.
>>>>> - fixed kick condition check in orphan mode
>>>>> v1->v2:
>>>>> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
>>>>> propagate use_napi flag to __free_old_xmit() and only call
>>>>> netdev_tx_completed_queue() in case it is true
>>>>> - added forgotten call to netdev_tx_reset_queue()
>>>>> - fixed stats for xdp packets
>>>>> - fixed bql accounting when __free_old_xmit() is called from xdp path
>>>>> - handle the !use_napi case in start_xmit() kick section
>>>>> ---
>>>>> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
>>>>> 1 file changed, 57 insertions(+), 24 deletions(-)
>>>> I've recently found an issue with virtio-net driver and system
>>>> suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
>>>> support for Byte Queue Limits") commit and this patch. Once it got
>>>> merged to linux-next and then Linus trees, the driver occasionally
>>>> crashes with the following log (captured on QEMU's ARM 32bit 'virt'
>>>> machine):
>>>>
>>>> root@target:~# time rtcwake -s10 -mmem
>>>> rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
>>>> PM: suspend entry (s2idle)
>>>> Filesystems sync: 0.000 seconds
>>>> Freezing user space processes
>>>> Freezing user space processes completed (elapsed 0.006 seconds)
>>>> OOM killer disabled.
>>>> Freezing remaining freezable tasks
>>>> Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
>>>> ------------[ cut here ]------------
>>>> kernel BUG at lib/dynamic_queue_limits.c:99!
>>>> Internal error: Oops - BUG: 0 [#1] SMP ARM
>>>> Modules linked in: bluetooth ecdh_generic ecc libaes
>>>> CPU: 1 PID: 1282 Comm: rtcwake Not tainted
>>>> 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
>>>> Hardware name: Generic DT based system
>>>> PC is at dql_completed+0x270/0x2cc
>>>> LR is at __free_old_xmit+0x120/0x198
>>>> pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
>>>> ...
>>>> Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
>>>> Control: 10c5387d Table: 43a4406a DAC: 00000051
>>>> ...
>>>> Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
>>>> Stack: (0xe0805e80 to 0xe0806000)
>>>> ...
>>>> Call trace:
>>>> dql_completed from __free_old_xmit+0x120/0x198
>>>> __free_old_xmit from free_old_xmit+0x44/0xe4
>>>> free_old_xmit from virtnet_poll_tx+0x88/0x1b4
>>>> virtnet_poll_tx from __napi_poll+0x2c/0x1d4
>>>> __napi_poll from net_rx_action+0x140/0x2b4
>>>> net_rx_action from handle_softirqs+0x11c/0x350
>>>> handle_softirqs from call_with_stack+0x18/0x20
>>>> call_with_stack from do_softirq+0x48/0x50
>>>> do_softirq from __local_bh_enable_ip+0xa0/0xa4
>>>> __local_bh_enable_ip from virtnet_open+0xd4/0x21c
>>>> virtnet_open from virtnet_restore+0x94/0x120
>>>> virtnet_restore from virtio_device_restore+0x110/0x1f4
>>>> virtio_device_restore from dpm_run_callback+0x3c/0x100
>>>> dpm_run_callback from device_resume+0x12c/0x2a8
>>>> device_resume from dpm_resume+0x12c/0x1e0
>>>> dpm_resume from dpm_resume_end+0xc/0x18
>>>> dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
>>>> suspend_devices_and_enter from pm_suspend+0x270/0x2a0
>>>> pm_suspend from state_store+0x68/0xc8
>>>> state_store from kernfs_fop_write_iter+0x10c/0x1cc
>>>> kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
>>>> vfs_write from ksys_write+0x5c/0xd4
>>>> ksys_write from ret_fast_syscall+0x0/0x54
>>>> Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
>>>> ...
>>>> ---[ end trace 0000000000000000 ]---
>>>> Kernel panic - not syncing: Fatal exception in interrupt
>>>> ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>>>>
>>>> I have fully reproducible setup for this issue. Reverting it together
>>>> with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
>>>> some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
>>>> recent linux-next releases. Let me know if I can help debugging this
>>>> issue further and help fixing.
>>> Will fix this tomorrow. In the meantime, could you provide full
>>> reproduce steps?
>>
>>Well, it is easy to reproduce it simply by calling
>>
>># time rtcwake -s10 -mmem
>>
>>a few times and sooner or later it will cause a kernel panic.
>
>I found the problem. Following patch will help:
>
>
>diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>index 3f10c72743e9..c6af18948092 100644
>--- a/drivers/net/virtio_net.c
>+++ b/drivers/net/virtio_net.c
>@@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
>- virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
>+ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
Hmm, I have to look at this a bit more. I think this might be accidental
fix. The thing is, napi can be triggered even if it is disabled:
->__local_bh_enable_ip()
-> net_rx_action()
-> __napi_poll()
Here __napi_poll() calls napi_is_scheduled() and calls virtnet_poll_tx()
in case napi is scheduled. napi_is_scheduled() checks NAPI_STATE_SCHED
bit in napi state.
However, this bit is set previously by netif_napi_add_weight().
>
> > ...
>
>Best regards
>--
>Marek Szyprowski, PhD
>Samsung R&D Institute Poland
>
>
> return 0;
>
>
>Will submit the patch in a jiff. Thanks!
>
>
>
>>
>>Best regards
>>--
>>Marek Szyprowski, PhD
>>Samsung R&D Institute Poland
>>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-14 7:49 ` Jiri Pirko
2024-08-14 8:17 ` Jiri Pirko
@ 2024-08-14 9:17 ` Marek Szyprowski
2024-08-14 12:16 ` Jiri Pirko
1 sibling, 1 reply; 19+ messages in thread
From: Marek Szyprowski @ 2024-08-14 9:17 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
On 14.08.2024 09:49, Jiri Pirko wrote:
> Mon, Aug 12, 2024 at 06:55:26PM CEST, m.szyprowski@samsung.com wrote:
>> On 12.08.2024 18:47, Jiri Pirko wrote:
>>> Mon, Aug 12, 2024 at 04:57:24PM CEST, m.szyprowski@samsung.com wrote:
>>>> On 18.06.2024 16:44, Jiri Pirko wrote:
>>>>> From: Jiri Pirko <jiri@nvidia.com>
>>>>>
>>>>> Add support for Byte Queue Limits (BQL).
>>>>>
>>>>> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
>>>>> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
>>>>> running in background. Netperf TCP_RR results:
>>>>>
>>>>> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
>>>>> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
>>>>> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
>>>>> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
>>>>> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
>>>>> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
>>>>> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
>>>>> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
>>>>> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
>>>>> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
>>>>> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
>>>>> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>>>>>
>>>>> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
>>>>> ---
>>>>> v2->v3:
>>>>> - fixed the switch from/to orphan mode while skbs are yet to be
>>>>> completed by using the second least significant bit in virtqueue
>>>>> token pointer to indicate skb is orphan. Don't account orphan
>>>>> skbs in completion.
>>>>> - reorganized parallel skb/xdp free stats accounting to napi/others.
>>>>> - fixed kick condition check in orphan mode
>>>>> v1->v2:
>>>>> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
>>>>> propagate use_napi flag to __free_old_xmit() and only call
>>>>> netdev_tx_completed_queue() in case it is true
>>>>> - added forgotten call to netdev_tx_reset_queue()
>>>>> - fixed stats for xdp packets
>>>>> - fixed bql accounting when __free_old_xmit() is called from xdp path
>>>>> - handle the !use_napi case in start_xmit() kick section
>>>>> ---
>>>>> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
>>>>> 1 file changed, 57 insertions(+), 24 deletions(-)
>>>> I've recently found an issue with virtio-net driver and system
>>>> suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
>>>> support for Byte Queue Limits") commit and this patch. Once it got
>>>> merged to linux-next and then Linus trees, the driver occasionally
>>>> crashes with the following log (captured on QEMU's ARM 32bit 'virt'
>>>> machine):
>>>>
>>>> root@target:~# time rtcwake -s10 -mmem
>>>> rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
>>>> PM: suspend entry (s2idle)
>>>> Filesystems sync: 0.000 seconds
>>>> Freezing user space processes
>>>> Freezing user space processes completed (elapsed 0.006 seconds)
>>>> OOM killer disabled.
>>>> Freezing remaining freezable tasks
>>>> Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
>>>> ------------[ cut here ]------------
>>>> kernel BUG at lib/dynamic_queue_limits.c:99!
>>>> Internal error: Oops - BUG: 0 [#1] SMP ARM
>>>> Modules linked in: bluetooth ecdh_generic ecc libaes
>>>> CPU: 1 PID: 1282 Comm: rtcwake Not tainted
>>>> 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
>>>> Hardware name: Generic DT based system
>>>> PC is at dql_completed+0x270/0x2cc
>>>> LR is at __free_old_xmit+0x120/0x198
>>>> pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
>>>> ...
>>>> Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
>>>> Control: 10c5387d Table: 43a4406a DAC: 00000051
>>>> ...
>>>> Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
>>>> Stack: (0xe0805e80 to 0xe0806000)
>>>> ...
>>>> Call trace:
>>>> dql_completed from __free_old_xmit+0x120/0x198
>>>> __free_old_xmit from free_old_xmit+0x44/0xe4
>>>> free_old_xmit from virtnet_poll_tx+0x88/0x1b4
>>>> virtnet_poll_tx from __napi_poll+0x2c/0x1d4
>>>> __napi_poll from net_rx_action+0x140/0x2b4
>>>> net_rx_action from handle_softirqs+0x11c/0x350
>>>> handle_softirqs from call_with_stack+0x18/0x20
>>>> call_with_stack from do_softirq+0x48/0x50
>>>> do_softirq from __local_bh_enable_ip+0xa0/0xa4
>>>> __local_bh_enable_ip from virtnet_open+0xd4/0x21c
>>>> virtnet_open from virtnet_restore+0x94/0x120
>>>> virtnet_restore from virtio_device_restore+0x110/0x1f4
>>>> virtio_device_restore from dpm_run_callback+0x3c/0x100
>>>> dpm_run_callback from device_resume+0x12c/0x2a8
>>>> device_resume from dpm_resume+0x12c/0x1e0
>>>> dpm_resume from dpm_resume_end+0xc/0x18
>>>> dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
>>>> suspend_devices_and_enter from pm_suspend+0x270/0x2a0
>>>> pm_suspend from state_store+0x68/0xc8
>>>> state_store from kernfs_fop_write_iter+0x10c/0x1cc
>>>> kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
>>>> vfs_write from ksys_write+0x5c/0xd4
>>>> ksys_write from ret_fast_syscall+0x0/0x54
>>>> Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
>>>> ...
>>>> ---[ end trace 0000000000000000 ]---
>>>> Kernel panic - not syncing: Fatal exception in interrupt
>>>> ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>>>>
>>>> I have fully reproducible setup for this issue. Reverting it together
>>>> with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
>>>> some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
>>>> recent linux-next releases. Let me know if I can help debugging this
>>>> issue further and help fixing.
>>> Will fix this tomorrow. In the meantime, could you provide full
>>> reproduce steps?
>> Well, it is easy to reproduce it simply by calling
>>
>> # time rtcwake -s10 -mmem
>>
>> a few times and sooner or later it will cause a kernel panic.
> I found the problem. Following patch will help:
>
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 3f10c72743e9..c6af18948092 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
> - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
> + virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
>
> return 0;
>
>
> Will submit the patch in a jiff. Thanks!
Confirmed. The above change fixed this issue in my tests.
Feel free to add:
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-14 8:17 ` Jiri Pirko
@ 2024-08-14 9:43 ` Michael S. Tsirkin
2024-08-14 12:16 ` Jiri Pirko
0 siblings, 1 reply; 19+ messages in thread
From: Michael S. Tsirkin @ 2024-08-14 9:43 UTC (permalink / raw)
To: Jiri Pirko
Cc: Marek Szyprowski, netdev, davem, edumazet, kuba, pabeni, jasowang,
xuanzhuo, virtualization, ast, daniel, hawk, john.fastabend,
dave.taht, kerneljasonxing, hengqi
On Wed, Aug 14, 2024 at 10:17:15AM +0200, Jiri Pirko wrote:
> >diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >index 3f10c72743e9..c6af18948092 100644
> >--- a/drivers/net/virtio_net.c
> >+++ b/drivers/net/virtio_net.c
> >@@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > if (err < 0)
> > goto err_xdp_reg_mem_model;
> >
> >- virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> > netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
> >+ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
> > virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
>
> Hmm, I have to look at this a bit more. I think this might be accidental
> fix. The thing is, napi can be triggered even if it is disabled:
>
> ->__local_bh_enable_ip()
> -> net_rx_action()
> -> __napi_poll()
>
> Here __napi_poll() calls napi_is_scheduled() and calls virtnet_poll_tx()
> in case napi is scheduled. napi_is_scheduled() checks NAPI_STATE_SCHED
> bit in napi state.
>
> However, this bit is set previously by netif_napi_add_weight().
It's actually set in napi_disable too, isn't it?
>
> >
> > > ...
> >
> >Best regards
> >--
> >Marek Szyprowski, PhD
> >Samsung R&D Institute Poland
> >
>
>
> >
> > return 0;
> >
> >
> >Will submit the patch in a jiff. Thanks!
> >
> >
> >
> >>
> >>Best regards
> >>--
> >>Marek Szyprowski, PhD
> >>Samsung R&D Institute Poland
> >>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-14 9:43 ` Michael S. Tsirkin
@ 2024-08-14 12:16 ` Jiri Pirko
0 siblings, 0 replies; 19+ messages in thread
From: Jiri Pirko @ 2024-08-14 12:16 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Marek Szyprowski, netdev, davem, edumazet, kuba, pabeni, jasowang,
xuanzhuo, virtualization, ast, daniel, hawk, john.fastabend,
dave.taht, kerneljasonxing, hengqi
Wed, Aug 14, 2024 at 11:43:51AM CEST, mst@redhat.com wrote:
>On Wed, Aug 14, 2024 at 10:17:15AM +0200, Jiri Pirko wrote:
>> >diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> >index 3f10c72743e9..c6af18948092 100644
>> >--- a/drivers/net/virtio_net.c
>> >+++ b/drivers/net/virtio_net.c
>> >@@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
>> > if (err < 0)
>> > goto err_xdp_reg_mem_model;
>> >
>> >- virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
>> > netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
>> >+ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
>> > virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
>>
>> Hmm, I have to look at this a bit more. I think this might be accidental
>> fix. The thing is, napi can be triggered even if it is disabled:
>>
>> ->__local_bh_enable_ip()
>> -> net_rx_action()
>> -> __napi_poll()
>>
>> Here __napi_poll() calls napi_is_scheduled() and calls virtnet_poll_tx()
>> in case napi is scheduled. napi_is_scheduled() checks NAPI_STATE_SCHED
>> bit in napi state.
>>
>> However, this bit is set previously by netif_napi_add_weight().
>
>It's actually set in napi_disable too, isn't it?
Yes, in both.
I actually find exactly what's the issue.
After virtnet_napi_enable() is called, the following path is hit
__napi_poll()
-> virtnet_poll()
-> virtnet_poll_cleantx()
-> netif_tx_wake_queue()
That wakes the TX queue and allows skbs to be submitted and accounted by
BQL counters.
Then netdev_tx_reset_queue() is called that resets BQL counters and
eventually leads to the BUG in dql_completed().
That's why virtnet_napi_tx_enable() move helped. Will submit.
>
>>
>> >
>> > > ...
>> >
>> >Best regards
>> >--
>> >Marek Szyprowski, PhD
>> >Samsung R&D Institute Poland
>> >
>>
>>
>> >
>> > return 0;
>> >
>> >
>> >Will submit the patch in a jiff. Thanks!
>> >
>> >
>> >
>> >>
>> >>Best regards
>> >>--
>> >>Marek Szyprowski, PhD
>> >>Samsung R&D Institute Poland
>> >>
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH net-next v3] virtio_net: add support for Byte Queue Limits
2024-08-14 9:17 ` Marek Szyprowski
@ 2024-08-14 12:16 ` Jiri Pirko
0 siblings, 0 replies; 19+ messages in thread
From: Jiri Pirko @ 2024-08-14 12:16 UTC (permalink / raw)
To: Marek Szyprowski
Cc: netdev, davem, edumazet, kuba, pabeni, mst, jasowang, xuanzhuo,
virtualization, ast, daniel, hawk, john.fastabend, dave.taht,
kerneljasonxing, hengqi
Wed, Aug 14, 2024 at 11:17:35AM CEST, m.szyprowski@samsung.com wrote:
>On 14.08.2024 09:49, Jiri Pirko wrote:
>> Mon, Aug 12, 2024 at 06:55:26PM CEST, m.szyprowski@samsung.com wrote:
>>> On 12.08.2024 18:47, Jiri Pirko wrote:
>>>> Mon, Aug 12, 2024 at 04:57:24PM CEST, m.szyprowski@samsung.com wrote:
>>>>> On 18.06.2024 16:44, Jiri Pirko wrote:
>>>>>> From: Jiri Pirko <jiri@nvidia.com>
>>>>>>
>>>>>> Add support for Byte Queue Limits (BQL).
>>>>>>
>>>>>> Tested on qemu emulated virtio_net device with 1, 2 and 4 queues.
>>>>>> Tested with fq_codel and pfifo_fast. Super netperf with 50 threads is
>>>>>> running in background. Netperf TCP_RR results:
>>>>>>
>>>>>> NOBQL FQC 1q: 159.56 159.33 158.50 154.31 agv: 157.925
>>>>>> NOBQL FQC 2q: 184.64 184.96 174.73 174.15 agv: 179.62
>>>>>> NOBQL FQC 4q: 994.46 441.96 416.50 499.56 agv: 588.12
>>>>>> NOBQL PFF 1q: 148.68 148.92 145.95 149.48 agv: 148.2575
>>>>>> NOBQL PFF 2q: 171.86 171.20 170.42 169.42 agv: 170.725
>>>>>> NOBQL PFF 4q: 1505.23 1137.23 2488.70 3507.99 agv: 2159.7875
>>>>>> BQL FQC 1q: 1332.80 1297.97 1351.41 1147.57 agv: 1282.4375
>>>>>> BQL FQC 2q: 768.30 817.72 864.43 974.40 agv: 856.2125
>>>>>> BQL FQC 4q: 945.66 942.68 878.51 822.82 agv: 897.4175
>>>>>> BQL PFF 1q: 149.69 151.49 149.40 147.47 agv: 149.5125
>>>>>> BQL PFF 2q: 2059.32 798.74 1844.12 381.80 agv: 1270.995
>>>>>> BQL PFF 4q: 1871.98 4420.02 4916.59 13268.16 agv: 6119.1875
>>>>>>
>>>>>> Signed-off-by: Jiri Pirko <jiri@nvidia.com>
>>>>>> ---
>>>>>> v2->v3:
>>>>>> - fixed the switch from/to orphan mode while skbs are yet to be
>>>>>> completed by using the second least significant bit in virtqueue
>>>>>> token pointer to indicate skb is orphan. Don't account orphan
>>>>>> skbs in completion.
>>>>>> - reorganized parallel skb/xdp free stats accounting to napi/others.
>>>>>> - fixed kick condition check in orphan mode
>>>>>> v1->v2:
>>>>>> - moved netdev_tx_completed_queue() call into __free_old_xmit(),
>>>>>> propagate use_napi flag to __free_old_xmit() and only call
>>>>>> netdev_tx_completed_queue() in case it is true
>>>>>> - added forgotten call to netdev_tx_reset_queue()
>>>>>> - fixed stats for xdp packets
>>>>>> - fixed bql accounting when __free_old_xmit() is called from xdp path
>>>>>> - handle the !use_napi case in start_xmit() kick section
>>>>>> ---
>>>>>> drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++------------
>>>>>> 1 file changed, 57 insertions(+), 24 deletions(-)
>>>>> I've recently found an issue with virtio-net driver and system
>>>>> suspend/resume. Bisecting pointed to the c8bd1f7f3e61 ("virtio_net: add
>>>>> support for Byte Queue Limits") commit and this patch. Once it got
>>>>> merged to linux-next and then Linus trees, the driver occasionally
>>>>> crashes with the following log (captured on QEMU's ARM 32bit 'virt'
>>>>> machine):
>>>>>
>>>>> root@target:~# time rtcwake -s10 -mmem
>>>>> rtcwake: wakeup from "mem" using /dev/rtc0 at Sat Aug 10 12:40:26 2024
>>>>> PM: suspend entry (s2idle)
>>>>> Filesystems sync: 0.000 seconds
>>>>> Freezing user space processes
>>>>> Freezing user space processes completed (elapsed 0.006 seconds)
>>>>> OOM killer disabled.
>>>>> Freezing remaining freezable tasks
>>>>> Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
>>>>> ------------[ cut here ]------------
>>>>> kernel BUG at lib/dynamic_queue_limits.c:99!
>>>>> Internal error: Oops - BUG: 0 [#1] SMP ARM
>>>>> Modules linked in: bluetooth ecdh_generic ecc libaes
>>>>> CPU: 1 PID: 1282 Comm: rtcwake Not tainted
>>>>> 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240
>>>>> Hardware name: Generic DT based system
>>>>> PC is at dql_completed+0x270/0x2cc
>>>>> LR is at __free_old_xmit+0x120/0x198
>>>>> pc : [<c07ffa54>] lr : [<c0c42bf4>] psr: 80000013
>>>>> ...
>>>>> Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
>>>>> Control: 10c5387d Table: 43a4406a DAC: 00000051
>>>>> ...
>>>>> Process rtcwake (pid: 1282, stack limit = 0xfbc21278)
>>>>> Stack: (0xe0805e80 to 0xe0806000)
>>>>> ...
>>>>> Call trace:
>>>>> dql_completed from __free_old_xmit+0x120/0x198
>>>>> __free_old_xmit from free_old_xmit+0x44/0xe4
>>>>> free_old_xmit from virtnet_poll_tx+0x88/0x1b4
>>>>> virtnet_poll_tx from __napi_poll+0x2c/0x1d4
>>>>> __napi_poll from net_rx_action+0x140/0x2b4
>>>>> net_rx_action from handle_softirqs+0x11c/0x350
>>>>> handle_softirqs from call_with_stack+0x18/0x20
>>>>> call_with_stack from do_softirq+0x48/0x50
>>>>> do_softirq from __local_bh_enable_ip+0xa0/0xa4
>>>>> __local_bh_enable_ip from virtnet_open+0xd4/0x21c
>>>>> virtnet_open from virtnet_restore+0x94/0x120
>>>>> virtnet_restore from virtio_device_restore+0x110/0x1f4
>>>>> virtio_device_restore from dpm_run_callback+0x3c/0x100
>>>>> dpm_run_callback from device_resume+0x12c/0x2a8
>>>>> device_resume from dpm_resume+0x12c/0x1e0
>>>>> dpm_resume from dpm_resume_end+0xc/0x18
>>>>> dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c
>>>>> suspend_devices_and_enter from pm_suspend+0x270/0x2a0
>>>>> pm_suspend from state_store+0x68/0xc8
>>>>> state_store from kernfs_fop_write_iter+0x10c/0x1cc
>>>>> kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc
>>>>> vfs_write from ksys_write+0x5c/0xd4
>>>>> ksys_write from ret_fast_syscall+0x0/0x54
>>>>> Exception stack(0xe8bf1fa8 to 0xe8bf1ff0)
>>>>> ...
>>>>> ---[ end trace 0000000000000000 ]---
>>>>> Kernel panic - not syncing: Fatal exception in interrupt
>>>>> ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>>>>>
>>>>> I have fully reproducible setup for this issue. Reverting it together
>>>>> with f8321fa75102 ("virtio_net: Fix napi_skb_cache_put warning") (due to
>>>>> some code dependencies) fixes this issue on top of Linux v6.11-rc1 and
>>>>> recent linux-next releases. Let me know if I can help debugging this
>>>>> issue further and help fixing.
>>>> Will fix this tomorrow. In the meantime, could you provide full
>>>> reproduce steps?
>>> Well, it is easy to reproduce it simply by calling
>>>
>>> # time rtcwake -s10 -mmem
>>>
>>> a few times and sooner or later it will cause a kernel panic.
>> I found the problem. Following patch will help:
>>
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 3f10c72743e9..c6af18948092 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
>> if (err < 0)
>> goto err_xdp_reg_mem_model;
>>
>> - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
>> netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
>> + virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
>> virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
>>
>> return 0;
>>
>>
>> Will submit the patch in a jiff. Thanks!
>
>Confirmed. The above change fixed this issue in my tests.
>
>Feel free to add:
>
>Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
>Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Thanks!
>
>Best regards
>--
>Marek Szyprowski, PhD
>Samsung R&D Institute Poland
>
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2024-08-14 12:16 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <CGME20240812145727eucas1p22360b410908e41aeafa7c9f09d52ca14@eucas1p2.samsung.com>
2024-06-18 14:44 ` [PATCH net-next v3] virtio_net: add support for Byte Queue Limits Jiri Pirko
2024-06-18 18:18 ` Michael S. Tsirkin
2024-06-19 5:45 ` Jiri Pirko
2024-06-19 7:26 ` Michael S. Tsirkin
2024-06-19 8:05 ` Jiri Pirko
2024-06-19 8:17 ` Michael S. Tsirkin
2024-06-19 8:23 ` Michael S. Tsirkin
2024-06-19 10:09 ` Jiri Pirko
2024-06-20 7:43 ` Michael S. Tsirkin
2024-06-20 0:40 ` patchwork-bot+netdevbpf
2024-08-12 14:57 ` Marek Szyprowski
2024-08-12 16:47 ` Jiri Pirko
2024-08-12 16:55 ` Marek Szyprowski
2024-08-14 7:49 ` Jiri Pirko
2024-08-14 8:17 ` Jiri Pirko
2024-08-14 9:43 ` Michael S. Tsirkin
2024-08-14 12:16 ` Jiri Pirko
2024-08-14 9:17 ` Marek Szyprowski
2024-08-14 12:16 ` Jiri Pirko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).