From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: Simon Schippers <simon.schippers@tu-dortmund.de>,
willemdebruijn.kernel@gmail.com, jasowang@redhat.com,
mst@redhat.com, eperezma@redhat.com,
stephen@networkplumber.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, virtualization@lists.linux.dev,
kvm@vger.kernel.org
Cc: Simon Schippers <simon.schippers@tu-dortmund.de>,
Tim Gebauer <tim.gebauer@tu-dortmund.de>
Subject: Re: [PATCH 4/4] netdev queue flow control for vhost_net
Date: Tue, 02 Sep 2025 17:31:04 -0400 [thread overview]
Message-ID: <willemdebruijn.kernel.251eacee11eca@gmail.com> (raw)
In-Reply-To: <20250902080957.47265-5-simon.schippers@tu-dortmund.de>
Simon Schippers wrote:
> Stopping the queue is done in tun_net_xmit.
>
> Waking the queue is done by calling one of the helpers,
> tun_wake_netdev_queue and tap_wake_netdev_queue. For that, in
> get_wake_netdev_queue, the correct method is determined and saved in the
> function pointer wake_netdev_queue of the vhost_net_virtqueue. Then, each
> time after consuming a batch in vhost_net_buf_produce, wake_netdev_queue
> is called.
>
> Co-developed-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
> Signed-off-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
> Signed-off-by: Simon Schippers <simon.schippers@tu-dortmund.de>
> ---
> drivers/net/tap.c | 6 ++++++
> drivers/net/tun.c | 6 ++++++
> drivers/vhost/net.c | 34 ++++++++++++++++++++++++++++------
> include/linux/if_tap.h | 2 ++
> include/linux/if_tun.h | 3 +++
> 5 files changed, 45 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index 4d874672bcd7..0bad9e3d59af 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -1198,6 +1198,12 @@ struct socket *tap_get_socket(struct file *file)
> }
> EXPORT_SYMBOL_GPL(tap_get_socket);
>
> +void tap_wake_netdev_queue(struct file *file)
> +{
> + wake_netdev_queue(file->private_data);
> +}
> +EXPORT_SYMBOL_GPL(tap_wake_netdev_queue);
> +
> struct ptr_ring *tap_get_ptr_ring(struct file *file)
> {
> struct tap_queue *q;
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 735498e221d8..e85589b596ac 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -3739,6 +3739,12 @@ struct socket *tun_get_socket(struct file *file)
> }
> EXPORT_SYMBOL_GPL(tun_get_socket);
>
> +void tun_wake_netdev_queue(struct file *file)
> +{
> + wake_netdev_queue(file->private_data);
> +}
> +EXPORT_SYMBOL_GPL(tun_wake_netdev_queue);
Having multiple functions with the same name is tad annoying from a
cscape PoV, better to call the internal functions
__tun_wake_netdev_queue, etc.
> +
> struct ptr_ring *tun_get_tx_ring(struct file *file)
> {
> struct tun_file *tfile;
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 6edac0c1ba9b..e837d3a334f1 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -130,6 +130,7 @@ struct vhost_net_virtqueue {
> struct vhost_net_buf rxq;
> /* Batched XDP buffs */
> struct xdp_buff *xdp;
> + void (*wake_netdev_queue)(struct file *f);
Indirect function calls are expensive post spectre. Probably
preferable to just have a branch.
A branch in `file->f_op != &tun_fops` would be expensive still as it
may touch a cold cacheline.
How about adding a bit in struct ptr_ring itself. Pahole shows plenty
of holes. Jason, WDYT?
> };
>
> struct vhost_net {
> @@ -175,13 +176,16 @@ static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
> return ret;
> }
>
> -static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
> +static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq,
> + struct sock *sk)
> {
> + struct file *file = sk->sk_socket->file;
> struct vhost_net_buf *rxq = &nvq->rxq;
>
> rxq->head = 0;
> rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
> VHOST_NET_BATCH);
> + nvq->wake_netdev_queue(file);
> return rxq->tail;
> }
>
> @@ -208,14 +212,15 @@ static int vhost_net_buf_peek_len(void *ptr)
> return __skb_array_len_with_tag(ptr);
> }
>
> -static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
> +static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq,
> + struct sock *sk)
odd indentation
> {
> struct vhost_net_buf *rxq = &nvq->rxq;
>
> if (!vhost_net_buf_is_empty(rxq))
> goto out;
>
> - if (!vhost_net_buf_produce(nvq))
> + if (!vhost_net_buf_produce(nvq, sk))
> return 0;
>
> out:
> @@ -994,7 +999,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
> unsigned long flags;
>
> if (rvq->rx_ring)
> - return vhost_net_buf_peek(rvq);
> + return vhost_net_buf_peek(rvq, sk);
>
> spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
> head = skb_peek(&sk->sk_receive_queue);
> @@ -1499,6 +1504,19 @@ static struct socket *get_tap_socket(int fd)
> return sock;
> }
>
> +static void (*get_wake_netdev_queue(struct file *file))(struct file *file)
> +{
> + struct ptr_ring *ring;
> +
> + ring = tun_get_tx_ring(file);
> + if (!IS_ERR(ring))
> + return tun_wake_netdev_queue;
> + ring = tap_get_ptr_ring(file);
> + if (!IS_ERR(ring))
> + return tap_wake_netdev_queue;
> + return NULL;
> +}
> +
> static struct socket *get_socket(int fd)
> {
> struct socket *sock;
> @@ -1570,10 +1588,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
> if (r)
> goto err_used;
> if (index == VHOST_NET_VQ_RX) {
> - if (sock)
> + if (sock) {
> nvq->rx_ring = get_tap_ptr_ring(sock->file);
> - else
> + nvq->wake_netdev_queue =
> + get_wake_netdev_queue(sock->file);
> + } else {
> nvq->rx_ring = NULL;
> + nvq->wake_netdev_queue = NULL;
> + }
> }
>
> oldubufs = nvq->ubufs;
> diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
> index 553552fa635c..02b2809784b5 100644
> --- a/include/linux/if_tap.h
> +++ b/include/linux/if_tap.h
> @@ -10,6 +10,7 @@ struct socket;
>
> #if IS_ENABLED(CONFIG_TAP)
> struct socket *tap_get_socket(struct file *);
> +void tap_wake_netdev_queue(struct file *file);
> struct ptr_ring *tap_get_ptr_ring(struct file *file);
> #else
> #include <linux/err.h>
> @@ -18,6 +19,7 @@ static inline struct socket *tap_get_socket(struct file *f)
> {
> return ERR_PTR(-EINVAL);
> }
> +static inline void tap_wake_netdev_queue(struct file *f) {}
> static inline struct ptr_ring *tap_get_ptr_ring(struct file *f)
> {
> return ERR_PTR(-EINVAL);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 80166eb62f41..04c504bb1954 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -21,6 +21,7 @@ struct tun_msg_ctl {
>
> #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> struct socket *tun_get_socket(struct file *);
> +void tun_wake_netdev_queue(struct file *file);
> struct ptr_ring *tun_get_tx_ring(struct file *file);
>
> static inline bool tun_is_xdp_frame(void *ptr)
> @@ -50,6 +51,8 @@ static inline struct socket *tun_get_socket(struct file *f)
> return ERR_PTR(-EINVAL);
> }
>
> +static inline void tun_wake_netdev_queue(struct file *f) {}
> +
> static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
> {
> return ERR_PTR(-EINVAL);
> --
> 2.43.0
>
next prev parent reply other threads:[~2025-09-02 21:31 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-02 8:09 [PATCH net-next v4 0/4] TUN/TAP & vhost_net: netdev queue flow control to avoid ptr_ring tail drop Simon Schippers
2025-09-02 8:09 ` [PATCH 1/4] ptr_ring_spare: Helper to check if spare capacity of size cnt is available Simon Schippers
2025-09-02 21:13 ` Willem de Bruijn
2025-09-03 3:13 ` Jason Wang
2025-09-03 13:05 ` Michael S. Tsirkin
2025-09-03 18:29 ` Simon Schippers
2025-09-02 8:09 ` [PATCH 2/4] netdev queue flow control for TUN Simon Schippers
2025-09-02 21:20 ` Willem de Bruijn
2025-09-03 18:35 ` Simon Schippers
2025-09-02 21:31 ` Willem de Bruijn
2025-09-03 3:27 ` Jason Wang
2025-09-03 18:41 ` Simon Schippers
2025-09-03 13:10 ` Michael S. Tsirkin
2025-09-03 18:45 ` Simon Schippers
2025-09-03 13:13 ` Michael S. Tsirkin
2025-09-03 18:49 ` Simon Schippers
2025-09-02 8:09 ` [PATCH 3/4] netdev queue flow control for TAP Simon Schippers
2025-09-02 8:09 ` [PATCH 4/4] netdev queue flow control for vhost_net Simon Schippers
2025-09-02 21:31 ` Willem de Bruijn [this message]
2025-09-03 3:42 ` Jason Wang
2025-09-03 13:51 ` Willem de Bruijn
2025-09-04 2:47 ` Jason Wang
2025-09-10 20:22 ` Simon Schippers
2025-09-03 4:00 ` [PATCH net-next v4 0/4] TUN/TAP & vhost_net: netdev queue flow control to avoid ptr_ring tail drop Jason Wang
2025-09-03 18:55 ` Simon Schippers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=willemdebruijn.kernel.251eacee11eca@gmail.com \
--to=willemdebruijn.kernel@gmail.com \
--cc=eperezma@redhat.com \
--cc=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=simon.schippers@tu-dortmund.de \
--cc=stephen@networkplumber.org \
--cc=tim.gebauer@tu-dortmund.de \
--cc=virtualization@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.