[PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Jon Kohler <jon@nutanix.com>
To: ast@kernel.org, daniel@iogearbox.net, davem@davemloft.net,
	kuba@kernel.org, hawk@kernel.org, john.fastabend@gmail.com,
	netdev@vger.kernel.org, bpf@vger.kernel.org, jon@nutanix.com,
	aleksander.lobakin@intel.com,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	Jason Wang <jasowang@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	Eric Dumazet <edumazet@google.com>,
	Paolo Abeni <pabeni@redhat.com>,
	linux-kernel@vger.kernel.org (open list)
Subject: [PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one
Date: Tue,  6 May 2025 07:55:27 -0700	[thread overview]
Message-ID: <20250506145530.2877229-3-jon@nutanix.com> (raw)
In-Reply-To: <20250506145530.2877229-1-jon@nutanix.com>

Enhance TUN_MSG_PTR batch processing by leveraging bulk allocation from
the per-CPU NAPI cache via napi_skb_cache_get_bulk. This improves
efficiency by reducing allocation overhead and is especially useful
when using IFF_NAPI and GRO is able to feed the cache entries back.

Handle scenarios where full preallocation of SKBs is not possible by
gracefully dropping only the uncovered portion of the batch payload.

Cc: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Jon Kohler <jon@nutanix.com>
---
 drivers/net/tun.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 87fc51916fce..f7f7490e78dc 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2354,12 +2354,12 @@ static int tun_xdp_one(struct tun_struct *tun,
 		       struct tun_file *tfile,
 		       struct xdp_buff *xdp, int *flush,
 		       struct tun_page *tpage,
-		       struct bpf_prog *xdp_prog)
+		       struct bpf_prog *xdp_prog,
+		       struct sk_buff *skb)
 {
 	unsigned int datasize = xdp->data_end - xdp->data;
 	struct tun_xdp_hdr *hdr = xdp->data_hard_start;
 	struct virtio_net_hdr *gso = &hdr->gso;
-	struct sk_buff *skb = NULL;
 	struct sk_buff_head *queue;
 	u32 rxhash = 0, act;
 	int buflen = hdr->buflen;
@@ -2381,16 +2381,15 @@ static int tun_xdp_one(struct tun_struct *tun,
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
 		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
-		if (ret < 0) {
-			put_page(virt_to_head_page(xdp->data));
+		if (ret < 0)
 			return ret;
-		}
 
 		switch (ret) {
 		case XDP_REDIRECT:
 			*flush = true;
 			fallthrough;
 		case XDP_TX:
+			napi_consume_skb(skb, 1);
 			return 0;
 		case XDP_PASS:
 			break;
@@ -2403,13 +2402,14 @@ static int tun_xdp_one(struct tun_struct *tun,
 				tpage->page = page;
 				tpage->count = 1;
 			}
+			napi_consume_skb(skb, 1);
 			return 0;
 		}
 	}
 
 build:
-	skb = build_skb(xdp->data_hard_start, buflen);
-	if (!skb) {
+	skb = build_skb_around(skb, xdp->data_hard_start, buflen);
+	if (unlikely(!skb)) {
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -2427,7 +2427,6 @@ static int tun_xdp_one(struct tun_struct *tun,
 
 	if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
 		atomic_long_inc(&tun->rx_frame_errors);
-		kfree_skb(skb);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -2455,7 +2454,6 @@ static int tun_xdp_one(struct tun_struct *tun,
 
 		if (unlikely(tfile->detached)) {
 			spin_unlock(&queue->lock);
-			kfree_skb(skb);
 			return -EBUSY;
 		}
 
@@ -2496,7 +2494,9 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		struct bpf_prog *xdp_prog;
 		struct tun_page tpage;
 		int n = ctl->num;
-		int flush = 0, queued = 0;
+		int flush = 0, queued = 0, num_skbs = 0;
+		/* Max size of VHOST_NET_BATCH */
+		void *skbs[64];
 
 		memset(&tpage, 0, sizeof(tpage));
 
@@ -2505,12 +2505,27 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
 		xdp_prog = rcu_dereference(tun->xdp_prog);
 
-		for (i = 0; i < n; i++) {
+		num_skbs = napi_skb_cache_get_bulk(skbs, n);
+
+		for (i = 0; i < num_skbs; i++) {
+			struct sk_buff *skb = skbs[i];
 			xdp = &((struct xdp_buff *)ctl->ptr)[i];
 			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
-					  xdp_prog);
+					  xdp_prog, skb);
 			if (ret > 0)
 				queued += ret;
+			else if (ret < 0) {
+				dev_core_stats_rx_dropped_inc(tun->dev);
+				napi_consume_skb(skb, 1);
+				put_page(virt_to_head_page(xdp->data));
+			}
+		}
+
+		/* Handle remaining xdp_buff entries if num_skbs < ctl->num */
+		for (i = num_skbs; i < ctl->num; i++) {
+			xdp = &((struct xdp_buff *)ctl->ptr)[i];
+			dev_core_stats_rx_dropped_inc(tun->dev);
+			put_page(virt_to_head_page(xdp->data));
 		}
 
 		if (flush)
-- 
2.43.0

next prev parent reply	other threads:[~2025-05-06 14:25 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-06 14:55 [PATCH net-next 0/4] tun: optimize SKB allocation with NAPI cache Jon Kohler
2025-05-06 14:54 ` Willem de Bruijn
2025-05-06 19:11   ` Jon Kohler
2025-05-07 13:25     ` Willem de Bruijn
2025-05-06 14:55 ` [PATCH net-next 1/4] tun: rcu_deference xdp_prog only once per batch Jon Kohler
2025-05-07 20:43   ` Willem de Bruijn
2025-05-08  3:13     ` Jon Kohler
2025-05-08 13:31       ` Willem de Bruijn
2025-05-08 13:40         ` Jon Kohler
2025-05-06 14:55 ` Jon Kohler [this message]
2025-05-07 20:50   ` [PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one Willem de Bruijn
2025-05-08  3:02     ` Jon Kohler
2025-05-06 14:55 ` [PATCH net-next 3/4] tun: use napi_build_skb in __tun_build_skb Jon Kohler
2025-05-07 20:50   ` Willem de Bruijn
2025-05-08  3:08     ` Jon Kohler
2025-05-06 14:55 ` [PATCH net-next 4/4] tun: use napi_consume_skb in tun_do_read Jon Kohler

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:87fc51916fc dfblob:f7f7490e78d )
 OR (
bs:"[PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250506145530.2877229-3-jon@nutanix.com \
    --to=jon@nutanix.com \
    --cc=aleksander.lobakin@intel.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).