netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jon Kohler <jon@nutanix.com>
To: netdev@vger.kernel.org,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	Jason Wang <jasowang@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Jesper Dangaard Brouer <hawk@kernel.org>,
	John Fastabend <john.fastabend@gmail.com>,
	Stanislav Fomichev <sdf@fomichev.me>,
	linux-kernel@vger.kernel.org (open list),
	bpf@vger.kernel.org (open list:XDP (eXpress Data
	Path):Keyword:(?:\b|_)xdp(?:\b|_))
Cc: Jon Kohler <jon@nutanix.com>
Subject: [PATCH net-next v2 5/9] tun: use bulk NAPI cache allocation in tun_xdp_one
Date: Tue, 25 Nov 2025 13:00:32 -0700	[thread overview]
Message-ID: <20251125200041.1565663-6-jon@nutanix.com> (raw)
In-Reply-To: <20251125200041.1565663-1-jon@nutanix.com>

Optimize TUN_MSG_PTR batch processing by allocating sk_buff structures
in bulk from the per-CPU NAPI cache using napi_skb_cache_get_bulk.
This reduces allocation overhead and improves efficiency, especially
when IFF_NAPI is enabled and GRO is feeding entries back to the cache.

If bulk allocation cannot fully satisfy the batch, gracefully drop only
the uncovered portion, allowing the rest of the batch to proceed, which
is what already happens in the previous case where build_skb() would
fail and return -ENOMEM.

Signed-off-by: Jon Kohler <jon@nutanix.com>
---
 drivers/net/tun.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 97f130bc5fed..64f944cce517 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2420,13 +2420,13 @@ static void tun_put_page(struct tun_page *tpage)
 static int tun_xdp_one(struct tun_struct *tun,
 		       struct tun_file *tfile,
 		       struct xdp_buff *xdp, int *flush,
-		       struct tun_page *tpage)
+		       struct tun_page *tpage,
+		       struct sk_buff *skb)
 {
 	unsigned int datasize = xdp->data_end - xdp->data;
 	struct virtio_net_hdr *gso = xdp->data_hard_start;
 	struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
 	struct bpf_prog *xdp_prog;
-	struct sk_buff *skb = NULL;
 	struct sk_buff_head *queue;
 	netdev_features_t features;
 	u32 rxhash = 0, act;
@@ -2437,6 +2437,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 	struct page *page;
 
 	if (unlikely(datasize < ETH_HLEN)) {
+		kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_SMALL);
 		dev_core_stats_rx_dropped_inc(tun->dev);
 		return -EINVAL;
 	}
@@ -2454,6 +2455,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
 		if (ret < 0) {
 			/* tun_xdp_act already handles drop statistics */
+			kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
 			put_page(virt_to_head_page(xdp->data));
 			return ret;
 		}
@@ -2463,6 +2465,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 			*flush = true;
 			fallthrough;
 		case XDP_TX:
+			napi_consume_skb(skb, 1);
 			return 0;
 		case XDP_PASS:
 			break;
@@ -2475,13 +2478,15 @@ static int tun_xdp_one(struct tun_struct *tun,
 				tpage->page = page;
 				tpage->count = 1;
 			}
+			napi_consume_skb(skb, 1);
 			return 0;
 		}
 	}
 
 build:
-	skb = build_skb(xdp->data_hard_start, buflen);
+	skb = build_skb_around(skb, xdp->data_hard_start, buflen);
 	if (!skb) {
+		kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
 		dev_core_stats_rx_dropped_inc(tun->dev);
 		return -ENOMEM;
 	}
@@ -2566,9 +2571,11 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
 	    ctl && ctl->type == TUN_MSG_PTR) {
 		struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+		int flush = 0, queued = 0, num_skbs = 0;
 		struct tun_page tpage;
 		int n = ctl->num;
-		int flush = 0, queued = 0;
+		/* Max size of VHOST_NET_BATCH */
+		void *skbs[64];
 
 		memset(&tpage, 0, sizeof(tpage));
 
@@ -2576,13 +2583,24 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		rcu_read_lock();
 		bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
 
-		for (i = 0; i < n; i++) {
+		num_skbs = napi_skb_cache_get_bulk(skbs, n);
+
+		for (i = 0; i < num_skbs; i++) {
+			struct sk_buff *skb = skbs[i];
 			xdp = &((struct xdp_buff *)ctl->ptr)[i];
-			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
+					  skb);
 			if (ret > 0)
 				queued += ret;
 		}
 
+		/* Handle remaining xdp_buff entries if num_skbs < ctl->num */
+		for (i = num_skbs; i < ctl->num; i++) {
+			xdp = &((struct xdp_buff *)ctl->ptr)[i];
+			dev_core_stats_rx_dropped_inc(tun->dev);
+			put_page(virt_to_head_page(xdp->data));
+		}
+
 		if (flush)
 			xdp_do_flush();
 
-- 
2.43.0


  parent reply	other threads:[~2025-11-25 19:18 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-25 20:00 [PATCH net-next v2 0/9] tun: optimize SKB allocation with NAPI cache Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 1/9] tun: cleanup out label in tun_xdp_one Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 2/9] tun: correct drop statistics " Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 3/9] tun: correct drop statistics in tun_put_user Jon Kohler
2025-11-29  3:07   ` Willem de Bruijn
2025-12-02 16:40     ` Jon Kohler
2025-12-02 21:34       ` Willem de Bruijn
2025-12-02 21:36         ` Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 4/9] tun: correct drop statistics in tun_get_user Jon Kohler
2025-11-25 20:00 ` Jon Kohler [this message]
2025-11-28  3:02   ` [PATCH net-next v2 5/9] tun: use bulk NAPI cache allocation in tun_xdp_one Jason Wang
2025-12-02 16:49     ` Jon Kohler
2025-12-02 17:32       ` Jesper Dangaard Brouer
2025-12-02 17:45         ` Jon Kohler
2025-12-03  4:10           ` Jason Wang
2025-12-03  4:34             ` Jon Kohler
2025-12-03  6:40               ` Jason Wang
2025-12-03  8:47         ` Sebastian Andrzej Siewior
2025-12-03 15:35           ` Jon Kohler
2025-12-05  7:58             ` Sebastian Andrzej Siewior
2025-12-05 13:21               ` Jesper Dangaard Brouer
2025-12-05 16:56                 ` Jon Kohler
2025-12-08 11:04                 ` Sebastian Andrzej Siewior
2025-11-25 20:00 ` [PATCH net-next v2 6/9] tun: use napi_build_skb in __tun_build_skb Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 7/9] tun: use napi_consume_skb() in tun_put_user Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 8/9] net: core: export skb_defer_free_flush Jon Kohler
2025-11-25 20:00 ` [PATCH net-next v2 9/9] tun: flush deferred skb free list before bulk NAPI cache get Jon Kohler
2025-11-29  3:08 ` [PATCH net-next v2 0/9] tun: optimize SKB allocation with NAPI cache Willem de Bruijn
2025-12-02 16:38   ` Jon Kohler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251125200041.1565663-6-jon@nutanix.com \
    --to=jon@nutanix.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).