Netdev List
 help / color / mirror / Atom feed
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, magnus.karlsson@intel.com,
	stfomichev@gmail.com, kuba@kernel.org, pabeni@redhat.com,
	horms@kernel.org, bjorn@kernel.org, lorenzo@kernel.org,
	hawk@kernel.org, toke@redhat.com,
	Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Subject: [PATCH RFC net-next 4/4] veth: use generic skb XDP handling
Date: Sat,  9 May 2026 10:48:58 +0200	[thread overview]
Message-ID: <20260509084858.773921-5-maciej.fijalkowski@intel.com> (raw)
In-Reply-To: <20260509084858.773921-1-maciej.fijalkowski@intel.com>

veth currently runs XDP for skb-backed packets by constructing an
xdp_buff and then, for XDP_TX and XDP_REDIRECT, converting that
skb-backed xdp_buff into an xdp_frame. The backing memory is still
skb-owned, so veth has to pin the skb data and frags manually before
consuming the skb.

Use the generic skb XDP helper for skb-backed packets instead. This
keeps skb-backed packets on the skb generic XDP path: XDP_REDIRECT uses
xdp_do_generic_redirect() and XDP_TX uses generic_xdp_tx(). Packets that
already arrive as xdp_frames keep using the existing veth xdp_frame path.

veth still provides its own page_pool and xdp_rxq_info through struct
xdp_generic_ctx. It also keeps using struct veth_xdp_buff storage so
metadata kfuncs that need the skb continue to work after a possible skb
COW.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
 drivers/net/veth.c | 179 +++++++++------------------------------------
 1 file changed, 36 insertions(+), 143 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e35df717e65e..4120a69f7e02 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -49,6 +49,10 @@ struct veth_stats {
 	u64	xdp_drops;
 	u64	xdp_tx;
 	u64	xdp_tx_err;
+	/* skb-backed XDP_TX is handled immediately by generic_xdp_tx().
+	 * xdp_frame XDP_TX is accounted from veth_xdp_flush_bq().
+	 */
+	u64 xdp_skb_tx;
 	u64	peer_tq_xdp_xmit;
 	u64	peer_tq_xdp_xmit_err;
 };
@@ -63,7 +67,6 @@ struct veth_rq {
 	struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */
 	struct net_device	*dev;
 	struct bpf_prog __rcu	*xdp_prog;
-	struct xdp_mem_info	xdp_mem;
 	struct veth_rq_stats	stats;
 	bool			rx_notify_masked;
 	struct ptr_ring		xdp_ring;
@@ -648,10 +651,15 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
 	if (likely(xdp_prog)) {
 		struct veth_xdp_buff vxbuf;
 		struct xdp_buff *xdp = &vxbuf.xdp;
+		struct xdp_rxq_info rxq;
 		u32 act;
 
 		xdp_convert_frame_to_buff(frame, xdp);
-		xdp->rxq = &rq->xdp_rxq;
+
+		rxq = rq->xdp_rxq;
+		rxq.mem.type = frame->mem_type;
+		xdp->rxq = &rxq;
+
 		vxbuf.skb = NULL;
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -663,19 +671,16 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
 			break;
 		case XDP_TX:
 			orig_frame = *frame;
-			xdp->rxq->mem.type = frame->mem_type;
 			if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
 				trace_xdp_exception(rq->dev, xdp_prog, act);
 				frame = &orig_frame;
 				stats->rx_drops++;
 				goto err_xdp;
 			}
-			stats->xdp_tx++;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		case XDP_REDIRECT:
 			orig_frame = *frame;
-			xdp->rxq->mem.type = frame->mem_type;
 			if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
 				frame = &orig_frame;
 				stats->rx_drops++;
@@ -735,69 +740,20 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
 	}
 }
 
-static void veth_xdp_get(struct xdp_buff *xdp)
-{
-	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-	int i;
-
-	get_page(virt_to_page(xdp->data));
-	if (likely(!xdp_buff_has_frags(xdp)))
-		return;
-
-	for (i = 0; i < sinfo->nr_frags; i++)
-		__skb_frag_ref(&sinfo->frags[i]);
-}
-
-static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
-					struct xdp_buff *xdp,
-					struct sk_buff **pskb)
-{
-	struct sk_buff *skb = *pskb;
-	u32 frame_sz;
-
-	if (skb_shared(skb) || skb_head_is_locked(skb) ||
-	    skb_shinfo(skb)->nr_frags ||
-	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
-		if (skb_pp_cow_data(rq->page_pool, pskb, XDP_PACKET_HEADROOM))
-			goto drop;
-
-		skb = *pskb;
-	}
-
-	/* SKB "head" area always have tailroom for skb_shared_info */
-	frame_sz = skb_end_pointer(skb) - skb->head;
-	frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
-	xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
-			 skb_headlen(skb), true);
-
-	if (skb_is_nonlinear(skb)) {
-		skb_shinfo(skb)->xdp_frags_size = skb->data_len;
-		xdp_buff_set_frags_flag(xdp);
-	} else {
-		xdp_buff_clear_frags_flag(xdp);
-	}
-	*pskb = skb;
-
-	return 0;
-drop:
-	consume_skb(skb);
-	*pskb = NULL;
-
-	return -ENOMEM;
-}
-
 static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 					struct sk_buff *skb,
-					struct veth_xdp_tx_bq *bq,
 					struct veth_stats *stats)
 {
-	void *orig_data, *orig_data_end;
 	struct bpf_prog *xdp_prog;
 	struct veth_xdp_buff vxbuf;
-	struct xdp_buff *xdp = &vxbuf.xdp;
-	u32 act, metalen;
-	int off;
+	struct xdp_generic_ctx ctx = {
+		.skb_cow_check = skb_needs_xdp_cow,
+		.page_pool = rq->page_pool,
+		.xdp_rxq = &rq->xdp_rxq,
+		.xdp_skb = &vxbuf.skb,
+		.xdp = &vxbuf.xdp,
+	};
+	int ret;
 
 	skb_prepare_for_gro(skb);
 
@@ -808,94 +764,33 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 		goto out;
 	}
 
-	__skb_push(skb, skb->data - skb_mac_header(skb));
-	if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
-		goto drop;
-	vxbuf.skb = skb;
-
-	orig_data = xdp->data;
-	orig_data_end = xdp->data_end;
-
-	act = bpf_prog_run_xdp(xdp_prog, xdp);
-
-	switch (act) {
+	ret = __do_xdp_generic(xdp_prog, &skb, &ctx);
+	switch (ctx.act) {
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		veth_xdp_get(xdp);
-		consume_skb(skb);
-		xdp->rxq->mem = rq->xdp_mem;
-		if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
-			trace_xdp_exception(rq->dev, xdp_prog, act);
-			stats->rx_drops++;
-			goto err_xdp;
-		}
-		stats->xdp_tx++;
-		rcu_read_unlock();
-		goto xdp_xmit;
+		if (!ctx.err)
+			stats->xdp_skb_tx++;
+		break;
 	case XDP_REDIRECT:
-		veth_xdp_get(xdp);
-		consume_skb(skb);
-		xdp->rxq->mem = rq->xdp_mem;
-		if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
+		if (ctx.err)
 			stats->rx_drops++;
-			goto err_xdp;
-		}
-		stats->xdp_redirect++;
-		rcu_read_unlock();
-		goto xdp_xmit;
+		else
+			stats->xdp_redirect++;
+		break;
 	default:
-		bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
-		fallthrough;
 	case XDP_ABORTED:
-		trace_xdp_exception(rq->dev, xdp_prog, act);
-		fallthrough;
 	case XDP_DROP:
 		stats->xdp_drops++;
-		goto xdp_drop;
+		break;
 	}
 	rcu_read_unlock();
 
-	/* check if bpf_xdp_adjust_head was used */
-	off = orig_data - xdp->data;
-	if (off > 0)
-		__skb_push(skb, off);
-	else if (off < 0)
-		__skb_pull(skb, -off);
-
-	skb_reset_mac_header(skb);
-
-	/* check if bpf_xdp_adjust_tail was used */
-	off = xdp->data_end - orig_data_end;
-	if (off != 0)
-		__skb_put(skb, off); /* positive on grow, negative on shrink */
-
-	/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
-	 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
-	 */
-	if (xdp_buff_has_frags(xdp))
-		skb->data_len = skb_shinfo(skb)->xdp_frags_size;
-	else
-		skb->data_len = 0;
-
-	skb->protocol = eth_type_trans(skb, rq->dev);
-
-	metalen = xdp->data - xdp->data_meta;
-	if (metalen)
-		skb_metadata_set(skb, metalen);
+	if (ret == XDP_PASS)
+		return skb;
+	return NULL;
 out:
 	return skb;
-drop:
-	stats->rx_drops++;
-xdp_drop:
-	rcu_read_unlock();
-	kfree_skb(skb);
-	return NULL;
-err_xdp:
-	rcu_read_unlock();
-	xdp_return_buff(xdp);
-xdp_xmit:
-	return NULL;
 }
 
 static int veth_xdp_rcv(struct veth_rq *rq, int budget,
@@ -931,7 +826,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
 			struct sk_buff *skb = ptr;
 
 			stats->xdp_bytes += skb->len;
-			skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
+			skb = veth_xdp_rcv_skb(rq, skb, stats);
 			if (skb) {
 				if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC))
 					netif_receive_skb(skb);
@@ -947,7 +842,9 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
 
 	u64_stats_update_begin(&rq->stats.syncp);
 	rq->stats.vs.xdp_redirect += stats->xdp_redirect;
+	rq->stats.vs.xdp_tx += stats->xdp_skb_tx;
 	rq->stats.vs.xdp_bytes += stats->xdp_bytes;
+	rq->stats.vs.xdp_tx_err += stats->xdp_tx_err;
 	rq->stats.vs.xdp_drops += stats->xdp_drops;
 	rq->stats.vs.rx_drops += stats->rx_drops;
 	rq->stats.vs.xdp_packets += done;
@@ -979,7 +876,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
 
 	if (stats.xdp_redirect > 0)
 		xdp_do_flush();
-	if (stats.xdp_tx > 0)
+	if (bq.count > 0)
 		veth_xdp_flush(rq, &bq);
 	xdp_clear_return_frame_no_direct();
 
@@ -1123,13 +1020,10 @@ static int veth_enable_xdp_range(struct net_device *dev, int start, int end,
 			goto err_rxq_reg;
 
 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
-						 MEM_TYPE_PAGE_SHARED,
+						 MEM_TYPE_PAGE_POOL_OR_SHARED,
 						 NULL);
 		if (err < 0)
 			goto err_reg_mem;
-
-		/* Save original mem info as it can be overwritten */
-		rq->xdp_mem = rq->xdp_rxq.mem;
 	}
 	return 0;
 
@@ -1156,7 +1050,6 @@ static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
 	for (i = start; i < end; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
-		rq->xdp_rxq.mem = rq->xdp_mem;
 		xdp_rxq_info_unreg(&rq->xdp_rxq);
 
 		if (delete_napi)
-- 
2.43.0


  parent reply	other threads:[~2026-05-09  8:49 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-09  8:48 [PATCH RFC net-next 0/4] xdp: reuse generic skb XDP handling for veth Maciej Fijalkowski
2026-05-09  8:48 ` [PATCH RFC net-next 1/4] xdp: add mixed page_pool/page_shared memory type Maciej Fijalkowski
2026-05-09  8:48 ` [PATCH RFC net-next 2/4] xdp: return status from generic_xdp_tx() Maciej Fijalkowski
2026-05-12 12:57   ` Björn Töpel
2026-05-12 17:13     ` Maciej Fijalkowski
2026-05-09  8:48 ` [PATCH RFC net-next 3/4] xdp: split generic XDP skb handling Maciej Fijalkowski
2026-05-09  8:48 ` Maciej Fijalkowski [this message]
2026-05-12 14:32   ` [PATCH RFC net-next 4/4] veth: use generic skb XDP handling Björn Töpel
2026-05-12 17:06     ` Maciej Fijalkowski
2026-05-13 11:31       ` Björn Töpel
2026-05-12 12:55 ` [PATCH RFC net-next 0/4] xdp: reuse generic skb XDP handling for veth Björn Töpel
2026-05-12 17:12   ` Maciej Fijalkowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260509084858.773921-5-maciej.fijalkowski@intel.com \
    --to=maciej.fijalkowski@intel.com \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=hawk@kernel.org \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=lorenzo@kernel.org \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=stfomichev@gmail.com \
    --cc=toke@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox