* [PATCH RFC net-next 1/4] xdp: add mixed page_pool/page_shared memory type
2026-05-09 8:48 [PATCH RFC net-next 0/4] xdp: reuse generic skb XDP handling for veth Maciej Fijalkowski
@ 2026-05-09 8:48 ` Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 2/4] xdp: return status from generic_xdp_tx() Maciej Fijalkowski
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2026-05-09 8:48 UTC (permalink / raw)
To: netdev
Cc: bpf, magnus.karlsson, stfomichev, kuba, pabeni, horms, bjorn,
lorenzo, hawk, toke, Maciej Fijalkowski
Generic XDP runs on skb-backed data. In that mode the skb head remains
owned by the skb, but XDP helpers may still release frags, for example
when a program trims a non-linear packet.
With the generic page_pool CoW path, the frags visible to XDP may be
backed by the generic system page_pool. In the fallback path, or for
other skb-backed memory, the same generic XDP rxq may still describe
page-frag based memory. Selecting MEM_TYPE_PAGE_POOL or
MEM_TYPE_PAGE_SHARED purely from the rxq therefore either lies about
page_pool ownership or misses recycling opportunities.
Add MEM_TYPE_PAGE_POOL_OR_SHARED for skb-backed generic XDP users. The
return path inspects the actual netmem: page_pool-backed netmems are
returned through their page_pool, and everything else falls back to
page_frag_free(). Transition netdev_rx_queue's xdp_rxq_info from
MEM_TYPE_PAGE_SHARED to MEM_TYPE_PAGE_POOL_OR_SHARED.
This keeps rxq identity stable for users which inspect xdp->rxq->dev and
xdp->rxq->queue_index, while avoiding per-packet rxq->mem mutation.
Respect new mem_type in __xdp_build_skb_from_frame() as veth could
redirect xdp_frame onto cpumap.
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
include/net/xdp.h | 1 +
net/core/dev.c | 7 ++++++
net/core/xdp.c | 54 ++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 57 insertions(+), 5 deletions(-)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index aa742f413c35..d60b8857e4eb 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -45,6 +45,7 @@ enum xdp_mem_type {
MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */
MEM_TYPE_PAGE_POOL,
MEM_TYPE_XSK_BUFF_POOL,
+ MEM_TYPE_PAGE_POOL_OR_SHARED,
MEM_TYPE_MAX,
};
diff --git a/net/core/dev.c b/net/core/dev.c
index e59f6025067c..6cc2a5bed20f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11207,6 +11207,13 @@ static int netif_alloc_rx_queues(struct net_device *dev)
err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0);
if (err < 0)
goto err_rxq_info;
+ err = xdp_rxq_info_reg_mem_model(&rx[i].xdp_rxq,
+ MEM_TYPE_PAGE_POOL_OR_SHARED,
+ NULL);
+ if (err < 0) {
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ goto err_rxq_info;
+ }
}
return 0;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9890a30584ba..c57a82620520 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -22,6 +22,7 @@
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
#include <net/xdp_sock_drv.h>
+#include "netmem_priv.h"
#define REG_STATE_NEW 0x0
#define REG_STATE_REGISTERED 0x1
@@ -280,6 +281,12 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
if (!__is_supported_mem_type(type))
return ERR_PTR(-EOPNOTSUPP);
+ /* MEM_TYPE_PAGE_POOL_OR_SHARED is expected to handle pp's allocator
+ * separately;
+ */
+ if (type == MEM_TYPE_PAGE_POOL_OR_SHARED && allocator)
+ return ERR_PTR(-EINVAL);
+
mem->type = type;
if (!allocator) {
@@ -424,6 +431,23 @@ void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
+static bool xdp_netmem_is_pp(netmem_ref netmem)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ return netmem_is_pp(netmem);
+#else
+ return false;
+#endif
+}
+
+static void __xdp_return_page_pool(netmem_ref netmem, bool napi_direct)
+{
+ if (napi_direct && xdp_return_frame_no_direct())
+ napi_direct = false;
+
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi_direct);
+}
+
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolean
@@ -433,20 +457,26 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
bool napi_direct, struct xdp_buff *xdp)
{
+ netmem_ref head;
+
switch (mem_type) {
case MEM_TYPE_PAGE_POOL:
netmem = netmem_compound_head(netmem);
- if (napi_direct && xdp_return_frame_no_direct())
- napi_direct = false;
/* No need to check netmem_is_pp() as mem->type knows this a
* page_pool page
*/
- page_pool_put_full_netmem(netmem_get_pp(netmem), netmem,
- napi_direct);
+ __xdp_return_page_pool(netmem, napi_direct);
break;
case MEM_TYPE_PAGE_SHARED:
page_frag_free(__netmem_address(netmem));
break;
+ case MEM_TYPE_PAGE_POOL_OR_SHARED:
+ head = netmem_compound_head(netmem);
+ if (xdp_netmem_is_pp(head))
+ __xdp_return_page_pool(head, napi_direct);
+ else
+ page_frag_free(__netmem_address(netmem));
+ break;
case MEM_TYPE_PAGE_ORDER0:
put_page(__netmem_to_page(netmem));
break;
@@ -791,6 +821,19 @@ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
+static bool xdp_mem_is_page_pool_backed(enum xdp_mem_type mem_type,
+ netmem_ref netmem)
+{
+ switch (mem_type) {
+ case MEM_TYPE_PAGE_POOL:
+ return true;
+ case MEM_TYPE_PAGE_POOL_OR_SHARED:
+ return xdp_netmem_is_pp(netmem_compound_head(netmem));
+ default:
+ return false;
+ }
+}
+
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev)
@@ -836,7 +879,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue)
*/
- if (xdpf->mem_type == MEM_TYPE_PAGE_POOL)
+ if (xdp_mem_is_page_pool_backed(xdpf->mem_type,
+ virt_to_netmem(xdpf->data)))
skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH RFC net-next 2/4] xdp: return status from generic_xdp_tx()
2026-05-09 8:48 [PATCH RFC net-next 0/4] xdp: reuse generic skb XDP handling for veth Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 1/4] xdp: add mixed page_pool/page_shared memory type Maciej Fijalkowski
@ 2026-05-09 8:48 ` Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 3/4] xdp: split generic XDP skb handling Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 4/4] veth: use generic skb XDP handling Maciej Fijalkowski
3 siblings, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2026-05-09 8:48 UTC (permalink / raw)
To: netdev
Cc: bpf, magnus.karlsson, stfomichev, kuba, pabeni, horms, bjorn,
lorenzo, hawk, toke, Maciej Fijalkowski
Once veth will start to use __do_xdp_generic(), this will be helpful for
correct stats values being incremented - otherwise we would not have a
knowledge whether XDP_TX succeeded or not. With this being implemented we
will only bump xdp_tx stat when actual transmission happened.
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
include/linux/netdevice.h | 2 +-
kernel/bpf/devmap.c | 2 +-
net/core/dev.c | 11 +++++++++--
net/core/filter.c | 2 +-
4 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47417b2d48a4..473b18b0bb63 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4255,7 +4255,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
const struct bpf_prog *xdp_prog);
-void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog);
+int generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog);
int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3d619d01088e..b7a4a0266345 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -714,7 +714,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
return 0;
skb->dev = dst->dev;
- generic_xdp_tx(skb, xdp_prog);
+ (void)generic_xdp_tx(skb, xdp_prog);
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 6cc2a5bed20f..09012cdea376 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5628,11 +5628,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
* and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
* queues, so they do not have this starvation issue.
*/
-void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
+int generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
bool free_skb = true;
+ int err = -ENETDOWN;
int cpu, rc;
txq = netdev_core_pick_tx(dev, skb, NULL);
@@ -5640,8 +5641,12 @@ void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_frozen_or_drv_stopped(txq)) {
rc = netdev_start_xmit(skb, dev, txq, 0);
- if (dev_xmit_complete(rc))
+ if (dev_xmit_complete(rc)) {
free_skb = false;
+ err = 0;
+ } else {
+ err = -EBUSY;
+ }
}
HARD_TX_UNLOCK(dev, txq);
if (free_skb) {
@@ -5649,6 +5654,8 @@ void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
dev_core_stats_tx_dropped_inc(dev);
kfree_skb(skb);
}
+
+ return err;
}
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
diff --git a/net/core/filter.c b/net/core/filter.c
index bf9c37b27646..d6d14e0a1e35 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4619,7 +4619,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
skb->dev = fwd;
_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
- generic_xdp_tx(skb, xdp_prog);
+ (void)generic_xdp_tx(skb, xdp_prog);
return 0;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH RFC net-next 3/4] xdp: split generic XDP skb handling
2026-05-09 8:48 [PATCH RFC net-next 0/4] xdp: reuse generic skb XDP handling for veth Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 1/4] xdp: add mixed page_pool/page_shared memory type Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 2/4] xdp: return status from generic_xdp_tx() Maciej Fijalkowski
@ 2026-05-09 8:48 ` Maciej Fijalkowski
2026-05-09 8:48 ` [PATCH RFC net-next 4/4] veth: use generic skb XDP handling Maciej Fijalkowski
3 siblings, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2026-05-09 8:48 UTC (permalink / raw)
To: netdev
Cc: bpf, magnus.karlsson, stfomichev, kuba, pabeni, horms, bjorn,
lorenzo, hawk, toke, Maciej Fijalkowski
veth has its own page_pool and xdp_rxq_info and also embeds struct
xdp_buff into a larger context used by its metadata kfuncs. At the same
time, the skb-backed veth XDP path currently open-codes most of what
generic XDP already does and then converts skb-backed xdp_buffs into
xdp_frames for XDP_TX and XDP_REDIRECT.
Add a lower-level generic XDP helper, __do_xdp_generic(), that lets
callers provide a small context object. The context carries the
caller-provided xdp_buff storage, optional page_pool and optional
xdp_rxq_info, and returns the actual XDP action and redirect error to
the caller. A NULL page_pool keeps the existing behaviour and uses the
per-CPU system page_pool. A NULL xdp_rxq_info keeps deriving the rxq
from the skb device/rx queue.
This lets drivers such as veth preserve stats and redirect flush
decisions while using the generic skb XDP action handling.
Address also existing bpf_prog_run_generic_xdp() callsites
({cpu,dev}map) so they can keep on using netdev's xdp_rxq_info.
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
include/linux/netdevice.h | 31 +++++++++++
net/core/dev.c | 106 ++++++++++++++++++++++++++++----------
2 files changed, 111 insertions(+), 26 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 473b18b0bb63..7d7c88a33328 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4253,9 +4253,40 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
dev_kfree_skb_any_reason(skb, SKB_CONSUMED);
}
+struct page_pool;
+struct xdp_rxq_info;
+
+/**
+ * struct xdp_generic_ctx - caller context for skb-backed generic XDP
+ * @xdp: caller-provided xdp_buff storage
+ * @page_pool: optional page_pool used when skb COW is needed
+ * @xdp_rxq: optional rxq used to initialise @xdp
+ * @xdp_skb: optional pointer updated with the skb used for the XDP run
+ * @skb_cow_check: caller-selected skb COW predicate, required
+ * @act: actual XDP action returned by the program
+ * @err: redirect error, valid when @act is XDP_REDIRECT
+ *
+ * If @page_pool is NULL, the generic path uses the per-CPU system
+ * page_pool. If @xdp_rxq is NULL, the generic path derives the rxq
+ * from the skb device/rx-queue, preserving existing do_xdp_generic()
+ * behaviour.
+ */
+struct xdp_generic_ctx {
+ struct xdp_buff *xdp;
+ struct page_pool *page_pool;
+ struct xdp_rxq_info *xdp_rxq;
+ struct sk_buff **xdp_skb;
+ bool (*skb_cow_check)(const struct sk_buff *skb);
+ u32 act;
+ int err;
+};
+
+bool skb_needs_xdp_cow(const struct sk_buff *skb);
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
const struct bpf_prog *xdp_prog);
int generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog);
+int __do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb,
+ struct xdp_generic_ctx *ctx);
int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 09012cdea376..f6770ca6f1bd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5445,11 +5445,11 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
return rxqueue;
}
-u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
- const struct bpf_prog *xdp_prog)
+static u32 __bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ const struct bpf_prog *xdp_prog,
+ struct xdp_rxq_info *xdp_rxq)
{
void *orig_data, *orig_data_end, *hard_start;
- struct netdev_rx_queue *rxqueue;
bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
@@ -5467,8 +5467,13 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
frame_sz = (void *)skb_end_pointer(skb) - hard_start;
frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- rxqueue = netif_get_rxqueue(skb);
- xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
+ if (!xdp_rxq) {
+ struct netdev_rx_queue *rxqueue;
+
+ rxqueue = netif_get_rxqueue(skb);
+ xdp_rxq = &rxqueue->xdp_rxq;
+ }
+ xdp_init_buff(xdp, frame_sz, xdp_rxq);
xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
skb_headlen(skb) + mac_len, true);
if (skb_is_nonlinear(skb)) {
@@ -5547,15 +5552,27 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
return act;
}
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ const struct bpf_prog *xdp_prog)
+{
+ return __bpf_prog_run_generic_xdp(skb, xdp, xdp_prog, NULL);
+}
+
static int
-netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)
+netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog,
+ struct page_pool *page_pool)
{
struct sk_buff *skb = *pskb;
int err, hroom, troom;
- local_lock_nested_bh(&system_page_pool.bh_lock);
- err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog);
- local_unlock_nested_bh(&system_page_pool.bh_lock);
+ if (page_pool) {
+ err = skb_cow_data_for_xdp(page_pool, pskb, prog);
+ } else {
+ local_lock_nested_bh(&system_page_pool.bh_lock);
+ err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool),
+ pskb, prog);
+ local_unlock_nested_bh(&system_page_pool.bh_lock);
+ }
if (!err)
return 0;
@@ -5573,9 +5590,29 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)
return skb_linearize(skb);
}
+bool skb_needs_xdp_cow(const struct sk_buff *skb)
+{
+ /* Keep this predicate aligned with the old veth skb->xdp_buff
+ * conversion rules. A page_pool-backed COW is needed when the skb head
+ * cannot be reused as-is, when frags need to be made page_pool backed,
+ * or when the XDP headroom contract is not met.
+ */
+ return skb_shared(skb) || skb_head_is_locked(skb) ||
+ skb_shinfo(skb)->nr_frags ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM;
+}
+EXPORT_SYMBOL_GPL(skb_needs_xdp_cow);
+
+static bool generic_skb_needs_xdp_cow(const struct sk_buff *skb)
+{
+ return skb_cloned(skb) || skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM;
+}
+
static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
- const struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog,
+ struct xdp_generic_ctx *ctx)
{
struct sk_buff *skb = *pskb;
u32 mac_len, act = XDP_DROP;
@@ -5593,15 +5630,20 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
mac_len = skb->data - skb_mac_header(skb);
__skb_push(skb, mac_len);
- if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
- skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- if (netif_skb_check_for_xdp(pskb, xdp_prog))
+ if (INDIRECT_CALL_2(ctx->skb_cow_check,
+ generic_skb_needs_xdp_cow,
+ skb_needs_xdp_cow,
+ skb)) {
+ if (netif_skb_check_for_xdp(pskb, xdp_prog, ctx->page_pool))
goto do_drop;
}
__skb_pull(*pskb, mac_len);
- act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
+ if (ctx->xdp_skb)
+ *ctx->xdp_skb = *pskb;
+
+ act = __bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog, ctx->xdp_rxq);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
@@ -5660,27 +5702,27 @@ int generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb)
+int __do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb,
+ struct xdp_generic_ctx *ctx)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
- if (xdp_prog) {
- struct xdp_buff xdp;
- u32 act;
- int err;
+ ctx->act = XDP_PASS;
+ ctx->err = 0;
+ if (xdp_prog) {
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
- act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
- if (act != XDP_PASS) {
- switch (act) {
+ ctx->act = netif_receive_generic_xdp(pskb, ctx->xdp, xdp_prog, ctx);
+ if (ctx->act != XDP_PASS) {
+ switch (ctx->act) {
case XDP_REDIRECT:
- err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
- &xdp, xdp_prog);
- if (err)
+ ctx->err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
+ ctx->xdp, xdp_prog);
+ if (ctx->err)
goto out_redir;
break;
case XDP_TX:
- generic_xdp_tx(*pskb, xdp_prog);
+ ctx->err = generic_xdp_tx(*pskb, xdp_prog);
break;
}
bpf_net_ctx_clear(bpf_net_ctx);
@@ -5694,6 +5736,18 @@ int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb)
kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
+EXPORT_SYMBOL_GPL(__do_xdp_generic);
+
+int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb)
+{
+ struct xdp_generic_ctx ctx = {};
+ struct xdp_buff xdp;
+
+ ctx.xdp = &xdp;
+ ctx.skb_cow_check = generic_skb_needs_xdp_cow;
+
+ return __do_xdp_generic(xdp_prog, pskb, &ctx);
+}
EXPORT_SYMBOL_GPL(do_xdp_generic);
static int netif_rx_internal(struct sk_buff *skb)
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH RFC net-next 4/4] veth: use generic skb XDP handling
2026-05-09 8:48 [PATCH RFC net-next 0/4] xdp: reuse generic skb XDP handling for veth Maciej Fijalkowski
` (2 preceding siblings ...)
2026-05-09 8:48 ` [PATCH RFC net-next 3/4] xdp: split generic XDP skb handling Maciej Fijalkowski
@ 2026-05-09 8:48 ` Maciej Fijalkowski
3 siblings, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2026-05-09 8:48 UTC (permalink / raw)
To: netdev
Cc: bpf, magnus.karlsson, stfomichev, kuba, pabeni, horms, bjorn,
lorenzo, hawk, toke, Maciej Fijalkowski
veth currently runs XDP for skb-backed packets by constructing an
xdp_buff and then, for XDP_TX and XDP_REDIRECT, converting that
skb-backed xdp_buff into an xdp_frame. The backing memory is still
skb-owned, so veth has to pin the skb data and frags manually before
consuming the skb.
Use the generic skb XDP helper for skb-backed packets instead. This
keeps skb-backed packets on the skb generic XDP path: XDP_REDIRECT uses
xdp_do_generic_redirect() and XDP_TX uses generic_xdp_tx(). Packets that
already arrive as xdp_frames keep using the existing veth xdp_frame path.
veth still provides its own page_pool and xdp_rxq_info through struct
xdp_generic_ctx. It also keeps using struct veth_xdp_buff storage so
metadata kfuncs that need the skb continue to work after a possible skb
COW.
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
drivers/net/veth.c | 179 +++++++++------------------------------------
1 file changed, 36 insertions(+), 143 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e35df717e65e..4120a69f7e02 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -49,6 +49,10 @@ struct veth_stats {
u64 xdp_drops;
u64 xdp_tx;
u64 xdp_tx_err;
+ /* skb-backed XDP_TX is handled immediately by generic_xdp_tx().
+ * xdp_frame XDP_TX is accounted from veth_xdp_flush_bq().
+ */
+ u64 xdp_skb_tx;
u64 peer_tq_xdp_xmit;
u64 peer_tq_xdp_xmit_err;
};
@@ -63,7 +67,6 @@ struct veth_rq {
struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */
struct net_device *dev;
struct bpf_prog __rcu *xdp_prog;
- struct xdp_mem_info xdp_mem;
struct veth_rq_stats stats;
bool rx_notify_masked;
struct ptr_ring xdp_ring;
@@ -648,10 +651,15 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
if (likely(xdp_prog)) {
struct veth_xdp_buff vxbuf;
struct xdp_buff *xdp = &vxbuf.xdp;
+ struct xdp_rxq_info rxq;
u32 act;
xdp_convert_frame_to_buff(frame, xdp);
- xdp->rxq = &rq->xdp_rxq;
+
+ rxq = rq->xdp_rxq;
+ rxq.mem.type = frame->mem_type;
+ xdp->rxq = &rxq;
+
vxbuf.skb = NULL;
act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -663,19 +671,16 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
break;
case XDP_TX:
orig_frame = *frame;
- xdp->rxq->mem.type = frame->mem_type;
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame;
stats->rx_drops++;
goto err_xdp;
}
- stats->xdp_tx++;
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
- xdp->rxq->mem.type = frame->mem_type;
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
frame = &orig_frame;
stats->rx_drops++;
@@ -735,69 +740,20 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
}
}
-static void veth_xdp_get(struct xdp_buff *xdp)
-{
- struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
- int i;
-
- get_page(virt_to_page(xdp->data));
- if (likely(!xdp_buff_has_frags(xdp)))
- return;
-
- for (i = 0; i < sinfo->nr_frags; i++)
- __skb_frag_ref(&sinfo->frags[i]);
-}
-
-static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
- struct xdp_buff *xdp,
- struct sk_buff **pskb)
-{
- struct sk_buff *skb = *pskb;
- u32 frame_sz;
-
- if (skb_shared(skb) || skb_head_is_locked(skb) ||
- skb_shinfo(skb)->nr_frags ||
- skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- if (skb_pp_cow_data(rq->page_pool, pskb, XDP_PACKET_HEADROOM))
- goto drop;
-
- skb = *pskb;
- }
-
- /* SKB "head" area always have tailroom for skb_shared_info */
- frame_sz = skb_end_pointer(skb) - skb->head;
- frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
- xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
- skb_headlen(skb), true);
-
- if (skb_is_nonlinear(skb)) {
- skb_shinfo(skb)->xdp_frags_size = skb->data_len;
- xdp_buff_set_frags_flag(xdp);
- } else {
- xdp_buff_clear_frags_flag(xdp);
- }
- *pskb = skb;
-
- return 0;
-drop:
- consume_skb(skb);
- *pskb = NULL;
-
- return -ENOMEM;
-}
-
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
struct sk_buff *skb,
- struct veth_xdp_tx_bq *bq,
struct veth_stats *stats)
{
- void *orig_data, *orig_data_end;
struct bpf_prog *xdp_prog;
struct veth_xdp_buff vxbuf;
- struct xdp_buff *xdp = &vxbuf.xdp;
- u32 act, metalen;
- int off;
+ struct xdp_generic_ctx ctx = {
+ .skb_cow_check = skb_needs_xdp_cow,
+ .page_pool = rq->page_pool,
+ .xdp_rxq = &rq->xdp_rxq,
+ .xdp_skb = &vxbuf.skb,
+ .xdp = &vxbuf.xdp,
+ };
+ int ret;
skb_prepare_for_gro(skb);
@@ -808,94 +764,33 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
goto out;
}
- __skb_push(skb, skb->data - skb_mac_header(skb));
- if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
- goto drop;
- vxbuf.skb = skb;
-
- orig_data = xdp->data;
- orig_data_end = xdp->data_end;
-
- act = bpf_prog_run_xdp(xdp_prog, xdp);
-
- switch (act) {
+ ret = __do_xdp_generic(xdp_prog, &skb, &ctx);
+ switch (ctx.act) {
case XDP_PASS:
break;
case XDP_TX:
- veth_xdp_get(xdp);
- consume_skb(skb);
- xdp->rxq->mem = rq->xdp_mem;
- if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
- trace_xdp_exception(rq->dev, xdp_prog, act);
- stats->rx_drops++;
- goto err_xdp;
- }
- stats->xdp_tx++;
- rcu_read_unlock();
- goto xdp_xmit;
+ if (!ctx.err)
+ stats->xdp_skb_tx++;
+ break;
case XDP_REDIRECT:
- veth_xdp_get(xdp);
- consume_skb(skb);
- xdp->rxq->mem = rq->xdp_mem;
- if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
+ if (ctx.err)
stats->rx_drops++;
- goto err_xdp;
- }
- stats->xdp_redirect++;
- rcu_read_unlock();
- goto xdp_xmit;
+ else
+ stats->xdp_redirect++;
+ break;
default:
- bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
- fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(rq->dev, xdp_prog, act);
- fallthrough;
case XDP_DROP:
stats->xdp_drops++;
- goto xdp_drop;
+ break;
}
rcu_read_unlock();
- /* check if bpf_xdp_adjust_head was used */
- off = orig_data - xdp->data;
- if (off > 0)
- __skb_push(skb, off);
- else if (off < 0)
- __skb_pull(skb, -off);
-
- skb_reset_mac_header(skb);
-
- /* check if bpf_xdp_adjust_tail was used */
- off = xdp->data_end - orig_data_end;
- if (off != 0)
- __skb_put(skb, off); /* positive on grow, negative on shrink */
-
- /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
- * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
- */
- if (xdp_buff_has_frags(xdp))
- skb->data_len = skb_shinfo(skb)->xdp_frags_size;
- else
- skb->data_len = 0;
-
- skb->protocol = eth_type_trans(skb, rq->dev);
-
- metalen = xdp->data - xdp->data_meta;
- if (metalen)
- skb_metadata_set(skb, metalen);
+ if (ret == XDP_PASS)
+ return skb;
+ return NULL;
out:
return skb;
-drop:
- stats->rx_drops++;
-xdp_drop:
- rcu_read_unlock();
- kfree_skb(skb);
- return NULL;
-err_xdp:
- rcu_read_unlock();
- xdp_return_buff(xdp);
-xdp_xmit:
- return NULL;
}
static int veth_xdp_rcv(struct veth_rq *rq, int budget,
@@ -931,7 +826,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
struct sk_buff *skb = ptr;
stats->xdp_bytes += skb->len;
- skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
+ skb = veth_xdp_rcv_skb(rq, skb, stats);
if (skb) {
if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC))
netif_receive_skb(skb);
@@ -947,7 +842,9 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
u64_stats_update_begin(&rq->stats.syncp);
rq->stats.vs.xdp_redirect += stats->xdp_redirect;
+ rq->stats.vs.xdp_tx += stats->xdp_skb_tx;
rq->stats.vs.xdp_bytes += stats->xdp_bytes;
+ rq->stats.vs.xdp_tx_err += stats->xdp_tx_err;
rq->stats.vs.xdp_drops += stats->xdp_drops;
rq->stats.vs.rx_drops += stats->rx_drops;
rq->stats.vs.xdp_packets += done;
@@ -979,7 +876,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
if (stats.xdp_redirect > 0)
xdp_do_flush();
- if (stats.xdp_tx > 0)
+ if (bq.count > 0)
veth_xdp_flush(rq, &bq);
xdp_clear_return_frame_no_direct();
@@ -1123,13 +1020,10 @@ static int veth_enable_xdp_range(struct net_device *dev, int start, int end,
goto err_rxq_reg;
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_PAGE_SHARED,
+ MEM_TYPE_PAGE_POOL_OR_SHARED,
NULL);
if (err < 0)
goto err_reg_mem;
-
- /* Save original mem info as it can be overwritten */
- rq->xdp_mem = rq->xdp_rxq.mem;
}
return 0;
@@ -1156,7 +1050,6 @@ static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
- rq->xdp_rxq.mem = rq->xdp_mem;
xdp_rxq_info_unreg(&rq->xdp_rxq);
if (delete_napi)
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread