From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
hawk@kernel.org, john.fastabend@gmail.com, joe@dama.to,
willemdebruijn.kernel@gmail.com, fmancera@suse.de, csmate@nop.hu
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Jason Xing <kernelxing@tencent.com>
Subject: [PATCH RFC net-next 1/2] Revert "xsk: Fix immature cq descriptor production"
Date: Fri, 31 Oct 2025 17:32:29 +0800 [thread overview]
Message-ID: <20251031093230.82386-2-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20251031093230.82386-1-kerneljasonxing@gmail.com>
From: Jason Xing <kernelxing@tencent.com>
This patch was made manually to revert previous fix, in order to
easily implement a new logic based on the previous code base.
Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
net/xdp/xsk.c | 118 ++++++++------------------------------------
net/xdp/xsk_queue.h | 12 -----
2 files changed, 20 insertions(+), 110 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 7b0c68a70888..05010c1bbfbd 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -36,20 +36,6 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET 32
-struct xsk_addr_node {
- u64 addr;
- struct list_head addr_node;
-};
-
-struct xsk_addr_head {
- u32 num_descs;
- struct list_head addrs_list;
-};
-
-static struct kmem_cache *xsk_tx_generic_cache;
-
-#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb))
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -546,43 +532,24 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
+static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&pool->cq_lock, flags);
- ret = xskq_prod_reserve(pool->cq);
+ ret = xskq_prod_reserve_addr(pool->cq, addr);
spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
- struct sk_buff *skb)
+static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
{
- struct xsk_addr_node *pos, *tmp;
- u32 descs_processed = 0;
unsigned long flags;
- u32 idx;
spin_lock_irqsave(&pool->cq_lock, flags);
- idx = xskq_get_prod(pool->cq);
-
- xskq_prod_write_addr(pool->cq, idx,
- (u64)(uintptr_t)skb_shinfo(skb)->destructor_arg);
- descs_processed++;
-
- if (unlikely(XSKCB(skb)->num_descs > 1)) {
- list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
- xskq_prod_write_addr(pool->cq, idx + descs_processed,
- pos->addr);
- descs_processed++;
- list_del(&pos->addr_node);
- kmem_cache_free(xsk_tx_generic_cache, pos);
- }
- }
- xskq_prod_submit_n(pool->cq, descs_processed);
+ xskq_prod_submit_n(pool->cq, n);
spin_unlock_irqrestore(&pool->cq_lock, flags);
}
@@ -595,14 +562,9 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
spin_unlock_irqrestore(&pool->cq_lock, flags);
}
-static void xsk_inc_num_desc(struct sk_buff *skb)
-{
- XSKCB(skb)->num_descs++;
-}
-
static u32 xsk_get_num_desc(struct sk_buff *skb)
{
- return XSKCB(skb)->num_descs;
+ return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -614,38 +576,31 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}
- xsk_cq_submit_addr_locked(xdp_sk(skb->sk)->pool, skb);
+ xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
sock_wfree(skb);
}
-static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
- u64 addr)
+static void xsk_set_destructor_arg(struct sk_buff *skb)
+{
+ long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+
+ skb_shinfo(skb)->destructor_arg = (void *)num;
+}
+
+static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs)
{
- BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb));
- INIT_LIST_HEAD(&XSKCB(skb)->addrs_list);
skb->dev = xs->dev;
skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
- XSKCB(skb)->num_descs = 0;
skb->destructor = xsk_destruct_skb;
- skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr;
}
static void xsk_consume_skb(struct sk_buff *skb)
{
struct xdp_sock *xs = xdp_sk(skb->sk);
- u32 num_descs = xsk_get_num_desc(skb);
- struct xsk_addr_node *pos, *tmp;
-
- if (unlikely(num_descs > 1)) {
- list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
- list_del(&pos->addr_node);
- kmem_cache_free(xsk_tx_generic_cache, pos);
- }
- }
skb->destructor = sock_wfree;
- xsk_cq_cancel_locked(xs->pool, num_descs);
+ xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
@@ -701,7 +656,6 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
- struct xsk_addr_node *xsk_addr;
struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
@@ -720,23 +674,12 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
skb_reserve(skb, hr);
- xsk_skb_init_misc(skb, xs, desc->addr);
+ xsk_skb_init_misc(skb, xs);
if (desc->options & XDP_TX_METADATA) {
err = xsk_skb_metadata(skb, buffer, desc, pool, hr);
if (unlikely(err))
return ERR_PTR(err);
}
- } else {
- xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
- if (!xsk_addr)
- return ERR_PTR(-ENOMEM);
-
- /* in case of -EOVERFLOW that could happen below,
- * xsk_consume_skb() will release this node as whole skb
- * would be dropped, which implies freeing all list elements
- */
- xsk_addr->addr = desc->addr;
- list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
}
len = desc->len;
@@ -804,7 +747,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
if (unlikely(err))
goto free_err;
- xsk_skb_init_misc(skb, xs, desc->addr);
+ xsk_skb_init_misc(skb, xs);
if (desc->options & XDP_TX_METADATA) {
err = xsk_skb_metadata(skb, buffer, desc,
xs->pool, hr);
@@ -813,7 +756,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
}
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
- struct xsk_addr_node *xsk_addr;
struct page *page;
u8 *vaddr;
@@ -828,26 +770,16 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
goto free_err;
}
- xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
- if (!xsk_addr) {
- __free_page(page);
- err = -ENOMEM;
- goto free_err;
- }
-
vaddr = kmap_local_page(page);
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
-
- xsk_addr->addr = desc->addr;
- list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
}
}
- xsk_inc_num_desc(skb);
+ xsk_set_destructor_arg(skb);
return skb;
@@ -857,7 +789,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
if (err == -EOVERFLOW) {
/* Drop the packet */
- xsk_inc_num_desc(xs->skb);
+ xsk_set_destructor_arg(xs->skb);
xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
} else {
@@ -900,7 +832,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- err = xsk_cq_reserve_locked(xs->pool);
+ err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr);
if (err) {
err = -EAGAIN;
goto out;
@@ -1903,18 +1835,8 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
- xsk_tx_generic_cache = kmem_cache_create("xsk_generic_xmit_cache",
- sizeof(struct xsk_addr_node),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!xsk_tx_generic_cache) {
- err = -ENOMEM;
- goto out_unreg_notif;
- }
-
return 0;
-out_unreg_notif:
- unregister_netdevice_notifier(&xsk_netdev_notifier);
out_pernet:
unregister_pernet_subsys(&xsk_net_ops);
out_sk:
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 1eb8d9f8b104..23c02066caaf 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -369,11 +369,6 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
/* Functions for producers */
-static inline u32 xskq_get_prod(struct xsk_queue *q)
-{
- return READ_ONCE(q->ring->producer);
-}
-
static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max)
{
u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
@@ -420,13 +415,6 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
return 0;
}
-static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr)
-{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-
- ring->desc[idx & q->ring_mask] = addr;
-}
-
static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
u32 nb_entries)
{
--
2.41.3
next prev parent reply other threads:[~2025-10-31 9:32 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-31 9:32 [PATCH RFC net-next 0/2] xsk: fix immature cq descriptor production (II) Jason Xing
2025-10-31 9:32 ` Jason Xing [this message]
2025-10-31 9:32 ` [PATCH RFC net-next 2/2] xsk: introduce a cached cq to temporarily store descriptor addrs Jason Xing
2025-10-31 14:02 ` Maciej Fijalkowski
2025-10-31 23:59 ` Jason Xing
2025-11-03 15:00 ` Maciej Fijalkowski
2025-11-03 23:29 ` Jason Xing
2025-11-11 13:03 ` Jason Xing
2025-11-11 13:44 ` Magnus Karlsson
2025-11-11 14:02 ` Jason Xing
2025-11-14 15:52 ` Maciej Fijalkowski
2025-11-14 23:46 ` Jason Xing
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251031093230.82386-2-kerneljasonxing@gmail.com \
--to=kerneljasonxing@gmail.com \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=csmate@nop.hu \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fmancera@suse.de \
--cc=hawk@kernel.org \
--cc=joe@dama.to \
--cc=john.fastabend@gmail.com \
--cc=jonathan.lemon@gmail.com \
--cc=kernelxing@tencent.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=willemdebruijn.kernel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).