* [PATCH v2 bpf] xsk: fix immature cq descriptor production
@ 2025-07-05 13:55 Maciej Fijalkowski
2025-07-07 13:59 ` Alexander Lobakin
2025-07-14 19:04 ` Dan Carpenter
0 siblings, 2 replies; 10+ messages in thread
From: Maciej Fijalkowski @ 2025-07-05 13:55 UTC (permalink / raw)
To: bpf, ast, daniel, andrii
Cc: netdev, magnus.karlsson, stfomichev, Maciej Fijalkowski,
Eryk Kubanski
Eryk reported an issue that I have put under Closes: tag, related to
umem addrs being prematurely produced onto pool's completion queue.
Let us make the skb's destructor responsible for producing all addrs
that given skb used.
Commit from fixes tag introduced the buggy behavior, it was not broken
from day 1, but rather when xsk multi-buffer got introduced.
Introduce a struct which will carry descriptor count with array of
addresses taken from processed descriptors that will be carried via
skb_shared_info::destructor_arg. This way we can refer to it within
xsk_destruct_skb().
To summarize, behavior is changed from:
- produce addr to cq, increase cq's cached_prod
- increment descriptor count and store it on
- (xmit and rest of path...)
skb_shared_info::destructor_arg
- use destructor_arg on skb destructor to update global state of cq
producer
to the following:
- increment cq's cached_prod
- increment descriptor count, save xdp_desc::addr in custom array and
store this custom array on skb_shared_info::destructor_arg
- (xmit and rest of path...)
- use destructor_arg on skb destructor to walk the array of addrs and
write them to cq and finally update global state of cq producer
Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path")
Reported-by: Eryk Kubanski <e.kubanski@partner.samsung.com>
Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
v1:
https://lore.kernel.org/bpf/20250702101648.1942562-1-maciej.fijalkowski@intel.com/
v1->v2:
* store addrs in array carried via destructor_arg instead having them
stored in skb headroom; cleaner and less hacky approach;
---
net/xdp/xsk.c | 79 ++++++++++++++++++++++++++++++++++-----------
net/xdp/xsk_queue.h | 12 +++++++
2 files changed, 73 insertions(+), 18 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 72c000c0ae5f..9f0ce87d440f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -36,6 +36,11 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
+struct xsk_addrs {
+ u32 num_descs;
+ u64 addrs[MAX_SKB_FRAGS + 1];
+};
+
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -528,25 +533,38 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
+static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&pool->cq_lock, flags);
- ret = xskq_prod_reserve_addr(pool->cq, addr);
+ ret = xskq_prod_reserve(pool->cq);
spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
+static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
+ struct sk_buff *skb)
{
+ struct xsk_addrs *xsk_addrs;
unsigned long flags;
+ u32 num_desc, i;
+ u32 idx;
+
+ xsk_addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+ num_desc = xsk_addrs->num_descs;
spin_lock_irqsave(&pool->cq_lock, flags);
- xskq_prod_submit_n(pool->cq, n);
+ idx = xskq_get_prod(pool->cq);
+
+ for (i = 0; i < num_desc; i++, idx++)
+ xskq_prod_write_addr(pool->cq, idx, xsk_addrs->addrs[i]);
+ xskq_prod_submit_n(pool->cq, num_desc);
+
spin_unlock_irqrestore(&pool->cq_lock, flags);
+ kfree(xsk_addrs);
}
static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
@@ -558,29 +576,37 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
spin_unlock_irqrestore(&pool->cq_lock, flags);
}
-static u32 xsk_get_num_desc(struct sk_buff *skb)
-{
- return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
-}
-
static void xsk_destruct_skb(struct sk_buff *skb)
{
struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
- if (compl->tx_timestamp) {
+ if (compl->tx_timestamp)
/* sw completion timestamp, not a real one */
*compl->tx_timestamp = ktime_get_tai_fast_ns();
- }
- xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
+ xsk_cq_submit_addr_locked(xdp_sk(skb->sk)->pool, skb);
sock_wfree(skb);
}
-static void xsk_set_destructor_arg(struct sk_buff *skb)
+static u32 xsk_get_num_desc(struct sk_buff *skb)
{
- long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+ struct xsk_addrs *addrs;
- skb_shinfo(skb)->destructor_arg = (void *)num;
+ addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+ return addrs->num_descs;
+}
+
+static void xsk_set_destructor_arg(struct sk_buff *skb, struct xsk_addrs *addrs)
+{
+ skb_shinfo(skb)->destructor_arg = (void *)addrs;
+}
+
+static void xsk_inc_skb_descs(struct sk_buff *skb)
+{
+ struct xsk_addrs *addrs;
+
+ addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+ addrs->num_descs++;
}
static void xsk_consume_skb(struct sk_buff *skb)
@@ -605,6 +631,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
+ struct xsk_addrs *addrs = NULL;
struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
@@ -619,6 +646,12 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
return ERR_PTR(err);
skb_reserve(skb, hr);
+
+ addrs = kzalloc(sizeof(*addrs), GFP_KERNEL);
+ if (!addrs)
+ return ERR_PTR(-ENOMEM);
+
+ xsk_set_destructor_arg(skb, addrs);
}
addr = desc->addr;
@@ -658,6 +691,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
{
struct xsk_tx_metadata *meta = NULL;
struct net_device *dev = xs->dev;
+ struct xsk_addrs *addrs = NULL;
struct sk_buff *skb = xs->skb;
bool first_frag = false;
int err;
@@ -690,6 +724,13 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err))
goto free_err;
+
+ addrs = kzalloc(sizeof(*addrs), GFP_KERNEL);
+ if (!addrs)
+ goto free_err;
+
+ xsk_set_destructor_arg(skb, addrs);
+
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -755,7 +796,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
- xsk_set_destructor_arg(skb);
+
+ addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+ addrs->addrs[addrs->num_descs++] = desc->addr;
return skb;
@@ -765,7 +808,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
if (err == -EOVERFLOW) {
/* Drop the packet */
- xsk_set_destructor_arg(xs->skb);
+ xsk_inc_skb_descs(xs->skb);
xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
} else {
@@ -807,7 +850,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr);
+ err = xsk_cq_reserve_locked(xs->pool);
if (err) {
err = -EAGAIN;
goto out;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 46d87e961ad6..f16f390370dc 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -344,6 +344,11 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
/* Functions for producers */
+static inline u32 xskq_get_prod(struct xsk_queue *q)
+{
+ return READ_ONCE(q->ring->producer);
+}
+
static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max)
{
u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
@@ -390,6 +395,13 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
return 0;
}
+static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr)
+{
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+ ring->desc[idx & q->ring_mask] = addr;
+}
+
static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
u32 nb_entries)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-05 13:55 [PATCH v2 bpf] xsk: fix immature cq descriptor production Maciej Fijalkowski
@ 2025-07-07 13:59 ` Alexander Lobakin
2025-07-07 15:06 ` Stanislav Fomichev
2025-07-14 19:04 ` Dan Carpenter
1 sibling, 1 reply; 10+ messages in thread
From: Alexander Lobakin @ 2025-07-07 13:59 UTC (permalink / raw)
To: Maciej Fijalkowski
Cc: bpf, ast, daniel, andrii, netdev, magnus.karlsson, stfomichev,
Eryk Kubanski
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Sat, 5 Jul 2025 15:55:12 +0200
> Eryk reported an issue that I have put under Closes: tag, related to
> umem addrs being prematurely produced onto pool's completion queue.
> Let us make the skb's destructor responsible for producing all addrs
> that given skb used.
>
> Commit from fixes tag introduced the buggy behavior, it was not broken
> from day 1, but rather when xsk multi-buffer got introduced.
>
> Introduce a struct which will carry descriptor count with array of
> addresses taken from processed descriptors that will be carried via
> skb_shared_info::destructor_arg. This way we can refer to it within
> xsk_destruct_skb().
>
> To summarize, behavior is changed from:
> - produce addr to cq, increase cq's cached_prod
> - increment descriptor count and store it on
> - (xmit and rest of path...)
> skb_shared_info::destructor_arg
> - use destructor_arg on skb destructor to update global state of cq
> producer
>
> to the following:
> - increment cq's cached_prod
> - increment descriptor count, save xdp_desc::addr in custom array and
> store this custom array on skb_shared_info::destructor_arg
> - (xmit and rest of path...)
> - use destructor_arg on skb destructor to walk the array of addrs and
> write them to cq and finally update global state of cq producer
>
> Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path")
> Reported-by: Eryk Kubanski <e.kubanski@partner.samsung.com>
> Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> ---
> v1:
> https://lore.kernel.org/bpf/20250702101648.1942562-1-maciej.fijalkowski@intel.com/
>
> v1->v2:
> * store addrs in array carried via destructor_arg instead having them
> stored in skb headroom; cleaner and less hacky approach;
Might look cleaner, but what about the performance given that you're
adding a memory allocation?
(I realize that's only for the skb mode, still)
Yeah we anyway allocate an skb and may even copy the whole frame, just
curious.
I could recommend using skb->cb for that, but its 48 bytes would cover
only 6 addresses =\
Headroom is no that hacky; we even place &xdp_frame there :D
> ---
> net/xdp/xsk.c | 79 ++++++++++++++++++++++++++++++++++-----------
> net/xdp/xsk_queue.h | 12 +++++++
> 2 files changed, 73 insertions(+), 18 deletions(-)
[...]
> @@ -619,6 +646,12 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
> return ERR_PTR(err);
>
> skb_reserve(skb, hr);
> +
> + addrs = kzalloc(sizeof(*addrs), GFP_KERNEL);
Are you sure you can use sleeping GFP_KERNEL here, not GFP_ATOMIC?
> + if (!addrs)
> + return ERR_PTR(-ENOMEM);
> +
> + xsk_set_destructor_arg(skb, addrs);
> }
>
> addr = desc->addr;
Thanks,
Olek
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-07 13:59 ` Alexander Lobakin
@ 2025-07-07 15:06 ` Stanislav Fomichev
2025-07-07 15:47 ` Alexander Lobakin
0 siblings, 1 reply; 10+ messages in thread
From: Stanislav Fomichev @ 2025-07-07 15:06 UTC (permalink / raw)
To: Alexander Lobakin
Cc: Maciej Fijalkowski, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
On 07/07, Alexander Lobakin wrote:
> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Date: Sat, 5 Jul 2025 15:55:12 +0200
>
> > Eryk reported an issue that I have put under Closes: tag, related to
> > umem addrs being prematurely produced onto pool's completion queue.
> > Let us make the skb's destructor responsible for producing all addrs
> > that given skb used.
> >
> > Commit from fixes tag introduced the buggy behavior, it was not broken
> > from day 1, but rather when xsk multi-buffer got introduced.
> >
> > Introduce a struct which will carry descriptor count with array of
> > addresses taken from processed descriptors that will be carried via
> > skb_shared_info::destructor_arg. This way we can refer to it within
> > xsk_destruct_skb().
> >
> > To summarize, behavior is changed from:
> > - produce addr to cq, increase cq's cached_prod
> > - increment descriptor count and store it on
> > - (xmit and rest of path...)
> > skb_shared_info::destructor_arg
> > - use destructor_arg on skb destructor to update global state of cq
> > producer
> >
> > to the following:
> > - increment cq's cached_prod
> > - increment descriptor count, save xdp_desc::addr in custom array and
> > store this custom array on skb_shared_info::destructor_arg
> > - (xmit and rest of path...)
> > - use destructor_arg on skb destructor to walk the array of addrs and
> > write them to cq and finally update global state of cq producer
> >
> > Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path")
> > Reported-by: Eryk Kubanski <e.kubanski@partner.samsung.com>
> > Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/
> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > ---
> > v1:
> > https://lore.kernel.org/bpf/20250702101648.1942562-1-maciej.fijalkowski@intel.com/
> >
> > v1->v2:
> > * store addrs in array carried via destructor_arg instead having them
> > stored in skb headroom; cleaner and less hacky approach;
>
> Might look cleaner, but what about the performance given that you're
> adding a memory allocation?
>
> (I realize that's only for the skb mode, still)
>
> Yeah we anyway allocate an skb and may even copy the whole frame, just
> curious.
> I could recommend using skb->cb for that, but its 48 bytes would cover
> only 6 addresses =\
Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
and replace it with some code to manage that pool of xsk_addrs..
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-07 15:06 ` Stanislav Fomichev
@ 2025-07-07 15:47 ` Alexander Lobakin
2025-07-07 18:40 ` Stanislav Fomichev
0 siblings, 1 reply; 10+ messages in thread
From: Alexander Lobakin @ 2025-07-07 15:47 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Maciej Fijalkowski, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
From: Stanislav Fomichev <stfomichev@gmail.com>
Date: Mon, 7 Jul 2025 08:06:21 -0700
> On 07/07, Alexander Lobakin wrote:
>> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
>> Date: Sat, 5 Jul 2025 15:55:12 +0200
>>
>>> Eryk reported an issue that I have put under Closes: tag, related to
>>> umem addrs being prematurely produced onto pool's completion queue.
>>> Let us make the skb's destructor responsible for producing all addrs
>>> that given skb used.
>>>
>>> Commit from fixes tag introduced the buggy behavior, it was not broken
>>> from day 1, but rather when xsk multi-buffer got introduced.
>>>
>>> Introduce a struct which will carry descriptor count with array of
>>> addresses taken from processed descriptors that will be carried via
>>> skb_shared_info::destructor_arg. This way we can refer to it within
>>> xsk_destruct_skb().
>>>
>>> To summarize, behavior is changed from:
>>> - produce addr to cq, increase cq's cached_prod
>>> - increment descriptor count and store it on
>>> - (xmit and rest of path...)
>>> skb_shared_info::destructor_arg
>>> - use destructor_arg on skb destructor to update global state of cq
>>> producer
>>>
>>> to the following:
>>> - increment cq's cached_prod
>>> - increment descriptor count, save xdp_desc::addr in custom array and
>>> store this custom array on skb_shared_info::destructor_arg
>>> - (xmit and rest of path...)
>>> - use destructor_arg on skb destructor to walk the array of addrs and
>>> write them to cq and finally update global state of cq producer
>>>
>>> Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path")
>>> Reported-by: Eryk Kubanski <e.kubanski@partner.samsung.com>
>>> Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/
>>> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
>>> ---
>>> v1:
>>> https://lore.kernel.org/bpf/20250702101648.1942562-1-maciej.fijalkowski@intel.com/
>>>
>>> v1->v2:
>>> * store addrs in array carried via destructor_arg instead having them
>>> stored in skb headroom; cleaner and less hacky approach;
>>
>> Might look cleaner, but what about the performance given that you're
>> adding a memory allocation?
>>
>> (I realize that's only for the skb mode, still)
>>
>> Yeah we anyway allocate an skb and may even copy the whole frame, just
>> curious.
>> I could recommend using skb->cb for that, but its 48 bytes would cover
>> only 6 addresses =\
BTW isn't num_descs from that new structure would be the same as
shinfo->nr_frags + 1 (or just nr_frags for xsk_build_skb_zerocopy())?
>
> Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
> xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
> and replace it with some code to manage that pool of xsk_addrs..
Nice idea BTW.
We could even use system per-cpu Page Pools to allocate these structs*
:D It wouldn't waste 1 page per one struct as PP is frag-aware and has
API for allocating only a small frag.
Headroom stuff was also ok to me: we either way allocate a new skb, so
we could allocate it with a bit bigger headroom and put that table there
being sure that nobody will overwrite it (some drivers insert special
headers or descriptors in front of the actual skb->data).
[*] Offtop: we could also use system PP to allocate skbs in
xsk_build_skb() just like it's done in xdp_build_skb_from_zc() +
xdp_copy_frags_from_zc() -- no way to avoid memcpy(), but the payload
buffers would be recycled then.
Thanks,
Olek
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-07 15:47 ` Alexander Lobakin
@ 2025-07-07 18:40 ` Stanislav Fomichev
2025-07-08 14:14 ` Maciej Fijalkowski
0 siblings, 1 reply; 10+ messages in thread
From: Stanislav Fomichev @ 2025-07-07 18:40 UTC (permalink / raw)
To: Alexander Lobakin
Cc: Maciej Fijalkowski, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
On 07/07, Alexander Lobakin wrote:
> From: Stanislav Fomichev <stfomichev@gmail.com>
> Date: Mon, 7 Jul 2025 08:06:21 -0700
>
> > On 07/07, Alexander Lobakin wrote:
> >> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> >> Date: Sat, 5 Jul 2025 15:55:12 +0200
> >>
> >>> Eryk reported an issue that I have put under Closes: tag, related to
> >>> umem addrs being prematurely produced onto pool's completion queue.
> >>> Let us make the skb's destructor responsible for producing all addrs
> >>> that given skb used.
> >>>
> >>> Commit from fixes tag introduced the buggy behavior, it was not broken
> >>> from day 1, but rather when xsk multi-buffer got introduced.
> >>>
> >>> Introduce a struct which will carry descriptor count with array of
> >>> addresses taken from processed descriptors that will be carried via
> >>> skb_shared_info::destructor_arg. This way we can refer to it within
> >>> xsk_destruct_skb().
> >>>
> >>> To summarize, behavior is changed from:
> >>> - produce addr to cq, increase cq's cached_prod
> >>> - increment descriptor count and store it on
> >>> - (xmit and rest of path...)
> >>> skb_shared_info::destructor_arg
> >>> - use destructor_arg on skb destructor to update global state of cq
> >>> producer
> >>>
> >>> to the following:
> >>> - increment cq's cached_prod
> >>> - increment descriptor count, save xdp_desc::addr in custom array and
> >>> store this custom array on skb_shared_info::destructor_arg
> >>> - (xmit and rest of path...)
> >>> - use destructor_arg on skb destructor to walk the array of addrs and
> >>> write them to cq and finally update global state of cq producer
> >>>
> >>> Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path")
> >>> Reported-by: Eryk Kubanski <e.kubanski@partner.samsung.com>
> >>> Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/
> >>> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> >>> ---
> >>> v1:
> >>> https://lore.kernel.org/bpf/20250702101648.1942562-1-maciej.fijalkowski@intel.com/
> >>>
> >>> v1->v2:
> >>> * store addrs in array carried via destructor_arg instead having them
> >>> stored in skb headroom; cleaner and less hacky approach;
> >>
> >> Might look cleaner, but what about the performance given that you're
> >> adding a memory allocation?
> >>
> >> (I realize that's only for the skb mode, still)
> >>
> >> Yeah we anyway allocate an skb and may even copy the whole frame, just
> >> curious.
> >> I could recommend using skb->cb for that, but its 48 bytes would cover
> >> only 6 addresses =\
>
> BTW isn't num_descs from that new structure would be the same as
> shinfo->nr_frags + 1 (or just nr_frags for xsk_build_skb_zerocopy())?
So you're saying we don't need to store it? Agreed. But storing the rest
in cb still might be problematic with kconfig-configurable MAX_SKB_FRAGS?
> > Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
> > xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
> > and replace it with some code to manage that pool of xsk_addrs..
>
> Nice idea BTW.
>
> We could even use system per-cpu Page Pools to allocate these structs*
> :D It wouldn't waste 1 page per one struct as PP is frag-aware and has
> API for allocating only a small frag.
>
> Headroom stuff was also ok to me: we either way allocate a new skb, so
> we could allocate it with a bit bigger headroom and put that table there
> being sure that nobody will overwrite it (some drivers insert special
> headers or descriptors in front of the actual skb->data).
>
> [*] Offtop: we could also use system PP to allocate skbs in
> xsk_build_skb() just like it's done in xdp_build_skb_from_zc() +
> xdp_copy_frags_from_zc() -- no way to avoid memcpy(), but the payload
> buffers would be recycled then.
Or maybe kmem_cache_alloc_node with a custom cache is good enough?
Headroom also feels ok if we store the whole xsk_addrs struct in it.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-07 18:40 ` Stanislav Fomichev
@ 2025-07-08 14:14 ` Maciej Fijalkowski
2025-07-08 15:54 ` Alexander Lobakin
0 siblings, 1 reply; 10+ messages in thread
From: Maciej Fijalkowski @ 2025-07-08 14:14 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Alexander Lobakin, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
On Mon, Jul 07, 2025 at 11:40:48AM -0700, Stanislav Fomichev wrote:
> On 07/07, Alexander Lobakin wrote:
> > From: Stanislav Fomichev <stfomichev@gmail.com>
> > Date: Mon, 7 Jul 2025 08:06:21 -0700
> >
> > > On 07/07, Alexander Lobakin wrote:
> > >> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > >> Date: Sat, 5 Jul 2025 15:55:12 +0200
> > >>
> > >>> Eryk reported an issue that I have put under Closes: tag, related to
> > >>> umem addrs being prematurely produced onto pool's completion queue.
> > >>> Let us make the skb's destructor responsible for producing all addrs
> > >>> that given skb used.
> > >>>
> > >>> Commit from fixes tag introduced the buggy behavior, it was not broken
> > >>> from day 1, but rather when xsk multi-buffer got introduced.
> > >>>
> > >>> Introduce a struct which will carry descriptor count with array of
> > >>> addresses taken from processed descriptors that will be carried via
> > >>> skb_shared_info::destructor_arg. This way we can refer to it within
> > >>> xsk_destruct_skb().
> > >>>
> > >>> To summarize, behavior is changed from:
> > >>> - produce addr to cq, increase cq's cached_prod
> > >>> - increment descriptor count and store it on
> > >>> - (xmit and rest of path...)
> > >>> skb_shared_info::destructor_arg
> > >>> - use destructor_arg on skb destructor to update global state of cq
> > >>> producer
> > >>>
> > >>> to the following:
> > >>> - increment cq's cached_prod
> > >>> - increment descriptor count, save xdp_desc::addr in custom array and
> > >>> store this custom array on skb_shared_info::destructor_arg
> > >>> - (xmit and rest of path...)
> > >>> - use destructor_arg on skb destructor to walk the array of addrs and
> > >>> write them to cq and finally update global state of cq producer
> > >>>
> > >>> Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path")
> > >>> Reported-by: Eryk Kubanski <e.kubanski@partner.samsung.com>
> > >>> Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/
> > >>> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > >>> ---
> > >>> v1:
> > >>> https://lore.kernel.org/bpf/20250702101648.1942562-1-maciej.fijalkowski@intel.com/
> > >>>
> > >>> v1->v2:
> > >>> * store addrs in array carried via destructor_arg instead having them
> > >>> stored in skb headroom; cleaner and less hacky approach;
> > >>
> > >> Might look cleaner, but what about the performance given that you're
> > >> adding a memory allocation?
> > >>
> > >> (I realize that's only for the skb mode, still)
> > >>
> > >> Yeah we anyway allocate an skb and may even copy the whole frame, just
> > >> curious.
> > >> I could recommend using skb->cb for that, but its 48 bytes would cover
> > >> only 6 addresses =\
> >
> > BTW isn't num_descs from that new structure would be the same as
> > shinfo->nr_frags + 1 (or just nr_frags for xsk_build_skb_zerocopy())?
>
> So you're saying we don't need to store it? Agreed. But storing the rest
> in cb still might be problematic with kconfig-configurable MAX_SKB_FRAGS?
Hi Stan & Olek,
no, as said in v1 drivers might linearize the skb and all frags will be
lost. This storage is needed unfortunately.
>
> > > Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
> > > xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
> > > and replace it with some code to manage that pool of xsk_addrs..
That would be pool-bound which makes it a shared resource so I believe
that we would repeat the problem being fixed here ;)
> >
> > Nice idea BTW.
> >
> > We could even use system per-cpu Page Pools to allocate these structs*
> > :D It wouldn't waste 1 page per one struct as PP is frag-aware and has
> > API for allocating only a small frag.
> >
> > Headroom stuff was also ok to me: we either way allocate a new skb, so
> > we could allocate it with a bit bigger headroom and put that table there
> > being sure that nobody will overwrite it (some drivers insert special
> > headers or descriptors in front of the actual skb->data).
headroom approach was causing one of bpf selftests to fail, but I didn't
check in-depth the reason. I didn't really like the check in destructor if
addr array was corrupted in v1 and I came up with v2 which seems to me a
cleaner fix.
> >
> > [*] Offtop: we could also use system PP to allocate skbs in
> > xsk_build_skb() just like it's done in xdp_build_skb_from_zc() +
> > xdp_copy_frags_from_zc() -- no way to avoid memcpy(), but the payload
> > buffers would be recycled then.
>
> Or maybe kmem_cache_alloc_node with a custom cache is good enough?
> Headroom also feels ok if we store the whole xsk_addrs struct in it.
Yep both of these approaches was something I considered, but keep in mind
it's a bugfix so I didn't want to go with something flashy. I have not
observed big performance impact but I checked only MAX_SKB_FRAGS being set
to standard value.
Would you guys be ok if I do the follow-up with possible optimization
after my vacation which would be a -next candidate?
Thanks,
MF
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-08 14:14 ` Maciej Fijalkowski
@ 2025-07-08 15:54 ` Alexander Lobakin
2025-07-08 17:49 ` Stanislav Fomichev
0 siblings, 1 reply; 10+ messages in thread
From: Alexander Lobakin @ 2025-07-08 15:54 UTC (permalink / raw)
To: Maciej Fijalkowski
Cc: Stanislav Fomichev, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 8 Jul 2025 16:14:39 +0200
> On Mon, Jul 07, 2025 at 11:40:48AM -0700, Stanislav Fomichev wrote:
>> On 07/07, Alexander Lobakin wrote:
[...]
>>> BTW isn't num_descs from that new structure would be the same as
>>> shinfo->nr_frags + 1 (or just nr_frags for xsk_build_skb_zerocopy())?
>>
>> So you're saying we don't need to store it? Agreed. But storing the rest
>> in cb still might be problematic with kconfig-configurable MAX_SKB_FRAGS?
For sure skb->cb is too small for 17+ u64s.
>
> Hi Stan & Olek,
>
> no, as said in v1 drivers might linearize the skb and all frags will be
> lost. This storage is needed unfortunately.
Aaah sorry. In this case yeah, you need this separate frag count.
>
>>
>>>> Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
>>>> xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
>>>> and replace it with some code to manage that pool of xsk_addrs..
>
> That would be pool-bound which makes it a shared resource so I believe
> that we would repeat the problem being fixed here ;)
Except the system Page Pool idea right below maybe :>
>
>>>
>>> Nice idea BTW.
>>>
>>> We could even use system per-cpu Page Pools to allocate these structs*
>>> :D It wouldn't waste 1 page per one struct as PP is frag-aware and has
>>> API for allocating only a small frag.
>>>
>>> Headroom stuff was also ok to me: we either way allocate a new skb, so
>>> we could allocate it with a bit bigger headroom and put that table there
>>> being sure that nobody will overwrite it (some drivers insert special
>>> headers or descriptors in front of the actual skb->data).
>
> headroom approach was causing one of bpf selftests to fail, but I didn't
> check in-depth the reason. I didn't really like the check in destructor if
> addr array was corrupted in v1 and I came up with v2 which seems to me a
> cleaner fix.
>
>>>
>>> [*] Offtop: we could also use system PP to allocate skbs in
>>> xsk_build_skb() just like it's done in xdp_build_skb_from_zc() +
>>> xdp_copy_frags_from_zc() -- no way to avoid memcpy(), but the payload
>>> buffers would be recycled then.
>>
>> Or maybe kmem_cache_alloc_node with a custom cache is good enough?
>> Headroom also feels ok if we store the whole xsk_addrs struct in it.
>
> Yep both of these approaches was something I considered, but keep in mind
> it's a bugfix so I didn't want to go with something flashy. I have not
> observed big performance impact but I checked only MAX_SKB_FRAGS being set
> to standard value.
>
> Would you guys be ok if I do the follow-up with possible optimization
> after my vacation which would be a -next candidate?
As a fix, it's totally fine for me to go in the current form, sure.
>
> Thanks,
> MF
Thanks,
Olek
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-08 15:54 ` Alexander Lobakin
@ 2025-07-08 17:49 ` Stanislav Fomichev
2025-07-09 10:00 ` Maciej Fijalkowski
0 siblings, 1 reply; 10+ messages in thread
From: Stanislav Fomichev @ 2025-07-08 17:49 UTC (permalink / raw)
To: Alexander Lobakin
Cc: Maciej Fijalkowski, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
On 07/08, Alexander Lobakin wrote:
> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Date: Tue, 8 Jul 2025 16:14:39 +0200
>
> > On Mon, Jul 07, 2025 at 11:40:48AM -0700, Stanislav Fomichev wrote:
> >> On 07/07, Alexander Lobakin wrote:
>
> [...]
>
> >>> BTW isn't num_descs from that new structure would be the same as
> >>> shinfo->nr_frags + 1 (or just nr_frags for xsk_build_skb_zerocopy())?
> >>
> >> So you're saying we don't need to store it? Agreed. But storing the rest
> >> in cb still might be problematic with kconfig-configurable MAX_SKB_FRAGS?
>
> For sure skb->cb is too small for 17+ u64s.
>
> >
> > Hi Stan & Olek,
> >
> > no, as said in v1 drivers might linearize the skb and all frags will be
> > lost. This storage is needed unfortunately.
>
> Aaah sorry. In this case yeah, you need this separate frag count.
>
> >
> >>
> >>>> Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
> >>>> xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
> >>>> and replace it with some code to manage that pool of xsk_addrs..
> >
> > That would be pool-bound which makes it a shared resource so I believe
> > that we would repeat the problem being fixed here ;)
>
> Except the system Page Pool idea right below maybe :>
It doesn't have to be a shared resource, the pool (in whatever form) can be
per xsk. (unless I'm missing something)
> >>> Nice idea BTW.
> >>>
> >>> We could even use system per-cpu Page Pools to allocate these structs*
> >>> :D It wouldn't waste 1 page per one struct as PP is frag-aware and has
> >>> API for allocating only a small frag.
> >>>
> >>> Headroom stuff was also ok to me: we either way allocate a new skb, so
> >>> we could allocate it with a bit bigger headroom and put that table there
> >>> being sure that nobody will overwrite it (some drivers insert special
> >>> headers or descriptors in front of the actual skb->data).
> >
> > headroom approach was causing one of bpf selftests to fail, but I didn't
> > check in-depth the reason. I didn't really like the check in destructor if
> > addr array was corrupted in v1 and I came up with v2 which seems to me a
> > cleaner fix.
> >
> >>>
> >>> [*] Offtop: we could also use system PP to allocate skbs in
> >>> xsk_build_skb() just like it's done in xdp_build_skb_from_zc() +
> >>> xdp_copy_frags_from_zc() -- no way to avoid memcpy(), but the payload
> >>> buffers would be recycled then.
> >>
> >> Or maybe kmem_cache_alloc_node with a custom cache is good enough?
> >> Headroom also feels ok if we store the whole xsk_addrs struct in it.
> >
> > Yep both of these approaches was something I considered, but keep in mind
> > it's a bugfix so I didn't want to go with something flashy. I have not
> > observed big performance impact but I checked only MAX_SKB_FRAGS being set
> > to standard value.
> >
> > Would you guys be ok if I do the follow-up with possible optimization
> > after my vacation which would be a -next candidate?
>
> As a fix, it's totally fine for me to go in the current form, sure.
+1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-08 17:49 ` Stanislav Fomichev
@ 2025-07-09 10:00 ` Maciej Fijalkowski
0 siblings, 0 replies; 10+ messages in thread
From: Maciej Fijalkowski @ 2025-07-09 10:00 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Alexander Lobakin, bpf, ast, daniel, andrii, netdev,
magnus.karlsson, Eryk Kubanski
On Tue, Jul 08, 2025 at 10:49:36AM -0700, Stanislav Fomichev wrote:
> On 07/08, Alexander Lobakin wrote:
> > From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > Date: Tue, 8 Jul 2025 16:14:39 +0200
> >
> > > On Mon, Jul 07, 2025 at 11:40:48AM -0700, Stanislav Fomichev wrote:
> > >> On 07/07, Alexander Lobakin wrote:
> >
> > [...]
> >
> > >>> BTW isn't num_descs from that new structure would be the same as
> > >>> shinfo->nr_frags + 1 (or just nr_frags for xsk_build_skb_zerocopy())?
> > >>
> > >> So you're saying we don't need to store it? Agreed. But storing the rest
> > >> in cb still might be problematic with kconfig-configurable MAX_SKB_FRAGS?
> >
> > For sure skb->cb is too small for 17+ u64s.
> >
> > >
> > > Hi Stan & Olek,
> > >
> > > no, as said in v1 drivers might linearize the skb and all frags will be
> > > lost. This storage is needed unfortunately.
> >
> > Aaah sorry. In this case yeah, you need this separate frag count.
> >
> > >
> > >>
> > >>>> Can we pre-allocate an array of xsk_addrs during xsk_bind (the number of
> > >>>> xsk_addrs is bound by the tx ring size)? Then we can remove the alloc on tx
> > >>>> and replace it with some code to manage that pool of xsk_addrs..
> > >
> > > That would be pool-bound which makes it a shared resource so I believe
> > > that we would repeat the problem being fixed here ;)
> >
> > Except the system Page Pool idea right below maybe :>
>
> It doesn't have to be a shared resource, the pool (in whatever form) can be
> per xsk. (unless I'm missing something)
It can not. when you attach multiple xsks to same {dev,qid} tuple pool is
shared.
https://elixir.bootlin.com/linux/v6.16-rc5/source/net/xdp/xsk.c#L1249
Not sure right now if we could store the pointer to xsk_addrs in
xdp_sock maybe. Let me get back to this after my time off.
BTW I didn't realize you added yourself as xsk reviewer. Would be nice to
have CCed Magnus or me when doing so :P
>
> > >>> Nice idea BTW.
> > >>>
> > >>> We could even use system per-cpu Page Pools to allocate these structs*
> > >>> :D It wouldn't waste 1 page per one struct as PP is frag-aware and has
> > >>> API for allocating only a small frag.
> > >>>
> > >>> Headroom stuff was also ok to me: we either way allocate a new skb, so
> > >>> we could allocate it with a bit bigger headroom and put that table there
> > >>> being sure that nobody will overwrite it (some drivers insert special
> > >>> headers or descriptors in front of the actual skb->data).
> > >
> > > headroom approach was causing one of bpf selftests to fail, but I didn't
> > > check in-depth the reason. I didn't really like the check in destructor if
> > > addr array was corrupted in v1 and I came up with v2 which seems to me a
> > > cleaner fix.
> > >
> > >>>
> > >>> [*] Offtop: we could also use system PP to allocate skbs in
> > >>> xsk_build_skb() just like it's done in xdp_build_skb_from_zc() +
> > >>> xdp_copy_frags_from_zc() -- no way to avoid memcpy(), but the payload
> > >>> buffers would be recycled then.
> > >>
> > >> Or maybe kmem_cache_alloc_node with a custom cache is good enough?
> > >> Headroom also feels ok if we store the whole xsk_addrs struct in it.
> > >
> > > Yep both of these approaches was something I considered, but keep in mind
> > > it's a bugfix so I didn't want to go with something flashy. I have not
> > > observed big performance impact but I checked only MAX_SKB_FRAGS being set
> > > to standard value.
> > >
> > > Would you guys be ok if I do the follow-up with possible optimization
> > > after my vacation which would be a -next candidate?
> >
> > As a fix, it's totally fine for me to go in the current form, sure.
>
> +1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf] xsk: fix immature cq descriptor production
2025-07-05 13:55 [PATCH v2 bpf] xsk: fix immature cq descriptor production Maciej Fijalkowski
2025-07-07 13:59 ` Alexander Lobakin
@ 2025-07-14 19:04 ` Dan Carpenter
1 sibling, 0 replies; 10+ messages in thread
From: Dan Carpenter @ 2025-07-14 19:04 UTC (permalink / raw)
To: oe-kbuild, Maciej Fijalkowski, bpf, ast, daniel, andrii
Cc: lkp, oe-kbuild-all, netdev, magnus.karlsson, stfomichev,
Maciej Fijalkowski, Eryk Kubanski
Hi Maciej,
kernel test robot noticed the following build warnings:
url: https://github.com/intel-lab-lkp/linux/commits/Maciej-Fijalkowski/xsk-fix-immature-cq-descriptor-production/20250705-215714
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
patch link: https://lore.kernel.org/r/20250705135512.1963216-1-maciej.fijalkowski%40intel.com
patch subject: [PATCH v2 bpf] xsk: fix immature cq descriptor production
config: x86_64-randconfig-r071-20250706 (https://download.01.org/0day-ci/archive/20250706/202507061447.DwFSGum1-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202507061447.DwFSGum1-lkp@intel.com/
smatch warnings:
net/xdp/xsk.c:819 xsk_build_skb() warn: passing zero to 'ERR_PTR'
vim +/ERR_PTR +819 net/xdp/xsk.c
9c8f21e6f8856a Xuan Zhuo 2021-02-18 689 static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
9c8f21e6f8856a Xuan Zhuo 2021-02-18 690 struct xdp_desc *desc)
9c8f21e6f8856a Xuan Zhuo 2021-02-18 691 {
48eb03dd26304c Stanislav Fomichev 2023-11-27 692 struct xsk_tx_metadata *meta = NULL;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 693 struct net_device *dev = xs->dev;
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 694 struct xsk_addrs *addrs = NULL;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 695 struct sk_buff *skb = xs->skb;
48eb03dd26304c Stanislav Fomichev 2023-11-27 696 bool first_frag = false;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 697 int err;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 698
9c8f21e6f8856a Xuan Zhuo 2021-02-18 699 if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
9c8f21e6f8856a Xuan Zhuo 2021-02-18 700 skb = xsk_build_skb_zerocopy(xs, desc);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 701 if (IS_ERR(skb)) {
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 702 err = PTR_ERR(skb);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 703 goto free_err;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 704 }
9c8f21e6f8856a Xuan Zhuo 2021-02-18 705 } else {
9c8f21e6f8856a Xuan Zhuo 2021-02-18 706 u32 hr, tr, len;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 707 void *buffer;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 708
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 709 buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 710 len = desc->len;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 711
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 712 if (!skb) {
0c0d0f42ffa6ac Felix Maurer 2024-11-14 713 first_frag = true;
0c0d0f42ffa6ac Felix Maurer 2024-11-14 714
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 715 hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 716 tr = dev->needed_tailroom;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 717 skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 718 if (unlikely(!skb))
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 719 goto free_err;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 720
9c8f21e6f8856a Xuan Zhuo 2021-02-18 721 skb_reserve(skb, hr);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 722 skb_put(skb, len);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 723
9c8f21e6f8856a Xuan Zhuo 2021-02-18 724 err = skb_store_bits(skb, 0, buffer, len);
0c0d0f42ffa6ac Felix Maurer 2024-11-14 725 if (unlikely(err))
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 726 goto free_err;
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 727
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 728 addrs = kzalloc(sizeof(*addrs), GFP_KERNEL);
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 729 if (!addrs)
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 730 goto free_err;
err is not set on this path.
regards,
dan carpenter
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 731
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 732 xsk_set_destructor_arg(skb, addrs);
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 733
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 734 } else {
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 735 int nr_frags = skb_shinfo(skb)->nr_frags;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 736 struct page *page;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 737 u8 *vaddr;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 738
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 739 if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
9d0a67b9d42c63 Tirthendu Sarkar 2023-08-23 740 err = -EOVERFLOW;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 741 goto free_err;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 742 }
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 743
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 744 page = alloc_page(xs->sk.sk_allocation);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 745 if (unlikely(!page)) {
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 746 err = -EAGAIN;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 747 goto free_err;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 748 }
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 749
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 750 vaddr = kmap_local_page(page);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 751 memcpy(vaddr, buffer, len);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 752 kunmap_local(vaddr);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 753
2127c604383666 Sebastian Andrzej Siewior 2024-02-02 754 skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
2127c604383666 Sebastian Andrzej Siewior 2024-02-02 755 refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 756 }
48eb03dd26304c Stanislav Fomichev 2023-11-27 757
48eb03dd26304c Stanislav Fomichev 2023-11-27 758 if (first_frag && desc->options & XDP_TX_METADATA) {
48eb03dd26304c Stanislav Fomichev 2023-11-27 759 if (unlikely(xs->pool->tx_metadata_len == 0)) {
48eb03dd26304c Stanislav Fomichev 2023-11-27 760 err = -EINVAL;
48eb03dd26304c Stanislav Fomichev 2023-11-27 761 goto free_err;
48eb03dd26304c Stanislav Fomichev 2023-11-27 762 }
48eb03dd26304c Stanislav Fomichev 2023-11-27 763
48eb03dd26304c Stanislav Fomichev 2023-11-27 764 meta = buffer - xs->pool->tx_metadata_len;
ce59f9686e0eca Stanislav Fomichev 2023-11-27 765 if (unlikely(!xsk_buff_valid_tx_metadata(meta))) {
ce59f9686e0eca Stanislav Fomichev 2023-11-27 766 err = -EINVAL;
ce59f9686e0eca Stanislav Fomichev 2023-11-27 767 goto free_err;
ce59f9686e0eca Stanislav Fomichev 2023-11-27 768 }
48eb03dd26304c Stanislav Fomichev 2023-11-27 769
48eb03dd26304c Stanislav Fomichev 2023-11-27 770 if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
48eb03dd26304c Stanislav Fomichev 2023-11-27 771 if (unlikely(meta->request.csum_start +
48eb03dd26304c Stanislav Fomichev 2023-11-27 772 meta->request.csum_offset +
48eb03dd26304c Stanislav Fomichev 2023-11-27 773 sizeof(__sum16) > len)) {
48eb03dd26304c Stanislav Fomichev 2023-11-27 774 err = -EINVAL;
48eb03dd26304c Stanislav Fomichev 2023-11-27 775 goto free_err;
48eb03dd26304c Stanislav Fomichev 2023-11-27 776 }
48eb03dd26304c Stanislav Fomichev 2023-11-27 777
48eb03dd26304c Stanislav Fomichev 2023-11-27 778 skb->csum_start = hr + meta->request.csum_start;
48eb03dd26304c Stanislav Fomichev 2023-11-27 779 skb->csum_offset = meta->request.csum_offset;
48eb03dd26304c Stanislav Fomichev 2023-11-27 780 skb->ip_summed = CHECKSUM_PARTIAL;
11614723af26e7 Stanislav Fomichev 2023-11-27 781
11614723af26e7 Stanislav Fomichev 2023-11-27 782 if (unlikely(xs->pool->tx_sw_csum)) {
11614723af26e7 Stanislav Fomichev 2023-11-27 783 err = skb_checksum_help(skb);
11614723af26e7 Stanislav Fomichev 2023-11-27 784 if (err)
11614723af26e7 Stanislav Fomichev 2023-11-27 785 goto free_err;
11614723af26e7 Stanislav Fomichev 2023-11-27 786 }
48eb03dd26304c Stanislav Fomichev 2023-11-27 787 }
ca4419f15abd19 Song Yoong Siang 2025-02-16 788
ca4419f15abd19 Song Yoong Siang 2025-02-16 789 if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME)
ca4419f15abd19 Song Yoong Siang 2025-02-16 790 skb->skb_mstamp_ns = meta->request.launch_time;
48eb03dd26304c Stanislav Fomichev 2023-11-27 791 }
9c8f21e6f8856a Xuan Zhuo 2021-02-18 792 }
9c8f21e6f8856a Xuan Zhuo 2021-02-18 793
9c8f21e6f8856a Xuan Zhuo 2021-02-18 794 skb->dev = dev;
10bbf1652c1cca Eric Dumazet 2023-09-21 795 skb->priority = READ_ONCE(xs->sk.sk_priority);
3c5b4d69c358a9 Eric Dumazet 2023-07-28 796 skb->mark = READ_ONCE(xs->sk.sk_mark);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 797 skb->destructor = xsk_destruct_skb;
48eb03dd26304c Stanislav Fomichev 2023-11-27 798 xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 799
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 800 addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 801 addrs->addrs[addrs->num_descs++] = desc->addr;
9c8f21e6f8856a Xuan Zhuo 2021-02-18 802
9c8f21e6f8856a Xuan Zhuo 2021-02-18 803 return skb;
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 804
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 805 free_err:
0c0d0f42ffa6ac Felix Maurer 2024-11-14 806 if (first_frag && skb)
0c0d0f42ffa6ac Felix Maurer 2024-11-14 807 kfree_skb(skb);
0c0d0f42ffa6ac Felix Maurer 2024-11-14 808
9d0a67b9d42c63 Tirthendu Sarkar 2023-08-23 809 if (err == -EOVERFLOW) {
9d0a67b9d42c63 Tirthendu Sarkar 2023-08-23 810 /* Drop the packet */
67a37dcecabbf0 Maciej Fijalkowski 2025-07-05 811 xsk_inc_skb_descs(xs->skb);
9d0a67b9d42c63 Tirthendu Sarkar 2023-08-23 812 xsk_drop_skb(xs->skb);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 813 xskq_cons_release(xs->tx);
9d0a67b9d42c63 Tirthendu Sarkar 2023-08-23 814 } else {
9d0a67b9d42c63 Tirthendu Sarkar 2023-08-23 815 /* Let application retry */
e6c4047f512280 Maciej Fijalkowski 2024-10-07 816 xsk_cq_cancel_locked(xs->pool, 1);
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 817 }
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 818
cf24f5a5feeaae Tirthendu Sarkar 2023-07-19 @819 return ERR_PTR(err);
9c8f21e6f8856a Xuan Zhuo 2021-02-18 820 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-07-14 19:05 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-05 13:55 [PATCH v2 bpf] xsk: fix immature cq descriptor production Maciej Fijalkowski
2025-07-07 13:59 ` Alexander Lobakin
2025-07-07 15:06 ` Stanislav Fomichev
2025-07-07 15:47 ` Alexander Lobakin
2025-07-07 18:40 ` Stanislav Fomichev
2025-07-08 14:14 ` Maciej Fijalkowski
2025-07-08 15:54 ` Alexander Lobakin
2025-07-08 17:49 ` Stanislav Fomichev
2025-07-09 10:00 ` Maciej Fijalkowski
2025-07-14 19:04 ` Dan Carpenter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).