From: Alexander Lobakin <aleksander.lobakin@intel.com>
To: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: "Alexander Lobakin" <aleksander.lobakin@intel.com>,
"Toke Høiland-Jørgensen" <toke@redhat.com>,
"Alexei Starovoitov" <ast@kernel.org>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"John Fastabend" <john.fastabend@gmail.com>,
"Andrii Nakryiko" <andrii@kernel.org>,
"Stanislav Fomichev" <sdf@fomichev.me>,
"Magnus Karlsson" <magnus.karlsson@intel.com>,
nex.sw.ncis.osdt.itp.upstreaming@intel.com, bpf@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH net-next v2 15/18] xsk: add generic XSk &xdp_buff -> skb conversion
Date: Tue, 15 Oct 2024 16:53:47 +0200 [thread overview]
Message-ID: <20241015145350.4077765-16-aleksander.lobakin@intel.com> (raw)
In-Reply-To: <20241015145350.4077765-1-aleksander.lobakin@intel.com>
Same as with converting &xdp_buff to skb on Rx, the code which allocates
a new skb and copies the XSk frame there is identical across the
drivers, so make it generic. This includes copying all the frags if they
are present in the original buff.
System percpu Page Pools help here a lot: when available, allocate pages
from there instead of the MM layer. This greatly improves XDP_PASS
performance on XSk: instead of page_alloc() + page_free(), the net core
recycles the same pages, so the only overhead left is memcpy()s.
Note that the passed buff gets freed if the conversion is done w/o any
error, assuming you don't need this buffer after you convert it to an
skb.
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
include/net/xdp.h | 1 +
net/core/xdp.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 139 insertions(+)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 83e3f4648caa..69728b2d75d5 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -331,6 +331,7 @@ void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp);
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 371c26c203b2..116153b88d26 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -22,6 +22,8 @@
#include <trace/events/xdp.h>
#include <net/xdp_sock_drv.h>
+#include "dev.h"
+
#define REG_STATE_NEW 0x0
#define REG_STATE_REGISTERED 0x1
#define REG_STATE_UNREGISTERED 0x2
@@ -682,6 +684,142 @@ struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
+/**
+ * xdp_copy_frags_from_zc - copy the frags from an XSk buff to an skb
+ * @skb: skb to copy frags to
+ * @xdp: XSk &xdp_buff from which the frags will be copied
+ * @pp: &page_pool backing page allocation, if available
+ *
+ * Copy all frags from an XSk &xdp_buff to an skb to pass it up the stack.
+ * Allocate a new page / page frag for each frag, copy it and attach to
+ * the skb.
+ *
+ * Return: true on success, false on page allocation fail.
+ */
+static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
+ const struct xdp_buff *xdp,
+ struct page_pool *pp)
+{
+ const struct skb_shared_info *xinfo;
+ struct skb_shared_info *sinfo;
+ u32 nr_frags, ts;
+
+ xinfo = xdp_get_shared_info_from_buff(xdp);
+ nr_frags = xinfo->nr_frags;
+ sinfo = skb_shinfo(skb);
+
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ ts = 0;
+#else
+ ts = xinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
+#endif
+
+ for (u32 i = 0; i < nr_frags; i++) {
+ u32 len = skb_frag_size(&xinfo->frags[i]);
+ void *data;
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ u32 truesize = len;
+
+ data = page_pool_dev_alloc_va(pp, &truesize);
+ ts += truesize;
+#else
+ data = napi_alloc_frag(len);
+#endif
+ if (unlikely(!data))
+ return false;
+
+ memcpy(data, skb_frag_address(&xinfo->frags[i]),
+ LARGEST_ALIGN(len));
+ __skb_fill_page_desc(skb, sinfo->nr_frags++,
+ virt_to_page(data),
+ offset_in_page(data), len);
+ }
+
+ xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size,
+ ts, false);
+
+ return true;
+}
+
+/**
+ * xdp_build_skb_from_zc - create an skb from an XSk &xdp_buff
+ * @xdp: source XSk buff
+ *
+ * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb
+ * head, new page for the head, copy the data and initialize the skb fields.
+ * If there are frags, allocate new pages for them and copy.
+ * If Page Pool is available, the function allocates memory from the system
+ * percpu pools to try recycling the pages, otherwise it uses the NAPI page
+ * frag caches.
+ * If new skb was built successfully, @xdp is returned to XSk pool's freelist.
+ * On error, it remains untouched and the caller must take care of this.
+ *
+ * Return: new &sk_buff on success, %NULL on error.
+ */
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
+{
+ const struct xdp_rxq_info *rxq = xdp->rxq;
+ u32 len = xdp->data_end - xdp->data_meta;
+ struct page_pool *pp;
+ struct sk_buff *skb;
+ int metalen;
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ u32 truesize;
+ void *data;
+
+ pp = this_cpu_read(system_page_pool);
+ truesize = xdp->frame_sz;
+
+ data = page_pool_dev_alloc_va(pp, &truesize);
+ if (unlikely(!data))
+ return NULL;
+
+ skb = napi_build_skb(data, truesize);
+ if (unlikely(!skb)) {
+ page_pool_free_va(pp, data, true);
+ return NULL;
+ }
+
+ skb_mark_for_recycle(skb);
+ skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+#else /* !CONFIG_PAGE_POOL */
+ struct napi_struct *napi;
+
+ pp = NULL;
+ napi = napi_by_id(rxq->napi_id);
+ if (likely(napi))
+ skb = napi_alloc_skb(napi, len);
+ else
+ skb = __netdev_alloc_skb_ip_align(rxq->dev, len,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+#endif /* !CONFIG_PAGE_POOL */
+
+ memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len));
+
+ metalen = xdp->data - xdp->data_meta;
+ if (metalen > 0) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
+
+ skb_record_rx_queue(skb, rxq->queue_index);
+
+ if (unlikely(xdp_buff_has_frags(xdp)) &&
+ unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
+ napi_consume_skb(skb, true);
+ return NULL;
+ }
+
+ xsk_buff_free(xdp);
+
+ skb->protocol = eth_type_trans(skb, rxq->dev);
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
+
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev)
--
2.46.2
next prev parent reply other threads:[~2024-10-15 14:55 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-15 14:53 [PATCH net-next v2 00/18] idpf: XDP chapter III: core XDP changes (+libeth_xdp) Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 01/18] jump_label: export static_key_slow_{inc,dec}_cpuslocked() Alexander Lobakin
2024-10-17 11:06 ` Maciej Fijalkowski
2024-10-21 13:53 ` Alexander Lobakin
2024-10-22 12:52 ` Maciej Fijalkowski
2024-10-15 14:53 ` [PATCH net-next v2 02/18] skbuff: allow 2-4-argument skb_frag_dma_map() Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 03/18] unroll: add generic loop unroll helpers Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 04/18] bpf, xdp: constify some bpf_prog * function arguments Alexander Lobakin
2024-10-17 11:12 ` Maciej Fijalkowski
2024-10-21 13:56 ` Alexander Lobakin
2024-10-22 12:55 ` Maciej Fijalkowski
2024-10-15 14:53 ` [PATCH net-next v2 05/18] xdp, xsk: constify read-only arguments of some static inline helpers Alexander Lobakin
2024-10-17 11:14 ` Maciej Fijalkowski
2024-10-21 13:57 ` Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 06/18] xdp: allow attaching already registered memory model to xdp_rxq_info Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 07/18] net: Register system page pool as an XDP memory model Alexander Lobakin
2024-10-17 11:32 ` Maciej Fijalkowski
2024-10-21 14:00 ` Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 08/18] page_pool: make page_pool_put_page_bulk() actually handle array of pages Alexander Lobakin
2024-10-17 11:33 ` Maciej Fijalkowski
2024-10-21 14:03 ` Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 09/18] page_pool: allow mixing PPs within one bulk Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 10/18] xdp: get rid of xdp_frame::mem.id Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 11/18] xdp: add generic xdp_buff_add_frag() Alexander Lobakin
2024-10-17 12:26 ` Maciej Fijalkowski
2024-10-21 14:10 ` Alexander Lobakin
2024-10-22 13:00 ` Maciej Fijalkowski
2024-10-15 14:53 ` [PATCH net-next v2 12/18] xdp: add generic xdp_build_skb_from_buff() Alexander Lobakin
2024-10-17 12:34 ` Maciej Fijalkowski
2024-10-21 14:20 ` Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 13/18] xsk: allow attaching XSk pool via xdp_rxq_info_reg_mem_model() Alexander Lobakin
2024-10-17 12:49 ` Maciej Fijalkowski
2024-10-21 14:23 ` Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 14/18] xsk: make xsk_buff_add_frag really add a frag via __xdp_buff_add_frag() Alexander Lobakin
2024-10-17 13:04 ` Maciej Fijalkowski
2024-10-15 14:53 ` Alexander Lobakin [this message]
2024-10-18 12:48 ` [PATCH net-next v2 15/18] xsk: add generic XSk &xdp_buff -> skb conversion Maciej Fijalkowski
2024-10-15 14:53 ` [PATCH net-next v2 16/18] xsk: add helper to get &xdp_desc's DMA and meta pointer in one go Alexander Lobakin
2024-10-22 15:42 ` Maciej Fijalkowski
2024-10-23 14:50 ` Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 17/18] libeth: support native XDP and register memory model Alexander Lobakin
2024-10-15 14:53 ` [PATCH net-next v2 18/18] libeth: add a couple of XDP helpers (libeth_xdp) Alexander Lobakin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241015145350.4077765-16-aleksander.lobakin@intel.com \
--to=aleksander.lobakin@intel.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=nex.sw.ncis.osdt.itp.upstreaming@intel.com \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=toke@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox