netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode
@ 2024-01-28 14:20 Lorenzo Bianconi
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
                   ` (5 more replies)
  0 siblings, 6 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-28 14:20 UTC (permalink / raw)
  To: netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Introduce multi-buffer support for xdp running in generic mode not always
linearizing the skb in netif_receive_generic_xdp routine.
Introduce page_pool in softnet_data structure

Changes since v5:
- move percpu page_pool pointer out of softnet_data in a dedicated variable
- make page_pool stats available just for global pools
- rely on netif_skb_segment_for_xdp utility routine in veth driver
Changes since v4:
- fix compilation error if page_pools are not enabled
Changes since v3:
- introduce page_pool in softnet_data structure
- rely on page_pools for xdp_generic code
Changes since v2:
- rely on napi_alloc_frag() and napi_build_skb() to build the new skb
Changes since v1:
- explicitly keep the skb segmented in netif_skb_check_for_generic_xdp() and
  do not rely on pskb_expand_head()

Lorenzo Bianconi (5):
  net: add generic per-cpu page_pool allocator
  xdp: rely on skb pointer reference in do_xdp_generic and
    netif_receive_generic_xdp
  xdp: add multi-buff support for xdp running in generic mode
  net: page_pool: make stats available just for global pools
  veth: rely on netif_skb_segment_for_xdp utility routine

 drivers/net/tun.c             |   4 +-
 drivers/net/veth.c            |  79 +------------
 include/linux/netdevice.h     |   6 +-
 include/net/page_pool/types.h |   3 +
 net/core/dev.c                | 208 +++++++++++++++++++++++++++++-----
 net/core/page_pool.c          |  59 +++++++---
 net/core/skbuff.c             |   5 +-
 7 files changed, 241 insertions(+), 123 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
@ 2024-01-28 14:20 ` Lorenzo Bianconi
  2024-01-29  6:37   ` kernel test robot
                     ` (3 more replies)
  2024-01-28 14:20 ` [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp Lorenzo Bianconi
                   ` (4 subsequent siblings)
  5 siblings, 4 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-28 14:20 UTC (permalink / raw)
  To: netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Introduce generic percpu page_pools allocator.
Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
in order to recycle the page in the page_pool "hot" cache if
napi_pp_put_page() is running on the same cpu.
This is a preliminary patch to add xdp multi-buff support for xdp running
in generic mode.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/net/page_pool/types.h |  3 +++
 net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
 net/core/page_pool.c          | 23 ++++++++++++++++----
 net/core/skbuff.c             |  5 +++--
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 76481c465375..3828396ae60c 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -128,6 +128,7 @@ struct page_pool_stats {
 struct page_pool {
 	struct page_pool_params_fast p;
 
+	int cpuid;
 	bool has_init_callback;
 
 	long frag_users;
@@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
 				  unsigned int size, gfp_t gfp);
 struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
+					  int cpuid);
 
 struct xdp_mem_info;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cb2dab0feee0..bf9ec740b09a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,6 +153,8 @@
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
 #include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
@@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
+DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
+
 #ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
  *
  */
 
+#define SD_PAGE_POOL_RING_SIZE	256
+static int net_page_pool_alloc(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+	struct page_pool_params page_pool_params = {
+		.pool_size = SD_PAGE_POOL_RING_SIZE,
+		.nid = NUMA_NO_NODE,
+	};
+	struct page_pool *pp_ptr;
+
+	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+	if (IS_ERR(pp_ptr)) {
+		pp_ptr = NULL;
+		return -ENOMEM;
+	}
+
+	per_cpu(page_pool, cpuid) = pp_ptr;
+#endif
+	return 0;
+}
+
 /*
  *       This is called single threaded during boot, so no need
  *       to take the rtnl semaphore.
@@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
 		init_gro_hash(&sd->backlog);
 		sd->backlog.poll = process_backlog;
 		sd->backlog.weight = weight_p;
+
+		if (net_page_pool_alloc(i))
+			goto out;
 	}
 
 	dev_boot_phase = 0;
@@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
 	WARN_ON(rc < 0);
 	rc = 0;
 out:
+	if (rc < 0) {
+		for_each_possible_cpu(i) {
+			struct page_pool *pp_ptr = this_cpu_read(page_pool);
+
+			if (!pp_ptr)
+				continue;
+
+			page_pool_destroy(pp_ptr);
+			per_cpu(page_pool, i) = NULL;
+		}
+	}
+
 	return rc;
 }
 
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4933762e5a6b..89c835fcf094 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -171,13 +171,16 @@ static void page_pool_producer_unlock(struct page_pool *pool,
 }
 
 static int page_pool_init(struct page_pool *pool,
-			  const struct page_pool_params *params)
+			  const struct page_pool_params *params,
+			  int cpuid)
 {
 	unsigned int ring_qsize = 1024; /* Default */
 
 	memcpy(&pool->p, &params->fast, sizeof(pool->p));
 	memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
 
+	pool->cpuid = cpuid;
+
 	/* Validate only known flags were used */
 	if (pool->p.flags & ~(PP_FLAG_ALL))
 		return -EINVAL;
@@ -253,10 +256,12 @@ static void page_pool_uninit(struct page_pool *pool)
 }
 
 /**
- * page_pool_create() - create a page pool.
+ * page_pool_create_percpu() - create a page pool for a given cpu.
  * @params: parameters, see struct page_pool_params
+ * @cpuid: cpu identifier
  */
-struct page_pool *page_pool_create(const struct page_pool_params *params)
+struct page_pool *
+page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
 {
 	struct page_pool *pool;
 	int err;
@@ -265,7 +270,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
 	if (!pool)
 		return ERR_PTR(-ENOMEM);
 
-	err = page_pool_init(pool, params);
+	err = page_pool_init(pool, params, cpuid);
 	if (err < 0)
 		goto err_free;
 
@@ -282,6 +287,16 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
 	kfree(pool);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(page_pool_create_percpu);
+
+/**
+ * page_pool_create() - create a page pool
+ * @params: parameters, see struct page_pool_params
+ */
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+	return page_pool_create_percpu(params, -1);
+}
 EXPORT_SYMBOL(page_pool_create);
 
 static void page_pool_return_page(struct page_pool *pool, struct page *page);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index edbbef563d4d..9e5eb47b4025 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -923,9 +923,10 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
 	 */
 	if (napi_safe || in_softirq()) {
 		const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+		unsigned int cpuid = smp_processor_id();
 
-		allow_direct = napi &&
-			READ_ONCE(napi->list_owner) == smp_processor_id();
+		allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
+		allow_direct |= (pp->cpuid == cpuid);
 	}
 
 	/* Driver set this to memory recycling info. Reset it on recycle.
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp
  2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
@ 2024-01-28 14:20 ` Lorenzo Bianconi
  2024-01-29  8:05   ` Ilias Apalodimas
  2024-01-28 14:20 ` [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode Lorenzo Bianconi
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-28 14:20 UTC (permalink / raw)
  To: netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Rely on skb pointer reference instead of the skb pointer in do_xdp_generic and
netif_receive_generic_xdp routine signatures. This is a preliminary patch to add
multi-buff support for xdp running in generic mode.

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/tun.c         |  4 ++--
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 16 +++++++++-------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4a4f8c8e79fa..5bd98bdaddf2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1927,7 +1927,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		rcu_read_lock();
 		xdp_prog = rcu_dereference(tun->xdp_prog);
 		if (xdp_prog) {
-			ret = do_xdp_generic(xdp_prog, skb);
+			ret = do_xdp_generic(xdp_prog, &skb);
 			if (ret != XDP_PASS) {
 				rcu_read_unlock();
 				local_bh_enable();
@@ -2517,7 +2517,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 	skb_record_rx_queue(skb, tfile->queue_index);
 
 	if (skb_xdp) {
-		ret = do_xdp_generic(xdp_prog, skb);
+		ret = do_xdp_generic(xdp_prog, &skb);
 		if (ret != XDP_PASS) {
 			ret = 0;
 			goto out;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 118c40258d07..7eee99a58200 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3958,7 +3958,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
 u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 			     struct bpf_prog *xdp_prog);
 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
 int netif_rx(struct sk_buff *skb);
 int __netif_rx(struct sk_buff *skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index bf9ec740b09a..960f39ac5e33 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4924,10 +4924,11 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 	return act;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
 				     struct xdp_buff *xdp,
 				     struct bpf_prog *xdp_prog)
 {
+	struct sk_buff *skb = *pskb;
 	u32 act = XDP_DROP;
 
 	/* Reinjected packets coming from act_mirred or similar should
@@ -5008,24 +5009,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
 
 static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
 
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
 {
 	if (xdp_prog) {
 		struct xdp_buff xdp;
 		u32 act;
 		int err;
 
-		act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
+		act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
 		if (act != XDP_PASS) {
 			switch (act) {
 			case XDP_REDIRECT:
-				err = xdp_do_generic_redirect(skb->dev, skb,
+				err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
 							      &xdp, xdp_prog);
 				if (err)
 					goto out_redir;
 				break;
 			case XDP_TX:
-				generic_xdp_tx(skb, xdp_prog);
+				generic_xdp_tx(*pskb, xdp_prog);
 				break;
 			}
 			return XDP_DROP;
@@ -5033,7 +5034,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 	}
 	return XDP_PASS;
 out_redir:
-	kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
+	kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
 	return XDP_DROP;
 }
 EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -5356,7 +5357,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
 		int ret2;
 
 		migrate_disable();
-		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+				      &skb);
 		migrate_enable();
 
 		if (ret2 != XDP_PASS) {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode
  2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
  2024-01-28 14:20 ` [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp Lorenzo Bianconi
@ 2024-01-28 14:20 ` Lorenzo Bianconi
  2024-01-31 23:47   ` Jakub Kicinski
  2024-01-28 14:20 ` [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools Lorenzo Bianconi
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-28 14:20 UTC (permalink / raw)
  To: netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Similar to native xdp, do not always linearize the skb in
netif_receive_generic_xdp routine but create a non-linear xdp_buff to be
processed by the eBPF program. This allow to add  multi-buffer support
for xdp running in generic mode.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/core/dev.c | 152 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 133 insertions(+), 19 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 960f39ac5e33..19f92ba90e49 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4862,6 +4862,12 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 	xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
 	xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
 			 skb_headlen(skb) + mac_len, true);
+	if (skb_is_nonlinear(skb)) {
+		skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+		xdp_buff_set_frags_flag(xdp);
+	} else {
+		xdp_buff_clear_frags_flag(xdp);
+	}
 
 	orig_data_end = xdp->data_end;
 	orig_data = xdp->data;
@@ -4891,6 +4897,14 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 		skb->len += off; /* positive on grow, negative on shrink */
 	}
 
+	/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
+	 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
+	 */
+	if (xdp_buff_has_frags(xdp))
+		skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+	else
+		skb->data_len = 0;
+
 	/* check if XDP changed eth hdr such SKB needs update */
 	eth = (struct ethhdr *)xdp->data;
 	if ((orig_eth_type != eth->h_proto) ||
@@ -4924,12 +4938,117 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 	return act;
 }
 
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+static int
+netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+			  struct bpf_prog *prog)
+{
+	u32 size, truesize, len, max_head_size, off;
+	struct sk_buff *skb = *pskb, *nskb;
+	int err, i, head_off;
+	void *data;
+
+	/* XDP does not support fraglist so we need to linearize
+	 * the skb.
+	 */
+	if (skb_has_frag_list(skb) || !prog->aux->xdp_has_frags)
+		return -EOPNOTSUPP;
+
+	max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - XDP_PACKET_HEADROOM);
+	if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE)
+		return -ENOMEM;
+
+	size = min_t(u32, skb->len, max_head_size);
+	truesize = SKB_HEAD_ALIGN(size) + XDP_PACKET_HEADROOM;
+	data = page_pool_dev_alloc_va(pool, &truesize);
+	if (!data)
+		return -ENOMEM;
+
+	nskb = napi_build_skb(data, truesize);
+	if (!nskb) {
+		page_pool_free_va(pool, data, true);
+		return -ENOMEM;
+	}
+
+	skb_reserve(nskb, XDP_PACKET_HEADROOM);
+	skb_copy_header(nskb, skb);
+	skb_mark_for_recycle(nskb);
+
+	err = skb_copy_bits(skb, 0, nskb->data, size);
+	if (err) {
+		consume_skb(nskb);
+		return err;
+	}
+	skb_put(nskb, size);
+
+	head_off = skb_headroom(nskb) - skb_headroom(skb);
+	skb_headers_offset_update(nskb, head_off);
+
+	off = size;
+	len = skb->len - off;
+	for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
+		struct page *page;
+		u32 page_off;
+
+		size = min_t(u32, len, PAGE_SIZE);
+		truesize = size;
+
+		page = page_pool_dev_alloc(pool, &page_off, &truesize);
+		if (!data) {
+			consume_skb(nskb);
+			return -ENOMEM;
+		}
+
+		skb_add_rx_frag(nskb, i, page, page_off, size, truesize);
+		err = skb_copy_bits(skb, off, page_address(page) + page_off,
+				    size);
+		if (err) {
+			consume_skb(nskb);
+			return err;
+		}
+
+		len -= size;
+		off += size;
+	}
+
+	consume_skb(skb);
+	*pskb = nskb;
+
+	return 0;
+}
+#endif
+
+static int
+netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+{
+	struct sk_buff *skb = *pskb;
+	int err, hroom, troom;
+
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+	if (!netif_skb_segment_for_xdp(this_cpu_read(page_pool), pskb, prog))
+		return 0;
+#endif
+
+	/* In case we have to go down the path and also linearize,
+	 * then lets do the pskb_expand_head() work just once here.
+	 */
+	hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+	troom = skb->tail + skb->data_len - skb->end;
+	err = pskb_expand_head(skb,
+			       hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+			       troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
+	if (err)
+		return err;
+
+	return skb_linearize(skb);
+}
+
 static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
 				     struct xdp_buff *xdp,
 				     struct bpf_prog *xdp_prog)
 {
 	struct sk_buff *skb = *pskb;
-	u32 act = XDP_DROP;
+	u32 mac_len, act = XDP_DROP;
 
 	/* Reinjected packets coming from act_mirred or similar should
 	 * not get XDP generic processing.
@@ -4937,41 +5056,36 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
 	if (skb_is_redirected(skb))
 		return XDP_PASS;
 
-	/* XDP packets must be linear and must have sufficient headroom
-	 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
-	 * native XDP provides, thus we need to do it here as well.
+	/* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
+	 * bytes. This is the guarantee that also native XDP provides,
+	 * thus we need to do it here as well.
 	 */
+	mac_len = skb->data - skb_mac_header(skb);
+	__skb_push(skb, mac_len);
+
 	if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
 	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
-		int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
-		int troom = skb->tail + skb->data_len - skb->end;
-
-		/* In case we have to go down the path and also linearize,
-		 * then lets do the pskb_expand_head() work just once here.
-		 */
-		if (pskb_expand_head(skb,
-				     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
-				     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
-			goto do_drop;
-		if (skb_linearize(skb))
+		if (netif_skb_check_for_xdp(pskb, xdp_prog))
 			goto do_drop;
 	}
 
-	act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+	__skb_pull(*pskb, mac_len);
+
+	act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
 	switch (act) {
 	case XDP_REDIRECT:
 	case XDP_TX:
 	case XDP_PASS:
 		break;
 	default:
-		bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
+		bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
 		fallthrough;
 	case XDP_ABORTED:
-		trace_xdp_exception(skb->dev, xdp_prog, act);
+		trace_xdp_exception((*pskb)->dev, xdp_prog, act);
 		fallthrough;
 	case XDP_DROP:
 	do_drop:
-		kfree_skb(skb);
+		kfree_skb(*pskb);
 		break;
 	}
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
                   ` (2 preceding siblings ...)
  2024-01-28 14:20 ` [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode Lorenzo Bianconi
@ 2024-01-28 14:20 ` Lorenzo Bianconi
  2024-01-29 12:06   ` Yunsheng Lin
  2024-01-28 14:20 ` [PATCH v6 net-next 5/5] veth: rely on netif_skb_segment_for_xdp utility routine Lorenzo Bianconi
  2024-01-29 12:43 ` [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Toke Høiland-Jørgensen
  5 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-28 14:20 UTC (permalink / raw)
  To: netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Move page_pool stats allocation in page_pool_create routine and get rid
of it for percpu page_pools.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/core/page_pool.c | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 89c835fcf094..5278ffef6442 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -37,13 +37,15 @@
 #define recycle_stat_inc(pool, __stat)							\
 	do {										\
 		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
-		this_cpu_inc(s->__stat);						\
+		if (s)									\
+			this_cpu_inc(s->__stat);					\
 	} while (0)
 
 #define recycle_stat_add(pool, __stat, val)						\
 	do {										\
 		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
-		this_cpu_add(s->__stat, val);						\
+		if (s)									\
+			this_cpu_add(s->__stat, val);					\
 	} while (0)
 
 static const char pp_stats[][ETH_GSTRING_LEN] = {
@@ -79,6 +81,9 @@ bool page_pool_get_stats(const struct page_pool *pool,
 	if (!stats)
 		return false;
 
+	if (!pool->recycle_stats)
+		return false;
+
 	/* The caller is responsible to initialize stats. */
 	stats->alloc_stats.fast += pool->alloc_stats.fast;
 	stats->alloc_stats.slow += pool->alloc_stats.slow;
@@ -218,19 +223,8 @@ static int page_pool_init(struct page_pool *pool,
 	}
 
 	pool->has_init_callback = !!pool->slow.init_callback;
-
-#ifdef CONFIG_PAGE_POOL_STATS
-	pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
-	if (!pool->recycle_stats)
+	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;
-#endif
-
-	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
-#ifdef CONFIG_PAGE_POOL_STATS
-		free_percpu(pool->recycle_stats);
-#endif
-		return -ENOMEM;
-	}
 
 	atomic_set(&pool->pages_state_release_cnt, 0);
 
@@ -295,7 +289,21 @@ EXPORT_SYMBOL(page_pool_create_percpu);
  */
 struct page_pool *page_pool_create(const struct page_pool_params *params)
 {
-	return page_pool_create_percpu(params, -1);
+	struct page_pool *pool;
+
+	pool = page_pool_create_percpu(params, -1);
+	if (IS_ERR(pool))
+		return pool;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+	pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
+	if (!pool->recycle_stats) {
+		page_pool_uninit(pool);
+		kfree(pool);
+		pool = ERR_PTR(-ENOMEM);
+	}
+#endif
+	return pool;
 }
 EXPORT_SYMBOL(page_pool_create);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v6 net-next 5/5] veth: rely on netif_skb_segment_for_xdp utility routine
  2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
                   ` (3 preceding siblings ...)
  2024-01-28 14:20 ` [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools Lorenzo Bianconi
@ 2024-01-28 14:20 ` Lorenzo Bianconi
  2024-01-29 12:43 ` [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Toke Høiland-Jørgensen
  5 siblings, 0 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-28 14:20 UTC (permalink / raw)
  To: netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Rely on netif_skb_segment_for_xdp utility routine and remove duplicated
code.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/veth.c        | 79 +++------------------------------------
 include/linux/netdevice.h |  4 ++
 net/core/dev.c            |  6 +--
 3 files changed, 12 insertions(+), 77 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 578e36ea1589..ddb163f134ea 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -721,7 +721,8 @@ static void veth_xdp_get(struct xdp_buff *xdp)
 
 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 					struct xdp_buff *xdp,
-					struct sk_buff **pskb)
+					struct sk_buff **pskb,
+					struct bpf_prog *prog)
 {
 	struct sk_buff *skb = *pskb;
 	u32 frame_sz;
@@ -729,80 +730,10 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 	if (skb_shared(skb) || skb_head_is_locked(skb) ||
 	    skb_shinfo(skb)->nr_frags ||
 	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
-		u32 size, len, max_head_size, off, truesize, page_offset;
-		struct sk_buff *nskb;
-		struct page *page;
-		int i, head_off;
-		void *va;
-
-		/* We need a private copy of the skb and data buffers since
-		 * the ebpf program can modify it. We segment the original skb
-		 * into order-0 pages without linearize it.
-		 *
-		 * Make sure we have enough space for linear and paged area
-		 */
-		max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
-						  VETH_XDP_HEADROOM);
-		if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
-			goto drop;
-
-		size = min_t(u32, skb->len, max_head_size);
-		truesize = SKB_HEAD_ALIGN(size) + VETH_XDP_HEADROOM;
-
-		/* Allocate skb head */
-		va = page_pool_dev_alloc_va(rq->page_pool, &truesize);
-		if (!va)
-			goto drop;
-
-		nskb = napi_build_skb(va, truesize);
-		if (!nskb) {
-			page_pool_free_va(rq->page_pool, va, true);
+		if (netif_skb_segment_for_xdp(rq->page_pool, pskb, prog))
 			goto drop;
-		}
-
-		skb_reserve(nskb, VETH_XDP_HEADROOM);
-		skb_copy_header(nskb, skb);
-		skb_mark_for_recycle(nskb);
-
-		if (skb_copy_bits(skb, 0, nskb->data, size)) {
-			consume_skb(nskb);
-			goto drop;
-		}
-		skb_put(nskb, size);
 
-		head_off = skb_headroom(nskb) - skb_headroom(skb);
-		skb_headers_offset_update(nskb, head_off);
-
-		/* Allocate paged area of new skb */
-		off = size;
-		len = skb->len - off;
-
-		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
-			size = min_t(u32, len, PAGE_SIZE);
-			truesize = size;
-
-			page = page_pool_dev_alloc(rq->page_pool, &page_offset,
-						   &truesize);
-			if (!page) {
-				consume_skb(nskb);
-				goto drop;
-			}
-
-			skb_add_rx_frag(nskb, i, page, page_offset, size,
-					truesize);
-			if (skb_copy_bits(skb, off,
-					  page_address(page) + page_offset,
-					  size)) {
-				consume_skb(nskb);
-				goto drop;
-			}
-
-			len -= size;
-			off += size;
-		}
-
-		consume_skb(skb);
-		skb = nskb;
+		skb = *pskb;
 	}
 
 	/* SKB "head" area always have tailroom for skb_shared_info */
@@ -850,7 +781,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	}
 
 	__skb_push(skb, skb->data - skb_mac_header(skb));
-	if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
+	if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb, xdp_prog))
 		goto drop;
 	vxbuf.skb = skb;
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7eee99a58200..8c1f6954de47 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3955,6 +3955,10 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
 	dev_kfree_skb_any_reason(skb, SKB_CONSUMED);
 }
 
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+int netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+			      struct bpf_prog *prog);
+#endif
 u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 			     struct bpf_prog *xdp_prog);
 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
diff --git a/net/core/dev.c b/net/core/dev.c
index 19f92ba90e49..b2fc8f0683dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4939,9 +4939,8 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
 }
 
 #if IS_ENABLED(CONFIG_PAGE_POOL)
-static int
-netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
-			  struct bpf_prog *prog)
+int netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+			      struct bpf_prog *prog)
 {
 	u32 size, truesize, len, max_head_size, off;
 	struct sk_buff *skb = *pskb, *nskb;
@@ -5016,6 +5015,7 @@ netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(netif_skb_segment_for_xdp);
 #endif
 
 static int
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
@ 2024-01-29  6:37   ` kernel test robot
  2024-01-29 12:05   ` Yunsheng Lin
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 36+ messages in thread
From: kernel test robot @ 2024-01-29  6:37 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: oe-kbuild-all, lorenzo.bianconi, davem, kuba, edumazet, pabeni,
	bpf, toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

Hi Lorenzo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Bianconi/net-add-generic-per-cpu-page_pool-allocator/20240128-222506
base:   net-next/main
patch link:    https://lore.kernel.org/r/5b0222d3df382c22fe0fa96154ae7b27189f7ecd.1706451150.git.lorenzo%40kernel.org
patch subject: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
config: sparc-randconfig-r123-20240129 (https://download.01.org/0day-ci/archive/20240129/202401291436.jz59b9EZ-lkp@intel.com/config)
compiler: sparc-linux-gcc (GCC) 13.2.0
reproduce: (https://download.01.org/0day-ci/archive/20240129/202401291436.jz59b9EZ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401291436.jz59b9EZ-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> net/core/dev.c:447:1: sparse: sparse: symbol '__pcpu_scope_page_pool' was not declared. Should it be static?
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 1 (different base types) @@     expected unsigned int [usertype] val @@     got restricted __wsum @@
   net/core/dev.c:3352:23: sparse:     expected unsigned int [usertype] val
   net/core/dev.c:3352:23: sparse:     got restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c: note: in included file (through include/linux/mmzone.h, include/linux/gfp.h, include/linux/xarray.h, ...):
   include/linux/page-flags.h:242:46: sparse: sparse: self-comparison always evaluates to false
   net/core/dev.c:205:9: sparse: sparse: context imbalance in 'unlist_netdevice' - different lock contexts for basic block
   net/core/dev.c:3792:17: sparse: sparse: context imbalance in '__dev_queue_xmit' - different lock contexts for basic block
   net/core/dev.c:5172:17: sparse: sparse: context imbalance in 'net_tx_action' - different lock contexts for basic block
   net/core/dev.c:8833:38: sparse: sparse: self-comparison always evaluates to false

vim +/__pcpu_scope_page_pool +447 net/core/dev.c

   446	
 > 447	DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
   448	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp
  2024-01-28 14:20 ` [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp Lorenzo Bianconi
@ 2024-01-29  8:05   ` Ilias Apalodimas
  2024-01-29  9:58     ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Ilias Apalodimas @ 2024-01-29  8:05 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: netdev, lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk

Hi Lorenzo,

On Sun, 28 Jan 2024 at 16:22, Lorenzo Bianconi <lorenzo@kernel.org> wrote:
>
> Rely on skb pointer reference instead of the skb pointer in do_xdp_generic and
> netif_receive_generic_xdp routine signatures. This is a preliminary patch to add
> multi-buff support for xdp running in generic mode.

The patch looks fine, but can we tweak the commit message explaining
in more detail  why this is needed?

Thanks
/Ilias
>
> Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  drivers/net/tun.c         |  4 ++--
>  include/linux/netdevice.h |  2 +-
>  net/core/dev.c            | 16 +++++++++-------
>  3 files changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 4a4f8c8e79fa..5bd98bdaddf2 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1927,7 +1927,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>                 rcu_read_lock();
>                 xdp_prog = rcu_dereference(tun->xdp_prog);
>                 if (xdp_prog) {
> -                       ret = do_xdp_generic(xdp_prog, skb);
> +                       ret = do_xdp_generic(xdp_prog, &skb);
>                         if (ret != XDP_PASS) {
>                                 rcu_read_unlock();
>                                 local_bh_enable();
> @@ -2517,7 +2517,7 @@ static int tun_xdp_one(struct tun_struct *tun,
>         skb_record_rx_queue(skb, tfile->queue_index);
>
>         if (skb_xdp) {
> -               ret = do_xdp_generic(xdp_prog, skb);
> +               ret = do_xdp_generic(xdp_prog, &skb);
>                 if (ret != XDP_PASS) {
>                         ret = 0;
>                         goto out;
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 118c40258d07..7eee99a58200 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3958,7 +3958,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
>  u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
>                              struct bpf_prog *xdp_prog);
>  void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
> -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
> +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
>  int netif_rx(struct sk_buff *skb);
>  int __netif_rx(struct sk_buff *skb);
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index bf9ec740b09a..960f39ac5e33 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4924,10 +4924,11 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
>         return act;
>  }
>
> -static u32 netif_receive_generic_xdp(struct sk_buff *skb,
> +static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
>                                      struct xdp_buff *xdp,
>                                      struct bpf_prog *xdp_prog)
>  {
> +       struct sk_buff *skb = *pskb;
>         u32 act = XDP_DROP;
>
>         /* Reinjected packets coming from act_mirred or similar should
> @@ -5008,24 +5009,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
>
>  static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
>
> -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
> +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
>  {
>         if (xdp_prog) {
>                 struct xdp_buff xdp;
>                 u32 act;
>                 int err;
>
> -               act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
> +               act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
>                 if (act != XDP_PASS) {
>                         switch (act) {
>                         case XDP_REDIRECT:
> -                               err = xdp_do_generic_redirect(skb->dev, skb,
> +                               err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
>                                                               &xdp, xdp_prog);
>                                 if (err)
>                                         goto out_redir;
>                                 break;
>                         case XDP_TX:
> -                               generic_xdp_tx(skb, xdp_prog);
> +                               generic_xdp_tx(*pskb, xdp_prog);
>                                 break;
>                         }
>                         return XDP_DROP;
> @@ -5033,7 +5034,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
>         }
>         return XDP_PASS;
>  out_redir:
> -       kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
> +       kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
>         return XDP_DROP;
>  }
>  EXPORT_SYMBOL_GPL(do_xdp_generic);
> @@ -5356,7 +5357,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
>                 int ret2;
>
>                 migrate_disable();
> -               ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
> +               ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
> +                                     &skb);
>                 migrate_enable();
>
>                 if (ret2 != XDP_PASS) {
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp
  2024-01-29  8:05   ` Ilias Apalodimas
@ 2024-01-29  9:58     ` Lorenzo Bianconi
  0 siblings, 0 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-29  9:58 UTC (permalink / raw)
  To: Ilias Apalodimas
  Cc: netdev, lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk

[-- Attachment #1: Type: text/plain, Size: 5734 bytes --]

> Hi Lorenzo,
> 
> On Sun, 28 Jan 2024 at 16:22, Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> >
> > Rely on skb pointer reference instead of the skb pointer in do_xdp_generic and
> > netif_receive_generic_xdp routine signatures. This is a preliminary patch to add
> > multi-buff support for xdp running in generic mode.
> 
> The patch looks fine, but can we tweak the commit message explaining
> in more detail  why this is needed?

ack, I will update commit log in the next iteration.

Regards,
Lorenzo

> 
> Thanks
> /Ilias
> >
> > Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  drivers/net/tun.c         |  4 ++--
> >  include/linux/netdevice.h |  2 +-
> >  net/core/dev.c            | 16 +++++++++-------
> >  3 files changed, 12 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 4a4f8c8e79fa..5bd98bdaddf2 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -1927,7 +1927,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
> >                 rcu_read_lock();
> >                 xdp_prog = rcu_dereference(tun->xdp_prog);
> >                 if (xdp_prog) {
> > -                       ret = do_xdp_generic(xdp_prog, skb);
> > +                       ret = do_xdp_generic(xdp_prog, &skb);
> >                         if (ret != XDP_PASS) {
> >                                 rcu_read_unlock();
> >                                 local_bh_enable();
> > @@ -2517,7 +2517,7 @@ static int tun_xdp_one(struct tun_struct *tun,
> >         skb_record_rx_queue(skb, tfile->queue_index);
> >
> >         if (skb_xdp) {
> > -               ret = do_xdp_generic(xdp_prog, skb);
> > +               ret = do_xdp_generic(xdp_prog, &skb);
> >                 if (ret != XDP_PASS) {
> >                         ret = 0;
> >                         goto out;
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 118c40258d07..7eee99a58200 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -3958,7 +3958,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
> >  u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
> >                              struct bpf_prog *xdp_prog);
> >  void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
> > -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
> > +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
> >  int netif_rx(struct sk_buff *skb);
> >  int __netif_rx(struct sk_buff *skb);
> >
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index bf9ec740b09a..960f39ac5e33 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -4924,10 +4924,11 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
> >         return act;
> >  }
> >
> > -static u32 netif_receive_generic_xdp(struct sk_buff *skb,
> > +static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
> >                                      struct xdp_buff *xdp,
> >                                      struct bpf_prog *xdp_prog)
> >  {
> > +       struct sk_buff *skb = *pskb;
> >         u32 act = XDP_DROP;
> >
> >         /* Reinjected packets coming from act_mirred or similar should
> > @@ -5008,24 +5009,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
> >
> >  static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
> >
> > -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
> > +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
> >  {
> >         if (xdp_prog) {
> >                 struct xdp_buff xdp;
> >                 u32 act;
> >                 int err;
> >
> > -               act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
> > +               act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
> >                 if (act != XDP_PASS) {
> >                         switch (act) {
> >                         case XDP_REDIRECT:
> > -                               err = xdp_do_generic_redirect(skb->dev, skb,
> > +                               err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
> >                                                               &xdp, xdp_prog);
> >                                 if (err)
> >                                         goto out_redir;
> >                                 break;
> >                         case XDP_TX:
> > -                               generic_xdp_tx(skb, xdp_prog);
> > +                               generic_xdp_tx(*pskb, xdp_prog);
> >                                 break;
> >                         }
> >                         return XDP_DROP;
> > @@ -5033,7 +5034,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
> >         }
> >         return XDP_PASS;
> >  out_redir:
> > -       kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
> > +       kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
> >         return XDP_DROP;
> >  }
> >  EXPORT_SYMBOL_GPL(do_xdp_generic);
> > @@ -5356,7 +5357,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
> >                 int ret2;
> >
> >                 migrate_disable();
> > -               ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
> > +               ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
> > +                                     &skb);
> >                 migrate_enable();
> >
> >                 if (ret2 != XDP_PASS) {
> > --
> > 2.43.0
> >

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
  2024-01-29  6:37   ` kernel test robot
@ 2024-01-29 12:05   ` Yunsheng Lin
  2024-01-29 13:04     ` Lorenzo Bianconi
  2024-01-29 12:45   ` Toke Høiland-Jørgensen
  2024-01-31 12:28   ` Jesper Dangaard Brouer
  3 siblings, 1 reply; 36+ messages in thread
From: Yunsheng Lin @ 2024-01-29 12:05 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

On 2024/1/28 22:20, Lorenzo Bianconi wrote:

>  #ifdef CONFIG_LOCKDEP
>  /*
>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>   *
>   */
>  
> +#define SD_PAGE_POOL_RING_SIZE	256

I might missed that if there is a reason we choose 256 here, do we
need to use different value for differe page size, for 64K page size,
it means we might need to reserve 16MB memory for each CPU.

> +static int net_page_pool_alloc(int cpuid)
> +{
> +#if IS_ENABLED(CONFIG_PAGE_POOL)
> +	struct page_pool_params page_pool_params = {
> +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> +		.nid = NUMA_NO_NODE,
> +	};
> +	struct page_pool *pp_ptr;
> +
> +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> +	if (IS_ERR(pp_ptr)) {
> +		pp_ptr = NULL;

unnecessary NULL setting?

> +		return -ENOMEM;
> +	}
> +
> +	per_cpu(page_pool, cpuid) = pp_ptr;
> +#endif
> +	return 0;
> +}
> +
>  /*
>   *       This is called single threaded during boot, so no need
>   *       to take the rtnl semaphore.
> @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
>  		init_gro_hash(&sd->backlog);
>  		sd->backlog.poll = process_backlog;
>  		sd->backlog.weight = weight_p;
> +
> +		if (net_page_pool_alloc(i))
> +			goto out;
>  	}
>  
>  	dev_boot_phase = 0;
> @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
>  	WARN_ON(rc < 0);
>  	rc = 0;
>  out:
> +	if (rc < 0) {
> +		for_each_possible_cpu(i) {
> +			struct page_pool *pp_ptr = this_cpu_read(page_pool);

this_cpu_read() -> per_cpu_ptr()?

> +
> +			if (!pp_ptr)
> +				continue;
> +
> +			page_pool_destroy(pp_ptr);
> +			per_cpu(page_pool, i) = NULL;
> +		}
> +	}
> +
>  	return rc;
>  }


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-28 14:20 ` [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools Lorenzo Bianconi
@ 2024-01-29 12:06   ` Yunsheng Lin
  2024-01-29 13:07     ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Yunsheng Lin @ 2024-01-29 12:06 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> Move page_pool stats allocation in page_pool_create routine and get rid
> of it for percpu page_pools.

Is there any reason why we do not need those kind stats for per cpu
page_pool?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode
  2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
                   ` (4 preceding siblings ...)
  2024-01-28 14:20 ` [PATCH v6 net-next 5/5] veth: rely on netif_skb_segment_for_xdp utility routine Lorenzo Bianconi
@ 2024-01-29 12:43 ` Toke Høiland-Jørgensen
  2024-01-29 13:05   ` Lorenzo Bianconi
  5 siblings, 1 reply; 36+ messages in thread
From: Toke Høiland-Jørgensen @ 2024-01-29 12:43 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Lorenzo Bianconi <lorenzo@kernel.org> writes:

> Introduce multi-buffer support for xdp running in generic mode not always
> linearizing the skb in netif_receive_generic_xdp routine.
> Introduce page_pool in softnet_data structure

This last line is not accurate anymore... :)

-Toke


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
  2024-01-29  6:37   ` kernel test robot
  2024-01-29 12:05   ` Yunsheng Lin
@ 2024-01-29 12:45   ` Toke Høiland-Jørgensen
  2024-01-29 12:52     ` Lorenzo Bianconi
  2024-01-31 12:28   ` Jesper Dangaard Brouer
  3 siblings, 1 reply; 36+ messages in thread
From: Toke Høiland-Jørgensen @ 2024-01-29 12:45 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Lorenzo Bianconi <lorenzo@kernel.org> writes:

> Introduce generic percpu page_pools allocator.
> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> in order to recycle the page in the page_pool "hot" cache if
> napi_pp_put_page() is running on the same cpu.
> This is a preliminary patch to add xdp multi-buff support for xdp running
> in generic mode.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  include/net/page_pool/types.h |  3 +++
>  net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>  net/core/page_pool.c          | 23 ++++++++++++++++----
>  net/core/skbuff.c             |  5 +++--
>  4 files changed, 65 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 76481c465375..3828396ae60c 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -128,6 +128,7 @@ struct page_pool_stats {
>  struct page_pool {
>  	struct page_pool_params_fast p;
>  
> +	int cpuid;
>  	bool has_init_callback;
>  
>  	long frag_users;
> @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>  				  unsigned int size, gfp_t gfp);
>  struct page_pool *page_pool_create(const struct page_pool_params *params);
> +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
> +					  int cpuid);
>  
>  struct xdp_mem_info;
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index cb2dab0feee0..bf9ec740b09a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -153,6 +153,8 @@
>  #include <linux/prandom.h>
>  #include <linux/once_lite.h>
>  #include <net/netdev_rx_queue.h>
> +#include <net/page_pool/types.h>
> +#include <net/page_pool/helpers.h>
>  
>  #include "dev.h"
>  #include "net-sysfs.h"
> @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>  EXPORT_PER_CPU_SYMBOL(softnet_data);
>  
> +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);

I think we should come up with a better name than just "page_pool" for
this global var. In the code below it looks like it's a local variable
that's being referenced. Maybe "global_page_pool" or "system_page_pool"
or something along those lines?

-Toke


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-29 12:45   ` Toke Høiland-Jørgensen
@ 2024-01-29 12:52     ` Lorenzo Bianconi
  2024-01-29 15:44       ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-29 12:52 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 2657 bytes --]

> Lorenzo Bianconi <lorenzo@kernel.org> writes:
> 
> > Introduce generic percpu page_pools allocator.
> > Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> > in order to recycle the page in the page_pool "hot" cache if
> > napi_pp_put_page() is running on the same cpu.
> > This is a preliminary patch to add xdp multi-buff support for xdp running
> > in generic mode.
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  include/net/page_pool/types.h |  3 +++
> >  net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
> >  net/core/page_pool.c          | 23 ++++++++++++++++----
> >  net/core/skbuff.c             |  5 +++--
> >  4 files changed, 65 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> > index 76481c465375..3828396ae60c 100644
> > --- a/include/net/page_pool/types.h
> > +++ b/include/net/page_pool/types.h
> > @@ -128,6 +128,7 @@ struct page_pool_stats {
> >  struct page_pool {
> >  	struct page_pool_params_fast p;
> >  
> > +	int cpuid;
> >  	bool has_init_callback;
> >  
> >  	long frag_users;
> > @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
> >  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
> >  				  unsigned int size, gfp_t gfp);
> >  struct page_pool *page_pool_create(const struct page_pool_params *params);
> > +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
> > +					  int cpuid);
> >  
> >  struct xdp_mem_info;
> >  
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index cb2dab0feee0..bf9ec740b09a 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -153,6 +153,8 @@
> >  #include <linux/prandom.h>
> >  #include <linux/once_lite.h>
> >  #include <net/netdev_rx_queue.h>
> > +#include <net/page_pool/types.h>
> > +#include <net/page_pool/helpers.h>
> >  
> >  #include "dev.h"
> >  #include "net-sysfs.h"
> > @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
> >  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
> >  EXPORT_PER_CPU_SYMBOL(softnet_data);
> >  
> > +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
> 
> I think we should come up with a better name than just "page_pool" for
> this global var. In the code below it looks like it's a local variable
> that's being referenced. Maybe "global_page_pool" or "system_page_pool"
> or something along those lines?

ack, I will fix it. system_page_pool seems better, agree?

Regards,
Lorenzo

> 
> -Toke
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-29 12:05   ` Yunsheng Lin
@ 2024-01-29 13:04     ` Lorenzo Bianconi
  2024-01-30 11:02       ` Yunsheng Lin
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-29 13:04 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 2712 bytes --]

> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> 
> >  #ifdef CONFIG_LOCKDEP
> >  /*
> >   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> > @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> >   *
> >   */
> >  
> > +#define SD_PAGE_POOL_RING_SIZE	256
> 
> I might missed that if there is a reason we choose 256 here, do we
> need to use different value for differe page size, for 64K page size,
> it means we might need to reserve 16MB memory for each CPU.

honestly I have not spent time on it, most of the current page_pool users set
pool_size to 256. Anyway, do you mean something like:

diff --git a/net/core/dev.c b/net/core/dev.c
index f70fb6cad2b2..3934a3fc5c45 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
  *
  */
 
-#define SD_PAGE_POOL_RING_SIZE	256
 static int net_page_pool_alloc(int cpuid)
 {
 #if IS_ENABLED(CONFIG_PAGE_POOL)
 	struct page_pool_params page_pool_params = {
-		.pool_size = SD_PAGE_POOL_RING_SIZE,
+		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
 		.nid = NUMA_NO_NODE,
 	};
 	struct page_pool *pp_ptr;

> 
> > +static int net_page_pool_alloc(int cpuid)
> > +{
> > +#if IS_ENABLED(CONFIG_PAGE_POOL)
> > +	struct page_pool_params page_pool_params = {
> > +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > +		.nid = NUMA_NO_NODE,
> > +	};
> > +	struct page_pool *pp_ptr;
> > +
> > +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> > +	if (IS_ERR(pp_ptr)) {
> > +		pp_ptr = NULL;
> 
> unnecessary NULL setting?

ack, I will get rid of it.

> 
> > +		return -ENOMEM;
> > +	}
> > +
> > +	per_cpu(page_pool, cpuid) = pp_ptr;
> > +#endif
> > +	return 0;
> > +}
> > +
> >  /*
> >   *       This is called single threaded during boot, so no need
> >   *       to take the rtnl semaphore.
> > @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
> >  		init_gro_hash(&sd->backlog);
> >  		sd->backlog.poll = process_backlog;
> >  		sd->backlog.weight = weight_p;
> > +
> > +		if (net_page_pool_alloc(i))
> > +			goto out;
> >  	}
> >  
> >  	dev_boot_phase = 0;
> > @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
> >  	WARN_ON(rc < 0);
> >  	rc = 0;
> >  out:
> > +	if (rc < 0) {
> > +		for_each_possible_cpu(i) {
> > +			struct page_pool *pp_ptr = this_cpu_read(page_pool);
> 
> this_cpu_read() -> per_cpu_ptr()?

ack, I will fix it.

Regards,
Lorenzo

> 
> > +
> > +			if (!pp_ptr)
> > +				continue;
> > +
> > +			page_pool_destroy(pp_ptr);
> > +			per_cpu(page_pool, i) = NULL;
> > +		}
> > +	}
> > +
> >  	return rc;
> >  }
> 
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode
  2024-01-29 12:43 ` [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Toke Høiland-Jørgensen
@ 2024-01-29 13:05   ` Lorenzo Bianconi
  0 siblings, 0 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-29 13:05 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 387 bytes --]

> Lorenzo Bianconi <lorenzo@kernel.org> writes:
> 
> > Introduce multi-buffer support for xdp running in generic mode not always
> > linearizing the skb in netif_receive_generic_xdp routine.
> > Introduce page_pool in softnet_data structure
> 
> This last line is not accurate anymore... :)

ack, I just copied it from v5. I will fix it.

Regards,
Lorenzo

> 
> -Toke
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-29 12:06   ` Yunsheng Lin
@ 2024-01-29 13:07     ` Lorenzo Bianconi
  2024-01-30 11:23       ` Yunsheng Lin
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-29 13:07 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 400 bytes --]

> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> > Move page_pool stats allocation in page_pool_create routine and get rid
> > of it for percpu page_pools.
> 
> Is there any reason why we do not need those kind stats for per cpu
> page_pool?
> 

IIRC discussing with Jakub, we decided to not support them since the pool is not
associated to any net_device in this case.

Regards,
Lorenzo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-29 12:52     ` Lorenzo Bianconi
@ 2024-01-29 15:44       ` Toke Høiland-Jørgensen
  2024-01-31 12:51         ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 36+ messages in thread
From: Toke Høiland-Jørgensen @ 2024-01-29 15:44 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

Lorenzo Bianconi <lorenzo.bianconi@redhat.com> writes:

>> Lorenzo Bianconi <lorenzo@kernel.org> writes:
>> 
>> > Introduce generic percpu page_pools allocator.
>> > Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
>> > in order to recycle the page in the page_pool "hot" cache if
>> > napi_pp_put_page() is running on the same cpu.
>> > This is a preliminary patch to add xdp multi-buff support for xdp running
>> > in generic mode.
>> >
>> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>> > ---
>> >  include/net/page_pool/types.h |  3 +++
>> >  net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>> >  net/core/page_pool.c          | 23 ++++++++++++++++----
>> >  net/core/skbuff.c             |  5 +++--
>> >  4 files changed, 65 insertions(+), 6 deletions(-)
>> >
>> > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
>> > index 76481c465375..3828396ae60c 100644
>> > --- a/include/net/page_pool/types.h
>> > +++ b/include/net/page_pool/types.h
>> > @@ -128,6 +128,7 @@ struct page_pool_stats {
>> >  struct page_pool {
>> >  	struct page_pool_params_fast p;
>> >  
>> > +	int cpuid;
>> >  	bool has_init_callback;
>> >  
>> >  	long frag_users;
>> > @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>> >  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>> >  				  unsigned int size, gfp_t gfp);
>> >  struct page_pool *page_pool_create(const struct page_pool_params *params);
>> > +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
>> > +					  int cpuid);
>> >  
>> >  struct xdp_mem_info;
>> >  
>> > diff --git a/net/core/dev.c b/net/core/dev.c
>> > index cb2dab0feee0..bf9ec740b09a 100644
>> > --- a/net/core/dev.c
>> > +++ b/net/core/dev.c
>> > @@ -153,6 +153,8 @@
>> >  #include <linux/prandom.h>
>> >  #include <linux/once_lite.h>
>> >  #include <net/netdev_rx_queue.h>
>> > +#include <net/page_pool/types.h>
>> > +#include <net/page_pool/helpers.h>
>> >  
>> >  #include "dev.h"
>> >  #include "net-sysfs.h"
>> > @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>> >  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>> >  EXPORT_PER_CPU_SYMBOL(softnet_data);
>> >  
>> > +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
>> 
>> I think we should come up with a better name than just "page_pool" for
>> this global var. In the code below it looks like it's a local variable
>> that's being referenced. Maybe "global_page_pool" or "system_page_pool"
>> or something along those lines?
>
> ack, I will fix it. system_page_pool seems better, agree?

Yeah, agreed :)

-Toke


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-29 13:04     ` Lorenzo Bianconi
@ 2024-01-30 11:02       ` Yunsheng Lin
  2024-01-30 14:26         ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Yunsheng Lin @ 2024-01-30 11:02 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

On 2024/1/29 21:04, Lorenzo Bianconi wrote:
>> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>>
>>>  #ifdef CONFIG_LOCKDEP
>>>  /*
>>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
>>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>>>   *
>>>   */
>>>  
>>> +#define SD_PAGE_POOL_RING_SIZE	256
>>
>> I might missed that if there is a reason we choose 256 here, do we
>> need to use different value for differe page size, for 64K page size,
>> it means we might need to reserve 16MB memory for each CPU.
> 
> honestly I have not spent time on it, most of the current page_pool users set
> pool_size to 256. Anyway, do you mean something like:
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index f70fb6cad2b2..3934a3fc5c45 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
>   *
>   */
>  
> -#define SD_PAGE_POOL_RING_SIZE	256
>  static int net_page_pool_alloc(int cpuid)
>  {
>  #if IS_ENABLED(CONFIG_PAGE_POOL)

Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
this feature? and this config can be selected by whoever needs this
feature?

>  	struct page_pool_params page_pool_params = {
> -		.pool_size = SD_PAGE_POOL_RING_SIZE,
> +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,

What about other page size? like 16KB?
How about something like below:
PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)

>  		.nid = NUMA_NO_NODE,
>  	};
>  	struct page_pool *pp_ptr;

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-29 13:07     ` Lorenzo Bianconi
@ 2024-01-30 11:23       ` Yunsheng Lin
  2024-01-30 13:52         ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Yunsheng Lin @ 2024-01-30 11:23 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

On 2024/1/29 21:07, Lorenzo Bianconi wrote:
>> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>>> Move page_pool stats allocation in page_pool_create routine and get rid
>>> of it for percpu page_pools.
>>
>> Is there any reason why we do not need those kind stats for per cpu
>> page_pool?
>>
> 
> IIRC discussing with Jakub, we decided to not support them since the pool is not
> associated to any net_device in this case.

It seems what jakub suggested is to 'extend netlink to dump unbound page pools'?

> 
> Regards,
> Lorenzo
> 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-30 11:23       ` Yunsheng Lin
@ 2024-01-30 13:52         ` Lorenzo Bianconi
  2024-01-30 15:11           ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-30 13:52 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 1031 bytes --]

> On 2024/1/29 21:07, Lorenzo Bianconi wrote:
> >> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> >>> Move page_pool stats allocation in page_pool_create routine and get rid
> >>> of it for percpu page_pools.
> >>
> >> Is there any reason why we do not need those kind stats for per cpu
> >> page_pool?
> >>
> > 
> > IIRC discussing with Jakub, we decided to not support them since the pool is not
> > associated to any net_device in this case.
> 
> It seems what jakub suggested is to 'extend netlink to dump unbound page pools'?

I do not have a strong opinion about it (since we do not have any use-case for
it at the moment).
In the case we want to support stats for per-cpu page_pools, I think we should
not create a per-cpu recycle_stats pointer and add a page_pool_recycle_stats field
in page_pool struct since otherwise we will endup with ncpu^2 copies, right?
Do we want to support it now?

@Jakub, Jesper: what do you guys think?

Regards,
Lorenzo

> 
> > 
> > Regards,
> > Lorenzo
> > 
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-30 11:02       ` Yunsheng Lin
@ 2024-01-30 14:26         ` Lorenzo Bianconi
  2024-02-01 11:49           ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-30 14:26 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, hawk,
	ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 2021 bytes --]

> On 2024/1/29 21:04, Lorenzo Bianconi wrote:
> >> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> >>
> >>>  #ifdef CONFIG_LOCKDEP
> >>>  /*
> >>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> >>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> >>>   *
> >>>   */
> >>>  
> >>> +#define SD_PAGE_POOL_RING_SIZE	256
> >>
> >> I might missed that if there is a reason we choose 256 here, do we
> >> need to use different value for differe page size, for 64K page size,
> >> it means we might need to reserve 16MB memory for each CPU.
> > 
> > honestly I have not spent time on it, most of the current page_pool users set
> > pool_size to 256. Anyway, do you mean something like:
> > 
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index f70fb6cad2b2..3934a3fc5c45 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
> >   *
> >   */
> >  
> > -#define SD_PAGE_POOL_RING_SIZE	256
> >  static int net_page_pool_alloc(int cpuid)
> >  {
> >  #if IS_ENABLED(CONFIG_PAGE_POOL)
> 
> Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
> this feature? and this config can be selected by whoever needs this
> feature?

since it will be used for generic xdp (at least) I think this will be 99%
enabled when we have bpf enabled, right?

> 
> >  	struct page_pool_params page_pool_params = {
> > -		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
> 
> What about other page size? like 16KB?
> How about something like below:
> PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)

since pool_size is the number of elements in the ptr_ring associated to the pool,
assuming we want to consume PER_CPU_PAGE_POOL_MAX_SIZE for each cpu, something
like:

PER_CPU_PAGE_POOL_MAX_SIZE / PAGE_SIZE

Regards,
Lorenzo

> 
> >  		.nid = NUMA_NO_NODE,
> >  	};
> >  	struct page_pool *pp_ptr;
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-30 13:52         ` Lorenzo Bianconi
@ 2024-01-30 15:11           ` Jesper Dangaard Brouer
  2024-01-30 16:01             ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Jesper Dangaard Brouer @ 2024-01-30 15:11 UTC (permalink / raw)
  To: Lorenzo Bianconi, Yunsheng Lin
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, ilias.apalodimas



On 30/01/2024 14.52, Lorenzo Bianconi wrote:
>> On 2024/1/29 21:07, Lorenzo Bianconi wrote:
>>>> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>>>>> Move page_pool stats allocation in page_pool_create routine and get rid
>>>>> of it for percpu page_pools.
>>>>
>>>> Is there any reason why we do not need those kind stats for per cpu
>>>> page_pool?
>>>>
>>>
>>> IIRC discussing with Jakub, we decided to not support them since the pool is not
>>> associated to any net_device in this case.
>>
>> It seems what jakub suggested is to 'extend netlink to dump unbound page pools'?
> 
> I do not have a strong opinion about it (since we do not have any use-case for
> it at the moment).
> In the case we want to support stats for per-cpu page_pools, I think we should
> not create a per-cpu recycle_stats pointer and add a page_pool_recycle_stats field
> in page_pool struct since otherwise we will endup with ncpu^2 copies, right?
> Do we want to support it now?
> 
> @Jakub, Jesper: what do you guys think?
> 


I do see an need for being able to access page_pool stats for all 
page_pool's in the system.
And I do like Jakub's netlink based stats.

--Jesper
(p.s. I'm debugging some production issues with page_pool and broadcom 
bnxt_en driver).

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-30 15:11           ` Jesper Dangaard Brouer
@ 2024-01-30 16:01             ` Lorenzo Bianconi
  2024-01-31 15:32               ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-30 16:01 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Yunsheng Lin, Lorenzo Bianconi, netdev, davem, kuba, edumazet,
	pabeni, bpf, toke, willemdebruijn.kernel, jasowang, sdf,
	ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 1879 bytes --]

> 
> 
> On 30/01/2024 14.52, Lorenzo Bianconi wrote:
> > > On 2024/1/29 21:07, Lorenzo Bianconi wrote:
> > > > > On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> > > > > > Move page_pool stats allocation in page_pool_create routine and get rid
> > > > > > of it for percpu page_pools.
> > > > > 
> > > > > Is there any reason why we do not need those kind stats for per cpu
> > > > > page_pool?
> > > > > 
> > > > 
> > > > IIRC discussing with Jakub, we decided to not support them since the pool is not
> > > > associated to any net_device in this case.
> > > 
> > > It seems what jakub suggested is to 'extend netlink to dump unbound page pools'?
> > 
> > I do not have a strong opinion about it (since we do not have any use-case for
> > it at the moment).
> > In the case we want to support stats for per-cpu page_pools, I think we should
> > not create a per-cpu recycle_stats pointer and add a page_pool_recycle_stats field
> > in page_pool struct since otherwise we will endup with ncpu^2 copies, right?
> > Do we want to support it now?
> > 
> > @Jakub, Jesper: what do you guys think?
> > 
> 
> 
> I do see an need for being able to access page_pool stats for all
> page_pool's in the system.
> And I do like Jakub's netlink based stats.

ack from my side if you have some use-cases in mind.
Some questions below:
- can we assume ethtool will be used to report stats just for 'global'
  page_pool (not per-cpu page_pool)?
- can we assume netlink/yaml will be used to report per-cpu page_pool stats?

I think in the current series we can fix the accounting part (in particular
avoiding memory wasting) and then we will figure out how to report percpu
page_pool stats through netlink/yaml. Agree?

Regards,
Lorenzo

> 
> --Jesper
> (p.s. I'm debugging some production issues with page_pool and broadcom
> bnxt_en driver).
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
                     ` (2 preceding siblings ...)
  2024-01-29 12:45   ` Toke Høiland-Jørgensen
@ 2024-01-31 12:28   ` Jesper Dangaard Brouer
  2024-01-31 13:36     ` Lorenzo Bianconi
  3 siblings, 1 reply; 36+ messages in thread
From: Jesper Dangaard Brouer @ 2024-01-31 12:28 UTC (permalink / raw)
  To: Lorenzo Bianconi, netdev
  Cc: lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, ilias.apalodimas



On 28/01/2024 15.20, Lorenzo Bianconi wrote:
> Introduce generic percpu page_pools allocator.
> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> in order to recycle the page in the page_pool "hot" cache if
> napi_pp_put_page() is running on the same cpu.
> This is a preliminary patch to add xdp multi-buff support for xdp running
> in generic mode.
> 
> Signed-off-by: Lorenzo Bianconi<lorenzo@kernel.org>
> ---
>   include/net/page_pool/types.h |  3 +++
>   net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>   net/core/page_pool.c          | 23 ++++++++++++++++----
>   net/core/skbuff.c             |  5 +++--
>   4 files changed, 65 insertions(+), 6 deletions(-)
> 
[...]
> diff --git a/net/core/dev.c b/net/core/dev.c
> index cb2dab0feee0..bf9ec740b09a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
[...]
> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>    *
>    */
>   
> +#define SD_PAGE_POOL_RING_SIZE	256
> +static int net_page_pool_alloc(int cpuid)

I don't like the name net_page_pool_alloc().
It uses the page_pool_create APIs.

Let us renamed to net_page_pool_create() ?


> +{
> +#if IS_ENABLED(CONFIG_PAGE_POOL)
> +	struct page_pool_params page_pool_params = {
> +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> +		.nid = NUMA_NO_NODE,
> +	};
> +	struct page_pool *pp_ptr;
> +
> +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> +	if (IS_ERR(pp_ptr)) {
> +		pp_ptr = NULL;
> +		return -ENOMEM;
> +	}
> +
> +	per_cpu(page_pool, cpuid) = pp_ptr;
> +#endif
> +	return 0;
> +}
> +
>   /*
>    *       This is called single threaded during boot, so no need
>    *       to take the rtnl semaphore.
> @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
>   		init_gro_hash(&sd->backlog);
>   		sd->backlog.poll = process_backlog;
>   		sd->backlog.weight = weight_p;
> +
> +		if (net_page_pool_alloc(i))
> +			goto out;
>   	}
>   
>   	dev_boot_phase = 0;
> @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
>   	WARN_ON(rc < 0);
>   	rc = 0;
>   out:
> +	if (rc < 0) {
> +		for_each_possible_cpu(i) {
> +			struct page_pool *pp_ptr = this_cpu_read(page_pool);
> +
> +			if (!pp_ptr)
> +				continue;
> +
> +			page_pool_destroy(pp_ptr);
> +			per_cpu(page_pool, i) = NULL;
> +		}
> +	}
> +
>   	return rc;
>   }
>   

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-29 15:44       ` Toke Høiland-Jørgensen
@ 2024-01-31 12:51         ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 36+ messages in thread
From: Jesper Dangaard Brouer @ 2024-01-31 12:51 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, Lorenzo Bianconi
  Cc: Lorenzo Bianconi, netdev, davem, kuba, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, ilias.apalodimas,
	Sebastian Andrzej Siewior



On 29/01/2024 16.44, Toke Høiland-Jørgensen wrote:
> Lorenzo Bianconi <lorenzo.bianconi@redhat.com> writes:
> 
>>> Lorenzo Bianconi <lorenzo@kernel.org> writes:
>>>
>>>> Introduce generic percpu page_pools allocator.
>>>> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
>>>> in order to recycle the page in the page_pool "hot" cache if
>>>> napi_pp_put_page() is running on the same cpu.
>>>> This is a preliminary patch to add xdp multi-buff support for xdp running
>>>> in generic mode.
>>>>
>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>>>> ---
>>>>   include/net/page_pool/types.h |  3 +++
>>>>   net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>>>>   net/core/page_pool.c          | 23 ++++++++++++++++----
>>>>   net/core/skbuff.c             |  5 +++--
>>>>   4 files changed, 65 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
>>>> index 76481c465375..3828396ae60c 100644
>>>> --- a/include/net/page_pool/types.h
>>>> +++ b/include/net/page_pool/types.h
>>>> @@ -128,6 +128,7 @@ struct page_pool_stats {
>>>>   struct page_pool {
>>>>   	struct page_pool_params_fast p;
>>>>   
>>>> +	int cpuid;
>>>>   	bool has_init_callback;
>>>>   
>>>>   	long frag_users;
>>>> @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>>>>   struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>>>>   				  unsigned int size, gfp_t gfp);
>>>>   struct page_pool *page_pool_create(const struct page_pool_params *params);
>>>> +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
>>>> +					  int cpuid);
>>>>   
>>>>   struct xdp_mem_info;
>>>>   
>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>> index cb2dab0feee0..bf9ec740b09a 100644
>>>> --- a/net/core/dev.c
>>>> +++ b/net/core/dev.c
>>>> @@ -153,6 +153,8 @@
>>>>   #include <linux/prandom.h>
>>>>   #include <linux/once_lite.h>
>>>>   #include <net/netdev_rx_queue.h>
>>>> +#include <net/page_pool/types.h>
>>>> +#include <net/page_pool/helpers.h>
>>>>   
>>>>   #include "dev.h"
>>>>   #include "net-sysfs.h"
>>>> @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>>>>   DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>>>>   EXPORT_PER_CPU_SYMBOL(softnet_data);
>>>>   
>>>> +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
>>>
>>> I think we should come up with a better name than just "page_pool" for
>>> this global var. In the code below it looks like it's a local variable
>>> that's being referenced. Maybe "global_page_pool" or "system_page_pool"
>>> or something along those lines?
>>
>> ack, I will fix it. system_page_pool seems better, agree?
> 
> Yeah, agreed :)

Naming it "system_page_pool" is good by me.

Should we add some comments about concurrency expectations when using this?
Or is this implied by "PER_CPU" define?

PP alloc side have a lockless array/stack, and the per_cpu stuff do
already imply only one CPU is accessing this, and implicitly (also) we
cannot handle reentrance cause by preemption.  So, the caller have the
responsibility to call this from appropriate context.

--Jesper

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-31 12:28   ` Jesper Dangaard Brouer
@ 2024-01-31 13:36     ` Lorenzo Bianconi
  0 siblings, 0 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-01-31 13:36 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, lorenzo.bianconi, davem, kuba, edumazet, pabeni, bpf,
	toke, willemdebruijn.kernel, jasowang, sdf, ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 2629 bytes --]

> 
> 
> On 28/01/2024 15.20, Lorenzo Bianconi wrote:
> > Introduce generic percpu page_pools allocator.
> > Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> > in order to recycle the page in the page_pool "hot" cache if
> > napi_pp_put_page() is running on the same cpu.
> > This is a preliminary patch to add xdp multi-buff support for xdp running
> > in generic mode.
> > 
> > Signed-off-by: Lorenzo Bianconi<lorenzo@kernel.org>
> > ---
> >   include/net/page_pool/types.h |  3 +++
> >   net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
> >   net/core/page_pool.c          | 23 ++++++++++++++++----
> >   net/core/skbuff.c             |  5 +++--
> >   4 files changed, 65 insertions(+), 6 deletions(-)
> > 
> [...]
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index cb2dab0feee0..bf9ec740b09a 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> [...]
> > @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> >    *
> >    */
> > +#define SD_PAGE_POOL_RING_SIZE	256
> > +static int net_page_pool_alloc(int cpuid)
> 
> I don't like the name net_page_pool_alloc().
> It uses the page_pool_create APIs.
> 
> Let us renamed to net_page_pool_create() ?

ack, I will fix it.

Regards,
Lorenzo

> 
> 
> > +{
> > +#if IS_ENABLED(CONFIG_PAGE_POOL)
> > +	struct page_pool_params page_pool_params = {
> > +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > +		.nid = NUMA_NO_NODE,
> > +	};
> > +	struct page_pool *pp_ptr;
> > +
> > +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> > +	if (IS_ERR(pp_ptr)) {
> > +		pp_ptr = NULL;
> > +		return -ENOMEM;
> > +	}
> > +
> > +	per_cpu(page_pool, cpuid) = pp_ptr;
> > +#endif
> > +	return 0;
> > +}
> > +
> >   /*
> >    *       This is called single threaded during boot, so no need
> >    *       to take the rtnl semaphore.
> > @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
> >   		init_gro_hash(&sd->backlog);
> >   		sd->backlog.poll = process_backlog;
> >   		sd->backlog.weight = weight_p;
> > +
> > +		if (net_page_pool_alloc(i))
> > +			goto out;
> >   	}
> >   	dev_boot_phase = 0;
> > @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
> >   	WARN_ON(rc < 0);
> >   	rc = 0;
> >   out:
> > +	if (rc < 0) {
> > +		for_each_possible_cpu(i) {
> > +			struct page_pool *pp_ptr = this_cpu_read(page_pool);
> > +
> > +			if (!pp_ptr)
> > +				continue;
> > +
> > +			page_pool_destroy(pp_ptr);
> > +			per_cpu(page_pool, i) = NULL;
> > +		}
> > +	}
> > +
> >   	return rc;
> >   }

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-30 16:01             ` Lorenzo Bianconi
@ 2024-01-31 15:32               ` Toke Høiland-Jørgensen
  2024-01-31 23:52                 ` Jakub Kicinski
  0 siblings, 1 reply; 36+ messages in thread
From: Toke Høiland-Jørgensen @ 2024-01-31 15:32 UTC (permalink / raw)
  To: Lorenzo Bianconi, Jesper Dangaard Brouer
  Cc: Yunsheng Lin, Lorenzo Bianconi, netdev, davem, kuba, edumazet,
	pabeni, bpf, willemdebruijn.kernel, jasowang, sdf,
	ilias.apalodimas

Lorenzo Bianconi <lorenzo.bianconi@redhat.com> writes:

>> 
>> 
>> On 30/01/2024 14.52, Lorenzo Bianconi wrote:
>> > > On 2024/1/29 21:07, Lorenzo Bianconi wrote:
>> > > > > On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>> > > > > > Move page_pool stats allocation in page_pool_create routine and get rid
>> > > > > > of it for percpu page_pools.
>> > > > > 
>> > > > > Is there any reason why we do not need those kind stats for per cpu
>> > > > > page_pool?
>> > > > > 
>> > > > 
>> > > > IIRC discussing with Jakub, we decided to not support them since the pool is not
>> > > > associated to any net_device in this case.
>> > > 
>> > > It seems what jakub suggested is to 'extend netlink to dump unbound page pools'?
>> > 
>> > I do not have a strong opinion about it (since we do not have any use-case for
>> > it at the moment).
>> > In the case we want to support stats for per-cpu page_pools, I think we should
>> > not create a per-cpu recycle_stats pointer and add a page_pool_recycle_stats field
>> > in page_pool struct since otherwise we will endup with ncpu^2 copies, right?
>> > Do we want to support it now?
>> > 
>> > @Jakub, Jesper: what do you guys think?
>> > 
>> 
>> 
>> I do see an need for being able to access page_pool stats for all
>> page_pool's in the system.
>> And I do like Jakub's netlink based stats.
>
> ack from my side if you have some use-cases in mind.
> Some questions below:
> - can we assume ethtool will be used to report stats just for 'global'
>   page_pool (not per-cpu page_pool)?
> - can we assume netlink/yaml will be used to report per-cpu page_pool stats?
>
> I think in the current series we can fix the accounting part (in particular
> avoiding memory wasting) and then we will figure out how to report percpu
> page_pool stats through netlink/yaml. Agree?

Deferring the export API to a separate series after this is merged is
fine with me. In which case the *gathering* of statistics could also be
deferred (it's not really useful if it can't be exported).

-Toke


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode
  2024-01-28 14:20 ` [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode Lorenzo Bianconi
@ 2024-01-31 23:47   ` Jakub Kicinski
  2024-02-01 11:34     ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Jakub Kicinski @ 2024-01-31 23:47 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: netdev, lorenzo.bianconi, davem, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

On Sun, 28 Jan 2024 15:20:39 +0100 Lorenzo Bianconi wrote:
> +#if IS_ENABLED(CONFIG_PAGE_POOL)
> +static int
> +netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
> +			  struct bpf_prog *prog)

nit: doesn't look all that related to a netif, I'd put it in skbuff.c

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-31 15:32               ` Toke Høiland-Jørgensen
@ 2024-01-31 23:52                 ` Jakub Kicinski
  2024-02-01 10:54                   ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Jakub Kicinski @ 2024-01-31 23:52 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Lorenzo Bianconi, Jesper Dangaard Brouer, Yunsheng Lin,
	Lorenzo Bianconi, netdev, davem, edumazet, pabeni, bpf,
	willemdebruijn.kernel, jasowang, sdf, ilias.apalodimas

On Wed, 31 Jan 2024 16:32:00 +0100 Toke Høiland-Jørgensen wrote:
> > ack from my side if you have some use-cases in mind.
> > Some questions below:
> > - can we assume ethtool will be used to report stats just for 'global'
> >   page_pool (not per-cpu page_pool)?
> > - can we assume netlink/yaml will be used to report per-cpu page_pool stats?
> >
> > I think in the current series we can fix the accounting part (in particular
> > avoiding memory wasting) and then we will figure out how to report percpu
> > page_pool stats through netlink/yaml. Agree?  
> 
> Deferring the export API to a separate series after this is merged is
> fine with me.

+1

> In which case the *gathering* of statistics could also be
> deferred (it's not really useful if it can't be exported).

What do you mean by "gather" here? If we plan to expose them later on 
I reckon there's no point having this patch which actively optimizes
them away, no? IOW we should just drop this patch from v7?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools
  2024-01-31 23:52                 ` Jakub Kicinski
@ 2024-02-01 10:54                   ` Lorenzo Bianconi
  0 siblings, 0 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-02-01 10:54 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Toke Høiland-Jørgensen, Lorenzo Bianconi,
	Jesper Dangaard Brouer, Yunsheng Lin, netdev, davem, edumazet,
	pabeni, bpf, willemdebruijn.kernel, jasowang, sdf,
	ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 1098 bytes --]

> On Wed, 31 Jan 2024 16:32:00 +0100 Toke Høiland-Jørgensen wrote:
> > > ack from my side if you have some use-cases in mind.
> > > Some questions below:
> > > - can we assume ethtool will be used to report stats just for 'global'
> > >   page_pool (not per-cpu page_pool)?
> > > - can we assume netlink/yaml will be used to report per-cpu page_pool stats?
> > >
> > > I think in the current series we can fix the accounting part (in particular
> > > avoiding memory wasting) and then we will figure out how to report percpu
> > > page_pool stats through netlink/yaml. Agree?  
> > 
> > Deferring the export API to a separate series after this is merged is
> > fine with me.
> 
> +1
> 
> > In which case the *gathering* of statistics could also be
> > deferred (it's not really useful if it can't be exported).
> 
> What do you mean by "gather" here? If we plan to expose them later on 
> I reckon there's no point having this patch which actively optimizes
> them away, no? IOW we should just drop this patch from v7?

ack, I will get rid of it in v7.

Regards,
Lorenzo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode
  2024-01-31 23:47   ` Jakub Kicinski
@ 2024-02-01 11:34     ` Lorenzo Bianconi
  2024-02-01 15:15       ` Jakub Kicinski
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-02-01 11:34 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, lorenzo.bianconi, davem, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 379 bytes --]

> On Sun, 28 Jan 2024 15:20:39 +0100 Lorenzo Bianconi wrote:
> > +#if IS_ENABLED(CONFIG_PAGE_POOL)
> > +static int
> > +netif_skb_segment_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
> > +			  struct bpf_prog *prog)
> 
> nit: doesn't look all that related to a netif, I'd put it in skbuff.c

ack, fine. skb_segment_for_xdp() in this case?

Regards,
Lorenzo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-01-30 14:26         ` Lorenzo Bianconi
@ 2024-02-01 11:49           ` Lorenzo Bianconi
  2024-02-01 12:14             ` Yunsheng Lin
  0 siblings, 1 reply; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-02-01 11:49 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Yunsheng Lin, netdev, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 2390 bytes --]

> > On 2024/1/29 21:04, Lorenzo Bianconi wrote:
> > >> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> > >>
> > >>>  #ifdef CONFIG_LOCKDEP
> > >>>  /*
> > >>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> > >>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> > >>>   *
> > >>>   */
> > >>>  
> > >>> +#define SD_PAGE_POOL_RING_SIZE	256
> > >>
> > >> I might missed that if there is a reason we choose 256 here, do we
> > >> need to use different value for differe page size, for 64K page size,
> > >> it means we might need to reserve 16MB memory for each CPU.
> > > 
> > > honestly I have not spent time on it, most of the current page_pool users set
> > > pool_size to 256. Anyway, do you mean something like:
> > > 
> > > diff --git a/net/core/dev.c b/net/core/dev.c
> > > index f70fb6cad2b2..3934a3fc5c45 100644
> > > --- a/net/core/dev.c
> > > +++ b/net/core/dev.c
> > > @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
> > >   *
> > >   */
> > >  
> > > -#define SD_PAGE_POOL_RING_SIZE	256
> > >  static int net_page_pool_alloc(int cpuid)
> > >  {
> > >  #if IS_ENABLED(CONFIG_PAGE_POOL)
> > 
> > Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
> > this feature? and this config can be selected by whoever needs this
> > feature?
> 
> since it will be used for generic xdp (at least) I think this will be 99%
> enabled when we have bpf enabled, right?
> 
> > 
> > >  	struct page_pool_params page_pool_params = {
> > > -		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > > +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
> > 
> > What about other page size? like 16KB?
> > How about something like below:
> > PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)
> 
> since pool_size is the number of elements in the ptr_ring associated to the pool,
> assuming we want to consume PER_CPU_PAGE_POOL_MAX_SIZE for each cpu, something
> like:
> 
> PER_CPU_PAGE_POOL_MAX_SIZE / PAGE_SIZE
> 
> Regards,
> Lorenzo

Discussing with Jesper and Toke, we agreed page_pool infrastructure will need
a way to release memory when the system is under memory pressure, so we can
defer this item to a subsequent series, what do you think?

Regards,
Lorenzo

> 
> > 
> > >  		.nid = NUMA_NO_NODE,
> > >  	};
> > >  	struct page_pool *pp_ptr;
> > 



[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
  2024-02-01 11:49           ` Lorenzo Bianconi
@ 2024-02-01 12:14             ` Yunsheng Lin
  0 siblings, 0 replies; 36+ messages in thread
From: Yunsheng Lin @ 2024-02-01 12:14 UTC (permalink / raw)
  To: Lorenzo Bianconi, Lorenzo Bianconi
  Cc: netdev, davem, kuba, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

On 2024/2/1 19:49, Lorenzo Bianconi wrote:
>>> On 2024/1/29 21:04, Lorenzo Bianconi wrote:
>>>>> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>>>>>
>>>>>>  #ifdef CONFIG_LOCKDEP
>>>>>>  /*
>>>>>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
>>>>>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>>>>>>   *
>>>>>>   */
>>>>>>  
>>>>>> +#define SD_PAGE_POOL_RING_SIZE	256
>>>>>
>>>>> I might missed that if there is a reason we choose 256 here, do we
>>>>> need to use different value for differe page size, for 64K page size,
>>>>> it means we might need to reserve 16MB memory for each CPU.
>>>>
>>>> honestly I have not spent time on it, most of the current page_pool users set
>>>> pool_size to 256. Anyway, do you mean something like:
>>>>
>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>> index f70fb6cad2b2..3934a3fc5c45 100644
>>>> --- a/net/core/dev.c
>>>> +++ b/net/core/dev.c
>>>> @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
>>>>   *
>>>>   */
>>>>  
>>>> -#define SD_PAGE_POOL_RING_SIZE	256
>>>>  static int net_page_pool_alloc(int cpuid)
>>>>  {
>>>>  #if IS_ENABLED(CONFIG_PAGE_POOL)
>>>
>>> Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
>>> this feature? and this config can be selected by whoever needs this
>>> feature?
>>
>> since it will be used for generic xdp (at least) I think this will be 99%
>> enabled when we have bpf enabled, right?
>>
>>>
>>>>  	struct page_pool_params page_pool_params = {
>>>> -		.pool_size = SD_PAGE_POOL_RING_SIZE,
>>>> +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
>>>
>>> What about other page size? like 16KB?
>>> How about something like below:
>>> PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)
>>
>> since pool_size is the number of elements in the ptr_ring associated to the pool,
>> assuming we want to consume PER_CPU_PAGE_POOL_MAX_SIZE for each cpu, something
>> like:
>>
>> PER_CPU_PAGE_POOL_MAX_SIZE / PAGE_SIZE

Using something like the above makes sense to me, thanks.

>>
>> Regards,
>> Lorenzo
> 
> Discussing with Jesper and Toke, we agreed page_pool infrastructure will need
> a way to release memory when the system is under memory pressure, so we can
> defer this item to a subsequent series, what do you think?
> 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode
  2024-02-01 11:34     ` Lorenzo Bianconi
@ 2024-02-01 15:15       ` Jakub Kicinski
  2024-02-01 16:41         ` Lorenzo Bianconi
  0 siblings, 1 reply; 36+ messages in thread
From: Jakub Kicinski @ 2024-02-01 15:15 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: netdev, lorenzo.bianconi, davem, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

On Thu, 1 Feb 2024 12:34:26 +0100 Lorenzo Bianconi wrote:
> > nit: doesn't look all that related to a netif, I'd put it in skbuff.c  
> 
> ack, fine. skb_segment_for_xdp() in this case?

I think the closest thing we have now is skb_cow_data(),
so how about skb_cow_data_pp() or skb_cow_fragged() or
skb_cow_something? :)

I'm on the fence whether we should split the XDP-ness out.
I mean the only two xdp-related things are the headroom and
check for xdp_has_frags, so we could also:

skb_cow_data_pp(struct page_pool *pool, struct sk_buff **pskb,
		unsigned int headroom)
{
	...
}

skb_cow_data_xdp(struct page_pool *pool, struct sk_buff **pskb,
		 struct bpf_prog *prog)
{
	if (!prog->aux->xdp_has_frags)
		return -EINVAL;

	return skb_cow_data_pp(pool, pskb, XDP_PACKET_HEADROOM);
}


I think it'd increase the chances of reuse. But that's speculative 
so I'll let you decide if you prefer that or to keep it simple.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode
  2024-02-01 15:15       ` Jakub Kicinski
@ 2024-02-01 16:41         ` Lorenzo Bianconi
  0 siblings, 0 replies; 36+ messages in thread
From: Lorenzo Bianconi @ 2024-02-01 16:41 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, lorenzo.bianconi, davem, edumazet, pabeni, bpf, toke,
	willemdebruijn.kernel, jasowang, sdf, hawk, ilias.apalodimas

[-- Attachment #1: Type: text/plain, Size: 1105 bytes --]

> On Thu, 1 Feb 2024 12:34:26 +0100 Lorenzo Bianconi wrote:
> > > nit: doesn't look all that related to a netif, I'd put it in skbuff.c  
> > 
> > ack, fine. skb_segment_for_xdp() in this case?
> 
> I think the closest thing we have now is skb_cow_data(),
> so how about skb_cow_data_pp() or skb_cow_fragged() or
> skb_cow_something? :)

I like skb_cow_something :)

> 
> I'm on the fence whether we should split the XDP-ness out.
> I mean the only two xdp-related things are the headroom and
> check for xdp_has_frags, so we could also:
> 
> skb_cow_data_pp(struct page_pool *pool, struct sk_buff **pskb,
> 		unsigned int headroom)
> {
> 	...
> }
> 
> skb_cow_data_xdp(struct page_pool *pool, struct sk_buff **pskb,
> 		 struct bpf_prog *prog)
> {
> 	if (!prog->aux->xdp_has_frags)
> 		return -EINVAL;
> 
> 	return skb_cow_data_pp(pool, pskb, XDP_PACKET_HEADROOM);
> }
> 
> 
> I think it'd increase the chances of reuse. But that's speculative 
> so I'll let you decide if you prefer that or to keep it simple.

ack, I agree. I will fix it in v7.

Regards,
Lorenzo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2024-02-01 16:41 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-28 14:20 [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Lorenzo Bianconi
2024-01-28 14:20 ` [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator Lorenzo Bianconi
2024-01-29  6:37   ` kernel test robot
2024-01-29 12:05   ` Yunsheng Lin
2024-01-29 13:04     ` Lorenzo Bianconi
2024-01-30 11:02       ` Yunsheng Lin
2024-01-30 14:26         ` Lorenzo Bianconi
2024-02-01 11:49           ` Lorenzo Bianconi
2024-02-01 12:14             ` Yunsheng Lin
2024-01-29 12:45   ` Toke Høiland-Jørgensen
2024-01-29 12:52     ` Lorenzo Bianconi
2024-01-29 15:44       ` Toke Høiland-Jørgensen
2024-01-31 12:51         ` Jesper Dangaard Brouer
2024-01-31 12:28   ` Jesper Dangaard Brouer
2024-01-31 13:36     ` Lorenzo Bianconi
2024-01-28 14:20 ` [PATCH v6 net-next 2/5] xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp Lorenzo Bianconi
2024-01-29  8:05   ` Ilias Apalodimas
2024-01-29  9:58     ` Lorenzo Bianconi
2024-01-28 14:20 ` [PATCH v6 net-next 3/5] xdp: add multi-buff support for xdp running in generic mode Lorenzo Bianconi
2024-01-31 23:47   ` Jakub Kicinski
2024-02-01 11:34     ` Lorenzo Bianconi
2024-02-01 15:15       ` Jakub Kicinski
2024-02-01 16:41         ` Lorenzo Bianconi
2024-01-28 14:20 ` [PATCH v6 net-next 4/5] net: page_pool: make stats available just for global pools Lorenzo Bianconi
2024-01-29 12:06   ` Yunsheng Lin
2024-01-29 13:07     ` Lorenzo Bianconi
2024-01-30 11:23       ` Yunsheng Lin
2024-01-30 13:52         ` Lorenzo Bianconi
2024-01-30 15:11           ` Jesper Dangaard Brouer
2024-01-30 16:01             ` Lorenzo Bianconi
2024-01-31 15:32               ` Toke Høiland-Jørgensen
2024-01-31 23:52                 ` Jakub Kicinski
2024-02-01 10:54                   ` Lorenzo Bianconi
2024-01-28 14:20 ` [PATCH v6 net-next 5/5] veth: rely on netif_skb_segment_for_xdp utility routine Lorenzo Bianconi
2024-01-29 12:43 ` [PATCH v6 net-next 0/5] add multi-buff support for xdp running in generic mode Toke Høiland-Jørgensen
2024-01-29 13:05   ` Lorenzo Bianconi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).