* [PATCH bpf-next 1/3] net: veth: introduce bulking for XDP_PASS
2021-01-26 18:41 [PATCH bpf-next 0/3] veth: add skb bulking allocation for XDP_PASS Lorenzo Bianconi
@ 2021-01-26 18:41 ` Lorenzo Bianconi
2021-01-28 14:06 ` Jesper Dangaard Brouer
2021-01-28 15:17 ` Toshiaki Makita
2021-01-26 18:42 ` [PATCH bpf-next 2/3] net: xdp: move XDP_BATCH_SIZE in common header Lorenzo Bianconi
2021-01-26 18:42 ` [PATCH bpf-next 3/3] net: veth: alloc skb in bulk for ndo_xdp_xmit Lorenzo Bianconi
2 siblings, 2 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2021-01-26 18:41 UTC (permalink / raw)
To: bpf
Cc: netdev, davem, kuba, ast, daniel, toshiaki.makita1,
lorenzo.bianconi, brouer, toke
Introduce bulking support for XDP_PASS verdict forwarding skbs to
the networking stack
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/veth.c | 43 ++++++++++++++++++++++++++-----------------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 6e03b619c93c..23137d9966da 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -35,6 +35,7 @@
#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
#define VETH_XDP_TX_BULK_SIZE 16
+#define VETH_XDP_BATCH 8
struct veth_stats {
u64 rx_drops;
@@ -787,27 +788,35 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
int i, done = 0;
for (i = 0; i < budget; i++) {
- void *ptr = __ptr_ring_consume(&rq->xdp_ring);
- struct sk_buff *skb;
+ void *frames[VETH_XDP_BATCH];
+ void *skbs[VETH_XDP_BATCH];
+ int i, n_frame, n_skb = 0;
- if (!ptr)
+ n_frame = __ptr_ring_consume_batched(&rq->xdp_ring, frames,
+ VETH_XDP_BATCH);
+ if (!n_frame)
break;
- if (veth_is_xdp_frame(ptr)) {
- struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
+ for (i = 0; i < n_frame; i++) {
+ void *f = frames[i];
+ struct sk_buff *skb;
- stats->xdp_bytes += frame->len;
- skb = veth_xdp_rcv_one(rq, frame, bq, stats);
- } else {
- skb = ptr;
- stats->xdp_bytes += skb->len;
- skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
- }
-
- if (skb)
- napi_gro_receive(&rq->xdp_napi, skb);
+ if (veth_is_xdp_frame(f)) {
+ struct xdp_frame *frame = veth_ptr_to_xdp(f);
- done++;
+ stats->xdp_bytes += frame->len;
+ skb = veth_xdp_rcv_one(rq, frame, bq, stats);
+ } else {
+ skb = f;
+ stats->xdp_bytes += skb->len;
+ skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
+ }
+ if (skb)
+ skbs[n_skb++] = skb;
+ }
+ for (i = 0; i < n_skb; i++)
+ napi_gro_receive(&rq->xdp_napi, skbs[i]);
+ done += n_frame;
}
u64_stats_update_begin(&rq->stats.syncp);
@@ -818,7 +827,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
rq->stats.vs.xdp_packets += done;
u64_stats_update_end(&rq->stats.syncp);
- return done;
+ return i;
}
static int veth_poll(struct napi_struct *napi, int budget)
--
2.29.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH bpf-next 2/3] net: xdp: move XDP_BATCH_SIZE in common header
2021-01-26 18:41 [PATCH bpf-next 0/3] veth: add skb bulking allocation for XDP_PASS Lorenzo Bianconi
2021-01-26 18:41 ` [PATCH bpf-next 1/3] net: veth: introduce bulking " Lorenzo Bianconi
@ 2021-01-26 18:42 ` Lorenzo Bianconi
2021-01-26 18:42 ` [PATCH bpf-next 3/3] net: veth: alloc skb in bulk for ndo_xdp_xmit Lorenzo Bianconi
2 siblings, 0 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2021-01-26 18:42 UTC (permalink / raw)
To: bpf
Cc: netdev, davem, kuba, ast, daniel, toshiaki.makita1,
lorenzo.bianconi, brouer, toke
Move CPUMAP_BATCH macro in xdp common header and rename it to
XDP_BATCH_SIZE in order to be reused in veth driver
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/veth.c | 2 +-
include/net/xdp.h | 1 +
kernel/bpf/cpumap.c | 13 +++++--------
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 23137d9966da..ff77b541e5fc 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -793,7 +793,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
int i, n_frame, n_skb = 0;
n_frame = __ptr_ring_consume_batched(&rq->xdp_ring, frames,
- VETH_XDP_BATCH);
+ XDP_BATCH_SIZE);
if (!n_frame)
break;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index c4bfdc9a8b79..c0e15bcb3a22 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -124,6 +124,7 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */
};
+#define XDP_BATCH_SIZE 8 /* 8 == one cacheline on 64-bit archs */
#define XDP_BULK_QUEUE_SIZE 16
struct xdp_frame_bulk {
int count;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 5d1469de6921..ecda8eadd837 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -37,12 +37,11 @@
* which queue in bpf_cpu_map_entry contains packets.
*/
-#define CPU_MAP_BULK_SIZE 8 /* 8 == one cacheline on 64-bit archs */
struct bpf_cpu_map_entry;
struct bpf_cpu_map;
struct xdp_bulk_queue {
- void *q[CPU_MAP_BULK_SIZE];
+ void *q[XDP_BATCH_SIZE];
struct list_head flush_node;
struct bpf_cpu_map_entry *obj;
unsigned int count;
@@ -237,8 +236,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
return nframes;
}
-#define CPUMAP_BATCH 8
-
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
@@ -254,8 +251,8 @@ static int cpu_map_kthread_run(void *data)
struct xdp_cpumap_stats stats = {}; /* zero stats */
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
unsigned int drops = 0, sched = 0;
- void *frames[CPUMAP_BATCH];
- void *skbs[CPUMAP_BATCH];
+ void *frames[XDP_BATCH_SIZE];
+ void *skbs[XDP_BATCH_SIZE];
int i, n, m, nframes;
/* Release CPU reschedule checks */
@@ -278,7 +275,7 @@ static int cpu_map_kthread_run(void *data)
* consume side valid as no-resize allowed of queue.
*/
n = __ptr_ring_consume_batched(rcpu->queue, frames,
- CPUMAP_BATCH);
+ XDP_BATCH_SIZE);
for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page = virt_to_page(f);
@@ -656,7 +653,7 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
- if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
+ if (unlikely(bq->count == XDP_BATCH_SIZE))
bq_flush_to_queue(bq);
/* Notice, xdp_buff/page MUST be queued here, long enough for
--
2.29.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH bpf-next 3/3] net: veth: alloc skb in bulk for ndo_xdp_xmit
2021-01-26 18:41 [PATCH bpf-next 0/3] veth: add skb bulking allocation for XDP_PASS Lorenzo Bianconi
2021-01-26 18:41 ` [PATCH bpf-next 1/3] net: veth: introduce bulking " Lorenzo Bianconi
2021-01-26 18:42 ` [PATCH bpf-next 2/3] net: xdp: move XDP_BATCH_SIZE in common header Lorenzo Bianconi
@ 2021-01-26 18:42 ` Lorenzo Bianconi
2 siblings, 0 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2021-01-26 18:42 UTC (permalink / raw)
To: bpf
Cc: netdev, davem, kuba, ast, daniel, toshiaki.makita1,
lorenzo.bianconi, brouer, toke
Split ndo_xdp_xmit and ndo_start_xmit use cases in veth_xdp_rcv routine
in order to alloc skbs in bulk for XDP_PASS verdict.
Introduce xdp_alloc_skb_bulk utility routine to alloc skb bulk list.
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/veth.c | 96 ++++++++++++++++++++++++++++++++--------------
include/net/xdp.h | 1 +
net/core/xdp.c | 11 ++++++
3 files changed, 79 insertions(+), 29 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ff77b541e5fc..3464f4c7844b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -563,14 +563,13 @@ static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp,
return 0;
}
-static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
- struct xdp_frame *frame,
- struct veth_xdp_tx_bq *bq,
- struct veth_stats *stats)
+static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
+ struct xdp_frame *frame,
+ struct veth_xdp_tx_bq *bq,
+ struct veth_stats *stats)
{
struct xdp_frame orig_frame;
struct bpf_prog *xdp_prog;
- struct sk_buff *skb;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -624,13 +623,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
}
rcu_read_unlock();
- skb = xdp_build_skb_from_frame(frame, rq->dev);
- if (!skb) {
- xdp_return_frame(frame);
- stats->rx_drops++;
- }
-
- return skb;
+ return frame;
err_xdp:
rcu_read_unlock();
xdp_return_frame(frame);
@@ -638,6 +631,48 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
return NULL;
}
+static void veth_xdp_rcv_batch(struct veth_rq *rq, void **frames,
+ int n_xdpf, struct veth_xdp_tx_bq *bq,
+ struct veth_stats *stats)
+{
+ void *skbs[XDP_BATCH_SIZE];
+ int i, n_skb = 0;
+
+ for (i = 0; i < n_xdpf; i++) {
+ struct xdp_frame *frame = frames[i];
+
+ stats->xdp_bytes += frame->len;
+ frame = veth_xdp_rcv_one(rq, frame, bq, stats);
+ if (frame)
+ frames[n_skb++] = frame;
+ }
+
+ if (!n_skb)
+ return;
+
+ if (xdp_alloc_skb_bulk(skbs, n_skb, GFP_ATOMIC) < 0) {
+ for (i = 0; i < n_skb; i++) {
+ xdp_return_frame(frames[i]);
+ stats->rx_drops++;
+ }
+ return;
+ }
+
+ for (i = 0; i < n_skb; i++) {
+ struct sk_buff *skb = skbs[i];
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ skb = __xdp_build_skb_from_frame(frames[i], skb,
+ rq->dev);
+ if (!skb) {
+ xdp_return_frame(frames[i]);
+ stats->rx_drops++;
+ continue;
+ }
+ napi_gro_receive(&rq->xdp_napi, skb);
+ }
+}
+
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
struct sk_buff *skb,
struct veth_xdp_tx_bq *bq,
@@ -788,9 +823,10 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
int i, done = 0;
for (i = 0; i < budget; i++) {
+ int i, n_frame, n_xdpf = 0, n_skb = 0;
void *frames[VETH_XDP_BATCH];
void *skbs[VETH_XDP_BATCH];
- int i, n_frame, n_skb = 0;
+ void *xdpf[VETH_XDP_BATCH];
n_frame = __ptr_ring_consume_batched(&rq->xdp_ring, frames,
XDP_BATCH_SIZE);
@@ -798,24 +834,26 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
break;
for (i = 0; i < n_frame; i++) {
- void *f = frames[i];
- struct sk_buff *skb;
-
- if (veth_is_xdp_frame(f)) {
- struct xdp_frame *frame = veth_ptr_to_xdp(f);
-
- stats->xdp_bytes += frame->len;
- skb = veth_xdp_rcv_one(rq, frame, bq, stats);
- } else {
- skb = f;
- stats->xdp_bytes += skb->len;
- skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
- }
+ if (veth_is_xdp_frame(frames[i]))
+ xdpf[n_xdpf++] = veth_ptr_to_xdp(frames[i]);
+ else
+ skbs[n_skb++] = frames[i];
+ }
+
+ /* ndo_xdp_xmit */
+ if (n_xdpf)
+ veth_xdp_rcv_batch(rq, xdpf, n_xdpf, bq, stats);
+
+ /* ndo_start_xmit */
+ for (i = 0; i < n_skb; i++) {
+ struct sk_buff *skb = skbs[i];
+
+ stats->xdp_bytes += skb->len;
+ skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
if (skb)
- skbs[n_skb++] = skb;
+ napi_gro_receive(&rq->xdp_napi, skb);
}
- for (i = 0; i < n_skb; i++)
- napi_gro_receive(&rq->xdp_napi, skbs[i]);
+
done += n_frame;
}
diff --git a/include/net/xdp.h b/include/net/xdp.h
index c0e15bcb3a22..e8db521f5323 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -170,6 +170,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct net_device *dev);
struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct net_device *dev);
+int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
static inline
void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 0d2630a35c3e..05354976c1fc 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -514,6 +514,17 @@ void xdp_warn(const char *msg, const char *func, const int line)
};
EXPORT_SYMBOL_GPL(xdp_warn);
+int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
+{
+ n_skb = kmem_cache_alloc_bulk(skbuff_head_cache, gfp,
+ n_skb, skbs);
+ if (unlikely(!n_skb))
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
+
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev)
--
2.29.2
^ permalink raw reply related [flat|nested] 7+ messages in thread