From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>,
"Daniel Bristot de Oliveira" <bristot@kernel.org>,
"Boqun Feng" <boqun.feng@gmail.com>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Eric Dumazet" <edumazet@google.com>,
"Frederic Weisbecker" <frederic@kernel.org>,
"Ingo Molnar" <mingo@redhat.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Waiman Long" <longman@redhat.com>,
"Will Deacon" <will@kernel.org>,
"Sebastian Andrzej Siewior" <bigeasy@linutronix.de>,
"Björn Töpel" <bjorn@kernel.org>,
"Alexei Starovoitov" <ast@kernel.org>,
"Andrii Nakryiko" <andrii@kernel.org>,
"Eduard Zingerman" <eddyz87@gmail.com>,
"Hao Luo" <haoluo@google.com>,
"Jesper Dangaard Brouer" <hawk@kernel.org>,
"Jiri Olsa" <jolsa@kernel.org>,
"John Fastabend" <john.fastabend@gmail.com>,
"Jonathan Lemon" <jonathan.lemon@gmail.com>,
"KP Singh" <kpsingh@kernel.org>,
"Maciej Fijalkowski" <maciej.fijalkowski@intel.com>,
"Magnus Karlsson" <magnus.karlsson@intel.com>,
"Martin KaFai Lau" <martin.lau@linux.dev>,
"Song Liu" <song@kernel.org>,
"Stanislav Fomichev" <sdf@google.com>,
"Toke Høiland-Jørgensen" <toke@redhat.com>,
"Yonghong Song" <yonghong.song@linux.dev>,
bpf@vger.kernel.org
Subject: [PATCH v9 net-next 15/15] net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT.
Date: Thu, 20 Jun 2024 15:22:05 +0200 [thread overview]
Message-ID: <20240620132727.660738-16-bigeasy@linutronix.de> (raw)
In-Reply-To: <20240620132727.660738-1-bigeasy@linutronix.de>
The per-CPU flush lists, which are accessed from within the NAPI callback
(xdp_do_flush() for instance), are per-CPU. There are subject to the
same problem as struct bpf_redirect_info.
Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and
xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the
access. The lists initialized on first usage (similar to
bpf_net_ctx_get_ri()).
Cc: "Björn Töpel" <bjorn@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: bpf@vger.kernel.org
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/cpumap.c | 19 +++----------------
kernel/bpf/devmap.c | 11 +++--------
net/xdp/xsk.c | 12 ++++--------
4 files changed, 52 insertions(+), 32 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0a7f6e4a00b60..c0349522de8fb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -736,6 +736,9 @@ struct bpf_nh_params {
/* flags for bpf_redirect_info kern_flags */
#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
#define BPF_RI_F_RI_INIT BIT(1)
+#define BPF_RI_F_CPU_MAP_INIT BIT(2)
+#define BPF_RI_F_DEV_MAP_INIT BIT(3)
+#define BPF_RI_F_XSK_MAP_INIT BIT(4)
struct bpf_redirect_info {
u64 tgt_index;
@@ -750,6 +753,9 @@ struct bpf_redirect_info {
struct bpf_net_context {
struct bpf_redirect_info ri;
+ struct list_head cpu_map_flush_list;
+ struct list_head dev_map_flush_list;
+ struct list_head xskmap_map_flush_list;
};
static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx)
@@ -787,6 +793,42 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
return &bpf_net_ctx->ri;
}
+static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+ if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) {
+ INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+ bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT;
+ }
+
+ return &bpf_net_ctx->cpu_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+ if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) {
+ INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+ bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT;
+ }
+
+ return &bpf_net_ctx->dev_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+ if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) {
+ INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+ bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT;
+ }
+
+ return &bpf_net_ctx->xskmap_map_flush_list;
+}
+
/* Compute the linear packet data range [data, data_end) which
* will be accessed by various program types (cls_bpf, act_bpf,
* lwt, ...). Subsystems allowing direct data access must (!)
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 66974bd027109..068e994ed781a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -79,8 +79,6 @@ struct bpf_cpu_map {
struct bpf_cpu_map_entry __rcu **cpu_map;
};
-static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
-
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
@@ -709,7 +707,7 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
*/
static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
- struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -761,7 +759,7 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
void __cpu_map_flush(void)
{
- struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
struct xdp_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -775,20 +773,9 @@ void __cpu_map_flush(void)
#ifdef CONFIG_DEBUG_NET
bool cpu_map_check_flush(void)
{
- if (list_empty(this_cpu_ptr(&cpu_map_flush_list)))
+ if (list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
return false;
__cpu_map_flush();
return true;
}
#endif
-
-static int __init cpu_map_init(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
- return 0;
-}
-
-subsys_initcall(cpu_map_init);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index fbfdfb60db8d7..317ac2d66ebd1 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -83,7 +83,6 @@ struct bpf_dtab {
u32 n_buckets;
};
-static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
@@ -415,7 +414,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
*/
void __dev_flush(void)
{
- struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
struct xdp_dev_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -429,7 +428,7 @@ void __dev_flush(void)
#ifdef CONFIG_DEBUG_NET
bool dev_check_flush(void)
{
- if (list_empty(this_cpu_ptr(&dev_flush_list)))
+ if (list_empty(bpf_net_ctx_get_dev_flush_list()))
return false;
__dev_flush();
return true;
@@ -460,7 +459,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
- struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -1160,15 +1159,11 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void)
{
- int cpu;
-
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
return 0;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 7d1c0986f9bb3..ed062e0383896 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,6 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -372,7 +370,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
int err;
err = xsk_rcv(xs, xdp);
@@ -387,7 +385,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
void __xsk_map_flush(void)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -399,7 +397,7 @@ void __xsk_map_flush(void)
#ifdef CONFIG_DEBUG_NET
bool xsk_map_check_flush(void)
{
- if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
+ if (list_empty(bpf_net_ctx_get_xskmap_flush_list()))
return false;
__xsk_map_flush();
return true;
@@ -1772,7 +1770,7 @@ static struct pernet_operations xsk_net_ops = {
static int __init xsk_init(void)
{
- int err, cpu;
+ int err;
err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@@ -1790,8 +1788,6 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;
out_pernet:
--
2.45.2
next prev parent reply other threads:[~2024-06-20 13:27 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-20 13:21 [PATCH v9 net-next 00/15] locking: Introduce nested-BH locking Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 01/15] locking/local_lock: Introduce guard definition for local_lock Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 02/15] locking/local_lock: Add local nested BH locking infrastructure Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 03/15] net: Use __napi_alloc_frag_align() instead of open coding it Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 04/15] net: Use nested-BH locking for napi_alloc_cache Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 05/15] net/tcp_sigpool: Use nested-BH locking for sigpool_scratch Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 06/15] net/ipv4: Use nested-BH locking for ipv4_tcp_sk Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 07/15] netfilter: br_netfilter: Use nested-BH locking for brnf_frag_data_storage Sebastian Andrzej Siewior
2024-06-20 13:21 ` [PATCH v9 net-next 08/15] net: softnet_data: Make xmit per task Sebastian Andrzej Siewior
2024-06-22 2:12 ` Jakub Kicinski
2024-06-24 10:20 ` Sebastian Andrzej Siewior
2024-06-24 23:36 ` Jakub Kicinski
2024-06-20 13:21 ` [PATCH v9 net-next 09/15] dev: Remove PREEMPT_RT ifdefs from backlog_lock.*() Sebastian Andrzej Siewior
2024-06-20 13:22 ` [PATCH v9 net-next 10/15] dev: Use nested-BH locking for softnet_data.process_queue Sebastian Andrzej Siewior
2024-06-20 13:22 ` [PATCH v9 net-next 11/15] lwt: Don't disable migration prio invoking BPF Sebastian Andrzej Siewior
2024-06-20 13:22 ` [PATCH v9 net-next 12/15] seg6: Use nested-BH locking for seg6_bpf_srh_states Sebastian Andrzej Siewior
2024-06-20 13:22 ` [PATCH v9 net-next 13/15] net: Use nested-BH locking for bpf_scratchpad Sebastian Andrzej Siewior
2024-06-20 13:22 ` [PATCH v9 net-next 14/15] net: Reference bpf_redirect_info via task_struct on PREEMPT_RT Sebastian Andrzej Siewior
2024-06-22 2:08 ` Jakub Kicinski
2024-06-24 7:26 ` Sebastian Andrzej Siewior
2024-06-24 23:37 ` Jakub Kicinski
2024-06-20 13:22 ` Sebastian Andrzej Siewior [this message]
2024-06-22 2:05 ` [PATCH v9 net-next 15/15] net: Move per-CPU flush-lists to bpf_net_context " Jakub Kicinski
2024-06-24 7:24 ` Sebastian Andrzej Siewior
2024-06-25 0:30 ` [PATCH v9 net-next 00/15] locking: Introduce nested-BH locking patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240620132727.660738-16-bigeasy@linutronix.de \
--to=bigeasy@linutronix.de \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=boqun.feng@gmail.com \
--cc=bpf@vger.kernel.org \
--cc=bristot@kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=eddyz87@gmail.com \
--cc=edumazet@google.com \
--cc=frederic@kernel.org \
--cc=haoluo@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=jonathan.lemon@gmail.com \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longman@redhat.com \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=martin.lau@linux.dev \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=peterz@infradead.org \
--cc=sdf@google.com \
--cc=song@kernel.org \
--cc=tglx@linutronix.de \
--cc=toke@redhat.com \
--cc=will@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).