[PATCH v2 net-next 2/4] net: no longer assume RTNL is held in flush_all_backlogs()

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	 Paolo Abeni <pabeni@redhat.com>
Cc: netdev@vger.kernel.org, Simon Horman <horms@kernel.org>,
	eric.dumazet@gmail.com,  Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net-next 2/4] net: no longer assume RTNL is held in flush_all_backlogs()
Date: Wed,  8 Jan 2025 16:22:53 +0000	[thread overview]
Message-ID: <20250108162255.1306392-3-edumazet@google.com> (raw)
In-Reply-To: <20250108162255.1306392-1-edumazet@google.com>

flush_all_backlogs() uses per-cpu and static data to hold its
temporary data, on the assumption it is called under RTNL
protection.

Following patch in the series will break this assumption.

Use instead a dynamically allocated piece of memory.

In the unlikely case the allocation fails,
use a boot-time allocated memory.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/dev.c | 53 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 76ad68b129eed0407686e8696102aeed9a8b30ec..8ff288cf25dceb5856496388f83f409fcb6f8e5d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5978,8 +5978,6 @@ void netif_receive_skb_list(struct list_head *head)
 }
 EXPORT_SYMBOL(netif_receive_skb_list);
 
-static DEFINE_PER_CPU(struct work_struct, flush_works);
-
 /* Network device is going away, flush any packets still pending */
 static void flush_backlog(struct work_struct *work)
 {
@@ -6036,36 +6034,54 @@ static bool flush_required(int cpu)
 	return true;
 }
 
+struct flush_backlogs {
+	cpumask_t		flush_cpus;
+	struct work_struct	w[];
+};
+
+static struct flush_backlogs *flush_backlogs_alloc(void)
+{
+	return kmalloc(struct_size_t(struct flush_backlogs, w, nr_cpu_ids),
+		       GFP_KERNEL);
+}
+
+static struct flush_backlogs *flush_backlogs_fallback;
+static DEFINE_MUTEX(flush_backlogs_mutex);
+
 static void flush_all_backlogs(void)
 {
-	static cpumask_t flush_cpus;
+	struct flush_backlogs *ptr = flush_backlogs_alloc();
 	unsigned int cpu;
 
-	/* since we are under rtnl lock protection we can use static data
-	 * for the cpumask and avoid allocating on stack the possibly
-	 * large mask
-	 */
-	ASSERT_RTNL();
+	if (!ptr) {
+		mutex_lock(&flush_backlogs_mutex);
+		ptr = flush_backlogs_fallback;
+	}
+	cpumask_clear(&ptr->flush_cpus);
 
 	cpus_read_lock();
 
-	cpumask_clear(&flush_cpus);
 	for_each_online_cpu(cpu) {
 		if (flush_required(cpu)) {
-			queue_work_on(cpu, system_highpri_wq,
-				      per_cpu_ptr(&flush_works, cpu));
-			cpumask_set_cpu(cpu, &flush_cpus);
+			INIT_WORK(&ptr->w[cpu], flush_backlog);
+			queue_work_on(cpu, system_highpri_wq, &ptr->w[cpu]);
+			__cpumask_set_cpu(cpu, &ptr->flush_cpus);
 		}
 	}
 
 	/* we can have in flight packet[s] on the cpus we are not flushing,
 	 * synchronize_net() in unregister_netdevice_many() will take care of
-	 * them
+	 * them.
 	 */
-	for_each_cpu(cpu, &flush_cpus)
-		flush_work(per_cpu_ptr(&flush_works, cpu));
+	for_each_cpu(cpu, &ptr->flush_cpus)
+		flush_work(&ptr->w[cpu]);
 
 	cpus_read_unlock();
+
+	if (ptr != flush_backlogs_fallback)
+		kfree(ptr);
+	else
+		mutex_unlock(&flush_backlogs_mutex);
 }
 
 static void net_rps_send_ipi(struct softnet_data *remsd)
@@ -12259,12 +12275,13 @@ static int __init net_dev_init(void)
 	 *	Initialise the packet receive queues.
 	 */
 
+	flush_backlogs_fallback = flush_backlogs_alloc();
+	if (!flush_backlogs_fallback)
+		goto out;
+
 	for_each_possible_cpu(i) {
-		struct work_struct *flush = per_cpu_ptr(&flush_works, i);
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		INIT_WORK(flush, flush_backlog);
-
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 #ifdef CONFIG_XFRM_OFFLOAD
-- 
2.47.1.613.gc27f4b7a9f-goog

next prev parent reply	other threads:[~2025-01-08 16:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-08 16:22 [PATCH v2 net-next 0/4] net: reduce RTNL pressure in unregister_netdevice() Eric Dumazet
2025-01-08 16:22 ` [PATCH v2 net-next 1/4] net: expedite synchronize_net() for cleanup_net() Eric Dumazet
2025-01-09 14:35   ` kernel test robot
2025-01-09 15:10   ` kernel test robot
2025-01-08 16:22 ` Eric Dumazet [this message]
2025-01-08 16:22 ` [PATCH v2 net-next 3/4] net: no longer hold RTNL while calling flush_all_backlogs() Eric Dumazet
2025-01-08 16:22 ` [PATCH v2 net-next 4/4] net: reduce RTNL hold duration in unregister_netdevice_many_notify() Eric Dumazet
2025-01-08 16:54 ` [PATCH v2 net-next 0/4] net: reduce RTNL pressure in unregister_netdevice() Eric Dumazet

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:76ad68b129eed0407686e8696102aeed9a8b30e
dfblob:8ff288cf25dceb5856496388f83f409fcb6f8e5 )
 OR (
bs:"[PATCH v2 net-next 2/4] net: no longer assume RTNL is held in flush_all_backlogs()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250108162255.1306392-3-edumazet@google.com \
    --to=edumazet@google.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.