[PATCH v2 net-next 2/4] net: no longer assume RTNL is held in flush_all_backlogs()

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	 Paolo Abeni <pabeni@redhat.com>
Cc: netdev@vger.kernel.org, Simon Horman <horms@kernel.org>,
	eric.dumazet@gmail.com,  Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net-next 2/4] net: no longer assume RTNL is held in flush_all_backlogs()
Date: Wed,  8 Jan 2025 16:22:53 +0000	[thread overview]
Message-ID: <20250108162255.1306392-3-edumazet@google.com> (raw)
In-Reply-To: <20250108162255.1306392-1-edumazet@google.com>

flush_all_backlogs() uses per-cpu and static data to hold its
temporary data, on the assumption it is called under RTNL
protection.

Following patch in the series will break this assumption.

Use instead a dynamically allocated piece of memory.

In the unlikely case the allocation fails,
use a boot-time allocated memory.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/dev.c | 53 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 76ad68b129eed0407686e8696102aeed9a8b30ec..8ff288cf25dceb5856496388f83f409fcb6f8e5d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5978,8 +5978,6 @@ void netif_receive_skb_list(struct list_head *head)
 }
 EXPORT_SYMBOL(netif_receive_skb_list);
 
-static DEFINE_PER_CPU(struct work_struct, flush_works);
-
 /* Network device is going away, flush any packets still pending */
 static void flush_backlog(struct work_struct *work)
 {
@@ -6036,36 +6034,54 @@ static bool flush_required(int cpu)
 	return true;
 }
 
+struct flush_backlogs {
+	cpumask_t		flush_cpus;
+	struct work_struct	w[];
+};
+
+static struct flush_backlogs *flush_backlogs_alloc(void)
+{
+	return kmalloc(struct_size_t(struct flush_backlogs, w, nr_cpu_ids),
+		       GFP_KERNEL);
+}
+
+static struct flush_backlogs *flush_backlogs_fallback;
+static DEFINE_MUTEX(flush_backlogs_mutex);
+
 static void flush_all_backlogs(void)
 {
-	static cpumask_t flush_cpus;
+	struct flush_backlogs *ptr = flush_backlogs_alloc();
 	unsigned int cpu;
 
-	/* since we are under rtnl lock protection we can use static data
-	 * for the cpumask and avoid allocating on stack the possibly
-	 * large mask
-	 */
-	ASSERT_RTNL();
+	if (!ptr) {
+		mutex_lock(&flush_backlogs_mutex);
+		ptr = flush_backlogs_fallback;
+	}
+	cpumask_clear(&ptr->flush_cpus);
 
 	cpus_read_lock();
 
-	cpumask_clear(&flush_cpus);
 	for_each_online_cpu(cpu) {
 		if (flush_required(cpu)) {
-			queue_work_on(cpu, system_highpri_wq,
-				      per_cpu_ptr(&flush_works, cpu));
-			cpumask_set_cpu(cpu, &flush_cpus);
+			INIT_WORK(&ptr->w[cpu], flush_backlog);
+			queue_work_on(cpu, system_highpri_wq, &ptr->w[cpu]);
+			__cpumask_set_cpu(cpu, &ptr->flush_cpus);
 		}
 	}
 
 	/* we can have in flight packet[s] on the cpus we are not flushing,
 	 * synchronize_net() in unregister_netdevice_many() will take care of
-	 * them
+	 * them.
 	 */
-	for_each_cpu(cpu, &flush_cpus)
-		flush_work(per_cpu_ptr(&flush_works, cpu));
+	for_each_cpu(cpu, &ptr->flush_cpus)
+		flush_work(&ptr->w[cpu]);
 
 	cpus_read_unlock();
+
+	if (ptr != flush_backlogs_fallback)
+		kfree(ptr);
+	else
+		mutex_unlock(&flush_backlogs_mutex);
 }
 
 static void net_rps_send_ipi(struct softnet_data *remsd)
@@ -12259,12 +12275,13 @@ static int __init net_dev_init(void)
 	 *	Initialise the packet receive queues.
 	 */
 
+	flush_backlogs_fallback = flush_backlogs_alloc();
+	if (!flush_backlogs_fallback)
+		goto out;
+
 	for_each_possible_cpu(i) {
-		struct work_struct *flush = per_cpu_ptr(&flush_works, i);
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		INIT_WORK(flush, flush_backlog);
-
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 #ifdef CONFIG_XFRM_OFFLOAD
-- 
2.47.1.613.gc27f4b7a9f-goog

next prev parent reply	other threads:[~2025-01-08 16:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-08 16:22 [PATCH v2 net-next 0/4] net: reduce RTNL pressure in unregister_netdevice() Eric Dumazet
2025-01-08 16:22 ` [PATCH v2 net-next 1/4] net: expedite synchronize_net() for cleanup_net() Eric Dumazet
2025-01-09 14:35   ` kernel test robot
2025-01-09 15:10   ` kernel test robot
2025-01-08 16:22 ` Eric Dumazet [this message]
2025-01-08 16:22 ` [PATCH v2 net-next 3/4] net: no longer hold RTNL while calling flush_all_backlogs() Eric Dumazet
2025-01-08 16:22 ` [PATCH v2 net-next 4/4] net: reduce RTNL hold duration in unregister_netdevice_many_notify() Eric Dumazet
2025-01-08 16:54 ` [PATCH v2 net-next 0/4] net: reduce RTNL pressure in unregister_netdevice() Eric Dumazet

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:76ad68b129eed0407686e8696102aeed9a8b30e
dfblob:8ff288cf25dceb5856496388f83f409fcb6f8e5 )
 OR (
bs:"[PATCH v2 net-next 2/4] net: no longer assume RTNL is held in flush_all_backlogs()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250108162255.1306392-3-edumazet@google.com \
    --to=edumazet@google.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).