netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
@ 2025-06-13 12:36 Sebastian Andrzej Siewior
  2025-06-16 22:09 ` Jakub Kicinski
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-06-13 12:36 UTC (permalink / raw)
  To: netdev, dev
  Cc: David S. Miller, Aaron Conole, Eelco Chaudron, Eric Dumazet,
	Gal Pressman, Ilya Maximets, Jakub Kicinski, Paolo Abeni,
	Simon Horman

PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
per-CPU data size used by modules. This is 8KiB.

Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
one") restructured the per-CPU memory allocation for the module and
moved the separate alloc_percpu() invocations at module init time to a
static per-CPU variable which is allocated by the module loader.

The size of the per-CPU data section for openvswitch is 6488 bytes which
is ~80% of the available per-CPU memory. Together with a few other
modules it is easy to exhaust the available 8KiB of memory.

Allocate ovs_pcpu_storage dynamically at module init time.

Reported-by: Gal Pressman <gal@nvidia.com>
Closes: https://lore.kernel.org/all/c401e017-f8db-4f57-a1cd-89beb979a277@nvidia.com
Fixes: 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into one")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---

Gal, would you please be so kind and check if this works for you?

 net/openvswitch/actions.c  | 23 +++++++++------------
 net/openvswitch/datapath.c | 42 +++++++++++++++++++++++++++++++-------
 net/openvswitch/datapath.h |  3 ++-
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e7269a3eec79e..3add108340bfd 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -39,16 +39,14 @@
 #include "flow_netlink.h"
 #include "openvswitch_trace.h"
 
-DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
-	.bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
 
 /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
  * space. Return NULL if out of key spaces.
  */
 static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
 {
-	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
 	struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
 	int level = ovs_pcpu->exec_level;
 	struct sw_flow_key *key = NULL;
@@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
 				    const struct nlattr *actions,
 				    const int actions_len)
 {
-	struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+	struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
 	struct deferred_action *da;
 
 	da = action_fifo_put(fifo);
@@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 static int ovs_vport_output(struct net *net, struct sock *sk,
 			    struct sk_buff *skb)
 {
-	struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+	struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
 	struct vport *vport = data->vport;
 
 	if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
 	unsigned int hlen = skb_network_offset(skb);
 	struct ovs_frag_data *data;
 
-	data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+	data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
 	data->dst = skb->_skb_refdst;
 	data->vport = vport;
 	data->cb = *OVS_CB(skb);
@@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
 	clone = clone_flow_key ? clone_key(key) : key;
 	if (clone) {
 		int err = 0;
-
 		if (actions) { /* Sample action */
 			if (clone_flow_key)
-				__this_cpu_inc(ovs_pcpu_storage.exec_level);
+				__this_cpu_inc(ovs_pcpu_storage->exec_level);
 
 			err = do_execute_actions(dp, skb, clone,
 						 actions, len);
 
 			if (clone_flow_key)
-				__this_cpu_dec(ovs_pcpu_storage.exec_level);
+				__this_cpu_dec(ovs_pcpu_storage->exec_level);
 		} else { /* Recirc action */
 			clone->recirc_id = recirc_id;
 			ovs_dp_process_packet(skb, clone);
@@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
 
 static void process_deferred_actions(struct datapath *dp)
 {
-	struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+	struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
 
 	/* Do not touch the FIFO in case there is no deferred actions. */
 	if (action_fifo_is_empty(fifo))
@@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 {
 	int err, level;
 
-	level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
+	level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
 	if (unlikely(level > OVS_RECURSION_LIMIT)) {
 		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
 				     ovs_dp_name(dp));
@@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		process_deferred_actions(dp);
 
 out:
-	__this_cpu_dec(ovs_pcpu_storage.exec_level);
+	__this_cpu_dec(ovs_pcpu_storage->exec_level);
 	return err;
 }
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6a304ae2d959c..b990dc83504f4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p)
 /* Must be called with rcu_read_lock. */
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
 	const struct vport *p = OVS_CB(skb)->input_vport;
 	struct datapath *dp = p->dp;
 	struct sw_flow *flow;
@@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 	 * avoided.
 	 */
 	if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
-		local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+		local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
 		ovs_pcpu->owner = current;
 		ovs_pcpu_locked = true;
 	}
@@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 				    ovs_dp_name(dp), error);
 	if (ovs_pcpu_locked) {
 		ovs_pcpu->owner = NULL;
-		local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+		local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
 	}
 
 	stats_counter = &stats->n_hit;
@@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	sf_acts = rcu_dereference(flow->sf_acts);
 
 	local_bh_disable();
-	local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+	local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-		this_cpu_write(ovs_pcpu_storage.owner, current);
+		this_cpu_write(ovs_pcpu_storage->owner, current);
 	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-		this_cpu_write(ovs_pcpu_storage.owner, NULL);
-	local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+		this_cpu_write(ovs_pcpu_storage->owner, NULL);
+	local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
 	local_bh_enable();
 	rcu_read_unlock();
 
@@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = {
 	.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
 };
 
+static int __init ovs_alloc_percpu_storage(void)
+{
+	unsigned int cpu;
+
+	ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+	if (!ovs_pcpu_storage)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		struct ovs_pcpu_storage *ovs_pcpu;
+
+		ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+		local_lock_init(&ovs_pcpu->bh_lock);
+	}
+	return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+	free_percpu(ovs_pcpu_storage);
+}
+
 static int __init dp_init(void)
 {
 	int err;
@@ -2753,6 +2775,10 @@ static int __init dp_init(void)
 
 	pr_info("Open vSwitch switching datapath\n");
 
+	err = ovs_alloc_percpu_storage();
+	if (err)
+		goto error;
+
 	err = ovs_internal_dev_rtnl_link_register();
 	if (err)
 		goto error;
@@ -2799,6 +2825,7 @@ static int __init dp_init(void)
 error_unreg_rtnl_link:
 	ovs_internal_dev_rtnl_link_unregister();
 error:
+	ovs_free_percpu_storage();
 	return err;
 }
 
@@ -2813,6 +2840,7 @@ static void dp_cleanup(void)
 	ovs_vport_exit();
 	ovs_flow_exit();
 	ovs_internal_dev_rtnl_link_unregister();
+	ovs_free_percpu_storage();
 }
 
 module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1b5348b0f5594..cfeb817a18894 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -220,7 +220,8 @@ struct ovs_pcpu_storage {
 	struct task_struct *owner;
 	local_lock_t bh_lock;
 };
-DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
 
 /**
  * enum ovs_pkt_hash_types - hash info to include with a packet
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
  2025-06-13 12:36 [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically Sebastian Andrzej Siewior
@ 2025-06-16 22:09 ` Jakub Kicinski
  2025-06-17  7:58   ` Sebastian Andrzej Siewior
  2025-06-17  8:00   ` Gal Pressman
  2025-06-17 11:31 ` [ovs-dev] " Aaron Conole
  2025-06-17 13:00 ` patchwork-bot+netdevbpf
  2 siblings, 2 replies; 6+ messages in thread
From: Jakub Kicinski @ 2025-06-16 22:09 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Gal Pressman
  Cc: netdev, dev, David S. Miller, Aaron Conole, Eelco Chaudron,
	Eric Dumazet, Ilya Maximets, Paolo Abeni, Simon Horman

On Fri, 13 Jun 2025 14:36:29 +0200 Sebastian Andrzej Siewior wrote:
> PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
> per-CPU data size used by modules. This is 8KiB.
> 
> Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
> one") restructured the per-CPU memory allocation for the module and
> moved the separate alloc_percpu() invocations at module init time to a
> static per-CPU variable which is allocated by the module loader.

IIUC you're saying that the module loader only gets 8kB but dynamic
allocations from the code don't have this restriction?
Maybe just me but TBH the commit message reads like the inverse :S

> The size of the per-CPU data section for openvswitch is 6488 bytes which
> is ~80% of the available per-CPU memory. Together with a few other
> modules it is easy to exhaust the available 8KiB of memory.
> 
> Allocate ovs_pcpu_storage dynamically at module init time.

Gal, should we wait for your testing or apply?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
  2025-06-16 22:09 ` Jakub Kicinski
@ 2025-06-17  7:58   ` Sebastian Andrzej Siewior
  2025-06-17  8:00   ` Gal Pressman
  1 sibling, 0 replies; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-06-17  7:58 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Gal Pressman, netdev, dev, David S. Miller, Aaron Conole,
	Eelco Chaudron, Eric Dumazet, Ilya Maximets, Paolo Abeni,
	Simon Horman

On 2025-06-16 15:09:02 [-0700], Jakub Kicinski wrote:
> On Fri, 13 Jun 2025 14:36:29 +0200 Sebastian Andrzej Siewior wrote:
> > PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
> > per-CPU data size used by modules. This is 8KiB.
> > 
> > Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
> > one") restructured the per-CPU memory allocation for the module and
> > moved the separate alloc_percpu() invocations at module init time to a
> > static per-CPU variable which is allocated by the module loader.
> 
> IIUC you're saying that the module loader only gets 8kB but dynamic
> allocations from the code don't have this restriction?
8KiB is for build time variables such as those defined by
DEFINE_PER_CPU(). The module loader uses __alloc_reserved_percpu() to
access this PERCPU_MODULE_RESERVE) storage.
Regular alloc_percpu() (at module init time or later ) does not have
this restriction.

> Maybe just me but TBH the commit message reads like the inverse :S

I can improve the message but I'm not sure how.

Sebastian

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
  2025-06-16 22:09 ` Jakub Kicinski
  2025-06-17  7:58   ` Sebastian Andrzej Siewior
@ 2025-06-17  8:00   ` Gal Pressman
  1 sibling, 0 replies; 6+ messages in thread
From: Gal Pressman @ 2025-06-17  8:00 UTC (permalink / raw)
  To: Jakub Kicinski, Sebastian Andrzej Siewior
  Cc: netdev, dev, David S. Miller, Aaron Conole, Eelco Chaudron,
	Eric Dumazet, Ilya Maximets, Paolo Abeni, Simon Horman

On 17/06/2025 1:09, Jakub Kicinski wrote:
> Gal, should we wait for your testing or apply?

Unfortunately, I don't have a way to manually test this at the moment
due to limited availability these days, I will only be able to report
back once it's applied, sorry :\.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [ovs-dev] [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
  2025-06-13 12:36 [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically Sebastian Andrzej Siewior
  2025-06-16 22:09 ` Jakub Kicinski
@ 2025-06-17 11:31 ` Aaron Conole
  2025-06-17 13:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 6+ messages in thread
From: Aaron Conole @ 2025-06-17 11:31 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: netdev, dev, Ilya Maximets, David S. Miller, Eric Dumazet,
	Simon Horman, Jakub Kicinski, Paolo Abeni, Gal Pressman

Sebastian Andrzej Siewior <bigeasy@linutronix.de> writes:

> PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
> per-CPU data size used by modules. This is 8KiB.
>
> Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
> one") restructured the per-CPU memory allocation for the module and
> moved the separate alloc_percpu() invocations at module init time to a
> static per-CPU variable which is allocated by the module loader.
>
> The size of the per-CPU data section for openvswitch is 6488 bytes which
> is ~80% of the available per-CPU memory. Together with a few other
> modules it is easy to exhaust the available 8KiB of memory.
>
> Allocate ovs_pcpu_storage dynamically at module init time.
>
> Reported-by: Gal Pressman <gal@nvidia.com>
> Closes:
> https://lore.kernel.org/all/c401e017-f8db-4f57-a1cd-89beb979a277@nvidia.com
> Fixes: 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into one")
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>
> Gal, would you please be so kind and check if this works for you?
>

I was hoping Gal would have gotten back by now.

Reviewed-by: Aaron Conole <aconole@redhat.com>


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
  2025-06-13 12:36 [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically Sebastian Andrzej Siewior
  2025-06-16 22:09 ` Jakub Kicinski
  2025-06-17 11:31 ` [ovs-dev] " Aaron Conole
@ 2025-06-17 13:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 6+ messages in thread
From: patchwork-bot+netdevbpf @ 2025-06-17 13:00 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: netdev, dev, davem, aconole, echaudro, edumazet, gal, i.maximets,
	kuba, pabeni, horms

Hello:

This patch was applied to netdev/net.git (main)
by Paolo Abeni <pabeni@redhat.com>:

On Fri, 13 Jun 2025 14:36:29 +0200 you wrote:
> PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
> per-CPU data size used by modules. This is 8KiB.
> 
> Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
> one") restructured the per-CPU memory allocation for the module and
> moved the separate alloc_percpu() invocations at module init time to a
> static per-CPU variable which is allocated by the module loader.
> 
> [...]

Here is the summary with links:
  - [net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
    https://git.kernel.org/netdev/net/c/7b4ac12cc929

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-06-17 13:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-13 12:36 [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically Sebastian Andrzej Siewior
2025-06-16 22:09 ` Jakub Kicinski
2025-06-17  7:58   ` Sebastian Andrzej Siewior
2025-06-17  8:00   ` Gal Pressman
2025-06-17 11:31 ` [ovs-dev] " Aaron Conole
2025-06-17 13:00 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).