[PATCH 1/7] net: openvswitch: generalise the per-cpu flow key allocation stack

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Nicholas Piggin <npiggin@gmail.com>
To: netdev@vger.kernel.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	dev@openvswitch.org, Pravin B Shelar <pshelar@ovn.org>,
	Aaron Conole <aconole@redhat.com>,
	"Eelco Chaudron" <echaudro@redhat.com>,
	"Ilya Maximets" <imaximet@redhat.com>,
	"Flavio Leitner" <fbl@redhat.com>
Subject: [PATCH 1/7] net: openvswitch: generalise the per-cpu flow key allocation stack
Date: Wed, 11 Oct 2023 13:43:38 +1000	[thread overview]
Message-ID: <20231011034344.104398-2-npiggin@gmail.com> (raw)
In-Reply-To: <20231011034344.104398-1-npiggin@gmail.com>

Rather than an implicit key allocation index based on the recursion
level, make this a standalone FIFO allocator. This makes it usable
in other places without modifying the recursion accounting.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 net/openvswitch/actions.c | 104 ++++++++++++++++++++++++++------------
 1 file changed, 72 insertions(+), 32 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fd66014d8a76..bc7a8c2fff91 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -59,9 +59,10 @@ struct ovs_frag_data {
 
 static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
 
-#define DEFERRED_ACTION_FIFO_SIZE 10
 #define OVS_RECURSION_LIMIT 5
-#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
+#define NR_FLOW_KEYS 5
+#define DEFERRED_ACTION_FIFO_SIZE 10
+
 struct action_fifo {
 	int head;
 	int tail;
@@ -69,27 +70,64 @@ struct action_fifo {
 	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
 };
 
-struct action_flow_keys {
-	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+struct flow_key_stack {
+	struct sw_flow_key key[NR_FLOW_KEYS];
 };
 
-static struct action_fifo __percpu *action_fifos;
-static struct action_flow_keys __percpu *flow_keys;
 static DEFINE_PER_CPU(int, exec_actions_level);
 
+static struct flow_key_stack __percpu *flow_key_stack;
+static DEFINE_PER_CPU(int, flow_keys_allocated);
+
+static struct action_fifo __percpu *action_fifos;
+
+/*
+ * ovs_flow_key_alloc provides a per-CPU sw_flow_key allocator. keys must be
+ * freed in the reverse order that they were allocated in (i.e., a stack).
+ */
+static struct sw_flow_key *ovs_flow_key_alloc(void)
+{
+	struct flow_key_stack *keys = this_cpu_ptr(flow_key_stack);
+	int level = this_cpu_read(flow_keys_allocated);
+
+	if (unlikely(level >= NR_FLOW_KEYS))
+		return NULL;
+
+	__this_cpu_inc(flow_keys_allocated);
+
+	return &keys->key[level];
+}
+
+static void ovs_flow_key_free(struct sw_flow_key *key)
+{
+	struct flow_key_stack *keys = this_cpu_ptr(flow_key_stack);
+	int level = this_cpu_read(flow_keys_allocated);
+
+	/*
+	 * If these debug checks fire then keys will cease being freed
+	 * and the allocator will become exhausted and stop working. This
+	 * gives a graceful failure mode for programming errors.
+	 */
+
+	if (WARN_ON_ONCE(level == 0))
+		return; /* Underflow */
+
+	if (WARN_ON_ONCE(key != &keys->key[level - 1]))
+		return; /* Mismatched alloc/free order */
+
+	__this_cpu_dec(flow_keys_allocated);
+}
+
 /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
  * space. Return NULL if out of key spaces.
  */
 static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
 {
-	struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
-	int level = this_cpu_read(exec_actions_level);
-	struct sw_flow_key *key = NULL;
+	struct sw_flow_key *key;
 
-	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
-		key = &keys->key[level - 1];
+	key = ovs_flow_key_alloc();
+	if (likely(key))
 		*key = *key_;
-	}
 
 	return key;
 }
@@ -1522,9 +1560,10 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
 {
 	struct deferred_action *da;
 	struct sw_flow_key *clone;
+	int err = 0;
 
 	skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
-	if (!skb) {
+	if (unlikely(!skb)) {
 		/* Out of memory, skip this action.
 		 */
 		return 0;
@@ -1536,26 +1575,27 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
 	 * 'flow_keys'. If clone is successful, execute the actions
 	 * without deferring.
 	 */
-	clone = clone_flow_key ? clone_key(key) : key;
-	if (clone) {
-		int err = 0;
+	if (clone_flow_key) {
+		clone = clone_key(key);
+		if (unlikely(!clone))
+			goto defer;
+	} else {
+		clone = key;
+	}
 
-		if (actions) { /* Sample action */
-			if (clone_flow_key)
-				__this_cpu_inc(exec_actions_level);
+	if (actions) { /* Sample action */
+		err = do_execute_actions(dp, skb, clone, actions, len);
+	} else { /* Recirc action */
+		clone->recirc_id = recirc_id;
+		ovs_dp_process_packet(skb, clone);
+	}
 
-			err = do_execute_actions(dp, skb, clone,
-						 actions, len);
+	if (clone_flow_key)
+		ovs_flow_key_free(clone);
 
-			if (clone_flow_key)
-				__this_cpu_dec(exec_actions_level);
-		} else { /* Recirc action */
-			clone->recirc_id = recirc_id;
-			ovs_dp_process_packet(skb, clone);
-		}
-		return err;
-	}
+	return err;
 
+defer:
 	/* Out of 'flow_keys' space. Defer actions */
 	da = add_deferred_actions(skb, key, actions, len);
 	if (da) {
@@ -1642,8 +1682,8 @@ int action_fifos_init(void)
 	if (!action_fifos)
 		return -ENOMEM;
 
-	flow_keys = alloc_percpu(struct action_flow_keys);
-	if (!flow_keys) {
+	flow_key_stack = alloc_percpu(struct flow_key_stack);
+	if (!flow_key_stack) {
 		free_percpu(action_fifos);
 		return -ENOMEM;
 	}
@@ -1654,5 +1694,5 @@ int action_fifos_init(void)
 void action_fifos_exit(void)
 {
 	free_percpu(action_fifos);
-	free_percpu(flow_keys);
+	free_percpu(flow_key_stack);
 }
-- 
2.42.0

next prev parent reply	other threads:[~2023-10-11  3:44 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-11  3:43 [PATCH 0/7] net: openvswitch: Reduce stack usage Nicholas Piggin
2023-10-11  3:43 ` Nicholas Piggin [this message]
2023-10-11  3:43 ` [PATCH 2/7] net: openvswitch: Use flow key allocator in ovs_vport_receive Nicholas Piggin
2023-10-11  3:43 ` [PATCH 3/7] openvswitch: reduce stack usage in do_execute_actions Nicholas Piggin
2023-10-11  3:43 ` [PATCH 4/7] net: openvswitch: Reduce push_nsh stack usage Nicholas Piggin
2023-10-11  3:43 ` [PATCH 5/7] net: openvswitch: uninline action execution Nicholas Piggin
2023-10-11  3:43 ` [PATCH 6/7] net: openvswitch: uninline ovs_fragment to control stack usage Nicholas Piggin
2023-10-11  3:43 ` [PATCH 7/7] net: openvswitch: Reduce stack usage in ovs_dp_process_packet Nicholas Piggin
2023-10-11 12:22 ` [PATCH 0/7] net: openvswitch: Reduce stack usage Ilya Maximets
2023-10-12  0:08   ` Nicholas Piggin
2023-10-11 13:23 ` Aaron Conole
2023-10-12  1:19   ` Nicholas Piggin
2023-10-13  8:27     ` David Laight
2023-10-20 17:04     ` Aaron Conole
2023-10-25  4:06       ` Nicholas Piggin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:fd66014d8a7 dfblob:bc7a8c2fff9 )
 OR (
bs:"[PATCH 1/7] net: openvswitch: generalise the per-cpu flow key allocation stack" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231011034344.104398-2-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=aconole@redhat.com \
    --cc=dev@openvswitch.org \
    --cc=echaudro@redhat.com \
    --cc=fbl@redhat.com \
    --cc=imaximet@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pshelar@ovn.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).