netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Gal Pressman <gal@nvidia.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	Mark Bloch <mbloch@nvidia.com>
Subject: [net-next 08/15] net/mlx5: fs, add support for no append at software level
Date: Wed, 11 Sep 2024 13:17:50 -0700	[thread overview]
Message-ID: <20240911201757.1505453-9-saeed@kernel.org> (raw)
In-Reply-To: <20240911201757.1505453-1-saeed@kernel.org>

From: Mark Bloch <mbloch@nvidia.com>

Native capability for some steering engines lacks support for adding an
additional match with the same value to the same flow group. To accommodate
the NO APPEND flag in these scenarios, we include the new rule in the
existing flow table entry (fte) without immediate hardware commitment. When
a request is made to delete the corresponding hardware rule, we then commit
the pending rule to hardware.

Only one pending rule is supported because NO_APPEND is primarily used
during replacement operations. In this scenario, a rule is initially added.
When it needs replacement, the new rule is added with NO_APPEND set. Only
after the insertion of the new rule is the original rule deleted.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  |   2 +-
 .../net/ethernet/mellanox/mlx5/core/fs_core.c | 245 +++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/fs_core.h |   7 +
 .../mellanox/mlx5/core/steering/fs_dr.c       |   4 +-
 4 files changed, 242 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1a14f8dd878f..676005854dad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -1071,7 +1071,7 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
 static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
 				     enum fs_flow_table_type ft_type)
 {
-	return 0;
+	return MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH;
 }
 
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 57df2a6bcb42..8505d5e241e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -613,6 +613,31 @@ static void modify_fte(struct fs_fte *fte)
 	fte->act_dests.modify_mask = 0;
 }
 
+static void del_sw_hw_dup_rule(struct fs_node *node)
+{
+	struct mlx5_flow_rule *rule;
+	struct fs_fte *fte;
+
+	fs_get_obj(rule, node);
+	fs_get_obj(fte, rule->node.parent);
+	trace_mlx5_fs_del_rule(rule);
+
+	if (is_fwd_next_action(rule->sw_action)) {
+		mutex_lock(&rule->dest_attr.ft->lock);
+		list_del(&rule->next_ft);
+		mutex_unlock(&rule->dest_attr.ft->lock);
+	}
+
+	/* If a pending rule is being deleted it means
+	 * this is a NO APPEND rule, so there are no partial deletions,
+	 * all the rules of the mlx5_flow_handle are going to be deleted
+	 * and the rules aren't shared with any other mlx5_flow_handle instance
+	 * so no need to do any bookkeeping like in del_sw_hw_rule().
+	 */
+
+	kfree(rule);
+}
+
 static void del_sw_hw_rule(struct fs_node *node)
 {
 	struct mlx5_flow_rule *rule;
@@ -658,12 +683,33 @@ static void del_sw_hw_rule(struct fs_node *node)
 	kfree(rule);
 }
 
+static void switch_to_pending_act_dests(struct fs_fte *fte)
+{
+	struct fs_node *iter;
+
+	memcpy(&fte->act_dests, &fte->dup->act_dests, sizeof(fte->act_dests));
+
+	list_bulk_move_tail(&fte->node.children,
+			    fte->dup->children.next,
+			    fte->dup->children.prev);
+
+	list_for_each_entry(iter, &fte->node.children, list)
+		iter->del_sw_func = del_sw_hw_rule;
+
+	/* Make sure the fte isn't deleted
+	 * as mlx5_del_flow_rules() decreases the refcount
+	 * of the fte to trigger deletion.
+	 */
+	tree_get_node(&fte->node);
+}
+
 static void del_hw_fte(struct fs_node *node)
 {
 	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct mlx5_core_dev *dev;
+	bool pending_used = false;
 	struct fs_fte *fte;
 	int err;
 
@@ -675,16 +721,33 @@ static void del_hw_fte(struct fs_node *node)
 	WARN_ON(fte->act_dests.dests_size);
 	dev = get_dev(&ft->node);
 	root = find_root(&ft->node);
+
+	if (fte->dup && !list_empty(&fte->dup->children)) {
+		switch_to_pending_act_dests(fte);
+		pending_used = true;
+	} else {
+		/* Avoid double call to del_hw_fte */
+		node->del_hw_func = NULL;
+	}
+
 	if (node->active) {
-		err = root->cmds->delete_fte(root, ft, fte);
-		if (err)
-			mlx5_core_warn(dev,
-				       "flow steering can't delete fte in index %d of flow group id %d\n",
-				       fte->index, fg->id);
-		node->active = false;
+		if (pending_used) {
+			err = root->cmds->update_fte(root, ft, fg,
+						     fte->act_dests.modify_mask, fte);
+			if (err)
+				mlx5_core_warn(dev,
+					       "flow steering can't update to pending rule in index %d of flow group id %d\n",
+					       fte->index, fg->id);
+			fte->act_dests.modify_mask = 0;
+		} else {
+			err = root->cmds->delete_fte(root, ft, fte);
+			if (err)
+				mlx5_core_warn(dev,
+					       "flow steering can't delete fte in index %d of flow group id %d\n",
+					       fte->index, fg->id);
+			node->active = false;
+		}
 	}
-	/* Avoid double call to del_hw_fte */
-	fte->node.del_hw_func = NULL;
 }
 
 static void del_sw_fte(struct fs_node *node)
@@ -702,6 +765,7 @@ static void del_sw_fte(struct fs_node *node)
 				     rhash_fte);
 	WARN_ON(err);
 	ida_free(&fg->fte_allocator, fte->index - fg->start_index);
+	kvfree(fte->dup);
 	kmem_cache_free(steering->ftes_cache, fte);
 }
 
@@ -1105,18 +1169,45 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	return err;
 }
 
+static bool rule_is_pending(struct fs_fte *fte, struct mlx5_flow_rule *rule)
+{
+	struct mlx5_flow_rule *tmp_rule;
+	struct fs_node *iter;
+
+	if (!fte->dup || list_empty(&fte->dup->children))
+		return false;
+
+	list_for_each_entry(iter, &fte->dup->children, list) {
+		tmp_rule = container_of(iter, struct mlx5_flow_rule, node);
+
+		if (tmp_rule == rule)
+			return true;
+	}
+
+	return false;
+}
+
 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 					 struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_root_namespace *root;
+	struct fs_fte_action *act_dests;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
+	bool pending = false;
 	struct fs_fte *fte;
 	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
 	int err = 0;
 
 	fs_get_obj(fte, rule->node.parent);
-	if (!(fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+
+	pending = rule_is_pending(fte, rule);
+	if (pending)
+		act_dests = &fte->dup->act_dests;
+	else
+		act_dests = &fte->act_dests;
+
+	if (!(act_dests->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
 	down_write_ref_node(&fte->node, false);
 	fs_get_obj(fg, fte->node.parent);
@@ -1124,8 +1215,9 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
 	root = find_root(&ft->node);
-	err = root->cmds->update_fte(root, ft, fg,
-				     modify_mask, fte);
+	if (!pending)
+		err = root->cmds->update_fte(root, ft, fg,
+					     modify_mask, fte);
 	up_write_ref_node(&fte->node, false);
 
 	return err;
@@ -1455,6 +1547,16 @@ static struct mlx5_flow_handle *alloc_handle(int num_rules)
 	return handle;
 }
 
+static void destroy_flow_handle_dup(struct mlx5_flow_handle *handle,
+				    int i)
+{
+	for (; --i >= 0;) {
+		list_del(&handle->rule[i]->node.list);
+		kfree(handle->rule[i]);
+	}
+	kfree(handle);
+}
+
 static void destroy_flow_handle(struct fs_fte *fte,
 				struct mlx5_flow_handle *handle,
 				struct mlx5_flow_destination *dest,
@@ -1470,6 +1572,61 @@ static void destroy_flow_handle(struct fs_fte *fte,
 	kfree(handle);
 }
 
+static struct mlx5_flow_handle *
+create_flow_handle_dup(struct list_head *children,
+		       struct mlx5_flow_destination *dest,
+		       int dest_num,
+		       struct fs_fte_action *act_dests)
+{
+	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+	struct mlx5_flow_rule *rule = NULL;
+	struct mlx5_flow_handle *handle;
+	int i = 0;
+	int type;
+
+	handle = alloc_handle((dest_num) ? dest_num : 1);
+	if (!handle)
+		return NULL;
+
+	do {
+		rule = alloc_rule(dest + i);
+		if (!rule)
+			goto free_rules;
+
+		/* Add dest to dests list- we need flow tables to be in the
+		 * end of the list for forward to next prio rules.
+		 */
+		tree_init_node(&rule->node, NULL, del_sw_hw_dup_rule);
+		if (dest &&
+		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+			list_add(&rule->node.list, children);
+		else
+			list_add_tail(&rule->node.list, children);
+
+		if (dest) {
+			act_dests->dests_size++;
+
+			if (is_fwd_dest_type(dest[i].type))
+				act_dests->fwd_dests++;
+
+			type = dest[i].type ==
+				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+			act_dests->modify_mask |= type ? count : dst;
+		}
+		handle->rule[i] = rule;
+	} while (++i < dest_num);
+
+	return handle;
+
+free_rules:
+	destroy_flow_handle_dup(handle, i);
+	act_dests->dests_size = 0;
+	act_dests->fwd_dests = 0;
+
+	return NULL;
+}
+
 static struct mlx5_flow_handle *
 create_flow_handle(struct fs_fte *fte,
 		   struct mlx5_flow_destination *dest,
@@ -1963,6 +2120,62 @@ lookup_fte_locked(struct mlx5_flow_group *g,
 	return fte_tmp;
 }
 
+/* Native capability lacks support for adding an additional match with the same value
+ * to the same flow group. To accommodate the NO APPEND flag in these scenarios,
+ * we include the new rule in the existing flow table entry (fte) without immediate
+ * hardware commitment. When a request is made to delete the corresponding hardware rule,
+ * we then commit the pending rule to hardware.
+ */
+static struct mlx5_flow_handle *
+add_rule_dup_match_fte(struct fs_fte *fte,
+		       const struct mlx5_flow_spec *spec,
+		       struct mlx5_flow_act *flow_act,
+		       struct mlx5_flow_destination *dest,
+		       int dest_num)
+{
+	struct mlx5_flow_handle *handle;
+	struct fs_fte_dup *dup;
+	int i = 0;
+
+	if (!fte->dup) {
+		dup = kvzalloc(sizeof(*dup), GFP_KERNEL);
+		if (!dup)
+			return ERR_PTR(-ENOMEM);
+		/* dup will be freed when the fte is freed
+		 * this way we don't allocate / free dup on every rule deletion
+		 * or creation
+		 */
+		INIT_LIST_HEAD(&dup->children);
+		fte->dup = dup;
+	}
+
+	if (!list_empty(&fte->dup->children)) {
+		mlx5_core_warn(get_dev(&fte->node),
+			       "Can have only a single duplicate rule\n");
+
+		return ERR_PTR(-EEXIST);
+	}
+
+	fte->dup->act_dests.action = *flow_act;
+	fte->dup->act_dests.flow_context = spec->flow_context;
+	fte->dup->act_dests.dests_size = 0;
+	fte->dup->act_dests.fwd_dests = 0;
+	fte->dup->act_dests.modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+
+	handle = create_flow_handle_dup(&fte->dup->children,
+					dest, dest_num,
+					&fte->dup->act_dests);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < handle->num_rules; i++) {
+		tree_add_node(&handle->rule[i]->node, &fte->node);
+		trace_mlx5_fs_add_rule(handle->rule[i]);
+	}
+
+	return handle;
+}
+
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       struct list_head *match_head,
@@ -1973,6 +2186,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       int ft_version)
 {
 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_flow_group *g;
 	struct mlx5_flow_handle *rule;
 	struct match_list *iter;
@@ -1986,7 +2200,9 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		return  ERR_PTR(-ENOMEM);
 
 search_again_locked:
-	if (flow_act->flags & FLOW_ACT_NO_APPEND)
+	if (flow_act->flags & FLOW_ACT_NO_APPEND &&
+	    (root->cmds->get_capabilities(root, root->table_type) &
+	     MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH))
 		goto skip_search;
 	version = matched_fgs_get_version(match_head);
 	/* Try to find an fte with identical match value and attempt update its
@@ -1999,7 +2215,10 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
 		if (!fte_tmp)
 			continue;
-		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
+		if (flow_act->flags & FLOW_ACT_NO_APPEND)
+			rule = add_rule_dup_match_fte(fte_tmp, spec, flow_act, dest, dest_num);
+		else
+			rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
 		/* No error check needed here, because insert_fte() is not called */
 		up_write_ref_node(&fte_tmp->node, false);
 		tree_put_node(&fte_tmp->node, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 19ba63063d09..964937f17cf5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -133,6 +133,7 @@ enum mlx5_flow_steering_capabilty {
 	MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
 	MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
 	MLX5_FLOW_STEERING_CAP_MATCH_RANGES = 1UL << 2,
+	MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH = 1UL << 3,
 };
 
 struct mlx5_flow_steering {
@@ -238,12 +239,18 @@ struct fs_fte_action {
 	struct mlx5_flow_act		action;
 };
 
+struct fs_fte_dup {
+	struct list_head children;
+	struct fs_fte_action act_dests;
+};
+
 /* Type of children is mlx5_flow_rule */
 struct fs_fte {
 	struct fs_node			node;
 	struct mlx5_fs_dr_rule		fs_dr_rule;
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	struct fs_fte_action		act_dests;
+	struct fs_fte_dup		*dup;
 	u32				index;
 	enum fs_fte_status		status;
 	struct rhash_head		hash;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 15ef118eade0..833cb68c744f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -813,11 +813,11 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
 static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
 					enum fs_flow_table_type ft_type)
 {
-	u32 steering_caps = 0;
+	u32 steering_caps = MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH;
 
 	if (ft_type != FS_FT_FDB ||
 	    MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5)
-		return 0;
+		return steering_caps;
 
 	steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX;
 	steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
-- 
2.46.0


  parent reply	other threads:[~2024-09-11 20:18 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-11 20:17 [pull request][net-next 00/15] mlx5 updates 2024-09-11 Saeed Mahameed
2024-09-11 20:17 ` [net-next 01/15] net/mlx5: HWS, updated API functions comments to kernel doc Saeed Mahameed
2024-09-12 18:52   ` Jacob Keller
2024-09-13  4:00   ` patchwork-bot+netdevbpf
2024-09-11 20:17 ` [net-next 02/15] net/mlx5: HWS, fixed error flow return values of some functions Saeed Mahameed
2024-09-12 18:52   ` Jacob Keller
2024-09-11 20:17 ` [net-next 03/15] net/mlx5: fs, move steering common function to fs_cmd.h Saeed Mahameed
2024-09-12 18:53   ` Jacob Keller
2024-09-11 20:17 ` [net-next 04/15] net/mlx5: fs, make get_root_namespace API function Saeed Mahameed
2024-09-12 18:54   ` Jacob Keller
2024-09-13  3:23   ` Kalesh Anakkur Purayil
2024-09-11 20:17 ` [net-next 05/15] net/mlx5: fs, move hardware fte deletion function reset Saeed Mahameed
2024-09-12 19:01   ` Jacob Keller
2024-09-11 20:17 ` [net-next 06/15] net/mlx5: fs, remove unused member Saeed Mahameed
2024-09-12 19:06   ` Jacob Keller
2024-09-11 20:17 ` [net-next 07/15] net/mlx5: fs, separate action and destination into distinct struct Saeed Mahameed
2024-09-12 19:08   ` Jacob Keller
2024-09-11 20:17 ` Saeed Mahameed [this message]
2024-09-12 19:10   ` [net-next 08/15] net/mlx5: fs, add support for no append at software level Jacob Keller
2024-09-11 20:17 ` [net-next 09/15] net/mlx5: Add device cap for supporting hot reset in sync reset flow Saeed Mahameed
2024-09-12 19:11   ` Jacob Keller
2024-09-11 20:17 ` [net-next 10/15] net/mlx5: Add support for sync reset using hot reset Saeed Mahameed
2024-09-12 19:12   ` Jacob Keller
2024-09-11 20:17 ` [net-next 11/15] net/mlx5: Skip HotPlug check on " Saeed Mahameed
2024-09-12 19:12   ` Jacob Keller
2024-09-11 20:17 ` [net-next 12/15] net/mlx5: Allow users to configure affinity for SFs Saeed Mahameed
2024-09-12 19:13   ` Jacob Keller
2024-09-13  3:19   ` Kalesh Anakkur Purayil
2024-09-11 20:17 ` [net-next 13/15] net/mlx5: Add NOT_READY command return status Saeed Mahameed
2024-09-12 19:14   ` Jacob Keller
2024-09-13  3:20   ` Kalesh Anakkur Purayil
2024-09-11 20:17 ` [net-next 14/15] net/mlx5e: SHAMPO, Add no-split ethtool counters for header/data split Saeed Mahameed
2024-09-12 19:15   ` Jacob Keller
2024-09-11 20:17 ` [net-next 15/15] net/mlx5e: Match cleanup order in mlx5e_free_rq in reverse of mlx5e_alloc_rq Saeed Mahameed
2024-09-12 19:15   ` Jacob Keller
2024-09-13  3:53 ` [pull request][net-next 00/15] mlx5 updates 2024-09-11 Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240911201757.1505453-9-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=mbloch@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).