Netdev List
 help / color / mirror / Atom feed
* [PATCH xfrm-next 04/26] net/mlx5e: Advertise IPsec full offload support
From: Leon Romanovsky @ 2022-08-16 10:37 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Add needed capabilities check to determine if device supports IPsec
full offload mode.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h   |  1 +
 .../mellanox/mlx5/core/en_accel/ipsec_offload.c        | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 16bcceec16c4..feea909d76c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -88,6 +88,7 @@ struct mlx5_accel_esp_xfrm_attrs {
 enum mlx5_ipsec_cap {
 	MLX5_IPSEC_CAP_CRYPTO		= 1 << 0,
 	MLX5_IPSEC_CAP_ESN		= 1 << 1,
+	MLX5_IPSEC_CAP_FULL_OFFLOAD	= 1 << 2,
 };
 
 struct mlx5e_priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 792724ce7336..e93775eb40b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -1,12 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */
 
+#include <linux/mlx5/eswitch.h>
 #include "mlx5_core.h"
 #include "ipsec.h"
 #include "lib/mlx5.h"
 
 u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
+	bool esw_encap;
 	u32 caps = 0;
 
 	if (!MLX5_CAP_GEN(mdev, ipsec_offload))
@@ -31,6 +33,14 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	    MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
 		caps |= MLX5_IPSEC_CAP_CRYPTO;
 
+	esw_encap = mlx5_eswitch_get_encap_mode(mdev) !=
+		    DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+	if (!esw_encap && MLX5_CAP_IPSEC(mdev, ipsec_full_offload) &&
+	    MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) &&
+	    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_del_esp_trasport) &&
+	    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap))
+		caps |= MLX5_IPSEC_CAP_FULL_OFFLOAD;
+
 	if (!caps)
 		return 0;
 
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 07/26] net/mlx5e: Create symmetric IPsec RX and TX flow steering structs
From: Leon Romanovsky @ 2022-08-16 10:37 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Remove AF family obfuscation by creating symmetric structs for RX and
TX IPsec flow steering chains. This simplifies to us low level IPsec
FS creation logic without need to dig into multiple levels of structs.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.h       |   7 +-
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 280 ++++++++----------
 2 files changed, 132 insertions(+), 155 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 7cc091115b5d..02c3e6334cdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -94,7 +94,7 @@ struct mlx5e_ipsec_sw_stats {
 	atomic64_t ipsec_tx_drop_trailer;
 };
 
-struct mlx5e_accel_fs_esp;
+struct mlx5e_ipsec_rx;
 struct mlx5e_ipsec_tx;
 
 struct mlx5e_ipsec {
@@ -103,8 +103,9 @@ struct mlx5e_ipsec {
 	spinlock_t sadb_rx_lock; /* Protects sadb_rx */
 	struct mlx5e_ipsec_sw_stats sw_stats;
 	struct workqueue_struct *wq;
-	struct mlx5e_accel_fs_esp *rx_fs;
-	struct mlx5e_ipsec_tx *tx_fs;
+	struct mlx5e_ipsec_rx *rx_ipv4;
+	struct mlx5e_ipsec_rx *rx_ipv6;
+	struct mlx5e_ipsec_tx *tx;
 };
 
 struct mlx5e_ipsec_esn_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 45501764a9bb..fdc4517fa104 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -9,49 +9,40 @@
 
 #define NUM_IPSEC_FTE BIT(15)
 
-enum accel_fs_esp_type {
-	ACCEL_FS_ESP4,
-	ACCEL_FS_ESP6,
-	ACCEL_FS_ESP_NUM_TYPES,
-};
-
 struct mlx5e_ipsec_rx_err {
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_handle *rule;
 	struct mlx5_modify_hdr *copy_modify_hdr;
 };
 
-struct mlx5e_accel_fs_esp_prot {
-	struct mlx5_flow_table *ft;
+struct mlx5e_ipsec_ft {
+	struct mutex mutex; /* Protect changes to this struct */
+	struct mlx5_flow_table *sa;
+	u32 refcnt;
+};
+
+struct mlx5e_ipsec_rx {
+	struct mlx5e_ipsec_ft ft;
 	struct mlx5_flow_group *miss_group;
 	struct mlx5_flow_handle *miss_rule;
 	struct mlx5_flow_destination default_dest;
 	struct mlx5e_ipsec_rx_err rx_err;
-	u32 refcnt;
-	struct mutex prot_mutex; /* protect ESP4/ESP6 protocol */
-};
-
-struct mlx5e_accel_fs_esp {
-	struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES];
 };
 
 struct mlx5e_ipsec_tx {
+	struct mlx5e_ipsec_ft ft;
 	struct mlx5_flow_namespace *ns;
-	struct mlx5_flow_table *ft;
-	struct mutex mutex; /* Protect IPsec TX steering */
-	u32 refcnt;
 };
 
 /* IPsec RX flow steering */
-static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+static enum mlx5_traffic_types family2tt(u32 family)
 {
-	if (i == ACCEL_FS_ESP4)
+	if (family == AF_INET)
 		return MLX5_TT_IPV4_IPSEC_ESP;
 	return MLX5_TT_IPV6_IPSEC_ESP;
 }
 
-static int rx_err_add_rule(struct mlx5e_priv *priv,
-			   struct mlx5e_accel_fs_esp_prot *fs_prot,
+static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
 			   struct mlx5e_ipsec_rx_err *rx_err)
 {
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
@@ -89,7 +80,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_act.modify_hdr = modify_hdr;
 	fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act,
-				  &fs_prot->default_dest, 1);
+				  &rx->default_dest, 1);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
 		netdev_err(priv->netdev, "fail to add ipsec rx err copy rule err=%d\n", err);
@@ -108,11 +99,10 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
 	return err;
 }
 
-static int rx_fs_create(struct mlx5e_priv *priv,
-			struct mlx5e_accel_fs_esp_prot *fs_prot)
+static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_table *ft = fs_prot->ft;
+	struct mlx5_flow_table *ft = rx->ft.sa;
 	struct mlx5_flow_group *miss_group;
 	struct mlx5_flow_handle *miss_rule;
 	MLX5_DECLARE_FLOW_ACT(flow_act);
@@ -136,55 +126,44 @@ static int rx_fs_create(struct mlx5e_priv *priv,
 		netdev_err(priv->netdev, "fail to create ipsec rx miss_group err=%d\n", err);
 		goto out;
 	}
-	fs_prot->miss_group = miss_group;
+	rx->miss_group = miss_group;
 
 	/* Create miss rule */
-	miss_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1);
+	miss_rule =
+		mlx5_add_flow_rules(ft, spec, &flow_act, &rx->default_dest, 1);
 	if (IS_ERR(miss_rule)) {
-		mlx5_destroy_flow_group(fs_prot->miss_group);
+		mlx5_destroy_flow_group(rx->miss_group);
 		err = PTR_ERR(miss_rule);
 		netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err);
 		goto out;
 	}
-	fs_prot->miss_rule = miss_rule;
+	rx->miss_rule = miss_rule;
 out:
 	kvfree(flow_group_in);
 	kvfree(spec);
 	return err;
 }
 
-static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+static void rx_destroy(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
 {
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
-	struct mlx5e_accel_fs_esp *accel_esp;
-
-	accel_esp = priv->ipsec->rx_fs;
+	mlx5_del_flow_rules(rx->miss_rule);
+	mlx5_destroy_flow_group(rx->miss_group);
+	mlx5_destroy_flow_table(rx->ft.sa);
 
-	/* The netdev unreg already happened, so all offloaded rule are already removed */
-	fs_prot = &accel_esp->fs_prot[type];
-
-	mlx5_del_flow_rules(fs_prot->miss_rule);
-	mlx5_destroy_flow_group(fs_prot->miss_group);
-	mlx5_destroy_flow_table(fs_prot->ft);
-
-	mlx5_del_flow_rules(fs_prot->rx_err.rule);
-	mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
-	mlx5_destroy_flow_table(fs_prot->rx_err.ft);
+	mlx5_del_flow_rules(rx->rx_err.rule);
+	mlx5_modify_header_dealloc(priv->mdev, rx->rx_err.copy_modify_hdr);
+	mlx5_destroy_flow_table(rx->rx_err.ft);
 }
 
-static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+static int rx_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
+		     u32 family)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
-	struct mlx5e_accel_fs_esp *accel_esp;
 	struct mlx5_flow_table *ft;
 	int err;
 
-	accel_esp = priv->ipsec->rx_fs;
-	fs_prot = &accel_esp->fs_prot[type];
-
-	fs_prot->default_dest =
-		mlx5_ttc_get_default_dest(priv->fs->ttc, fs_esp2tt(type));
+	rx->default_dest =
+		mlx5_ttc_get_default_dest(priv->fs->ttc, family2tt(family));
 
 	ft_attr.max_fte = 1;
 	ft_attr.autogroup.max_num_groups = 1;
@@ -194,8 +173,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
 	if (IS_ERR(ft))
 		return PTR_ERR(ft);
 
-	fs_prot->rx_err.ft = ft;
-	err = rx_err_add_rule(priv, fs_prot, &fs_prot->rx_err);
+	rx->rx_err.ft = ft;
+	err = rx_err_add_rule(priv, rx, &rx->rx_err);
 	if (err)
 		goto err_add;
 
@@ -210,74 +189,80 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
 		err = PTR_ERR(ft);
 		goto err_fs_ft;
 	}
-	fs_prot->ft = ft;
+	rx->ft.sa = ft;
 
-	err = rx_fs_create(priv, fs_prot);
+	err = rx_fs_create(priv, rx);
 	if (err)
 		goto err_fs;
 
 	return 0;
 
 err_fs:
-	mlx5_destroy_flow_table(fs_prot->ft);
+	mlx5_destroy_flow_table(rx->ft.sa);
 err_fs_ft:
-	mlx5_del_flow_rules(fs_prot->rx_err.rule);
-	mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr);
+	mlx5_del_flow_rules(rx->rx_err.rule);
+	mlx5_modify_header_dealloc(priv->mdev, rx->rx_err.copy_modify_hdr);
 err_add:
-	mlx5_destroy_flow_table(fs_prot->rx_err.ft);
+	mlx5_destroy_flow_table(rx->rx_err.ft);
 	return err;
 }
 
-static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5e_priv *priv, u32 family)
 {
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
 	struct mlx5_flow_destination dest = {};
-	struct mlx5e_accel_fs_esp *accel_esp;
+	struct mlx5e_ipsec_rx *rx;
 	int err = 0;
 
-	accel_esp = priv->ipsec->rx_fs;
-	fs_prot = &accel_esp->fs_prot[type];
-	mutex_lock(&fs_prot->prot_mutex);
-	if (fs_prot->refcnt)
+	if (family == AF_INET)
+		rx = priv->ipsec->rx_ipv4;
+	else
+		rx = priv->ipsec->rx_ipv6;
+
+	mutex_lock(&rx->ft.mutex);
+	if (rx->ft.refcnt)
 		goto skip;
 
 	/* create FT */
-	err = rx_create(priv, type);
+	err = rx_create(priv, rx, family);
 	if (err)
 		goto out;
 
 	/* connect */
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = fs_prot->ft;
-	mlx5_ttc_fwd_dest(priv->fs->ttc, fs_esp2tt(type), &dest);
+	dest.ft = rx->ft.sa;
+	mlx5_ttc_fwd_dest(priv->fs->ttc, family2tt(family), &dest);
 
 skip:
-	fs_prot->refcnt++;
+	rx->ft.refcnt++;
 out:
-	mutex_unlock(&fs_prot->prot_mutex);
-	return err;
+	mutex_unlock(&rx->ft.mutex);
+	if (err)
+		return ERR_PTR(err);
+	return rx;
 }
 
-static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
+static void rx_ft_put(struct mlx5e_priv *priv, u32 family)
 {
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
-	struct mlx5e_accel_fs_esp *accel_esp;
-
-	accel_esp = priv->ipsec->rx_fs;
-	fs_prot = &accel_esp->fs_prot[type];
-	mutex_lock(&fs_prot->prot_mutex);
-	fs_prot->refcnt--;
-	if (fs_prot->refcnt)
+	struct mlx5e_ipsec_rx *rx;
+
+	if (family == AF_INET)
+		rx = priv->ipsec->rx_ipv4;
+	else
+		rx = priv->ipsec->rx_ipv6;
+
+	mutex_lock(&rx->ft.mutex);
+	rx->ft.refcnt--;
+	if (rx->ft.refcnt)
 		goto out;
 
 	/* disconnect */
-	mlx5_ttc_fwd_default_dest(priv->fs->ttc, fs_esp2tt(type));
+	mlx5_ttc_fwd_default_dest(priv->fs->ttc, family2tt(family));
 
 	/* remove FT */
-	rx_destroy(priv, type);
+	rx_destroy(priv, rx);
 
 out:
-	mutex_unlock(&fs_prot->prot_mutex);
+	mutex_unlock(&rx->ft.mutex);
 }
 
 /* IPsec TX flow steering */
@@ -290,47 +275,49 @@ static int tx_create(struct mlx5e_priv *priv)
 
 	ft_attr.max_fte = NUM_IPSEC_FTE;
 	ft_attr.autogroup.max_num_groups = 1;
-	ft = mlx5_create_auto_grouped_flow_table(ipsec->tx_fs->ns, &ft_attr);
+	ft = mlx5_create_auto_grouped_flow_table(ipsec->tx->ns, &ft_attr);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
 		return err;
 	}
-	ipsec->tx_fs->ft = ft;
+	ipsec->tx->ft.sa = ft;
 	return 0;
 }
 
-static int tx_ft_get(struct mlx5e_priv *priv)
+static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5e_priv *priv)
 {
-	struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+	struct mlx5e_ipsec_tx *tx = priv->ipsec->tx;
 	int err = 0;
 
-	mutex_lock(&tx_fs->mutex);
-	if (tx_fs->refcnt)
+	mutex_lock(&tx->ft.mutex);
+	if (tx->ft.refcnt)
 		goto skip;
 
 	err = tx_create(priv);
 	if (err)
 		goto out;
 skip:
-	tx_fs->refcnt++;
+	tx->ft.refcnt++;
 out:
-	mutex_unlock(&tx_fs->mutex);
-	return err;
+	mutex_unlock(&tx->ft.mutex);
+	if (err)
+		return ERR_PTR(err);
+	return tx;
 }
 
 static void tx_ft_put(struct mlx5e_priv *priv)
 {
-	struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+	struct mlx5e_ipsec_tx *tx = priv->ipsec->tx;
 
-	mutex_lock(&tx_fs->mutex);
-	tx_fs->refcnt--;
-	if (tx_fs->refcnt)
+	mutex_lock(&tx->ft.mutex);
+	tx->ft.refcnt--;
+	if (tx->ft.refcnt)
 		goto out;
 
-	mlx5_destroy_flow_table(tx_fs->ft);
+	mlx5_destroy_flow_table(tx->ft.sa);
 out:
-	mutex_unlock(&tx_fs->mutex);
+	mutex_unlock(&tx->ft.mutex);
 }
 
 static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
@@ -397,22 +384,16 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
 	struct mlx5_modify_hdr *modify_hdr = NULL;
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
 	struct mlx5_flow_destination dest = {};
-	struct mlx5e_accel_fs_esp *accel_esp;
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
-	enum accel_fs_esp_type type;
 	struct mlx5_flow_spec *spec;
+	struct mlx5e_ipsec_rx *rx;
 	int err = 0;
 
-	accel_esp = priv->ipsec->rx_fs;
-	type = (attrs->family == AF_INET) ? ACCEL_FS_ESP4 : ACCEL_FS_ESP6;
-	fs_prot = &accel_esp->fs_prot[type];
-
-	err = rx_ft_get(priv, type);
-	if (err)
-		return err;
+	rx = rx_ft_get(priv, attrs->family);
+	if (IS_ERR(rx))
+		return PTR_ERR(rx);
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
@@ -445,8 +426,8 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	flow_act.modify_hdr = modify_hdr;
-	dest.ft = fs_prot->rx_err.ft;
-	rule = mlx5_add_flow_rules(fs_prot->ft, spec, &flow_act, &dest, 1);
+	dest.ft = rx->rx_err.ft;
+	rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "fail to add RX ipsec rule err=%d\n",
@@ -461,7 +442,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 out_err:
 	if (modify_hdr)
 		mlx5_modify_header_dealloc(priv->mdev, modify_hdr);
-	rx_ft_put(priv, type);
+	rx_ft_put(priv, attrs->family);
 
 out:
 	kvfree(spec);
@@ -474,11 +455,12 @@ static int tx_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
+	struct mlx5e_ipsec_tx *tx;
 	int err = 0;
 
-	err = tx_ft_get(priv);
-	if (err)
-		return err;
+	tx = tx_ft_get(priv);
+	if (IS_ERR(tx))
+		return PTR_ERR(tx);
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
@@ -498,7 +480,7 @@ static int tx_add_rule(struct mlx5e_priv *priv,
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
 			  MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
-	rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
+	rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, NULL, 0);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "fail to add TX ipsec rule err=%d\n",
@@ -529,7 +511,6 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
 {
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
-	enum accel_fs_esp_type type;
 
 	mlx5_del_flow_rules(ipsec_rule->rule);
 
@@ -539,38 +520,30 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
 	}
 
 	mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr);
-	type = (sa_entry->attrs.family == AF_INET) ? ACCEL_FS_ESP4 : ACCEL_FS_ESP6;
-	rx_ft_put(priv, type);
+	rx_ft_put(priv, sa_entry->attrs.family);
 }
 
 void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
 {
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
-	struct mlx5e_accel_fs_esp *accel_esp;
-	enum accel_fs_esp_type i;
-
-	if (!ipsec->rx_fs)
+	if (!ipsec->tx)
 		return;
 
-	mutex_destroy(&ipsec->tx_fs->mutex);
-	WARN_ON(ipsec->tx_fs->refcnt);
-	kfree(ipsec->tx_fs);
+	mutex_destroy(&ipsec->tx->ft.mutex);
+	WARN_ON(ipsec->tx->ft.refcnt);
+	kfree(ipsec->tx);
 
-	accel_esp = ipsec->rx_fs;
-	for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
-		fs_prot = &accel_esp->fs_prot[i];
-		mutex_destroy(&fs_prot->prot_mutex);
-		WARN_ON(fs_prot->refcnt);
-	}
-	kfree(ipsec->rx_fs);
+	mutex_destroy(&ipsec->rx_ipv4->ft.mutex);
+	WARN_ON(ipsec->rx_ipv4->ft.refcnt);
+	kfree(ipsec->rx_ipv4);
+
+	mutex_destroy(&ipsec->rx_ipv6->ft.mutex);
+	WARN_ON(ipsec->rx_ipv6->ft.refcnt);
+	kfree(ipsec->rx_ipv6);
 }
 
 int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 {
-	struct mlx5e_accel_fs_esp_prot *fs_prot;
-	struct mlx5e_accel_fs_esp *accel_esp;
 	struct mlx5_flow_namespace *ns;
-	enum accel_fs_esp_type i;
 	int err = -ENOMEM;
 
 	ns = mlx5_get_flow_namespace(ipsec->mdev,
@@ -578,26 +551,29 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 	if (!ns)
 		return -EOPNOTSUPP;
 
-	ipsec->tx_fs = kzalloc(sizeof(*ipsec->tx_fs), GFP_KERNEL);
-	if (!ipsec->tx_fs)
+	ipsec->tx = kzalloc(sizeof(*ipsec->tx), GFP_KERNEL);
+	if (!ipsec->tx)
 		return -ENOMEM;
 
-	ipsec->rx_fs = kzalloc(sizeof(*ipsec->rx_fs), GFP_KERNEL);
-	if (!ipsec->rx_fs)
-		goto err_rx;
+	ipsec->rx_ipv4 = kzalloc(sizeof(*ipsec->rx_ipv4), GFP_KERNEL);
+	if (!ipsec->rx_ipv4)
+		goto err_rx_ipv4;
 
-	mutex_init(&ipsec->tx_fs->mutex);
-	ipsec->tx_fs->ns = ns;
+	ipsec->rx_ipv6 = kzalloc(sizeof(*ipsec->rx_ipv6), GFP_KERNEL);
+	if (!ipsec->rx_ipv6)
+		goto err_rx_ipv6;
 
-	accel_esp = ipsec->rx_fs;
-	for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) {
-		fs_prot = &accel_esp->fs_prot[i];
-		mutex_init(&fs_prot->prot_mutex);
-	}
+	mutex_init(&ipsec->tx->ft.mutex);
+	mutex_init(&ipsec->rx_ipv4->ft.mutex);
+	mutex_init(&ipsec->rx_ipv6->ft.mutex);
+	ipsec->tx->ns = ns;
 
 	return 0;
 
-err_rx:
-	kfree(ipsec->tx_fs);
+err_rx_ipv6:
+	kfree(ipsec->rx_ipv4);
+err_rx_ipv4:
+	kfree(ipsec->tx);
+	ipsec->tx = NULL;
 	return err;
 }
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 10/26] net/mlx5e: Validate that IPsec full offload can handle packets
From: Leon Romanovsky @ 2022-08-16 10:37 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Add specific to IPsec full offload check to drop packets that are
larger than MTU and such are going to be fragmented.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 56d70bbb4b5c..f8ba2d7581e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -409,6 +409,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
 
 static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 {
+	struct net_device *netdev = x->xso.real_dev;
+
 	if (x->props.family == AF_INET) {
 		/* Offload with IPv4 options is not supported yet */
 		if (ip_hdr(skb)->ihl > 5)
@@ -419,6 +421,17 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 			return false;
 	}
 
+	if (x->xso.type == XFRM_DEV_OFFLOAD_FULL) {
+		/* Fragmented packets */
+		if (skb_is_gso(skb)) {
+			if (!skb_gso_validate_network_len(skb, netdev->mtu))
+				return false;
+		} else {
+			if (skb->len > netdev->mtu)
+				return false;
+		}
+	}
+
 	return true;
 }
 
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 08/26] net/mlx5e: Use mlx5 print routines for low level IPsec code
From: Leon Romanovsky @ 2022-08-16 10:37 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Low level mlx5 code needs to use mlx5_core print routines and
not netdev ones, as the failures are relevant to the HW itself
and not to its netdev.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index fdc4517fa104..c9b93ea8eec5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -70,8 +70,8 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
 
 	if (IS_ERR(modify_hdr)) {
 		err = PTR_ERR(modify_hdr);
-		netdev_err(priv->netdev,
-			   "fail to alloc ipsec copy modify_header_id err=%d\n", err);
+		mlx5_core_err(mdev,
+			      "fail to alloc ipsec copy modify_header_id err=%d\n", err);
 		goto out_spec;
 	}
 
@@ -83,7 +83,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
 				  &rx->default_dest, 1);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
-		netdev_err(priv->netdev, "fail to add ipsec rx err copy rule err=%d\n", err);
+		mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err);
 		goto out;
 	}
 
@@ -103,6 +103,7 @@ static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_table *ft = rx->ft.sa;
+	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_flow_group *miss_group;
 	struct mlx5_flow_handle *miss_rule;
 	MLX5_DECLARE_FLOW_ACT(flow_act);
@@ -123,7 +124,7 @@ static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
 	miss_group = mlx5_create_flow_group(ft, flow_group_in);
 	if (IS_ERR(miss_group)) {
 		err = PTR_ERR(miss_group);
-		netdev_err(priv->netdev, "fail to create ipsec rx miss_group err=%d\n", err);
+		mlx5_core_err(mdev, "fail to create ipsec rx miss_group err=%d\n", err);
 		goto out;
 	}
 	rx->miss_group = miss_group;
@@ -134,7 +135,7 @@ static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
 	if (IS_ERR(miss_rule)) {
 		mlx5_destroy_flow_group(rx->miss_group);
 		err = PTR_ERR(miss_rule);
-		netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err);
+		mlx5_core_err(mdev, "fail to create ipsec rx miss_rule err=%d\n", err);
 		goto out;
 	}
 	rx->miss_rule = miss_rule;
@@ -270,6 +271,7 @@ static int tx_create(struct mlx5e_priv *priv)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_flow_table *ft;
 	int err;
 
@@ -278,7 +280,7 @@ static int tx_create(struct mlx5e_priv *priv)
 	ft = mlx5_create_auto_grouped_flow_table(ipsec->tx->ns, &ft_attr);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
-		netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
+		mlx5_core_err(mdev, "fail to create ipsec tx ft err=%d\n", err);
 		return err;
 	}
 	ipsec->tx->ft.sa = ft;
@@ -382,6 +384,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
 	struct mlx5_modify_hdr *modify_hdr = NULL;
 	struct mlx5_flow_destination dest = {};
@@ -415,8 +418,8 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 					      1, action);
 	if (IS_ERR(modify_hdr)) {
 		err = PTR_ERR(modify_hdr);
-		netdev_err(priv->netdev,
-			   "fail to alloc ipsec set modify_header_id err=%d\n", err);
+		mlx5_core_err(mdev,
+			      "fail to alloc ipsec set modify_header_id err=%d\n", err);
 		modify_hdr = NULL;
 		goto out_err;
 	}
@@ -430,8 +433,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 	rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
-		netdev_err(priv->netdev, "fail to add RX ipsec rule err=%d\n",
-			   err);
+		mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err);
 		goto out_err;
 	}
 
@@ -452,6 +454,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 static int tx_add_rule(struct mlx5e_priv *priv,
 		       struct mlx5e_ipsec_sa_entry *sa_entry)
 {
+	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
@@ -483,8 +486,7 @@ static int tx_add_rule(struct mlx5e_priv *priv,
 	rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, NULL, 0);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
-		netdev_err(priv->netdev, "fail to add TX ipsec rule err=%d\n",
-			   err);
+		mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
 		goto out;
 	}
 
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 09/26] net/mlx5e: Remove accesses to priv for low level IPsec FS code
From: Leon Romanovsky @ 2022-08-16 10:37 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

mlx5 priv structure is driver main structure that holds high level data.
That information is not needed for IPsec flow steering logic and the
pointer to mlx5e_priv was not supposed to be passed in the first place.

This change "cleans" the logic to rely on internal to IPsec structures
without touching global mlx5e_priv.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       |   8 +-
 .../mellanox/mlx5/core/en_accel/ipsec.h       |   7 +-
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 102 +++++++++---------
 3 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index e4fe0249c5be..56d70bbb4b5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -309,7 +309,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	if (err)
 		goto err_xfrm;
 
-	err = mlx5e_accel_ipsec_fs_add_rule(priv, sa_entry);
+	err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
 	if (err)
 		goto err_hw_ctx;
 
@@ -327,7 +327,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	goto out;
 
 err_add_rule:
-	mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
 err_hw_ctx:
 	mlx5_ipsec_free_sa_ctx(sa_entry);
 err_xfrm:
@@ -347,10 +347,9 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
-	struct mlx5e_priv *priv = netdev_priv(x->xso.dev);
 
 	cancel_work_sync(&sa_entry->modify_work.work);
-	mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry);
+	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
 	mlx5_ipsec_free_sa_ctx(sa_entry);
 	kfree(sa_entry);
 }
@@ -383,6 +382,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	if (ret)
 		goto err_fs_init;
 
+	ipsec->fs = priv->fs;
 	priv->ipsec = ipsec;
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 02c3e6334cdd..3bba62f54604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -103,6 +103,7 @@ struct mlx5e_ipsec {
 	spinlock_t sadb_rx_lock; /* Protects sadb_rx */
 	struct mlx5e_ipsec_sw_stats sw_stats;
 	struct workqueue_struct *wq;
+	struct mlx5e_flow_steering *fs;
 	struct mlx5e_ipsec_rx *rx_ipv4;
 	struct mlx5e_ipsec_rx *rx_ipv6;
 	struct mlx5e_ipsec_tx *tx;
@@ -148,10 +149,8 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
 
 void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec);
 int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec);
-int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
-				  struct mlx5e_ipsec_sa_entry *sa_entry);
-void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
-				   struct mlx5e_ipsec_sa_entry *sa_entry);
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
 
 int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
 void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index c9b93ea8eec5..e732ce19e039 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -42,11 +42,11 @@ static enum mlx5_traffic_types family2tt(u32 family)
 	return MLX5_TT_IPV6_IPSEC_ESP;
 }
 
-static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
+static int rx_err_add_rule(struct mlx5_core_dev *mdev,
+			   struct mlx5e_ipsec_rx *rx,
 			   struct mlx5e_ipsec_rx_err *rx_err)
 {
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
-	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_modify_hdr *modify_hdr;
 	struct mlx5_flow_handle *fte;
@@ -99,11 +99,10 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
 	return err;
 }
 
-static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
+static int rx_fs_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_table *ft = rx->ft.sa;
-	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_flow_group *miss_group;
 	struct mlx5_flow_handle *miss_rule;
 	MLX5_DECLARE_FLOW_ACT(flow_act);
@@ -145,37 +144,37 @@ static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
 	return err;
 }
 
-static void rx_destroy(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx)
+static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 {
 	mlx5_del_flow_rules(rx->miss_rule);
 	mlx5_destroy_flow_group(rx->miss_group);
 	mlx5_destroy_flow_table(rx->ft.sa);
 
 	mlx5_del_flow_rules(rx->rx_err.rule);
-	mlx5_modify_header_dealloc(priv->mdev, rx->rx_err.copy_modify_hdr);
+	mlx5_modify_header_dealloc(mdev, rx->rx_err.copy_modify_hdr);
 	mlx5_destroy_flow_table(rx->rx_err.ft);
 }
 
-static int rx_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
-		     u32 family)
+static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+		     struct mlx5e_ipsec_rx *rx, u32 family)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_table *ft;
 	int err;
 
 	rx->default_dest =
-		mlx5_ttc_get_default_dest(priv->fs->ttc, family2tt(family));
+		mlx5_ttc_get_default_dest(ipsec->fs->ttc, family2tt(family));
 
 	ft_attr.max_fte = 1;
 	ft_attr.autogroup.max_num_groups = 1;
 	ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
 	ft_attr.prio = MLX5E_NIC_PRIO;
-	ft = mlx5_create_auto_grouped_flow_table(priv->fs->ns, &ft_attr);
+	ft = mlx5_create_auto_grouped_flow_table(ipsec->fs->ns, &ft_attr);
 	if (IS_ERR(ft))
 		return PTR_ERR(ft);
 
 	rx->rx_err.ft = ft;
-	err = rx_err_add_rule(priv, rx, &rx->rx_err);
+	err = rx_err_add_rule(mdev, rx, &rx->rx_err);
 	if (err)
 		goto err_add;
 
@@ -185,14 +184,14 @@ static int rx_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
 	ft_attr.prio = MLX5E_NIC_PRIO;
 	ft_attr.autogroup.num_reserved_entries = 1;
 	ft_attr.autogroup.max_num_groups = 1;
-	ft = mlx5_create_auto_grouped_flow_table(priv->fs->ns, &ft_attr);
+	ft = mlx5_create_auto_grouped_flow_table(ipsec->fs->ns, &ft_attr);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		goto err_fs_ft;
 	}
 	rx->ft.sa = ft;
 
-	err = rx_fs_create(priv, rx);
+	err = rx_fs_create(mdev, rx);
 	if (err)
 		goto err_fs;
 
@@ -202,36 +201,37 @@ static int rx_create(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx *rx,
 	mlx5_destroy_flow_table(rx->ft.sa);
 err_fs_ft:
 	mlx5_del_flow_rules(rx->rx_err.rule);
-	mlx5_modify_header_dealloc(priv->mdev, rx->rx_err.copy_modify_hdr);
+	mlx5_modify_header_dealloc(mdev, rx->rx_err.copy_modify_hdr);
 err_add:
 	mlx5_destroy_flow_table(rx->rx_err.ft);
 	return err;
 }
 
-static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5e_priv *priv, u32 family)
+static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev,
+					struct mlx5e_ipsec *ipsec, u32 family)
 {
 	struct mlx5_flow_destination dest = {};
 	struct mlx5e_ipsec_rx *rx;
 	int err = 0;
 
 	if (family == AF_INET)
-		rx = priv->ipsec->rx_ipv4;
+		rx = ipsec->rx_ipv4;
 	else
-		rx = priv->ipsec->rx_ipv6;
+		rx = ipsec->rx_ipv6;
 
 	mutex_lock(&rx->ft.mutex);
 	if (rx->ft.refcnt)
 		goto skip;
 
 	/* create FT */
-	err = rx_create(priv, rx, family);
+	err = rx_create(mdev, ipsec, rx, family);
 	if (err)
 		goto out;
 
 	/* connect */
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest.ft = rx->ft.sa;
-	mlx5_ttc_fwd_dest(priv->fs->ttc, family2tt(family), &dest);
+	mlx5_ttc_fwd_dest(ipsec->fs->ttc, family2tt(family), &dest);
 
 skip:
 	rx->ft.refcnt++;
@@ -242,14 +242,15 @@ static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5e_priv *priv, u32 family)
 	return rx;
 }
 
-static void rx_ft_put(struct mlx5e_priv *priv, u32 family)
+static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+		      u32 family)
 {
 	struct mlx5e_ipsec_rx *rx;
 
 	if (family == AF_INET)
-		rx = priv->ipsec->rx_ipv4;
+		rx = ipsec->rx_ipv4;
 	else
-		rx = priv->ipsec->rx_ipv6;
+		rx = ipsec->rx_ipv6;
 
 	mutex_lock(&rx->ft.mutex);
 	rx->ft.refcnt--;
@@ -257,46 +258,45 @@ static void rx_ft_put(struct mlx5e_priv *priv, u32 family)
 		goto out;
 
 	/* disconnect */
-	mlx5_ttc_fwd_default_dest(priv->fs->ttc, family2tt(family));
+	mlx5_ttc_fwd_default_dest(ipsec->fs->ttc, family2tt(family));
 
 	/* remove FT */
-	rx_destroy(priv, rx);
+	rx_destroy(mdev, rx);
 
 out:
 	mutex_unlock(&rx->ft.mutex);
 }
 
 /* IPsec TX flow steering */
-static int tx_create(struct mlx5e_priv *priv)
+static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
-	struct mlx5e_ipsec *ipsec = priv->ipsec;
-	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_flow_table *ft;
 	int err;
 
 	ft_attr.max_fte = NUM_IPSEC_FTE;
 	ft_attr.autogroup.max_num_groups = 1;
-	ft = mlx5_create_auto_grouped_flow_table(ipsec->tx->ns, &ft_attr);
+	ft = mlx5_create_auto_grouped_flow_table(tx->ns, &ft_attr);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		mlx5_core_err(mdev, "fail to create ipsec tx ft err=%d\n", err);
 		return err;
 	}
-	ipsec->tx->ft.sa = ft;
+	tx->ft.sa = ft;
 	return 0;
 }
 
-static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5e_priv *priv)
+static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev,
+					struct mlx5e_ipsec *ipsec)
 {
-	struct mlx5e_ipsec_tx *tx = priv->ipsec->tx;
+	struct mlx5e_ipsec_tx *tx = ipsec->tx;
 	int err = 0;
 
 	mutex_lock(&tx->ft.mutex);
 	if (tx->ft.refcnt)
 		goto skip;
 
-	err = tx_create(priv);
+	err = tx_create(mdev, tx);
 	if (err)
 		goto out;
 skip:
@@ -308,9 +308,9 @@ static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5e_priv *priv)
 	return tx;
 }
 
-static void tx_ft_put(struct mlx5e_priv *priv)
+static void tx_ft_put(struct mlx5e_ipsec *ipsec)
 {
-	struct mlx5e_ipsec_tx *tx = priv->ipsec->tx;
+	struct mlx5e_ipsec_tx *tx = ipsec->tx;
 
 	mutex_lock(&tx->ft.mutex);
 	tx->ft.refcnt--;
@@ -378,13 +378,13 @@ static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
 	flow_act->flags |= FLOW_ACT_NO_APPEND;
 }
 
-static int rx_add_rule(struct mlx5e_priv *priv,
-		       struct mlx5e_ipsec_sa_entry *sa_entry)
+static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
 	u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
 	struct mlx5_modify_hdr *modify_hdr = NULL;
 	struct mlx5_flow_destination dest = {};
@@ -394,7 +394,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 	struct mlx5e_ipsec_rx *rx;
 	int err = 0;
 
-	rx = rx_ft_get(priv, attrs->family);
+	rx = rx_ft_get(mdev, ipsec, attrs->family);
 	if (IS_ERR(rx))
 		return PTR_ERR(rx);
 
@@ -414,7 +414,7 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 	MLX5_SET(set_action_in, action, offset, 0);
 	MLX5_SET(set_action_in, action, length, 32);
 
-	modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL,
+	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
 					      1, action);
 	if (IS_ERR(modify_hdr)) {
 		err = PTR_ERR(modify_hdr);
@@ -443,25 +443,25 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 
 out_err:
 	if (modify_hdr)
-		mlx5_modify_header_dealloc(priv->mdev, modify_hdr);
-	rx_ft_put(priv, attrs->family);
+		mlx5_modify_header_dealloc(mdev, modify_hdr);
+	rx_ft_put(mdev, ipsec, attrs->family);
 
 out:
 	kvfree(spec);
 	return err;
 }
 
-static int tx_add_rule(struct mlx5e_priv *priv,
-		       struct mlx5e_ipsec_sa_entry *sa_entry)
+static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	struct mlx5e_ipsec_tx *tx;
 	int err = 0;
 
-	tx = tx_ft_get(priv);
+	tx = tx_ft_get(mdev, ipsec);
 	if (IS_ERR(tx))
 		return PTR_ERR(tx);
 
@@ -495,21 +495,19 @@ static int tx_add_rule(struct mlx5e_priv *priv,
 out:
 	kvfree(spec);
 	if (err)
-		tx_ft_put(priv);
+		tx_ft_put(ipsec);
 	return err;
 }
 
-int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
-				  struct mlx5e_ipsec_sa_entry *sa_entry)
+int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
-		return tx_add_rule(priv, sa_entry);
+		return tx_add_rule(sa_entry);
 
-	return rx_add_rule(priv, sa_entry);
+	return rx_add_rule(sa_entry);
 }
 
-void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
-				   struct mlx5e_ipsec_sa_entry *sa_entry)
+void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
@@ -517,12 +515,12 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
 	mlx5_del_flow_rules(ipsec_rule->rule);
 
 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) {
-		tx_ft_put(priv);
+		tx_ft_put(sa_entry->ipsec);
 		return;
 	}
 
 	mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr);
-	rx_ft_put(priv, sa_entry->attrs.family);
+	rx_ft_put(mdev, sa_entry->ipsec, sa_entry->attrs.family);
 }
 
 void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 12/26] net/mlx5e: Create hardware IPsec full offload objects
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Create initial hardware IPsec full offload object and connect it
to advanced steering operation (ASO) context and queue, so the data
path can communicate with the stack.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       |  1 +
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  1 +
 .../mlx5/core/en_accel/ipsec_offload.c        | 31 ++++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index f65305281ac4..9e936e9cc673 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -179,6 +179,7 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 	memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
 	memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
 	attrs->family = x->props.family;
+	attrs->type = x->xso.type;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 2be7fb7db456..9acb3e98c823 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -73,6 +73,7 @@ struct mlx5_accel_esp_xfrm_attrs {
 	u8 dir : 2;
 	u8 esn_overlap : 1;
 	u8 esn_trigger : 1;
+	u8 type : 2;
 	u8 family;
 	u32 replay_window;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 7ebdfe560398..4fc472722859 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -63,10 +63,12 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
 	struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
 	u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {};
-	void *obj, *salt_p, *salt_iv_p;
+	void *obj, *salt_p, *salt_iv_p, *aso_ctx;
+	u32 pdn = sa_entry->ipsec->pdn;
 	int err;
 
 	obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object);
+	aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso);
 
 	/* salt and seq_iv */
 	salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt);
@@ -80,6 +82,17 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
 		MLX5_SET(ipsec_obj, obj, esn_en, 1);
 		MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn);
 		MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap);
+
+		if (attrs->type == XFRM_DEV_OFFLOAD_FULL) {
+			MLX5_SET(ipsec_aso, aso_ctx, esn_event_arm, 1);
+
+			if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
+				MLX5_SET(ipsec_aso, aso_ctx, window_sz,
+					 attrs->replay_window / 64);
+				MLX5_SET(ipsec_aso, aso_ctx, mode,
+					 MLX5_IPSEC_ASO_REPLAY_PROTECTION);
+			}
+		}
 	}
 
 	MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id);
@@ -90,6 +103,22 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
 	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
 		 MLX5_GENERAL_OBJECT_TYPES_IPSEC);
 
+	/* ASO context */
+	if (attrs->type == XFRM_DEV_OFFLOAD_FULL) {
+		MLX5_SET(ipsec_obj, obj, ipsec_aso_access_pd, pdn);
+		MLX5_SET(ipsec_obj, obj, full_offload, 1);
+		MLX5_SET(ipsec_aso, aso_ctx, valid, 1);
+		/* MLX5_IPSEC_ASO_REG_C_4_5 is type C register that is used
+		 * in flow steering to perform matching against. Please be
+		 * aware that this register was chosen arbitrary and can't
+		 * be used in other places as long as IPsec full offload
+		 * active.
+		 */
+		MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5);
+		if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
+			MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN);
+	}
+
 	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (!err)
 		sa_entry->ipsec_obj_id =
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 13/26] net/mlx5e: Move IPsec flow table creation to separate function
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Even now, to support IPsec crypto, the RX and TX paths use same
logic to create flow tables. In the following patches, we will
add more tables to support IPsec full offload. So reuse existing
code and rewrite it to support IPsec full offload from the beginning.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 53 +++++++++++--------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index e732ce19e039..291533d55e2d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -42,6 +42,28 @@ static enum mlx5_traffic_types family2tt(u32 family)
 	return MLX5_TT_IPV6_IPSEC_ESP;
 }
 
+static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_core_dev *mdev,
+					       struct mlx5_flow_namespace *ns,
+					       int level, int prio,
+					       int max_num_groups, u8 dir)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+
+	ft_attr.autogroup.num_reserved_entries = 1;
+	ft_attr.autogroup.max_num_groups = max_num_groups;
+	ft_attr.max_fte = NUM_IPSEC_FTE;
+	ft_attr.level = level;
+	ft_attr.prio = prio;
+	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_FULL_OFFLOAD) {
+		ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+
+		if (dir == XFRM_DEV_OFFLOAD_IN)
+			ft_attr.flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+	}
+
+	return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+}
+
 static int rx_err_add_rule(struct mlx5_core_dev *mdev,
 			   struct mlx5e_ipsec_rx *rx,
 			   struct mlx5e_ipsec_rx_err *rx_err)
@@ -158,18 +180,15 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		     struct mlx5e_ipsec_rx *rx, u32 family)
 {
-	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_table *ft;
 	int err;
 
 	rx->default_dest =
 		mlx5_ttc_get_default_dest(ipsec->fs->ttc, family2tt(family));
 
-	ft_attr.max_fte = 1;
-	ft_attr.autogroup.max_num_groups = 1;
-	ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
-	ft_attr.prio = MLX5E_NIC_PRIO;
-	ft = mlx5_create_auto_grouped_flow_table(ipsec->fs->ns, &ft_attr);
+	ft = ipsec_ft_create(mdev, ipsec->fs->ns,
+			     MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, MLX5E_NIC_PRIO, 1,
+			     XFRM_DEV_OFFLOAD_IN);
 	if (IS_ERR(ft))
 		return PTR_ERR(ft);
 
@@ -179,12 +198,8 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		goto err_add;
 
 	/* Create FT */
-	ft_attr.max_fte = NUM_IPSEC_FTE;
-	ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL;
-	ft_attr.prio = MLX5E_NIC_PRIO;
-	ft_attr.autogroup.num_reserved_entries = 1;
-	ft_attr.autogroup.max_num_groups = 1;
-	ft = mlx5_create_auto_grouped_flow_table(ipsec->fs->ns, &ft_attr);
+	ft = ipsec_ft_create(mdev, ipsec->fs->ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL,
+			     MLX5E_NIC_PRIO, 1, XFRM_DEV_OFFLOAD_IN);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		goto err_fs_ft;
@@ -270,18 +285,12 @@ static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 /* IPsec TX flow steering */
 static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
 {
-	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_table *ft;
-	int err;
 
-	ft_attr.max_fte = NUM_IPSEC_FTE;
-	ft_attr.autogroup.max_num_groups = 1;
-	ft = mlx5_create_auto_grouped_flow_table(tx->ns, &ft_attr);
-	if (IS_ERR(ft)) {
-		err = PTR_ERR(ft);
-		mlx5_core_err(mdev, "fail to create ipsec tx ft err=%d\n", err);
-		return err;
-	}
+	ft = ipsec_ft_create(mdev, tx->ns, 0, 0, 1, XFRM_DEV_OFFLOAD_OUT);
+	if (IS_ERR(ft))
+		return PTR_ERR(ft);
+
 	tx->ft.sa = ft;
 	return 0;
 }
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 11/26] net/mlx5e: Create Advanced Steering Operation object for IPsec
From: Leon Romanovsky @ 2022-08-16 10:37 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Setup the ASO (Advanced Steering Operation) object that is needed
for IPsec to interact with SW stack about various fast changing
events: replay window, lifetime limits,  e.t.c

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 11 +++++++
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  6 ++++
 .../mlx5/core/en_accel/ipsec_offload.c        | 30 +++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index f8ba2d7581e4..f65305281ac4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -378,6 +378,12 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 		goto err_wq;
 	}
 
+	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_FULL_OFFLOAD) {
+		ret = mlx5e_ipsec_aso_init(ipsec);
+		if (ret)
+			goto err_aso;
+	}
+
 	ret = mlx5e_accel_ipsec_fs_init(ipsec);
 	if (ret)
 		goto err_fs_init;
@@ -388,6 +394,9 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	return 0;
 
 err_fs_init:
+	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_FULL_OFFLOAD)
+		mlx5e_ipsec_aso_cleanup(ipsec);
+err_aso:
 	destroy_workqueue(ipsec->wq);
 err_wq:
 	kfree(ipsec);
@@ -402,6 +411,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
 		return;
 
 	mlx5e_accel_ipsec_fs_cleanup(ipsec);
+	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_FULL_OFFLOAD)
+		mlx5e_ipsec_aso_cleanup(ipsec);
 	destroy_workqueue(ipsec->wq);
 	kfree(ipsec);
 	priv->ipsec = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 3bba62f54604..2be7fb7db456 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -39,6 +39,7 @@
 #include <linux/mlx5/device.h>
 #include <net/xfrm.h>
 #include <linux/idr.h>
+#include "lib/aso.h"
 
 #define MLX5E_IPSEC_SADB_RX_BITS 10
 #define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
@@ -107,6 +108,8 @@ struct mlx5e_ipsec {
 	struct mlx5e_ipsec_rx *rx_ipv4;
 	struct mlx5e_ipsec_rx *rx_ipv6;
 	struct mlx5e_ipsec_tx *tx;
+	struct mlx5_aso *aso;
+	u32 pdn;
 };
 
 struct mlx5e_ipsec_esn_state {
@@ -160,6 +163,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev);
 void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
 				const struct mlx5_accel_esp_xfrm_attrs *attrs);
 
+int mlx5e_ipsec_aso_init(struct mlx5e_ipsec *ipsec);
+void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec);
+
 static inline struct mlx5_core_dev *
 mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 1e586db009be..7ebdfe560398 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -211,3 +211,33 @@ void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
 
 	memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs));
 }
+
+int mlx5e_ipsec_aso_init(struct mlx5e_ipsec *ipsec)
+{
+	struct mlx5_core_dev *mdev = ipsec->mdev;
+	int err;
+
+	err = mlx5_core_alloc_pd(mdev, &ipsec->pdn);
+	if (err)
+		return err;
+
+	ipsec->aso = mlx5_aso_create(mdev, ipsec->pdn);
+	if (IS_ERR(ipsec->aso)) {
+		err = PTR_ERR(ipsec->aso);
+		goto err_aso_create;
+	}
+
+	return 0;
+
+err_aso_create:
+	mlx5_core_dealloc_pd(mdev, ipsec->pdn);
+	return err;
+}
+
+void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec)
+{
+	struct mlx5_core_dev *mdev = ipsec->mdev;
+
+	mlx5_aso_destroy(ipsec->aso);
+	mlx5_core_dealloc_pd(mdev, ipsec->pdn);
+}
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 15/26] net/mlx5e: Flatten the IPsec RX add rule path
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Rewrote the IPsec RX add rule path to be less convoluted
and don't rely on pre-initialized variables. The code now has clean
linear flow with clean separation between error and success paths.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  2 +-
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 88 +++++++++++--------
 2 files changed, 53 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 9acb3e98c823..67f38cb16d34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -121,7 +121,7 @@ struct mlx5e_ipsec_esn_state {
 
 struct mlx5e_ipsec_rule {
 	struct mlx5_flow_handle *rule;
-	struct mlx5_modify_hdr *set_modify_hdr;
+	struct mlx5_modify_hdr *modify_hdr;
 };
 
 struct mlx5e_ipsec_modify_state_work {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 19e22780f846..03b0ce7b09df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -405,20 +405,52 @@ static void setup_fte_reg_a(struct mlx5_flow_spec *spec)
 		 misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC);
 }
 
-static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
+static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
+			       struct mlx5_flow_act *flow_act)
 {
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	enum mlx5_flow_namespace_type ns_type;
+	struct mlx5_modify_hdr *modify_hdr;
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	switch (dir) {
+	case XFRM_DEV_OFFLOAD_IN:
+		MLX5_SET(set_action_in, action, field,
+			 MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	MLX5_SET(set_action_in, action, data, val);
+	MLX5_SET(set_action_in, action, offset, 0);
+	MLX5_SET(set_action_in, action, length, 32);
+
+	modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, 1, action);
+	if (IS_ERR(modify_hdr)) {
+		mlx5_core_err(mdev, "Failed to allocate modify_header %ld\n",
+			      PTR_ERR(modify_hdr));
+		return PTR_ERR(modify_hdr);
+	}
+
+	flow_act->modify_hdr = modify_hdr;
+	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	return 0;
+}
+
+static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-	struct mlx5_modify_hdr *modify_hdr = NULL;
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	struct mlx5e_ipsec_rx *rx;
-	int err = 0;
+	int err;
 
 	rx = rx_ft_get(mdev, ipsec, attrs->family);
 	if (IS_ERR(rx))
@@ -427,7 +459,7 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
 		err = -ENOMEM;
-		goto out_err;
+		goto err_alloc;
 	}
 
 	if (attrs->family == AF_INET)
@@ -439,51 +471,35 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	setup_fte_esp(spec);
 	setup_fte_no_frags(spec);
 
-	/* Set bit[31] ipsec marker */
-	/* Set bit[23-0] ipsec_obj_id */
-	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
-	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
-	MLX5_SET(set_action_in, action, data,
-		 (sa_entry->ipsec_obj_id | BIT(31)));
-	MLX5_SET(set_action_in, action, offset, 0);
-	MLX5_SET(set_action_in, action, length, 32);
-
-	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
-					      1, action);
-	if (IS_ERR(modify_hdr)) {
-		err = PTR_ERR(modify_hdr);
-		mlx5_core_err(mdev,
-			      "fail to alloc ipsec set modify_header_id err=%d\n", err);
-		modify_hdr = NULL;
-		goto out_err;
-	}
+	err = setup_modify_header(mdev, sa_entry->ipsec_obj_id | BIT(31),
+				  XFRM_DEV_OFFLOAD_IN, &flow_act);
+	if (err)
+		goto err_mod_header;
 
 	flow_act.ipsec_obj_id = sa_entry->ipsec_obj_id;
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
-			  MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT |
-			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+			   MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	flow_act.modify_hdr = modify_hdr;
 	dest.ft = rx->rx_err.ft;
 	rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err);
-		goto out_err;
+		goto err_add_flow;
 	}
+	kvfree(spec);
 
 	ipsec_rule->rule = rule;
-	ipsec_rule->set_modify_hdr = modify_hdr;
-	goto out;
-
-out_err:
-	if (modify_hdr)
-		mlx5_modify_header_dealloc(mdev, modify_hdr);
-	rx_ft_put(mdev, ipsec, attrs->family);
+	ipsec_rule->modify_hdr = flow_act.modify_hdr;
+	return 0;
 
-out:
+err_add_flow:
+	mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
+err_mod_header:
 	kvfree(spec);
+err_alloc:
+	rx_ft_put(mdev, ipsec, attrs->family);
 	return err;
 }
 
@@ -558,7 +574,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 		return;
 	}
 
-	mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr);
+	mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);
 	rx_ft_put(mdev, sa_entry->ipsec, sa_entry->attrs.family);
 }
 
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 14/26] net/mlx5e: Refactor FTE setup code to be more clear
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The policy offload logic needs to set flow steering rule that match
on saddr and daddr too, so factor out this code to separate functions,
together with code alignment to netdev coding pattern of relying on
family type.

As part of this change, let's separate more logic from setup_fte_common
to make sure that the function names describe that is done in the
function better than general *common* name.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 136 +++++++++++-------
 1 file changed, 83 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 291533d55e2d..19e22780f846 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -331,60 +331,78 @@ static void tx_ft_put(struct mlx5e_ipsec *ipsec)
 	mutex_unlock(&tx->ft.mutex);
 }
 
-static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
-			     u32 ipsec_obj_id,
-			     struct mlx5_flow_spec *spec,
-			     struct mlx5_flow_act *flow_act)
+static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr,
+			    __be32 *daddr)
 {
-	u8 ip_version = (attrs->family == AF_INET) ? 4 : 6;
+	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
-	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS;
-
-	/* ip_version */
 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
-	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ip_version);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4);
+
+	memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4);
+	memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+}
 
-	/* Non fragmented */
-	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag);
-	MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0);
+static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr,
+			    __be32 *daddr)
+{
+	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6);
+
+	memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16);
+	memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16);
+	memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16);
+	memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16);
+}
 
+static void setup_fte_esp(struct mlx5_flow_spec *spec)
+{
 	/* ESP header */
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP);
+}
 
+static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi)
+{
 	/* SPI number */
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi);
-	MLX5_SET(fte_match_param, spec->match_value,
-		 misc_parameters.outer_esp_spi, attrs->spi);
-
-	if (ip_version == 4) {
-		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				    outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
-		       &attrs->saddr.a4, 4);
-		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				    outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-		       &attrs->daddr.a4, 4);
-		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
-				 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
-		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
-				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-	} else {
-		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &attrs->saddr.a6, 16);
-		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &attrs->daddr.a6, 16);
-		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       0xff, 16);
-		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       0xff, 16);
-	}
+	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi);
+}
 
-	flow_act->ipsec_obj_id = ipsec_obj_id;
-	flow_act->flags |= FLOW_ACT_NO_APPEND;
+static void setup_fte_no_frags(struct mlx5_flow_spec *spec)
+{
+	/* Non fragmented */
+	spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0);
+}
+
+static void setup_fte_reg_a(struct mlx5_flow_spec *spec)
+{
+	/* Add IPsec indicator in metadata_reg_a */
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+	MLX5_SET(fte_match_param, spec->match_criteria,
+		 misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC);
+	MLX5_SET(fte_match_param, spec->match_value,
+		 misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC);
 }
 
 static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -394,7 +412,6 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-	u32 ipsec_obj_id = sa_entry->ipsec_obj_id;
 	struct mlx5_modify_hdr *modify_hdr = NULL;
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act flow_act = {};
@@ -413,13 +430,21 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 		goto out_err;
 	}
 
-	setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+	if (attrs->family == AF_INET)
+		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
+	else
+		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
+
+	setup_fte_spi(spec, attrs->spi);
+	setup_fte_esp(spec);
+	setup_fte_no_frags(spec);
 
 	/* Set bit[31] ipsec marker */
 	/* Set bit[23-0] ipsec_obj_id */
 	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
 	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
-	MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31)));
+	MLX5_SET(set_action_in, action, data,
+		 (sa_entry->ipsec_obj_id | BIT(31)));
 	MLX5_SET(set_action_in, action, offset, 0);
 	MLX5_SET(set_action_in, action, length, 32);
 
@@ -433,6 +458,8 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 		goto out_err;
 	}
 
+	flow_act.ipsec_obj_id = sa_entry->ipsec_obj_id;
+	flow_act.flags |= FLOW_ACT_NO_APPEND;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 			  MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT |
 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
@@ -462,6 +489,7 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 
 static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
+	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
 	struct mlx5_flow_act flow_act = {};
@@ -480,16 +508,18 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 		goto out;
 	}
 
-	setup_fte_common(&sa_entry->attrs, sa_entry->ipsec_obj_id, spec,
-			 &flow_act);
+	if (attrs->family == AF_INET)
+		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
+	else
+		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
 
-	/* Add IPsec indicator in metadata_reg_a */
-	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
-	MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
-		 MLX5_ETH_WQE_FT_META_IPSEC);
-	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
-		 MLX5_ETH_WQE_FT_META_IPSEC);
+	setup_fte_spi(spec, attrs->spi);
+	setup_fte_esp(spec);
+	setup_fte_no_frags(spec);
+	setup_fte_reg_a(spec);
 
+	flow_act.ipsec_obj_id = sa_entry->ipsec_obj_id;
+	flow_act.flags |= FLOW_ACT_NO_APPEND;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
 			  MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
 	rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, NULL, 0);
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 16/26] net/mlx5e: Make clear what IPsec rx_err does
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Reuse existing struct what holds all information about modify
header pointer and rule. This helps to reduce ambiguity from the
name _err_ that doesn't describe the real purpose of that flow
table, rule and function - to copy status result from HW to
the stack.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 38 ++++++++-----------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 03b0ce7b09df..7d1b587a0a04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -9,15 +9,10 @@
 
 #define NUM_IPSEC_FTE BIT(15)
 
-struct mlx5e_ipsec_rx_err {
-	struct mlx5_flow_table *ft;
-	struct mlx5_flow_handle *rule;
-	struct mlx5_modify_hdr *copy_modify_hdr;
-};
-
 struct mlx5e_ipsec_ft {
 	struct mutex mutex; /* Protect changes to this struct */
 	struct mlx5_flow_table *sa;
+	struct mlx5_flow_table *status;
 	u32 refcnt;
 };
 
@@ -26,7 +21,7 @@ struct mlx5e_ipsec_rx {
 	struct mlx5_flow_group *miss_group;
 	struct mlx5_flow_handle *miss_rule;
 	struct mlx5_flow_destination default_dest;
-	struct mlx5e_ipsec_rx_err rx_err;
+	struct mlx5e_ipsec_rule status;
 };
 
 struct mlx5e_ipsec_tx {
@@ -64,9 +59,8 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_core_dev *mdev,
 	return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 }
 
-static int rx_err_add_rule(struct mlx5_core_dev *mdev,
-			   struct mlx5e_ipsec_rx *rx,
-			   struct mlx5e_ipsec_rx_err *rx_err)
+static int ipsec_status_rule(struct mlx5_core_dev *mdev,
+			     struct mlx5e_ipsec_rx *rx)
 {
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
 	struct mlx5_flow_act flow_act = {};
@@ -101,7 +95,7 @@ static int rx_err_add_rule(struct mlx5_core_dev *mdev,
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_act.modify_hdr = modify_hdr;
-	fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act,
+	fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act,
 				  &rx->default_dest, 1);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
@@ -110,8 +104,8 @@ static int rx_err_add_rule(struct mlx5_core_dev *mdev,
 	}
 
 	kvfree(spec);
-	rx_err->rule = fte;
-	rx_err->copy_modify_hdr = modify_hdr;
+	rx->status.rule = fte;
+	rx->status.modify_hdr = modify_hdr;
 	return 0;
 
 out:
@@ -172,9 +166,9 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 	mlx5_destroy_flow_group(rx->miss_group);
 	mlx5_destroy_flow_table(rx->ft.sa);
 
-	mlx5_del_flow_rules(rx->rx_err.rule);
-	mlx5_modify_header_dealloc(mdev, rx->rx_err.copy_modify_hdr);
-	mlx5_destroy_flow_table(rx->rx_err.ft);
+	mlx5_del_flow_rules(rx->status.rule);
+	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
+	mlx5_destroy_flow_table(rx->ft.status);
 }
 
 static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
@@ -192,8 +186,8 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	if (IS_ERR(ft))
 		return PTR_ERR(ft);
 
-	rx->rx_err.ft = ft;
-	err = rx_err_add_rule(mdev, rx, &rx->rx_err);
+	rx->ft.status = ft;
+	err = ipsec_status_rule(mdev, rx);
 	if (err)
 		goto err_add;
 
@@ -215,10 +209,10 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 err_fs:
 	mlx5_destroy_flow_table(rx->ft.sa);
 err_fs_ft:
-	mlx5_del_flow_rules(rx->rx_err.rule);
-	mlx5_modify_header_dealloc(mdev, rx->rx_err.copy_modify_hdr);
+	mlx5_del_flow_rules(rx->status.rule);
+	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
 err_add:
-	mlx5_destroy_flow_table(rx->rx_err.ft);
+	mlx5_destroy_flow_table(rx->ft.status);
 	return err;
 }
 
@@ -481,7 +475,7 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 			   MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = rx->rx_err.ft;
+	dest.ft = rx->ft.status;
 	rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 17/26] net/mlx5e: Group IPsec miss handles into separate struct
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Move miss handles into dedicated struct, so we can reuse it in next
patch when creating IPsec policy flow table.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c     | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 7d1b587a0a04..3443638453a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -16,10 +16,14 @@ struct mlx5e_ipsec_ft {
 	u32 refcnt;
 };
 
+struct mlx5e_ipsec_miss {
+	struct mlx5_flow_group *group;
+	struct mlx5_flow_handle *rule;
+};
+
 struct mlx5e_ipsec_rx {
 	struct mlx5e_ipsec_ft ft;
-	struct mlx5_flow_group *miss_group;
-	struct mlx5_flow_handle *miss_rule;
+	struct mlx5e_ipsec_miss sa;
 	struct mlx5_flow_destination default_dest;
 	struct mlx5e_ipsec_rule status;
 };
@@ -142,18 +146,18 @@ static int rx_fs_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 		mlx5_core_err(mdev, "fail to create ipsec rx miss_group err=%d\n", err);
 		goto out;
 	}
-	rx->miss_group = miss_group;
+	rx->sa.group = miss_group;
 
 	/* Create miss rule */
 	miss_rule =
 		mlx5_add_flow_rules(ft, spec, &flow_act, &rx->default_dest, 1);
 	if (IS_ERR(miss_rule)) {
-		mlx5_destroy_flow_group(rx->miss_group);
+		mlx5_destroy_flow_group(rx->sa.group);
 		err = PTR_ERR(miss_rule);
 		mlx5_core_err(mdev, "fail to create ipsec rx miss_rule err=%d\n", err);
 		goto out;
 	}
-	rx->miss_rule = miss_rule;
+	rx->sa.rule = miss_rule;
 out:
 	kvfree(flow_group_in);
 	kvfree(spec);
@@ -162,8 +166,8 @@ static int rx_fs_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 
 static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 {
-	mlx5_del_flow_rules(rx->miss_rule);
-	mlx5_destroy_flow_group(rx->miss_group);
+	mlx5_del_flow_rules(rx->sa.rule);
+	mlx5_destroy_flow_group(rx->sa.group);
 	mlx5_destroy_flow_table(rx->ft.sa);
 
 	mlx5_del_flow_rules(rx->status.rule);
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 19/26] net/mlx5e: Create IPsec policy offload tables
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Add empty table to be used for IPsec policy offload.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  3 +-
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 53 ++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |  6 +--
 3 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 9b8cdf2e68ad..6e155ed64d94 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -78,7 +78,8 @@ enum {
 	MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #endif
 #ifdef CONFIG_MLX5_EN_IPSEC
-	MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+	MLX5E_ACCEL_FS_POL_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+	MLX5E_ACCEL_FS_ESP_FT_LEVEL,
 	MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
 #endif
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index b3827e024a1d..1878bca0121a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -11,6 +11,7 @@
 
 struct mlx5e_ipsec_ft {
 	struct mutex mutex; /* Protect changes to this struct */
+	struct mlx5_flow_table *pol;
 	struct mlx5_flow_table *sa;
 	struct mlx5_flow_table *status;
 	u32 refcnt;
@@ -23,12 +24,14 @@ struct mlx5e_ipsec_miss {
 
 struct mlx5e_ipsec_rx {
 	struct mlx5e_ipsec_ft ft;
+	struct mlx5e_ipsec_miss pol;
 	struct mlx5e_ipsec_miss sa;
 	struct mlx5e_ipsec_rule status;
 };
 
 struct mlx5e_ipsec_tx {
 	struct mlx5e_ipsec_ft ft;
+	struct mlx5e_ipsec_miss pol;
 	struct mlx5_flow_namespace *ns;
 };
 
@@ -164,6 +167,10 @@ static int ipsec_miss_create(struct mlx5_core_dev *mdev,
 
 static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 {
+	mlx5_del_flow_rules(rx->pol.rule);
+	mlx5_destroy_flow_group(rx->pol.group);
+	mlx5_destroy_flow_table(rx->ft.pol);
+
 	mlx5_del_flow_rules(rx->sa.rule);
 	mlx5_destroy_flow_group(rx->sa.group);
 	mlx5_destroy_flow_table(rx->ft.sa);
@@ -206,8 +213,27 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	if (err)
 		goto err_fs;
 
+	ft = ipsec_ft_create(mdev, ipsec->fs->ns, MLX5E_ACCEL_FS_POL_FT_LEVEL,
+			     MLX5E_NIC_PRIO, 1, XFRM_DEV_OFFLOAD_IN);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto err_pol_ft;
+	}
+	rx->ft.pol = ft;
+	memset(&dest, 0x00, sizeof(dest));
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = rx->ft.sa;
+	err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, &dest);
+	if (err)
+		goto err_pol_miss;
+
 	return 0;
 
+err_pol_miss:
+	mlx5_destroy_flow_table(rx->ft.pol);
+err_pol_ft:
+	mlx5_del_flow_rules(rx->sa.rule);
+	mlx5_destroy_flow_group(rx->sa.group);
 err_fs:
 	mlx5_destroy_flow_table(rx->ft.sa);
 err_fs_ft:
@@ -241,7 +267,7 @@ static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev,
 
 	/* connect */
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = rx->ft.sa;
+	dest.ft = rx->ft.pol;
 	mlx5_ttc_fwd_dest(ipsec->fs->ttc, family2tt(family), &dest);
 
 skip:
@@ -281,14 +307,34 @@ static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 /* IPsec TX flow steering */
 static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
 {
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_table *ft;
+	int err;
 
-	ft = ipsec_ft_create(mdev, tx->ns, 0, 0, 1, XFRM_DEV_OFFLOAD_OUT);
+	ft = ipsec_ft_create(mdev, tx->ns, 1, 0, 1, XFRM_DEV_OFFLOAD_OUT);
 	if (IS_ERR(ft))
 		return PTR_ERR(ft);
 
 	tx->ft.sa = ft;
+
+	ft = ipsec_ft_create(mdev, tx->ns, 0, 0, 1, XFRM_DEV_OFFLOAD_OUT);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto err_pol_ft;
+	}
+	tx->ft.pol = ft;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = tx->ft.sa;
+	err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest);
+	if (err)
+		goto err_pol_miss;
 	return 0;
+
+err_pol_miss:
+	mlx5_destroy_flow_table(tx->ft.pol);
+err_pol_ft:
+	mlx5_destroy_flow_table(tx->ft.sa);
+	return err;
 }
 
 static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev,
@@ -322,6 +368,9 @@ static void tx_ft_put(struct mlx5e_ipsec *ipsec)
 	if (tx->ft.refcnt)
 		goto out;
 
+	mlx5_del_flow_rules(tx->pol.rule);
+	mlx5_destroy_flow_group(tx->pol.group);
+	mlx5_destroy_flow_table(tx->ft.pol);
 	mlx5_destroy_flow_table(tx->ft.sa);
 out:
 	mutex_unlock(&tx->ft.mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e3960cdf5131..bb7e8c6f299c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -107,8 +107,8 @@
 #define ETHTOOL_PRIO_NUM_LEVELS 1
 #define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */
-#define KERNEL_NIC_PRIO_NUM_LEVELS 7
+/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 8
 #define KERNEL_NIC_NUM_PRIOS 1
 /* One more level for tc */
 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
@@ -129,7 +129,7 @@
 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
 
 #define KERNEL_TX_IPSEC_NUM_PRIOS  1
-#define KERNEL_TX_IPSEC_NUM_LEVELS 1
+#define KERNEL_TX_IPSEC_NUM_LEVELS 2
 #define KERNEL_TX_MIN_LEVEL        (KERNEL_TX_IPSEC_NUM_LEVELS)
 
 struct node_caps {
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 22/26] net/mlx5e: Configure IPsec full offload flow steering
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

In full offload mode, the HW is responsible to handle ESP headers,
SPI numbers and trailers (ICV) together with different logic for
RX and TX paths.

In order to support full offload mode, special logic is added
to flow steering rules.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       |  2 +
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  2 +
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 95 +++++++++++++++++--
 3 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index fef1b1918419..7f93ec8ed3dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -165,6 +165,8 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
 	       sizeof(aes_gcm->salt));
 
+	attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
+
 	/* iv len */
 	aes_gcm->icv_len = x->aead->alg_icv_len;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index fead96097332..e5e3b2d1acc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -76,6 +76,7 @@ struct mlx5_accel_esp_xfrm_attrs {
 	u8 type : 2;
 	u8 family;
 	u32 replay_window;
+	u32 authsize;
 };
 
 enum mlx5_ipsec_cap {
@@ -122,6 +123,7 @@ struct mlx5e_ipsec_esn_state {
 struct mlx5e_ipsec_rule {
 	struct mlx5_flow_handle *rule;
 	struct mlx5_modify_hdr *modify_hdr;
+	struct mlx5_pkt_reformat *pkt_reformat;
 };
 
 struct mlx5e_ipsec_modify_state_work {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 6911f0b962ce..a047b1d6a016 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -484,6 +484,49 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
 	return 0;
 }
 
+static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
+			      struct mlx5_accel_esp_xfrm_attrs *attrs,
+			      struct mlx5_flow_act *flow_act)
+{
+	struct mlx5_pkt_reformat_params reformat_params = {};
+	enum mlx5_flow_namespace_type ns_type =
+		MLX5_FLOW_NAMESPACE_EGRESS_KERNEL;
+	struct mlx5_pkt_reformat *pkt_reformat;
+	u8 reformatbf[16] = {};
+	__be32 spi;
+
+	if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
+		reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+		goto cmd;
+	}
+
+	if (attrs->family == AF_INET)
+		reformat_params.type =
+			MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
+	else
+		reformat_params.type =
+			MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
+
+	/* convert to network format */
+	spi = htonl(attrs->spi);
+	memcpy(reformatbf, &spi, 4);
+
+	reformat_params.param_0 = attrs->authsize;
+	reformat_params.size = sizeof(reformatbf);
+	reformat_params.data = &reformatbf;
+
+cmd:
+	pkt_reformat =
+		mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type);
+	if (IS_ERR(pkt_reformat))
+		return PTR_ERR(pkt_reformat);
+
+	flow_act->pkt_reformat = pkt_reformat;
+	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+	return 0;
+}
+
 static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
@@ -520,6 +563,16 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	if (err)
 		goto err_mod_header;
 
+	switch (attrs->type) {
+	case XFRM_DEV_OFFLOAD_FULL:
+		err = setup_pkt_reformat(mdev, attrs, &flow_act);
+		if (err)
+			goto err_pkt_reformat;
+		break;
+	default:
+		break;
+	}
+
 	flow_act.ipsec_obj_id = sa_entry->ipsec_obj_id;
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
 	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
@@ -536,9 +589,13 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 
 	sa_entry->ipsec_rule.rule = rule;
 	sa_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr;
+	sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat;
 	return 0;
 
 err_add_flow:
+	if (flow_act.pkt_reformat)
+		mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat);
+err_pkt_reformat:
 	mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
 err_mod_header:
 	kvfree(spec);
@@ -565,7 +622,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
 		err = -ENOMEM;
-		goto out;
+		goto err_alloc;
 	}
 
 	if (attrs->family == AF_INET)
@@ -573,28 +630,46 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	else
 		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
 
-	setup_fte_spi(spec, attrs->spi);
-	setup_fte_esp(spec);
 	setup_fte_no_frags(spec);
-	setup_fte_reg_a(spec);
+
+	switch (attrs->type) {
+	case XFRM_DEV_OFFLOAD_CRYPTO:
+		setup_fte_spi(spec, attrs->spi);
+		setup_fte_esp(spec);
+		setup_fte_reg_a(spec);
+		break;
+	case XFRM_DEV_OFFLOAD_FULL:
+		err = setup_pkt_reformat(mdev, attrs, &flow_act);
+		if (err)
+			goto err_pkt_reformat;
+		break;
+	default:
+		break;
+	}
 
 	flow_act.ipsec_obj_id = sa_entry->ipsec_obj_id;
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
-			  MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
+	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+			   MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
 	rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, NULL, 0);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
-		goto out;
+		goto err_add_flow;
 	}
 
+	kvfree(spec);
 	sa_entry->ipsec_rule.rule = rule;
+	sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat;
+	return 0;
 
-out:
+err_add_flow:
+	if (flow_act.pkt_reformat)
+		mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat);
+err_pkt_reformat:
 	kvfree(spec);
-	if (err)
-		tx_ft_put(ipsec);
+err_alloc:
+	tx_ft_put(ipsec);
 	return err;
 }
 
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 20/26] net/mlx5e: Add XFRM policy offload logic
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Implement mlx5 flow steering logic and mlx5 IPsec code support
XFRM policy offload.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 103 ++++++++++++
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  32 ++++
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 146 ++++++++++++++++++
 3 files changed, 281 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 9e936e9cc673..fef1b1918419 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -45,6 +45,11 @@ static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
 	return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
 }
 
+static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
+{
+	return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
+}
+
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
 					      unsigned int handle)
 {
@@ -462,12 +467,110 @@ static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
 	queue_work(sa_entry->ipsec->wq, &modify_work->work);
 }
 
+static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x)
+{
+	struct net_device *netdev = x->xdo.real_dev;
+
+	if (x->type != XFRM_POLICY_TYPE_MAIN) {
+		netdev_info(netdev, "Cannot offload non-main policy types\n");
+		return -EINVAL;
+	}
+
+	/* Please pay attention that we support only one template */
+	if (x->xfrm_nr > 1) {
+		netdev_info(netdev, "Cannot offload more than one template\n");
+		return -EINVAL;
+	}
+
+	if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
+	    x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
+		netdev_info(netdev, "Cannot offload forward policy\n");
+		return -EINVAL;
+	}
+
+	if (!x->xfrm_vec[0].reqid) {
+		netdev_info(netdev, "Cannot offload policy without reqid\n");
+		return -EINVAL;
+	}
+
+	if (x->xdo.type != XFRM_DEV_OFFLOAD_FULL) {
+		netdev_info(netdev, "Unsupported xfrm offload type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
+				  struct mlx5_accel_pol_xfrm_attrs *attrs)
+{
+	struct xfrm_policy *x = pol_entry->x;
+	struct xfrm_selector *sel;
+
+	sel = &x->selector;
+	memset(attrs, 0, sizeof(*attrs));
+
+	memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr));
+	memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr));
+	attrs->family = sel->family;
+	attrs->dir = x->xdo.dir;
+	attrs->action = x->action;
+	attrs->type = XFRM_DEV_OFFLOAD_FULL;
+}
+
+static int mlx5e_xfrm_add_policy(struct xfrm_policy *x)
+{
+	struct net_device *netdev = x->xdo.real_dev;
+	struct mlx5e_ipsec_pol_entry *pol_entry;
+	struct mlx5e_priv *priv;
+	int err;
+
+	priv = netdev_priv(netdev);
+	if (!priv->ipsec)
+		return -EOPNOTSUPP;
+
+	err = mlx5e_xfrm_validate_policy(x);
+	if (err)
+		return err;
+
+	pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
+	if (!pol_entry)
+		return -ENOMEM;
+
+	pol_entry->x = x;
+	pol_entry->ipsec = priv->ipsec;
+
+	mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
+	err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
+	if (err)
+		goto err_fs;
+
+	x->xdo.offload_handle = (unsigned long)pol_entry;
+	return 0;
+
+err_fs:
+	kfree(pol_entry);
+	return err;
+}
+
+static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
+{
+	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
+
+	mlx5e_accel_ipsec_fs_del_pol(pol_entry);
+	kfree(pol_entry);
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
 	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
 	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
+
+	.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
+	.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 67f38cb16d34..fead96097332 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -144,6 +144,30 @@ struct mlx5e_ipsec_sa_entry {
 	struct mlx5e_ipsec_modify_state_work modify_work;
 };
 
+struct mlx5_accel_pol_xfrm_attrs {
+	union {
+		__be32 a4;
+		__be32 a6[4];
+	} saddr;
+
+	union {
+		__be32 a4;
+		__be32 a6[4];
+	} daddr;
+
+	u8 family;
+	u8 action;
+	u8 type : 2;
+	u8 dir : 2;
+};
+
+struct mlx5e_ipsec_pol_entry {
+	struct xfrm_policy *x;
+	struct mlx5e_ipsec *ipsec;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_accel_pol_xfrm_attrs attrs;
+};
+
 int mlx5e_ipsec_init(struct mlx5e_priv *priv);
 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
@@ -155,6 +179,8 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec);
 int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec);
 int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
 void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
+int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
+void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
 
 int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
 void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
@@ -172,6 +198,12 @@ mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	return sa_entry->ipsec->mdev;
 }
+
+static inline struct mlx5_core_dev *
+mlx5e_ipsec_pol2dev(struct mlx5e_ipsec_pol_entry *pol_entry)
+{
+	return pol_entry->ipsec->mdev;
+}
 #else
 static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 1878bca0121a..eb08175f41b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -599,6 +599,130 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	return err;
 }
 
+static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
+{
+	struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
+	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	struct mlx5e_ipsec_tx *tx;
+	int err;
+
+	tx = tx_ft_get(mdev, pol_entry->ipsec);
+	if (IS_ERR(tx))
+		return PTR_ERR(tx);
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	if (attrs->family == AF_INET)
+		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
+	else
+		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
+
+	setup_fte_no_frags(spec);
+
+	switch (attrs->action) {
+	case XFRM_POLICY_ALLOW:
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+		break;
+	case XFRM_POLICY_BLOCK:
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+		break;
+	default:
+		WARN_ON(true);
+		err = -EINVAL;
+		goto err_action;
+	}
+
+	flow_act.flags |= FLOW_ACT_NO_APPEND;
+	dest.ft = tx->ft.sa;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	rule = mlx5_add_flow_rules(tx->ft.pol, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
+		goto err_action;
+	}
+
+	kvfree(spec);
+	pol_entry->rule = rule;
+	return 0;
+
+err_action:
+	kvfree(spec);
+err_alloc:
+	tx_ft_put(pol_entry->ipsec);
+	return err;
+}
+
+static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
+{
+	struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
+	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	struct mlx5e_ipsec_rx *rx;
+	int err;
+
+	rx = rx_ft_get(mdev, pol_entry->ipsec, attrs->family);
+	if (IS_ERR(rx))
+		return PTR_ERR(rx);
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	if (attrs->family == AF_INET)
+		setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
+	else
+		setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
+
+	setup_fte_no_frags(spec);
+
+	switch (attrs->action) {
+	case XFRM_POLICY_ALLOW:
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+		break;
+	case XFRM_POLICY_BLOCK:
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+		break;
+	default:
+		WARN_ON(true);
+		err = -EINVAL;
+		goto err_action;
+	}
+
+	flow_act.flags |= FLOW_ACT_NO_APPEND;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = rx->ft.sa;
+	rule = mlx5_add_flow_rules(rx->ft.pol, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Fail to add RX IPsec policy rule err=%d\n", err);
+		goto err_action;
+	}
+
+	kvfree(spec);
+	pol_entry->rule = rule;
+	return 0;
+
+err_action:
+	kvfree(spec);
+err_alloc:
+	rx_ft_put(mdev, pol_entry->ipsec, attrs->family);
+	return err;
+}
+
 int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
@@ -623,6 +747,28 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	rx_ft_put(mdev, sa_entry->ipsec, sa_entry->attrs.family);
 }
 
+int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
+{
+	if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
+		return tx_add_policy(pol_entry);
+
+	return rx_add_policy(pol_entry);
+}
+
+void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
+{
+	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
+
+	mlx5_del_flow_rules(pol_entry->rule);
+
+	if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) {
+		tx_ft_put(pol_entry->ipsec);
+		return;
+	}
+
+	rx_ft_put(mdev, pol_entry->ipsec, pol_entry->attrs.family);
+}
+
 void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
 {
 	if (!ipsec->tx)
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 18/26] net/mlx5e: Generalize creation of default IPsec miss group and rule
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Create general function that sets miss group and rule to forward all
not-matched traffic to the next table.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 48 +++++++++----------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 3443638453a9..b3827e024a1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -24,7 +24,6 @@ struct mlx5e_ipsec_miss {
 struct mlx5e_ipsec_rx {
 	struct mlx5e_ipsec_ft ft;
 	struct mlx5e_ipsec_miss sa;
-	struct mlx5_flow_destination default_dest;
 	struct mlx5e_ipsec_rule status;
 };
 
@@ -64,7 +63,8 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_core_dev *mdev,
 }
 
 static int ipsec_status_rule(struct mlx5_core_dev *mdev,
-			     struct mlx5e_ipsec_rx *rx)
+			     struct mlx5e_ipsec_rx *rx,
+			     struct mlx5_flow_destination *dest)
 {
 	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
 	struct mlx5_flow_act flow_act = {};
@@ -99,8 +99,7 @@ static int ipsec_status_rule(struct mlx5_core_dev *mdev,
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_act.modify_hdr = modify_hdr;
-	fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act,
-				  &rx->default_dest, 1);
+	fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 1);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
 		mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err);
@@ -119,12 +118,12 @@ static int ipsec_status_rule(struct mlx5_core_dev *mdev,
 	return err;
 }
 
-static int rx_fs_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
+static int ipsec_miss_create(struct mlx5_core_dev *mdev,
+			     struct mlx5_flow_table *ft,
+			     struct mlx5e_ipsec_miss *miss,
+			     struct mlx5_flow_destination *dest)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_table *ft = rx->ft.sa;
-	struct mlx5_flow_group *miss_group;
-	struct mlx5_flow_handle *miss_rule;
 	MLX5_DECLARE_FLOW_ACT(flow_act);
 	struct mlx5_flow_spec *spec;
 	u32 *flow_group_in;
@@ -140,24 +139,23 @@ static int rx_fs_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 	/* Create miss_group */
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
-	miss_group = mlx5_create_flow_group(ft, flow_group_in);
-	if (IS_ERR(miss_group)) {
-		err = PTR_ERR(miss_group);
-		mlx5_core_err(mdev, "fail to create ipsec rx miss_group err=%d\n", err);
+	miss->group = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(miss->group)) {
+		err = PTR_ERR(miss->group);
+		mlx5_core_err(mdev, "fail to create IPsec miss_group err=%d\n",
+			      err);
 		goto out;
 	}
-	rx->sa.group = miss_group;
 
 	/* Create miss rule */
-	miss_rule =
-		mlx5_add_flow_rules(ft, spec, &flow_act, &rx->default_dest, 1);
-	if (IS_ERR(miss_rule)) {
-		mlx5_destroy_flow_group(rx->sa.group);
-		err = PTR_ERR(miss_rule);
-		mlx5_core_err(mdev, "fail to create ipsec rx miss_rule err=%d\n", err);
+	miss->rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+	if (IS_ERR(miss->rule)) {
+		mlx5_destroy_flow_group(miss->group);
+		err = PTR_ERR(miss->rule);
+		mlx5_core_err(mdev, "fail to create IPsec miss_rule err=%d\n",
+			      err);
 		goto out;
 	}
-	rx->sa.rule = miss_rule;
 out:
 	kvfree(flow_group_in);
 	kvfree(spec);
@@ -178,12 +176,10 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		     struct mlx5e_ipsec_rx *rx, u32 family)
 {
+	struct mlx5_flow_destination dest;
 	struct mlx5_flow_table *ft;
 	int err;
 
-	rx->default_dest =
-		mlx5_ttc_get_default_dest(ipsec->fs->ttc, family2tt(family));
-
 	ft = ipsec_ft_create(mdev, ipsec->fs->ns,
 			     MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, MLX5E_NIC_PRIO, 1,
 			     XFRM_DEV_OFFLOAD_IN);
@@ -191,7 +187,9 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		return PTR_ERR(ft);
 
 	rx->ft.status = ft;
-	err = ipsec_status_rule(mdev, rx);
+
+	dest = mlx5_ttc_get_default_dest(ipsec->fs->ttc, family2tt(family));
+	err = ipsec_status_rule(mdev, rx, &dest);
 	if (err)
 		goto err_add;
 
@@ -204,7 +202,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	}
 	rx->ft.sa = ft;
 
-	err = rx_fs_create(mdev, rx);
+	err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, &dest);
 	if (err)
 		goto err_fs;
 
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 23/26] net/mlx5e: Improve IPsec flow steering autogroup
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Flow steering API separates newly created rules based on their
match criteria. Right now, all IPsec tables are created with one
group and suffers from not-optimal FS performance.

Count number of different match criteria for relevant tables, and
set proper value at the table creation.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index a047b1d6a016..c6e6652f5890 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -202,7 +202,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 
 	/* Create FT */
 	ft = ipsec_ft_create(mdev, ipsec->fs->ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL,
-			     MLX5E_NIC_PRIO, 1, XFRM_DEV_OFFLOAD_IN);
+			     MLX5E_NIC_PRIO, 2, XFRM_DEV_OFFLOAD_IN);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		goto err_fs_ft;
@@ -214,7 +214,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		goto err_fs;
 
 	ft = ipsec_ft_create(mdev, ipsec->fs->ns, MLX5E_ACCEL_FS_POL_FT_LEVEL,
-			     MLX5E_NIC_PRIO, 1, XFRM_DEV_OFFLOAD_IN);
+			     MLX5E_NIC_PRIO, 2, XFRM_DEV_OFFLOAD_IN);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		goto err_pol_ft;
@@ -311,13 +311,13 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx)
 	struct mlx5_flow_table *ft;
 	int err;
 
-	ft = ipsec_ft_create(mdev, tx->ns, 1, 0, 1, XFRM_DEV_OFFLOAD_OUT);
+	ft = ipsec_ft_create(mdev, tx->ns, 1, 0, 4, XFRM_DEV_OFFLOAD_OUT);
 	if (IS_ERR(ft))
 		return PTR_ERR(ft);
 
 	tx->ft.sa = ft;
 
-	ft = ipsec_ft_create(mdev, tx->ns, 0, 0, 1, XFRM_DEV_OFFLOAD_OUT);
+	ft = ipsec_ft_create(mdev, tx->ns, 0, 0, 2, XFRM_DEV_OFFLOAD_OUT);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
 		goto err_pol_ft;
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 21/26] net/mlx5e: Use same coding pattern for Rx and Tx flows
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Remove intermediate variable in favour of having similar coding style
for Rx and Tx add rule functions.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index eb08175f41b8..6911f0b962ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -486,7 +486,6 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
 
 static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
-	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
@@ -535,8 +534,8 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	}
 	kvfree(spec);
 
-	ipsec_rule->rule = rule;
-	ipsec_rule->modify_hdr = flow_act.modify_hdr;
+	sa_entry->ipsec_rule.rule = rule;
+	sa_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr;
 	return 0;
 
 err_add_flow:
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 25/26] net/mlx5e: Skip IPsec encryption for TX path without matching policy
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Software implementation of IPsec skips encryption of packets in TX
path if no matching policy is found. So align HW implementation to
this behavior, by requiring matching reqid for offloaded policy and
SA.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       |  2 +
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  4 +-
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 39 ++++++++++++++++---
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 7f93ec8ed3dc..6017aaabaabd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -187,6 +187,7 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 	memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
 	attrs->family = x->props.family;
 	attrs->type = x->xso.type;
+	attrs->reqid = x->props.reqid;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
@@ -519,6 +520,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
 	attrs->dir = x->xdo.dir;
 	attrs->action = x->action;
 	attrs->type = XFRM_DEV_OFFLOAD_FULL;
+	attrs->reqid = x->xfrm_vec[0].reqid;
 }
 
 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 0fcab9ad9949..5c3ca03d21da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -77,6 +77,7 @@ struct mlx5_accel_esp_xfrm_attrs {
 	u8 family;
 	u32 replay_window;
 	u32 authsize;
+	u32 reqid;
 };
 
 enum mlx5_ipsec_cap {
@@ -173,12 +174,13 @@ struct mlx5_accel_pol_xfrm_attrs {
 	u8 action;
 	u8 type : 2;
 	u8 dir : 2;
+	u32 reqid;
 };
 
 struct mlx5e_ipsec_pol_entry {
 	struct xfrm_policy *x;
 	struct mlx5e_ipsec *ipsec;
-	struct mlx5_flow_handle *rule;
+	struct mlx5e_ipsec_rule ipsec_rule;
 	struct mlx5_accel_pol_xfrm_attrs attrs;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index e6b5c9526e1a..7b0071837f46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -460,6 +460,17 @@ static void setup_fte_reg_a(struct mlx5_flow_spec *spec)
 		 misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC);
 }
 
+static void setup_fte_reg_c0(struct mlx5_flow_spec *spec, u32 reqid)
+{
+	/* Pass policy check before choosing this SA */
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+	MLX5_SET(fte_match_param, spec->match_criteria,
+		 misc_parameters_2.metadata_reg_c_0, reqid);
+	MLX5_SET(fte_match_param, spec->match_value,
+		 misc_parameters_2.metadata_reg_c_0, reqid);
+}
+
 static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
 			       struct mlx5_flow_act *flow_act)
 {
@@ -474,6 +485,11 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
 			 MLX5_ACTION_IN_FIELD_METADATA_REG_B);
 		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
 		break;
+	case XFRM_DEV_OFFLOAD_OUT:
+		MLX5_SET(set_action_in, action, field,
+			 MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+		ns_type = MLX5_FLOW_NAMESPACE_EGRESS_KERNEL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -650,6 +666,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 		setup_fte_reg_a(spec);
 		break;
 	case XFRM_DEV_OFFLOAD_FULL:
+		setup_fte_reg_c0(spec, attrs->reqid);
 		err = setup_pkt_reformat(mdev, attrs, &flow_act);
 		if (err)
 			goto err_pkt_reformat;
@@ -715,6 +732,11 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 
 	setup_fte_no_frags(spec);
 
+	err = setup_modify_header(mdev, attrs->reqid, XFRM_DEV_OFFLOAD_OUT,
+				  &flow_act);
+	if (err)
+		goto err_mod_header;
+
 	switch (attrs->action) {
 	case XFRM_POLICY_ALLOW:
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -744,10 +766,13 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 	}
 
 	kvfree(spec);
-	pol_entry->rule = rule;
+	pol_entry->ipsec_rule.rule = rule;
+	pol_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr;
 	return 0;
 
 err_action:
+	mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr);
+err_mod_header:
 	kvfree(spec);
 err_alloc:
 	tx_ft_put(pol_entry->ipsec);
@@ -810,7 +835,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 	}
 
 	kvfree(spec);
-	pol_entry->rule = rule;
+	pol_entry->ipsec_rule.rule = rule;
 	return 0;
 
 err_action:
@@ -964,16 +989,18 @@ int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
 
 void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry)
 {
+	struct mlx5e_ipsec_rule *ipsec_rule = &pol_entry->ipsec_rule;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
 
-	mlx5_del_flow_rules(pol_entry->rule);
+	mlx5_del_flow_rules(ipsec_rule->rule);
 
-	if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) {
-		tx_ft_put(pol_entry->ipsec);
+	if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
+		rx_ft_put(mdev, pol_entry->ipsec, pol_entry->attrs.family);
 		return;
 	}
 
-	rx_ft_put(mdev, pol_entry->ipsec, pol_entry->attrs.family);
+	mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr);
+	tx_ft_put(pol_entry->ipsec);
 }
 
 void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 26/26] net/mlx5e: Open mlx5 driver to accept IPsec full offload
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Leon Romanovsky, Eric Dumazet, netdev, Paolo Abeni, Raed Salem,
	ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Enable configuration of IPsec full offload through XFRM state add
interface together with moving specific to IPsec full mode limitations
to specific switch-case section.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 46 +++++++++++++++----
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 6017aaabaabd..b517d105cb55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -219,11 +219,6 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
 		return -EINVAL;
 	}
-	if (x->props.mode != XFRM_MODE_TRANSPORT &&
-	    x->props.mode != XFRM_MODE_TUNNEL) {
-		dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
-		return -EINVAL;
-	}
 	if (x->id.proto != IPPROTO_ESP) {
 		netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
 		return -EINVAL;
@@ -257,11 +252,32 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
 		return -EINVAL;
 	}
-	if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) {
-		netdev_info(netdev, "Unsupported xfrm offload type\n");
-		return -EINVAL;
-	}
-	if (x->xso.type == XFRM_DEV_OFFLOAD_FULL) {
+	switch (x->xso.type) {
+	case XFRM_DEV_OFFLOAD_CRYPTO:
+		if (!(mlx5_ipsec_device_caps(priv->mdev) &
+		      MLX5_IPSEC_CAP_CRYPTO)) {
+			netdev_info(netdev, "Crypto offload is not supported\n");
+			return -EINVAL;
+		}
+
+		if (x->props.mode != XFRM_MODE_TRANSPORT &&
+		    x->props.mode != XFRM_MODE_TUNNEL) {
+			netdev_info(netdev, "Only transport and tunnel xfrm states may be offloaded\n");
+			return -EINVAL;
+		}
+		break;
+	case XFRM_DEV_OFFLOAD_FULL:
+		if (!(mlx5_ipsec_device_caps(priv->mdev) &
+		      MLX5_IPSEC_CAP_FULL_OFFLOAD)) {
+			netdev_info(netdev, "Full offload is not supported\n");
+			return -EINVAL;
+		}
+
+		if (x->props.mode != XFRM_MODE_TRANSPORT) {
+			netdev_info(netdev, "Only transport xfrm states may be offloaded in full mode\n");
+			return -EINVAL;
+		}
+
 		if (x->replay_esn && x->replay_esn->replay_window != 32 &&
 		    x->replay_esn->replay_window != 64 &&
 		    x->replay_esn->replay_window != 128 &&
@@ -271,6 +287,16 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 				    x->replay_esn->replay_window);
 			return -EINVAL;
 		}
+
+		if (!x->props.reqid) {
+			netdev_info(netdev, "Cannot ofload without reqid\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		netdev_info(netdev, "Unsupported xfrm offload type %d\n",
+			    x->xso.type);
+		return -EINVAL;
 	}
 	return 0;
 }
-- 
2.37.2


^ permalink raw reply related

* [PATCH xfrm-next 24/26] net/mlx5e: Add statistics for Rx/Tx IPsec offloaded flows
From: Leon Romanovsky @ 2022-08-16 10:38 UTC (permalink / raw)
  To: Steffen Klassert, David S . Miller, Jakub Kicinski,
	Saeed Mahameed
  Cc: Raed Salem, Eric Dumazet, netdev, Paolo Abeni, ipsec-devel
In-Reply-To: <cover.1660641154.git.leonro@nvidia.com>

From: Raed Salem <raeds@nvidia.com>

Add the following statistics:
RX successfully IPsec flows:
ipsec_rx_pkts  : Number of packets passed Rx IPsec flow
ipsec_rx_bytes : Number of bytes passed Rx IPsec flow

Rx dropped IPsec policy packets:
ipsec_rx_drop_pkts: Number of packets dropped in Rx datapath due to IPsec drop policy
ipsec_rx_drop_bytes: Number of bytes dropped in Rx datapath due to IPsec drop policy

TX successfully encrypted and encapsulated IPsec packets:
ipsec_tx_pkts  : Number of packets encrypted and encapsulated successfully
ipsec_tx_bytes : Number of bytes encrypted and encapsulated successfully

Tx dropped IPsec policy packets:
ipsec_tx_drop_pkts: Number of packets dropped in Tx datapath due to IPsec drop policy
ipsec_tx_drop_bytes: Number of bytes dropped in Tx datapath due to IPsec drop policy

The above can be seen using:
ethtool -S <ifc> |grep ipsec

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  14 ++
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 188 +++++++++++++++---
 .../mellanox/mlx5/core/en_accel/ipsec_stats.c |  52 +++++
 .../ethernet/mellanox/mlx5/core/en_stats.c    |   1 +
 .../ethernet/mellanox/mlx5/core/en_stats.h    |   1 +
 5 files changed, 232 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index e5e3b2d1acc3..0fcab9ad9949 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -87,6 +87,17 @@ enum mlx5_ipsec_cap {
 
 struct mlx5e_priv;
 
+struct mlx5e_ipsec_hw_stats {
+	u64 ipsec_rx_pkts;
+	u64 ipsec_rx_bytes;
+	u64 ipsec_rx_drop_pkts;
+	u64 ipsec_rx_drop_bytes;
+	u64 ipsec_tx_pkts;
+	u64 ipsec_tx_bytes;
+	u64 ipsec_tx_drop_pkts;
+	u64 ipsec_tx_drop_bytes;
+};
+
 struct mlx5e_ipsec_sw_stats {
 	atomic64_t ipsec_rx_drop_sp_alloc;
 	atomic64_t ipsec_rx_drop_sadb_miss;
@@ -105,6 +116,7 @@ struct mlx5e_ipsec {
 	DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
 	spinlock_t sadb_rx_lock; /* Protects sadb_rx */
 	struct mlx5e_ipsec_sw_stats sw_stats;
+	struct mlx5e_ipsec_hw_stats hw_stats;
 	struct workqueue_struct *wq;
 	struct mlx5e_flow_steering *fs;
 	struct mlx5e_ipsec_rx *rx_ipv4;
@@ -195,6 +207,8 @@ void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry,
 int mlx5e_ipsec_aso_init(struct mlx5e_ipsec *ipsec);
 void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec);
 
+void mlx5e_accel_ipsec_fs_read_full_stats(struct mlx5e_priv *priv, void *ipsec_stats);
+
 static inline struct mlx5_core_dev *
 mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index c6e6652f5890..e6b5c9526e1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -9,6 +9,11 @@
 
 #define NUM_IPSEC_FTE BIT(15)
 
+struct mlx5e_ipsec_fc {
+	struct mlx5_fc *cnt;
+	struct mlx5_fc *drop;
+};
+
 struct mlx5e_ipsec_ft {
 	struct mutex mutex; /* Protect changes to this struct */
 	struct mlx5_flow_table *pol;
@@ -27,12 +32,14 @@ struct mlx5e_ipsec_rx {
 	struct mlx5e_ipsec_miss pol;
 	struct mlx5e_ipsec_miss sa;
 	struct mlx5e_ipsec_rule status;
+	struct mlx5e_ipsec_fc *fc;
 };
 
 struct mlx5e_ipsec_tx {
 	struct mlx5e_ipsec_ft ft;
 	struct mlx5e_ipsec_miss pol;
 	struct mlx5_flow_namespace *ns;
+	struct mlx5e_ipsec_fc *fc;
 };
 
 /* IPsec RX flow steering */
@@ -100,9 +107,10 @@ static int ipsec_status_rule(struct mlx5_core_dev *mdev,
 
 	/* create fte */
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
-			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+			  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 	flow_act.modify_hdr = modify_hdr;
-	fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 1);
+	fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
 		mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err);
@@ -183,7 +191,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		     struct mlx5e_ipsec_rx *rx, u32 family)
 {
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest[2];
 	struct mlx5_flow_table *ft;
 	int err;
 
@@ -195,8 +203,10 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 
 	rx->ft.status = ft;
 
-	dest = mlx5_ttc_get_default_dest(ipsec->fs->ttc, family2tt(family));
-	err = ipsec_status_rule(mdev, rx, &dest);
+	dest[0] = mlx5_ttc_get_default_dest(ipsec->fs->ttc, family2tt(family));
+	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dest[1].counter_id = mlx5_fc_id(rx->fc->cnt);
+	err = ipsec_status_rule(mdev, rx, dest);
 	if (err)
 		goto err_add;
 
@@ -209,7 +219,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	}
 	rx->ft.sa = ft;
 
-	err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, &dest);
+	err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, dest);
 	if (err)
 		goto err_fs;
 
@@ -220,10 +230,10 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		goto err_pol_ft;
 	}
 	rx->ft.pol = ft;
-	memset(&dest, 0x00, sizeof(dest));
-	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = rx->ft.sa;
-	err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, &dest);
+	memset(dest, 0x00, 2 * sizeof(*dest));
+	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest[0].ft = rx->ft.sa;
+	err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, dest);
 	if (err)
 		goto err_pol_miss;
 
@@ -609,6 +619,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
@@ -650,8 +661,11 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 	flow_act.ipsec_obj_id = sa_entry->ipsec_obj_id;
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
 	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW |
-			   MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
-	rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, NULL, 0);
+			   MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT |
+			   MLX5_FLOW_CONTEXT_ACTION_COUNT;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dest.counter_id = mlx5_fc_id(tx->fc->cnt);
+	rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
@@ -677,12 +691,12 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 {
 	struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
-	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	struct mlx5e_ipsec_tx *tx;
-	int err;
+	int err, dstn = 0;
 
 	tx = tx_ft_get(mdev, pol_entry->ipsec);
 	if (IS_ERR(tx))
@@ -706,7 +720,11 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		break;
 	case XFRM_POLICY_BLOCK:
-		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+				   MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest[dstn].counter_id = mlx5_fc_id(tx->fc->drop);
+		dstn++;
 		break;
 	default:
 		WARN_ON(true);
@@ -715,9 +733,10 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 	}
 
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
-	dest.ft = tx->ft.sa;
-	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	rule = mlx5_add_flow_rules(tx->ft.pol, spec, &flow_act, &dest, 1);
+	dest[dstn].ft = tx->ft.sa;
+	dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dstn++;
+	rule = mlx5_add_flow_rules(tx->ft.pol, spec, &flow_act, dest, dstn);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err);
@@ -739,12 +758,12 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 {
 	struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs;
 	struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry);
-	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_destination dest[2];
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	struct mlx5e_ipsec_rx *rx;
-	int err;
+	int err, dstn = 0;
 
 	rx = rx_ft_get(mdev, pol_entry->ipsec, attrs->family);
 	if (IS_ERR(rx))
@@ -768,7 +787,10 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		break;
 	case XFRM_POLICY_BLOCK:
-		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest[dstn].counter_id = mlx5_fc_id(rx->fc->drop);
+		dstn++;
 		break;
 	default:
 		WARN_ON(true);
@@ -777,9 +799,10 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 	}
 
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
-	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = rx->ft.sa;
-	rule = mlx5_add_flow_rules(rx->ft.pol, spec, &flow_act, &dest, 1);
+	dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest[dstn].ft = rx->ft.sa;
+	dstn++;
+	rule = mlx5_add_flow_rules(rx->ft.pol, spec, &flow_act, dest, dstn);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		mlx5_core_err(mdev, "Fail to add RX IPsec policy rule err=%d\n", err);
@@ -797,6 +820,116 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 	return err;
 }
 
+static void ipsec_fs_destroy_counters(struct mlx5e_ipsec *ipsec)
+{
+	struct mlx5e_ipsec_rx *rx_ipv4 = ipsec->rx_ipv4;
+	struct mlx5_core_dev *mdev = ipsec->mdev;
+	struct mlx5e_ipsec_tx *tx = ipsec->tx;
+
+	mlx5_fc_destroy(mdev, tx->fc->drop);
+	mlx5_fc_destroy(mdev, tx->fc->cnt);
+	kfree(tx->fc);
+	mlx5_fc_destroy(mdev, rx_ipv4->fc->drop);
+	mlx5_fc_destroy(mdev, rx_ipv4->fc->cnt);
+	kfree(rx_ipv4->fc);
+}
+
+static int ipsec_fs_init_counters(struct mlx5e_ipsec *ipsec)
+{
+	struct mlx5e_ipsec_rx *rx_ipv4 = ipsec->rx_ipv4;
+	struct mlx5e_ipsec_rx *rx_ipv6 = ipsec->rx_ipv6;
+	struct mlx5_core_dev *mdev = ipsec->mdev;
+	struct mlx5e_ipsec_tx *tx = ipsec->tx;
+	struct mlx5e_ipsec_fc *fc;
+	struct mlx5_fc *counter;
+	int err;
+
+	fc = kzalloc(sizeof(*rx_ipv4->fc), GFP_KERNEL);
+	if (!fc)
+		return -ENOMEM;
+
+	/* Both IPv4 and IPv6 point to same flow counters struct. */
+	rx_ipv4->fc = fc;
+	rx_ipv6->fc = fc;
+	counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(counter)) {
+		err = PTR_ERR(counter);
+		goto err_rx_cnt;
+	}
+
+	fc->cnt = counter;
+	counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(counter)) {
+		err = PTR_ERR(counter);
+		goto err_rx_drop;
+	}
+
+	fc->drop = counter;
+	fc = kzalloc(sizeof(*tx->fc), GFP_KERNEL);
+	if (!fc) {
+		err = -ENOMEM;
+		goto err_tx_fc;
+	}
+
+	tx->fc = fc;
+	counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(counter)) {
+		err = PTR_ERR(counter);
+		goto err_tx_cnt;
+	}
+
+	fc->cnt = counter;
+	counter = mlx5_fc_create(mdev, false);
+	if (IS_ERR(counter)) {
+		err = PTR_ERR(counter);
+		goto err_tx_drop;
+	}
+
+	fc->drop = counter;
+	return 0;
+
+err_tx_drop:
+	mlx5_fc_destroy(mdev, tx->fc->cnt);
+err_tx_cnt:
+	kfree(tx->fc);
+err_tx_fc:
+	mlx5_fc_destroy(mdev, rx_ipv4->fc->drop);
+err_rx_drop:
+	mlx5_fc_destroy(mdev, rx_ipv4->fc->cnt);
+err_rx_cnt:
+	kfree(rx_ipv4->fc);
+	return err;
+}
+
+void mlx5e_accel_ipsec_fs_read_full_stats(struct mlx5e_priv *priv, void *ipsec_stats)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5e_ipsec_hw_stats *stats;
+	struct mlx5e_ipsec_fc *fc;
+
+	stats = (struct mlx5e_ipsec_hw_stats *)ipsec_stats;
+
+	stats->ipsec_rx_pkts = 0;
+	stats->ipsec_rx_bytes = 0;
+	stats->ipsec_rx_drop_pkts = 0;
+	stats->ipsec_rx_drop_bytes = 0;
+	stats->ipsec_tx_pkts = 0;
+	stats->ipsec_tx_bytes = 0;
+	stats->ipsec_tx_drop_pkts = 0;
+	stats->ipsec_tx_drop_bytes = 0;
+
+	fc = ipsec->rx_ipv4->fc;
+	mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_rx_pkts, &stats->ipsec_rx_bytes);
+	mlx5_fc_query(mdev, fc->drop, &stats->ipsec_rx_drop_pkts,
+		      &stats->ipsec_rx_drop_bytes);
+
+	fc = ipsec->tx->fc;
+	mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_tx_pkts, &stats->ipsec_tx_bytes);
+	mlx5_fc_query(mdev, fc->drop, &stats->ipsec_tx_drop_pkts,
+		      &stats->ipsec_tx_drop_bytes);
+}
+
 int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
@@ -848,6 +981,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
 	if (!ipsec->tx)
 		return;
 
+	ipsec_fs_destroy_counters(ipsec);
 	mutex_destroy(&ipsec->tx->ft.mutex);
 	WARN_ON(ipsec->tx->ft.refcnt);
 	kfree(ipsec->tx);
@@ -883,6 +1017,10 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 	if (!ipsec->rx_ipv6)
 		goto err_rx_ipv6;
 
+	err = ipsec_fs_init_counters(ipsec);
+	if (err)
+		goto err_counters;
+
 	mutex_init(&ipsec->tx->ft.mutex);
 	mutex_init(&ipsec->rx_ipv4->ft.mutex);
 	mutex_init(&ipsec->rx_ipv6->ft.mutex);
@@ -890,6 +1028,8 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 
 	return 0;
 
+err_counters:
+	kfree(ipsec->rx_ipv6);
 err_rx_ipv6:
 	kfree(ipsec->rx_ipv4);
 err_rx_ipv4:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
index 9de84821dafb..1e6b564fbb92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -37,6 +37,17 @@
 #include "en.h"
 #include "ipsec.h"
 
+static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_drop_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_drop_bytes) },
+};
+
 static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
@@ -50,8 +61,48 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
 #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
 	atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
 
+#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
 #define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
 
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw)
+{
+	if (!priv->ipsec)
+		return 0;
+
+	return NUM_IPSEC_HW_COUNTERS;
+}
+
+static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) {}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw)
+{
+	unsigned int i;
+
+	if (!priv->ipsec)
+		return idx;
+
+	for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       mlx5e_ipsec_hw_stats_desc[i].format);
+
+	return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw)
+{
+	int i;
+
+	if (!priv->ipsec)
+		return idx;
+
+	mlx5e_accel_ipsec_fs_read_full_stats(priv, &priv->ipsec->hw_stats);
+	for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR_ATOMIC64(
+			&priv->ipsec->hw_stats, mlx5e_ipsec_hw_stats_desc, i);
+
+	return idx;
+}
+
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw)
 {
 	return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0;
@@ -81,4 +132,5 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw)
 	return idx;
 }
 
+MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0);
 MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 7409829d1201..a3decb43c287 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -2444,6 +2444,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
 	&MLX5E_STATS_GRP(per_prio),
 	&MLX5E_STATS_GRP(pme),
 #ifdef CONFIG_MLX5_EN_IPSEC
+	&MLX5E_STATS_GRP(ipsec_hw),
 	&MLX5E_STATS_GRP(ipsec_sw),
 #endif
 	&MLX5E_STATS_GRP(tls),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index ed4fc940e4ef..9cd7c807e508 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -484,6 +484,7 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio);
 extern MLX5E_DECLARE_STATS_GRP(pme);
 extern MLX5E_DECLARE_STATS_GRP(channels);
 extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
+extern MLX5E_DECLARE_STATS_GRP(ipsec_hw);
 extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
 extern MLX5E_DECLARE_STATS_GRP(ptp);
 
-- 
2.37.2


^ permalink raw reply related

* Re: [PATCH bpf-next 2/3] bpf: Add support for writing to nf_conn:mark
From: Toke Høiland-Jørgensen @ 2022-08-16 10:43 UTC (permalink / raw)
  To: Alexei Starovoitov, Florian Westphal
  Cc: Daniel Xu, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Kumar Kartikeya Dwivedi, Pablo Neira Ayuso,
	netfilter-devel, Network Development, LKML
In-Reply-To: <CAADnVQLB1SQoYAYEzU_VuJ=q3azeyhBiK-NkU5OZC7rrumi0xQ@mail.gmail.com>

Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:

> On Mon, Aug 15, 2022 at 3:40 PM Florian Westphal <fw@strlen.de> wrote:
>>
>> Toke Høiland-Jørgensen <toke@kernel.org> wrote:
>> > > Support direct writes to nf_conn:mark from TC and XDP prog types. This
>> > > is useful when applications want to store per-connection metadata. This
>> > > is also particularly useful for applications that run both bpf and
>> > > iptables/nftables because the latter can trivially access this metadata.
>> > >
>> > > One example use case would be if a bpf prog is responsible for advanced
>> > > packet classification and iptables/nftables is later used for routing
>> > > due to pre-existing/legacy code.
>> > >
>> > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
>> >
>> > Didn't we agree the last time around that all field access should be
>> > using helper kfuncs instead of allowing direct writes to struct nf_conn?
>>
>> I don't see why ct->mark needs special handling.
>>
>> It might be possible we need to change accesses on nf/tc side to use
>> READ/WRITE_ONCE though.
>
> +1
> I don't think we need to have a hard rule.
> If fields is safe to access directly than it's faster
> to let bpf prog read/write it.
> There are no backward compat concerns. If conntrack side decides
> to make that field special we can disallow direct writes in
> the same kernel version.

Right, I was under the impression we wanted all fields to be wrapper by
helpers so that the struct owner could change their semantics without
affecting users (and solve the performance issue by figuring out a
generic way to inline those helpers). I guess there could also be an API
consistency argument for doing this.

However, I don't have a strong opinion on this, so if y'all prefer
keeping these as direct field writes, that's OK with me.

> These accesses, just like kfuncs, are unstable.

Well, it will be interesting to see how that plays out the first time
an application relying on one of these breaks on a kernel upgrade :)

-Toke

^ permalink raw reply

* [patch net v3] net: dsa: microchip: ksz9477: fix fdb_dump last invalid entry
From: Arun Ramadoss @ 2022-08-16 10:55 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: Woojung Huh, UNGLinuxDriver, Andrew Lunn, Vivien Didelot,
	Florian Fainelli, Vladimir Oltean, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Russell King, Tristram Ha

In the ksz9477_fdb_dump function it reads the ALU control register and
exit from the timeout loop if there is valid entry or search is
complete. After exiting the loop, it reads the alu entry and report to
the user space irrespective of entry is valid. It works till the valid
entry. If the loop exited when search is complete, it reads the alu
table. The table returns all ones and it is reported to user space. So
bridge fdb show gives ff:ff:ff:ff:ff:ff as last entry for every port.
To fix it, after exiting the loop the entry is reported only if it is
valid one.

Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477")
Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
---
changes in v3
- changed the subject from net-next to net

changes in v2
- changed the fixes commit id
- reduced the indentation level by using ! and continue statement

 drivers/net/dsa/microchip/ksz9477.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 4b14d80d27ed..e4f446db0ca1 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -613,6 +613,9 @@ int ksz9477_fdb_dump(struct ksz_device *dev, int port,
 			goto exit;
 		}
 
+		if (!(ksz_data & ALU_VALID))
+			continue;
+
 		/* read ALU table */
 		ksz9477_read_table(dev, alu_table);
 

base-commit: 7ebfc85e2cd7b08f518b526173e9a33b56b3913b
-- 
2.36.1


^ permalink raw reply related

* Re: [PATCH 1/2] dt-bindings: vertexcom-mse102x: Update email address
From: Stefan Wahren @ 2022-08-16 11:14 UTC (permalink / raw)
  To: Krzysztof Kozlowski, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski
  Cc: netdev, devicetree, stefan.wahren
In-Reply-To: <dc0c6c80-7af0-ee7c-2019-cdb6eb96c3f5@linaro.org>

Hi Krzystof,

Am 16.08.22 um 12:24 schrieb Krzysztof Kozlowski:
> On 15/08/2022 11:06, Stefan Wahren wrote:
>> in-tech smart charging is now chargebyte. So update the email address
>> accordingly.
>>
>> Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
> Yet you used third email address... which is fine, just a bit confusing.
> Since in-tech.com still works and you might be (or not) different
> Stefan, it's difficult to judge...

sorry about the confusion. I'm that person. Unfortuntely the other 
accounts are using a M$ Exchange server, so i switched to the i2se 
account for this setup before sending broken patches ...

in-tech was the parent company, but the accounts will unavailable in the 
future.

Best regards

>
> Best regards,
> Krzysztof

^ permalink raw reply

* [PATCH v4.3] net: phy: Add driver for Motorcomm yt8521 gigabit ethernet phy
From: Frank @ 2022-08-16 11:17 UTC (permalink / raw)
  To: Peter Geis, Andrew Lunn, Heiner Kallweit, Russell King,
	David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: yinghong.zhang, fei.zhang, hua.sun, netdev, linux-kernel, Frank

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 38468 bytes --]

 Add a driver for the motorcomm yt8521 gigabit ethernet phy. We have verified
 the driver on StarFive VisionFive development board, which is developed by
 Shanghai StarFive Technology Co., Ltd.. On the board, yt8521 gigabit ethernet
 phy works in utp mode, RGMII interface, supports 1000M/100M/10M speeds, and
 wol(magic package).

Signed-off-by: Frank <Frank.Sae@motor-comm.com>
---

patch v4.3:
 Hi Jakub
 Kernel 6.0-rc1 is cut on 2022-08-14, we update new patch for Kernel 6.0-rc1.

patch v4.2:
 fix "=0D" and "=20" which in patch v4.1.
 
patch v4.1:
 Hi Andrew
 Thanks very much and based on your comments we modified the patch v4.1 as below.

> You have interrupt bits defined, you enable the WoL interrupt, but you
> don't have an interrupt handler. PHY interrupts are generally level
> interrupts, so on WoL, don't you end up with an interrupt storm,
> because you don't have anything clearing the WoL interrupt?

 in the driver, only the interrupt for WOL is used according to uplayer WOL
 configuration via .set_wol. If WOL is enalbed, then the WOL interrupt is
 enabled accordingly; you know, for a system support WOL, the WOL interrupt
 will be handled by user's WOL circuit, and it will not cause interrupt storm
 to CPU core system.

 for WOL interrupt, by default, we configure to pulse trigger mode, that is a
 pulse from High to low change, and the low level will hold for 672ms.
 user's WOL circuit will detected such volt level change as a WOL event trigger.

patch v4:
 Hi Andrew,Jakub
 Thanks very much and based on your comments we modified the patch v4 as below.

 We evaluated the Marvell 10G driver and at803x you suggested. The 2 drivers
 implement SFP module attach/detach functions and we think these functions do
 not help yt8521 to do UTP/Fiber register space arbitration which you may
 concern in previous patch.

 Yt8521 can detect utp/fiber media link status automatically. For the case of
 both media are connected, driver arbitrates the priority of the media (by
 default, driver takes fiber as higher priority) and report the media status
 to up layer(MAC).

patch v3:
 Hi Andrew
 Thanks and based on your comments we modified the patch as below.

> It is generally not that simple. Fibre, you probably want 1000BaseX,
> unless the fibre module is actually copper, and then you want
> SGMII. So you need something to talk to the fibre module and ask it
> what it is. That something is phylink. Phylink does not support both
> copper and fibre at the same time for one MAC.

 yes, you said it and for MAC, it does not support copper and Fiber at same time.
 but from Physical Layer, you know, sometimes both Copper and Fiber cables are
 connected. in this case, Phy driver should do arbitration and report to MAC
 which media should be used and Link-up.
 This is the reason that the driver has a "polling mode", and by default, also
 this driver takes fiber as first priority which matches phy chip default behavior.

patch v2:
 Hi Andrew, Russell King, Peter,
 Thanks and based on your comments we modified the patch as below.
 
> So there's only two possible pages that can be used in the extended
>register space?
 
 Yes,there is only two register space (utp and fiber).
 
> > +/* Extended Register's Data Register */
> > +#define YTPHY_PAGE_DATA                                0x1F
>
> These are defined exactly the same way as below. Please reuse code
> where possible.
 
 Yes, code will be reuse, but "YT8511_PAGE" need to be rename like
 "YTPHY_PAGE_DATA",as it is common register for yt phys.

patch v1:
 Add a driver for the motorcomm yt8521 gigabit ethernet phy. We have verified
 the driver on StarFive VisionFive development board, which is developed by
 Shanghai StarFive Technology Co., Ltd.. On the board, yt8521 gigabit ethernet
 phy works in utp mode, RGMII interface, supports 1000M/100M/10M speeds, and
 wol(magic package).

Thanks and BR,
Frank

 MAINTAINERS                 |    1 +
 drivers/net/phy/Kconfig     |    2 +-
 drivers/net/phy/motorcomm.c | 1162 ++++++++++++++++++++++++++++++++++-
 3 files changed, 1162 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8a5012b..994919b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13780,6 +13780,7 @@ F:	include/uapi/linux/meye.h
 
 MOTORCOMM PHY DRIVER
 M:	Peter Geis <pgwipeout@gmail.com>
+M:	Frank <Frank.Sae@motor-comm.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/phy/motorcomm.c
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index c57a026..040c8bf 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -260,7 +260,7 @@ config MOTORCOMM_PHY
 	tristate "Motorcomm PHYs"
 	help
 	  Enables support for Motorcomm network PHYs.
-	  Currently supports the YT8511 gigabit PHY.
+	  Currently supports the YT8511, YT8521 Gigabit Ethernet PHYs.
 
 config NATIONAL_PHY
 	tristate "National Semiconductor PHYs"
diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c
index 7e6ac2c..bae56f5 100644
--- a/drivers/net/phy/motorcomm.c
+++ b/drivers/net/phy/motorcomm.c
@@ -1,15 +1,102 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Driver for Motorcomm PHYs
+ * Motorcomm 8511/8521 PHY driver.
  *
  * Author: Peter Geis <pgwipeout@gmail.com>
+ * Author: Frank <Frank.Sae@motor-comm.com>
  */
 
+#include <linux/etherdevice.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 
 #define PHY_ID_YT8511		0x0000010a
+#define PHY_ID_YT8521				0x0000011A
+
+/* YT8521 Register Overview
+ *	UTP Register space	|	FIBER Register space
+ *  ------------------------------------------------------------
+ * |	UTP MII			|	FIBER MII		|
+ * |	UTP MMD			|				|
+ * |	UTP Extended		|	FIBER Extended		|
+ *  ------------------------------------------------------------
+ * |			Common Extended				|
+ *  ------------------------------------------------------------
+ */
+
+/* 0x10 ~ 0x15 , 0x1E and 0x1F are common MII registers of yt phy */
+
+/* Specific Function Control Register */
+#define YTPHY_SPECIFIC_FUNCTION_CONTROL_REG	0x10
+
+/* 2b00 Manual MDI configuration
+ * 2b01 Manual MDIX configuration
+ * 2b10 Reserved
+ * 2b11 Enable automatic crossover for all modes  *default*
+ */
+#define YTPHY_SFCR_MDI_CROSSOVER_MODE_MASK	(BIT(6) | BIT(5))
+#define YTPHY_SFCR_CROSSOVER_EN			BIT(3)
+#define YTPHY_SFCR_SQE_TEST_EN			BIT(2)
+#define YTPHY_SFCR_POLARITY_REVERSAL_EN		BIT(1)
+#define YTPHY_SFCR_JABBER_DIS			BIT(0)
+
+/* Specific Status Register */
+#define YTPHY_SPECIFIC_STATUS_REG		0x11
+#define YTPHY_SSR_SPEED_MODE_OFFSET		14
+
+#define YTPHY_SSR_SPEED_MODE_MASK		(BIT(15) | BIT(14))
+#define YTPHY_SSR_SPEED_10M			0x0
+#define YTPHY_SSR_SPEED_100M			0x1
+#define YTPHY_SSR_SPEED_1000M			0x2
+#define YTPHY_SSR_DUPLEX_OFFSET			13
+#define YTPHY_SSR_DUPLEX			BIT(13)
+#define YTPHY_SSR_PAGE_RECEIVED			BIT(12)
+#define YTPHY_SSR_SPEED_DUPLEX_RESOLVED		BIT(11)
+#define YTPHY_SSR_LINK				BIT(10)
+#define YTPHY_SSR_MDIX_CROSSOVER		BIT(6)
+#define YTPHY_SSR_DOWNGRADE			BIT(5)
+#define YTPHY_SSR_TRANSMIT_PAUSE		BIT(3)
+#define YTPHY_SSR_RECEIVE_PAUSE			BIT(2)
+#define YTPHY_SSR_POLARITY			BIT(1)
+#define YTPHY_SSR_JABBER			BIT(0)
+
+/* Interrupt enable Register */
+#define YTPHY_INTERRUPT_ENABLE_REG		0x12
+#define YTPHY_IER_WOL				BIT(6)
+
+/* Interrupt Status Register */
+#define YTPHY_INTERRUPT_STATUS_REG		0x13
+#define YTPHY_ISR_AUTONEG_ERR			BIT(15)
+#define YTPHY_ISR_SPEED_CHANGED			BIT(14)
+#define YTPHY_ISR_DUPLEX_CHANGED		BIT(13)
+#define YTPHY_ISR_PAGE_RECEIVED			BIT(12)
+#define YTPHY_ISR_LINK_FAILED			BIT(11)
+#define YTPHY_ISR_LINK_SUCCESSED		BIT(10)
+#define YTPHY_ISR_WOL				BIT(6)
+#define YTPHY_ISR_WIRESPEED_DOWNGRADE		BIT(5)
+#define YTPHY_ISR_SERDES_LINK_FAILED		BIT(3)
+#define YTPHY_ISR_SERDES_LINK_SUCCESSED		BIT(2)
+#define YTPHY_ISR_POLARITY_CHANGED		BIT(1)
+#define YTPHY_ISR_JABBER_HAPPENED		BIT(0)
+
+/* Speed Auto Downgrade Control Register */
+#define YTPHY_SPEED_AUTO_DOWNGRADE_CONTROL_REG	0x14
+#define YTPHY_SADCR_SPEED_DOWNGRADE_EN		BIT(5)
+
+/* If these bits are set to 3, the PHY attempts five times ( 3(set value) +
+ * additional 2) before downgrading, default 0x3
+ */
+#define YTPHY_SADCR_SPEED_RETRY_LIMIT		(0x3 << 2)
+
+/* Rx Error Counter Register */
+#define YTPHY_RX_ERROR_COUNTER_REG		0x15
+
+/* Extended Register's Address Offset Register */
+#define YTPHY_PAGE_SELECT			0x1E
+
+/* Extended Register's Data Register */
+#define YTPHY_PAGE_DATA				0x1F
 
 #define YT8511_PAGE_SELECT	0x1e
 #define YT8511_PAGE		0x1f
@@ -38,6 +125,347 @@
 #define YT8511_DELAY_FE_TX_EN	(0xf << 12)
 #define YT8511_DELAY_FE_TX_DIS	(0x2 << 12)
 
+/* Extended register is different from MMD Register and MII Register.
+ * We can use ytphy_read_ext/ytphy_write_ext/ytphy_modify_ext function to
+ * operate extended register.
+ * Extended Register  start
+ */
+
+/* Phy gmii clock gating Register */
+#define YT8521_CLOCK_GATING_REG			0xC
+#define YT8521_CGR_RX_CLK_EN			BIT(12)
+
+#define YT8521_EXTREG_SLEEP_CONTROL1_REG	0x27
+#define YT8521_ESC1R_SLEEP_SW			BIT(15)
+#define YT8521_ESC1R_PLLON_SLP			BIT(14)
+
+/* Phy fiber Link timer cfg2 Register */
+#define YT8521_LINK_TIMER_CFG2_REG		0xA5
+#define YT8521_LTCR_EN_AUTOSEN			BIT(15)
+
+/* 0xA000, 0xA001, 0xA003 ,and 0xA006 ~ 0xA00A  are common ext registers
+ * of yt8521 phy. There is no need to switch reg space when operating these
+ * registers.
+ */
+
+#define YT8521_REG_SPACE_SELECT_REG		0xA000
+#define YT8521_RSSR_SPACE_MASK			BIT(1)
+#define YT8521_RSSR_FIBER_SPACE			(0x1 << 1)
+#define YT8521_RSSR_UTP_SPACE			(0x0 << 1)
+#define YT8521_RSSR_TO_BE_ARBITRATED		(0xFF)
+
+#define YT8521_CHIP_CONFIG_REG			0xA001
+#define YT8521_CCR_SW_RST			BIT(15)
+
+#define YT8521_CCR_MODE_SEL_MASK		(BIT(2) | BIT(1) | BIT(0))
+#define YT8521_CCR_MODE_UTP_TO_RGMII		0
+#define YT8521_CCR_MODE_FIBER_TO_RGMII		1
+#define YT8521_CCR_MODE_UTP_FIBER_TO_RGMII	2
+#define YT8521_CCR_MODE_UTP_TO_SGMII		3
+#define YT8521_CCR_MODE_SGPHY_TO_RGMAC		4
+#define YT8521_CCR_MODE_SGMAC_TO_RGPHY		5
+#define YT8521_CCR_MODE_UTP_TO_FIBER_AUTO	6
+#define YT8521_CCR_MODE_UTP_TO_FIBER_FORCE	7
+
+/* 3 phy polling modes,poll mode combines utp and fiber mode*/
+#define YT8521_MODE_FIBER			0x1
+#define YT8521_MODE_UTP				0x2
+#define YT8521_MODE_POLL			0x3
+
+#define YT8521_RGMII_CONFIG1_REG		0xA003
+
+/* TX Gig-E Delay is bits 3:0, default 0x1
+ * TX Fast-E Delay is bits 7:4, default 0xf
+ * RX Delay is bits 13:10, default 0x0
+ * Delay = 150ps * N
+ * On = 2250ps, off = 0ps
+ */
+#define YT8521_RC1R_RX_DELAY_MASK		(0xF << 10)
+#define YT8521_RC1R_RX_DELAY_EN			(0xF << 10)
+#define YT8521_RC1R_RX_DELAY_DIS		(0x0 << 10)
+#define YT8521_RC1R_FE_TX_DELAY_MASK		(0xF << 4)
+#define YT8521_RC1R_FE_TX_DELAY_EN		(0xF << 4)
+#define YT8521_RC1R_FE_TX_DELAY_DIS		(0x0 << 4)
+#define YT8521_RC1R_GE_TX_DELAY_MASK		(0xF << 0)
+#define YT8521_RC1R_GE_TX_DELAY_EN		(0xF << 0)
+#define YT8521_RC1R_GE_TX_DELAY_DIS		(0x0 << 0)
+
+#define YTPHY_MISC_CONFIG_REG			0xA006
+#define YTPHY_MCR_FIBER_SPEED_MASK		BIT(0)
+#define YTPHY_MCR_FIBER_1000BX			(0x1 << 0)
+#define YTPHY_MCR_FIBER_100FX			(0x0 << 0)
+
+/* WOL MAC ADDR: MACADDR2(highest), MACADDR1(middle), MACADDR0(lowest) */
+#define YTPHY_WOL_MACADDR2_REG			0xA007
+#define YTPHY_WOL_MACADDR1_REG			0xA008
+#define YTPHY_WOL_MACADDR0_REG			0xA009
+
+#define YTPHY_WOL_CONFIG_REG			0xA00A
+#define YTPHY_WCR_INTR_SEL			BIT(6)
+#define YTPHY_WCR_ENABLE			BIT(3)
+
+/* 2b00 84ms
+ * 2b01 168ms  *default*
+ * 2b10 336ms
+ * 2b11 672ms
+ */
+#define YTPHY_WCR_PULSE_WIDTH_MASK		(BIT(2) | BIT(1))
+#define YTPHY_WCR_PULSE_WIDTH_672MS		(BIT(2) | BIT(1))
+
+/* 1b0 Interrupt and WOL events is level triggered and active LOW  *default*
+ * 1b1 Interrupt and WOL events is pulse triggered and active LOW
+ */
+#define YTPHY_WCR_TYPE_PULSE			BIT(0)
+
+/* Extended Register  end */
+
+struct yt8521_priv {
+	/* YT8521_RSSR_UTP_SPACE / YT8521_RSSR_FIBER_SPACE / YT8521_RSSR_TO_BE_ARBITRATED */
+	u8 reg_page;
+	/* YT8521_MODE_FIBER / YT8521_MODE_UTP / YT8521_MODE_POLL*/
+	u8 polling_mode;
+	u8 strap_mode; /* 8 working modes  */
+};
+
+/**
+ * ytphy_read_ext() - read a PHY's extended register
+ * @phydev: a pointer to a &struct phy_device
+ * @regnum: register number to read
+ *
+ * NOTE:The caller must have taken the MDIO bus lock.
+ *
+ * returns the value of regnum reg or negative error code
+ */
+static int ytphy_read_ext(struct phy_device *phydev, u16 regnum)
+{
+	int ret;
+
+	ret = __phy_write(phydev, YTPHY_PAGE_SELECT, regnum);
+	if (ret < 0)
+		return ret;
+
+	return __phy_read(phydev, YTPHY_PAGE_DATA);
+}
+
+/**
+ * ytphy_read_ext_with_lock() - read a PHY's extended register
+ * @phydev: a pointer to a &struct phy_device
+ * @regnum: register number to read
+ *
+ * returns the value of regnum reg or negative error code
+ */
+static int ytphy_read_ext_with_lock(struct phy_device *phydev, u16 regnum)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = ytphy_read_ext(phydev, regnum);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+
+/**
+ * ytphy_write_ext() - write a PHY's extended register
+ * @phydev: a pointer to a &struct phy_device
+ * @regnum: register number to write
+ * @val: value to write to @regnum
+ *
+ * NOTE:The caller must have taken the MDIO bus lock.
+ *
+ * returns 0 or negative error code
+ */
+static int ytphy_write_ext(struct phy_device *phydev, u16 regnum, u16 val)
+{
+	int ret;
+
+	ret = __phy_write(phydev, YTPHY_PAGE_SELECT, regnum);
+	if (ret < 0)
+		return ret;
+
+	return __phy_write(phydev, YTPHY_PAGE_DATA, val);
+}
+
+/**
+ * ytphy_write_ext_with_lock() - write a PHY's extended register
+ * @phydev: a pointer to a &struct phy_device
+ * @regnum: register number to write
+ * @val: value to write to @regnum
+ *
+ * returns 0 or negative error code
+ */
+static int ytphy_write_ext_with_lock(struct phy_device *phydev, u16 regnum,
+				     u16 val)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = ytphy_write_ext(phydev, regnum, val);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+
+/**
+ * ytphy_modify_ext() - bits modify a PHY's extended register
+ * @phydev: a pointer to a &struct phy_device
+ * @regnum: register number to write
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * NOTE: Convenience function which allows a PHY‘s extended register to be
+ * modified as new register value = (old register value & ~mask) | set.
+ * The caller must have taken the MDIO bus lock.
+ *
+ * returns 0 or negative error code
+ */
+static int ytphy_modify_ext(struct phy_device *phydev, u16 regnum, u16 mask,
+			    u16 set)
+{
+	u16 val;
+	int ret;
+
+	ret = ytphy_read_ext(phydev, regnum);
+	if (ret < 0)
+		return ret;
+
+	val = ret & 0xffff;
+	val &= ~mask;
+	val |= set;
+
+	return ytphy_write_ext(phydev, regnum, val);
+}
+
+/**
+ * ytphy_modify_ext_with_lock() - bits modify a PHY's extended register
+ * @phydev: a pointer to a &struct phy_device
+ * @regnum: register number to write
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * NOTE: Convenience function which allows a PHY‘s extended register to be
+ * modified as new register value = (old register value & ~mask) | set.
+ *
+ * returns 0 or negative error code
+ */
+static int ytphy_modify_ext_with_lock(struct phy_device *phydev, u16 regnum,
+				      u16 mask, u16 set)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = ytphy_modify_ext(phydev, regnum, mask, set);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+
+/**
+ * ytphy_get_wol() - report whether wake-on-lan is enabled
+ * @phydev: a pointer to a &struct phy_device
+ * @wol: a pointer to a &struct ethtool_wolinfo
+ *
+ * NOTE: YTPHY_WOL_CONFIG_REG is common ext reg.
+ */
+static void ytphy_get_wol(struct phy_device *phydev,
+			  struct ethtool_wolinfo *wol)
+{
+	int wol_config;
+
+	wol->supported = WAKE_MAGIC;
+	wol->wolopts = 0;
+
+	wol_config = ytphy_read_ext_with_lock(phydev, YTPHY_WOL_CONFIG_REG);
+	if (wol_config < 0)
+		return;
+
+	if (wol_config & YTPHY_WCR_ENABLE)
+		wol->wolopts |= WAKE_MAGIC;
+}
+
+/**
+ * ytphy_set_wol() - turn wake-on-lan on or off
+ * @phydev: a pointer to a &struct phy_device
+ * @wol: a pointer to a &struct ethtool_wolinfo
+ *
+ * NOTE: YTPHY_WOL_CONFIG_REG, YTPHY_WOL_MACADDR2_REG, YTPHY_WOL_MACADDR1_REG
+ * and YTPHY_WOL_MACADDR0_REG are common ext reg. the YTPHY_INTERRUPT_ENABLE_REG
+ * of UTP is special, fiber also use this register.
+ *
+ * returns 0 or negative errno code
+ */
+static int ytphy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
+{
+	struct net_device *p_attached_dev;
+	const u16 mac_addr_reg[] = {
+		YTPHY_WOL_MACADDR2_REG,
+		YTPHY_WOL_MACADDR1_REG,
+		YTPHY_WOL_MACADDR0_REG,
+	};
+	const u8 *mac_addr;
+	int old_page;
+	int ret = 0;
+	u16 mask;
+	u16 val;
+	u8 i;
+
+	if (wol->wolopts & WAKE_MAGIC) {
+		p_attached_dev = phydev->attached_dev;
+		if (!p_attached_dev)
+			return -ENODEV;
+
+		mac_addr = (const u8 *)p_attached_dev->dev_addr;
+		if (!is_valid_ether_addr(mac_addr))
+			return -EINVAL;
+
+		/* lock mdio bus then switch to utp reg space */
+		old_page = phy_select_page(phydev, YT8521_RSSR_UTP_SPACE);
+		if (old_page < 0)
+			goto err_restore_page;
+
+		/* Store the device address for the magic packet */
+		for (i = 0; i < 3; i++) {
+			ret = ytphy_write_ext(phydev, mac_addr_reg[i],
+					      ((mac_addr[i * 2] << 8)) |
+						      (mac_addr[i * 2 + 1]));
+			if (ret < 0)
+				goto err_restore_page;
+		}
+
+		/* Enable WOL feature */
+		mask = YTPHY_WCR_PULSE_WIDTH_MASK | YTPHY_WCR_INTR_SEL;
+		val = YTPHY_WCR_ENABLE | YTPHY_WCR_INTR_SEL;
+		val |= YTPHY_WCR_TYPE_PULSE | YTPHY_WCR_PULSE_WIDTH_672MS;
+		ret = ytphy_modify_ext(phydev, YTPHY_WOL_CONFIG_REG, mask, val);
+		if (ret < 0)
+			goto err_restore_page;
+
+		/* Enable WOL interrupt */
+		ret = __phy_modify(phydev, YTPHY_INTERRUPT_ENABLE_REG, 0,
+				   YTPHY_IER_WOL);
+		if (ret < 0)
+			goto err_restore_page;
+
+	} else {
+		old_page = phy_select_page(phydev, YT8521_RSSR_UTP_SPACE);
+		if (old_page < 0)
+			goto err_restore_page;
+
+		/* Disable WOL feature */
+		mask = YTPHY_WCR_ENABLE | YTPHY_WCR_INTR_SEL;
+		ret = ytphy_modify_ext(phydev, YTPHY_WOL_CONFIG_REG, mask, 0);
+
+		/* Disable WOL interrupt */
+		ret = __phy_modify(phydev, YTPHY_INTERRUPT_ENABLE_REG,
+				   YTPHY_IER_WOL, 0);
+		if (ret < 0)
+			goto err_restore_page;
+	}
+
+err_restore_page:
+	return phy_restore_page(phydev, old_page, ret);
+}
+
 static int yt8511_read_page(struct phy_device *phydev)
 {
 	return __phy_read(phydev, YT8511_PAGE_SELECT);
@@ -111,6 +539,717 @@ static int yt8511_config_init(struct phy_device *phydev)
 	return phy_restore_page(phydev, oldpage, ret);
 }
 
+/**
+ * yt8521_read_page() - read reg page
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns current reg space of yt8521 (YT8521_RSSR_FIBER_SPACE/
+ * YT8521_RSSR_UTP_SPACE) or negative errno code
+ */
+static int yt8521_read_page(struct phy_device *phydev)
+{
+	int old_page;
+
+	old_page = ytphy_read_ext(phydev, YT8521_REG_SPACE_SELECT_REG);
+	if (old_page < 0)
+		return old_page;
+
+	if ((old_page & YT8521_RSSR_SPACE_MASK) == YT8521_RSSR_FIBER_SPACE)
+		return YT8521_RSSR_FIBER_SPACE;
+
+	return YT8521_RSSR_UTP_SPACE;
+};
+
+/**
+ * yt8521_write_page() - write reg page
+ * @phydev: a pointer to a &struct phy_device
+ * @page: The reg page(YT8521_RSSR_FIBER_SPACE/YT8521_RSSR_UTP_SPACE) to write.
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_write_page(struct phy_device *phydev, int page)
+{
+	int mask = YT8521_RSSR_SPACE_MASK;
+	int set;
+
+	if ((page & YT8521_RSSR_SPACE_MASK) == YT8521_RSSR_FIBER_SPACE)
+		set = YT8521_RSSR_FIBER_SPACE;
+	else
+		set = YT8521_RSSR_UTP_SPACE;
+
+	return ytphy_modify_ext(phydev, YT8521_REG_SPACE_SELECT_REG, mask, set);
+};
+
+/**
+ * yt8521_probe() - read chip config then set suitable polling_mode
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct yt8521_priv *priv;
+	int chip_config;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	phydev->priv = priv;
+
+	chip_config = ytphy_read_ext_with_lock(phydev, YT8521_CHIP_CONFIG_REG);
+	if (chip_config < 0)
+		return chip_config;
+
+	priv->strap_mode = chip_config & YT8521_CCR_MODE_SEL_MASK;
+	switch (priv->strap_mode) {
+	case YT8521_CCR_MODE_FIBER_TO_RGMII:
+	case YT8521_CCR_MODE_SGPHY_TO_RGMAC:
+	case YT8521_CCR_MODE_SGMAC_TO_RGPHY:
+		priv->polling_mode = YT8521_MODE_FIBER;
+		priv->reg_page = YT8521_RSSR_FIBER_SPACE;
+		break;
+	case YT8521_CCR_MODE_UTP_FIBER_TO_RGMII:
+	case YT8521_CCR_MODE_UTP_TO_FIBER_AUTO:
+	case YT8521_CCR_MODE_UTP_TO_FIBER_FORCE:
+		priv->polling_mode = YT8521_MODE_POLL;
+		priv->reg_page = YT8521_RSSR_TO_BE_ARBITRATED;
+		break;
+	case YT8521_CCR_MODE_UTP_TO_SGMII:
+	case YT8521_CCR_MODE_UTP_TO_RGMII:
+		priv->polling_mode = YT8521_MODE_UTP;
+		priv->reg_page = YT8521_RSSR_UTP_SPACE;
+		break;
+	}
+	/* set default reg space */
+	if (priv->reg_page != YT8521_RSSR_TO_BE_ARBITRATED) {
+		ret = ytphy_write_ext_with_lock(phydev,
+						YT8521_REG_SPACE_SELECT_REG,
+						priv->reg_page);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * yt8521_adjust_status() - update speed and duplex to phydev. when in fiber
+ * mode, adjust speed and duplex.
+ *
+ * @phydev: a pointer to a &struct phy_device
+ * @status: yt8521 status read from YTPHY_SPECIFIC_STATUS_REG
+ * @is_utp: false(yt8521 work in fiber mode) or true(yt8521 work in utp mode)
+ *
+ * NOTE: there is no 10M speed mode in fiber mode, so need adjust.
+ *
+ * returns 0
+ */
+static int yt8521_adjust_status(struct phy_device *phydev, int status,
+				bool is_utp)
+{
+	int speed_mode, duplex;
+	int speed;
+
+	if (is_utp)
+		duplex = (status & YTPHY_SSR_DUPLEX) >> YTPHY_SSR_DUPLEX_OFFSET;
+	else
+		duplex = DUPLEX_FULL;
+
+	speed_mode = (status & YTPHY_SSR_SPEED_MODE_MASK) >>
+		     YTPHY_SSR_SPEED_MODE_OFFSET;
+
+	switch (speed_mode) {
+	case YTPHY_SSR_SPEED_10M:
+		if (is_utp)
+			speed = SPEED_10;
+		else
+			speed = SPEED_UNKNOWN;
+		break;
+	case YTPHY_SSR_SPEED_100M:
+		speed = SPEED_100;
+		break;
+	case YTPHY_SSR_SPEED_1000M:
+		speed = SPEED_1000;
+		break;
+	default:
+		speed = SPEED_UNKNOWN;
+		break;
+	}
+
+	phydev->speed = speed;
+	phydev->duplex = duplex;
+
+	return 0;
+}
+
+/**
+ * yt8521_read_status_paged() -  determines the speed and duplex of one page
+ * @phydev: a pointer to a &struct phy_device
+ * @page: The reg page(YT8521_RSSR_FIBER_SPACE/YT8521_RSSR_UTP_SPACE) to operate
+ *
+ * returns 1 (utp or fiber link),0 (no link) or negative errno code
+ */
+static int yt8521_read_status_paged(struct phy_device *phydev, int page)
+{
+	int fiber_latch_val;
+	int fiber_curr_val;
+	int old_page;
+	int ret = 0;
+	int status;
+	int link;
+
+	/* YT8521 has two reg space (utp/fiber) for linkup with utp/fiber
+	 * respectively. but for utp/fiber combo mode, reg space should be
+	 * arbitrated based on media priority. by default, fiber takes priority.
+	 * reg space should be properly set before read YTPHY_SPECIFIC_STATUS_REG.
+	 */
+
+	page &= YT8521_RSSR_SPACE_MASK;
+	old_page = phy_select_page(phydev, page);
+	if (old_page < 0)
+		goto err_restore_page;
+
+	/* Read YTPHY_SPECIFIC_STATUS_REG, which indicates the speed and duplex
+	 * of the PHY is actually using.
+	 */
+	ret = __phy_read(phydev, YTPHY_SPECIFIC_STATUS_REG);
+	if (ret < 0)
+		goto err_restore_page;
+
+	status = ret;
+	link = status & YTPHY_SSR_LINK;
+
+	/* When PHY is in fiber mode, speed transferred from 1000Mbps to 100Mbps,
+	 * there is not link down from YTPHY_SPECIFIC_STATUS_REG, so we need
+	 * check MII_BMSR to identify such case.
+	 */
+	if (page == YT8521_RSSR_FIBER_SPACE) {
+		ret = __phy_read(phydev, MII_BMSR);
+		if (ret < 0)
+			goto err_restore_page;
+
+		fiber_latch_val = ret;
+		ret = __phy_read(phydev, MII_BMSR);
+		if (ret < 0)
+			goto err_restore_page;
+
+		fiber_curr_val = ret;
+		if (link && fiber_latch_val != fiber_curr_val) {
+			link = 0;
+			phydev_info(phydev,
+				    "%s, phy addr: %d, fiber link down detect, latch = %04x, curr = %04x\n",
+				    __func__, phydev->mdio.addr, fiber_latch_val,
+				    fiber_curr_val);
+		}
+	}
+
+	if (link) {
+		ret = 1;
+		if (page == YT8521_RSSR_UTP_SPACE)
+			yt8521_adjust_status(phydev, status, true);
+		else
+			yt8521_adjust_status(phydev, status, false);
+
+	} else {
+		ret = 0;
+	}
+
+err_restore_page:
+	return phy_restore_page(phydev, old_page, ret);
+}
+
+/**
+ * yt8521_read_status() -  determines the negotiated speed and duplex
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_read_status(struct phy_device *phydev)
+{
+	struct yt8521_priv *priv = phydev->priv;
+	int link_utp = 0;
+	int link_fiber;
+	int link;
+	int ret;
+
+	if (priv->reg_page != YT8521_RSSR_TO_BE_ARBITRATED) {
+		link = yt8521_read_status_paged(phydev, priv->reg_page);
+		if (link < 0)
+			return link;
+	} else {
+		/* when page is YT8521_RSSR_TO_BE_ARBITRATED, arbitration is
+		 * needed. by default, fiber is of higher priority.
+		 */
+
+		link_fiber = yt8521_read_status_paged(phydev,
+						      YT8521_RSSR_FIBER_SPACE);
+		if (link_fiber < 0)
+			return link_fiber;
+
+		if (!link_fiber) {
+			link_utp = yt8521_read_status_paged(phydev,
+							    YT8521_RSSR_UTP_SPACE);
+			if (link_utp < 0)
+				return link_utp;
+		}
+
+		link = link_utp || link_fiber;
+	}
+
+	if (link) {
+		if (phydev->link == 0) {
+			/* arbitrate reg space based on linkup media type. */
+			if (priv->polling_mode == YT8521_MODE_POLL) {
+				if (link_fiber)
+					priv->reg_page =
+						YT8521_RSSR_FIBER_SPACE;
+				else
+					priv->reg_page = YT8521_RSSR_UTP_SPACE;
+
+				ret = ytphy_write_ext_with_lock(phydev,
+								YT8521_REG_SPACE_SELECT_REG,
+								priv->reg_page);
+				if (ret < 0)
+					return ret;
+			}
+
+			phydev_info(phydev,
+				    "%s, phy addr: %d, link up, media: %s\n",
+				    __func__, phydev->mdio.addr,
+				    (priv->reg_page == YT8521_RSSR_UTP_SPACE) ?
+					    "UTP" :
+						  "Fiber");
+		}
+		phydev->link = 1;
+	} else {
+		if (phydev->link == 1) {
+			/* When in YT8521_MODE_POLL mode, need prepare for next
+			 * arbitration.
+			 */
+			if (priv->polling_mode == YT8521_MODE_POLL)
+				priv->reg_page = YT8521_RSSR_TO_BE_ARBITRATED;
+
+			phydev_info(phydev, "%s, phy addr: %d, link down\n",
+				    __func__, phydev->mdio.addr);
+		}
+
+		phydev->link = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * yt8521_modify_bmcr_paged - bits modify a PHY's BMCR register of one page
+ * @phydev: the phy_device struct
+ * @page: The reg page(YT8521_RSSR_FIBER_SPACE/YT8521_RSSR_UTP_SPACE) to operate
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * NOTE: Convenience function which allows a PHY‘s BMCR register to be
+ * modified as new register value = (old register value & ~mask) | set.
+ * YT8521 has two space (utp/fiber) and three mode (utp/fiber/poll), each space
+ * has MII_BMCR. poll mode combines utp and faber,so need do both.
+ * If it is reset, it will wait for completion.
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_modify_bmcr_paged(struct phy_device *phydev, int page,
+				    u16 mask, u16 set)
+{
+	int max_cnt = 500; /* the max wait time of reset ~ 500 ms */
+	int old_page;
+	int ret = 0;
+
+	old_page = phy_select_page(phydev, page & YT8521_RSSR_SPACE_MASK);
+	if (old_page < 0)
+		goto err_restore_page;
+
+	ret = __phy_modify(phydev, MII_BMCR, mask, set);
+	if (ret < 0)
+		goto err_restore_page;
+
+	/* If it is reset, need to wait for the reset to complete */
+	if (set == BMCR_RESET) {
+		while (max_cnt--) {
+			/* unlock mdio bus during sleep */
+			phy_unlock_mdio_bus(phydev);
+			usleep_range(1000, 1100);
+			phy_lock_mdio_bus(phydev);
+
+			ret = __phy_read(phydev, MII_BMCR);
+			if (ret < 0)
+				goto err_restore_page;
+
+			if (!(ret & BMCR_RESET))
+				return phy_restore_page(phydev, old_page, 0);
+		}
+		if (max_cnt <= 0)
+			ret = -ETIMEDOUT;
+	}
+
+err_restore_page:
+	return phy_restore_page(phydev, old_page, ret);
+}
+
+/**
+ * yt8521_modify_utp_fiber_bmcr - bits modify a PHY's BMCR register
+ * @phydev: the phy_device struct
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * NOTE: Convenience function which allows a PHY‘s BMCR register to be
+ * modified as new register value = (old register value & ~mask) | set.
+ * YT8521 has two space (utp/fiber) and three mode (utp/fiber/poll), each space
+ * has MII_BMCR. poll mode combines utp and faber,so need do both.
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_modify_utp_fiber_bmcr(struct phy_device *phydev, u16 mask,
+					u16 set)
+{
+	struct yt8521_priv *priv = phydev->priv;
+	int ret;
+
+	if (priv->reg_page != YT8521_RSSR_TO_BE_ARBITRATED) {
+		ret = yt8521_modify_bmcr_paged(phydev, priv->reg_page, mask,
+					       set);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = yt8521_modify_bmcr_paged(phydev, YT8521_RSSR_FIBER_SPACE,
+					       mask, set);
+		if (ret < 0)
+			return ret;
+
+		ret = yt8521_modify_bmcr_paged(phydev, YT8521_RSSR_UTP_SPACE,
+					       mask, set);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+/**
+ * yt8521_soft_reset() - called to issue a PHY software reset
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_soft_reset(struct phy_device *phydev)
+{
+	return yt8521_modify_utp_fiber_bmcr(phydev, 0, BMCR_RESET);
+}
+
+/**
+ * yt8521_suspend() - suspend the hardware
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_suspend(struct phy_device *phydev)
+{
+	int wol_config;
+
+	/* YTPHY_WOL_CONFIG_REG is common ext reg */
+	wol_config = ytphy_read_ext_with_lock(phydev, YTPHY_WOL_CONFIG_REG);
+	if (wol_config < 0)
+		return wol_config;
+
+	/* if wol enable, do nothing */
+	if (wol_config & YTPHY_WCR_ENABLE)
+		return 0;
+
+	return yt8521_modify_utp_fiber_bmcr(phydev, 0, BMCR_PDOWN);
+}
+
+/**
+ * yt8521_resume() - resume the hardware
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_resume(struct phy_device *phydev)
+{
+	int ret;
+	int wol_config;
+
+	/* disable auto sleep */
+	ret = ytphy_modify_ext_with_lock(phydev,
+					 YT8521_EXTREG_SLEEP_CONTROL1_REG,
+					 YT8521_ESC1R_SLEEP_SW, 0);
+	if (ret < 0)
+		return ret;
+
+	wol_config = ytphy_read_ext_with_lock(phydev, YTPHY_WOL_CONFIG_REG);
+	if (wol_config < 0)
+		return wol_config;
+
+	/* if wol enable, do nothing */
+	if (wol_config & YTPHY_WCR_ENABLE)
+		return 0;
+
+	return yt8521_modify_utp_fiber_bmcr(phydev, BMCR_PDOWN, 0);
+}
+
+/**
+ * yt8521_config_init() - called to initialize the PHY
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_config_init(struct phy_device *phydev)
+{
+	int old_page;
+	int ret = 0;
+
+	old_page = phy_select_page(phydev, YT8521_RSSR_UTP_SPACE);
+	if (old_page < 0)
+		goto err_restore_page;
+
+	/* disable auto sleep */
+	ret = ytphy_modify_ext(phydev, YT8521_EXTREG_SLEEP_CONTROL1_REG,
+			       YT8521_ESC1R_SLEEP_SW, 0);
+	if (ret < 0)
+		goto err_restore_page;
+
+	/* enable RXC clock when no wire plug */
+	ret = ytphy_modify_ext(phydev, YT8521_CLOCK_GATING_REG,
+			       YT8521_CGR_RX_CLK_EN, 0);
+	if (ret < 0)
+		goto err_restore_page;
+
+err_restore_page:
+	return phy_restore_page(phydev, old_page, ret);
+}
+
+/**
+ * yt8521_fiber_setup_forced - configures/forces speed from @phydev
+ * @phydev: target phy_device struct
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_fiber_setup_forced(struct phy_device *phydev)
+{
+	int max_cnt = 500; /* the max wait time of reset ~ 500 ms */
+	u16 val;
+	int ret;
+
+	if (phydev->speed == SPEED_1000)
+		val = YTPHY_MCR_FIBER_1000BX;
+	else if (phydev->speed == SPEED_100)
+		val = YTPHY_MCR_FIBER_100FX;
+	else
+		return -EINVAL;
+
+	ret =  phy_modify(phydev,  MII_BMCR, BMCR_ANENABLE, 0);
+	if (ret < 0)
+		return ret;
+
+	/* disable Fiber auto sensing */
+	ret =  ytphy_modify_ext_with_lock(phydev, YT8521_LINK_TIMER_CFG2_REG,
+					  YT8521_LTCR_EN_AUTOSEN, 0);
+	if (ret < 0)
+		return ret;
+
+	phydev->pause = 0;
+	phydev->asym_pause = 0;
+
+	ret =  ytphy_modify_ext_with_lock(phydev, YTPHY_MISC_CONFIG_REG,
+					  YTPHY_MCR_FIBER_SPEED_MASK, val);
+	if (ret < 0)
+		return ret;
+
+	ret =  ytphy_modify_ext_with_lock(phydev, YT8521_CHIP_CONFIG_REG,
+					  YT8521_CCR_SW_RST, 0);
+	if (ret < 0)
+		return ret;
+
+	while (max_cnt--) {
+		usleep_range(1000, 1100);
+		ret = ytphy_read_ext_with_lock(phydev, YT8521_CHIP_CONFIG_REG);
+		if (ret < 0)
+			return ret;
+
+		if (!(ret & YT8521_CCR_SW_RST))
+			return 0;
+	}
+
+	if (max_cnt <= 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/**
+ * yt8521_fiber_config_aneg - restart auto-negotiation or write YTPHY_MISC_CONFIG_REG
+ * @phydev: target phy_device struct
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_fiber_config_aneg(struct phy_device *phydev)
+{
+	int err, changed;
+	u16 adv;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return yt8521_fiber_setup_forced(phydev);
+
+	err =  ytphy_modify_ext_with_lock(phydev, YTPHY_MISC_CONFIG_REG,
+					  YTPHY_MCR_FIBER_SPEED_MASK, YTPHY_MCR_FIBER_1000BX);
+	if (err < 0)
+		return err;
+
+	/* enable Fiber auto sensing */
+	err =  ytphy_modify_ext_with_lock(phydev, YT8521_LINK_TIMER_CFG2_REG,
+					  0, YT8521_LTCR_EN_AUTOSEN);
+	if (err < 0)
+		return err;
+
+	/* Setup fiber advertisement */
+	adv = ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM;
+	err = phy_modify_changed(phydev, MII_ADVERTISE,
+				 ADVERTISE_1000XHALF | ADVERTISE_1000XFULL |
+				 ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM,
+				 adv);
+	if (err < 0)
+		return err;
+
+	if (err > 0)
+		changed = 1;
+
+	return genphy_check_and_restart_aneg(phydev, changed);
+}
+
+/**
+ * yt8521_config_aneg_paged() - switch reg space then call genphy_config_aneg
+ * of one page
+ * @phydev: a pointer to a &struct phy_device
+ * @page: The reg page(YT8521_RSSR_FIBER_SPACE/YT8521_RSSR_UTP_SPACE) to operate
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_config_aneg_paged(struct phy_device *phydev, int page)
+{
+	int old_page;
+	int ret = 0;
+
+	page &= YT8521_RSSR_SPACE_MASK;
+
+	old_page = phy_select_page(phydev, page);
+	if (old_page < 0)
+		goto err_restore_page;
+
+	/* unlock mdio bus during genphy_config_aneg/yt8521_fiber_config_aneg,
+	 * because it will operate this lock.
+	 */
+	phy_unlock_mdio_bus(phydev);
+	if (page == YT8521_RSSR_FIBER_SPACE)
+		ret = yt8521_fiber_config_aneg(phydev);
+	else
+		ret = genphy_config_aneg(phydev);
+
+	phy_lock_mdio_bus(phydev);
+
+err_restore_page:
+	return phy_restore_page(phydev, old_page, ret);
+}
+
+/**
+ * yt8521_config_aneg() - change reg space then call genphy_config_aneg
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0 or negative errno code
+ */
+static int yt8521_config_aneg(struct phy_device *phydev)
+{
+	struct yt8521_priv *priv = phydev->priv;
+	int ret;
+
+	if (priv->reg_page != YT8521_RSSR_TO_BE_ARBITRATED) {
+		ret = yt8521_config_aneg_paged(phydev, priv->reg_page);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = yt8521_config_aneg_paged(phydev, YT8521_RSSR_FIBER_SPACE);
+		if (ret < 0)
+			return ret;
+
+		ret = yt8521_config_aneg_paged(phydev, YT8521_RSSR_UTP_SPACE);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * yt8521_aneg_done_paged() - determines the auto negotiation result of one page
+ * @phydev: a pointer to a &struct phy_device
+ * @page: The reg page(YT8521_RSSR_FIBER_SPACE/YT8521_RSSR_UTP_SPACE) to operate
+ *
+ * returns 0(no link)or 1(fiber or utp link) or negative errno code
+ */
+static int yt8521_aneg_done_paged(struct phy_device *phydev, int page)
+{
+	int old_page;
+	int ret = 0;
+	int link;
+
+	old_page = phy_select_page(phydev, page & YT8521_RSSR_SPACE_MASK);
+	if (old_page < 0)
+		goto err_restore_page;
+
+	ret = __phy_read(phydev, YTPHY_SPECIFIC_STATUS_REG);
+	if (ret < 0)
+		goto err_restore_page;
+
+	link = !!(ret & YTPHY_SSR_LINK);
+	ret = link;
+
+err_restore_page:
+	return phy_restore_page(phydev, old_page, ret);
+}
+
+/**
+ * yt8521_aneg_done() - determines the auto negotiation result
+ * @phydev: a pointer to a &struct phy_device
+ *
+ * returns 0(no link)or 1(fiber or utp link) or negative errno code
+ */
+static int yt8521_aneg_done(struct phy_device *phydev)
+{
+	struct yt8521_priv *priv = phydev->priv;
+	int link_utp = 0;
+	int link_fiber;
+	int link;
+
+	if (priv->reg_page != YT8521_RSSR_TO_BE_ARBITRATED) {
+		link = yt8521_aneg_done_paged(phydev, priv->reg_page);
+	} else {
+		link_fiber =
+			yt8521_aneg_done_paged(phydev, YT8521_RSSR_FIBER_SPACE);
+		if (link_fiber < 0)
+			return link_fiber;
+
+		if (!link_fiber) {
+			link_utp = yt8521_aneg_done_paged(phydev,
+							  YT8521_RSSR_UTP_SPACE);
+			if (link_utp < 0)
+				return link_utp;
+		}
+		link = link_fiber || link_utp;
+		phydev_info(phydev,
+			    "%s, phy addr: %d, link_fiber: %d, link_utp: %d\n",
+			    __func__, phydev->mdio.addr, link_fiber, link_utp);
+	}
+
+	return link;
+}
+
 static struct phy_driver motorcomm_phy_drvs[] = {
 	{
 		PHY_ID_MATCH_EXACT(PHY_ID_YT8511),
@@ -121,16 +1260,35 @@ static struct phy_driver motorcomm_phy_drvs[] = {
 		.read_page	= yt8511_read_page,
 		.write_page	= yt8511_write_page,
 	},
+	{
+		PHY_ID_MATCH_EXACT(PHY_ID_YT8521),
+		.name		= "YT8521 Gigabit Ethernet",
+		.features	= PHY_GBIT_FEATURES,
+		.probe		= yt8521_probe,
+		.read_page	= yt8521_read_page,
+		.write_page	= yt8521_write_page,
+		.get_wol	= ytphy_get_wol,
+		.set_wol	= ytphy_set_wol,
+		.config_aneg	= yt8521_config_aneg,
+		.aneg_done	= yt8521_aneg_done,
+		.config_init	= yt8521_config_init,
+		.read_status	= yt8521_read_status,
+		.soft_reset	= yt8521_soft_reset,
+		.suspend	= yt8521_suspend,
+		.resume		= yt8521_resume,
+	},
 };
 
 module_phy_driver(motorcomm_phy_drvs);
 
-MODULE_DESCRIPTION("Motorcomm PHY driver");
+MODULE_DESCRIPTION("Motorcomm 8511/8521 PHY driver");
 MODULE_AUTHOR("Peter Geis");
+MODULE_AUTHOR("Frank");
 MODULE_LICENSE("GPL");
 
 static const struct mdio_device_id __maybe_unused motorcomm_tbl[] = {
 	{ PHY_ID_MATCH_EXACT(PHY_ID_YT8511) },
+	{ PHY_ID_MATCH_EXACT(PHY_ID_YT8521) },
 	{ /* sentinal */ }
 };
 
-- 
2.31.0.windows.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox