* [RFC 1/2] net/mlx5: discover IPv6 traffic class support in RDMA core
  2024-01-10  7:12 [RFC 0/2] use traffic class PRM field for IPv6 modification Gavin Li
@ 2024-01-10  7:12 ` Gavin Li
  2024-01-10  7:12 ` [RFC 2/2] net/mlx5: use traffic class PRM field for IPv6 modification Gavin Li
  1 sibling, 0 replies; 3+ messages in thread
From: Gavin Li @ 2024-01-10  7:12 UTC (permalink / raw)
  To: dev, dsosnowski, viacheslavo, orika, suanmingm, matan; +Cc: jiaweiw, rasland
Previously, IPv6 traffic class used the same ids of IPv4 DSCP and ECN by
rdam core and firmware. New FW support new IPv6 traffic class id which is
recommended to be used though the old way is still working.
FW exposed a new cap bit to indicate the supporting of the new id while
RDMA core does not have such mechanism.
To fix the backward compatibility issue of combination of RDMA core and FW
of different versions, a new function and a new flag were introduced to
check if the new IPv6 traffic class id is supported by RDMA core.
Signed-off-by: Gavin Li <gavinl@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c |  4 +++
 drivers/net/mlx5/mlx5.h          |  1 +
 drivers/net/mlx5/mlx5_flow.c     | 42 ++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow.h     |  1 +
 4 files changed, 48 insertions(+)
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index ae82e1e5d8..5ae31c88f4 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1602,6 +1602,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 	}
 	rte_rwlock_init(&priv->ind_tbls_lock);
+	if (sh->config.dv_flow_en == 1 &&
+	    !priv->sh->ipv6_tc_fallback &&
+	    mlx5_flow_discover_ipv6_tc_support(eth_dev))
+		priv->sh->ipv6_tc_fallback = 1;
 	if (priv->sh->config.dv_flow_en == 2) {
 #ifdef HAVE_MLX5_HWS_SUPPORT
 		if (priv->sh->config.dv_esw_en) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 263ebead7f..779805bcd8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1444,6 +1444,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t lag_rx_port_affinity_en:1;
 	/* lag_rx_port_affinity is supported. */
 	uint32_t hws_max_log_bulk_sz:5;
+	uint32_t ipv6_tc_fallback:1;
 	/* Log of minimal HWS counters created hard coded. */
 	uint32_t hws_max_nb_counters; /* Maximal number for HWS counters. */
 	uint32_t max_port; /* Maximal IB device port index. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 85e8c77c81..90b72b7b0a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -12476,3 +12476,45 @@ mlx5_flow_pick_transfer_proxy(struct rte_eth_dev *dev,
 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				  NULL, "unable to find a proxy port");
 }
+
+/**
+ * Discover ipv6 traffic class id support in rdma core and firmware.
+ *
+ * @param dev
+ *   Ethernet device.
+ *
+ * @return
+ *   0, rdma core is good to work with firmware.
+ *   -EOPNOTSUPP, rdma core could not work with new ipv6 tc id.
+ */
+int
+mlx5_flow_discover_ipv6_tc_support(struct rte_eth_dev *dev)
+{
+	struct rte_flow_action_set_dscp set_dscp;
+	struct rte_flow_attr attr;
+	struct rte_flow_action actions[2];
+	struct rte_flow_item items[3];
+	struct rte_flow_error error;
+	uint32_t flow_idx;
+
+	memset(&attr, 0, sizeof(attr));
+	memset(actions, 0, sizeof(actions));
+	memset(items, 0, sizeof(items));
+	attr.group = 1;
+	attr.egress = 1;
+	items[0].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[1].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[2].type = RTE_FLOW_ITEM_TYPE_END;
+	/* Random value */
+	set_dscp.dscp = 9;
+	actions[0].type = RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP;
+	actions[0].conf = &set_dscp;
+	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
+
+	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr, items, actions, true, &error);
+	if (!flow_idx)
+		return -EOPNOTSUPP;
+
+	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 120609c595..33d4a28077 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -2638,6 +2638,7 @@ void mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
 		struct mlx5_flow_meter_policy *mtr_policy);
 int mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev);
 int mlx5_flow_discover_dr_action_support(struct rte_eth_dev *dev);
+int mlx5_flow_discover_ipv6_tc_support(struct rte_eth_dev *dev);
 int mlx5_action_handle_attach(struct rte_eth_dev *dev);
 int mlx5_action_handle_detach(struct rte_eth_dev *dev);
 int mlx5_action_handle_flush(struct rte_eth_dev *dev);
-- 
2.39.1
^ permalink raw reply related	[flat|nested] 3+ messages in thread* [RFC 2/2] net/mlx5: use traffic class PRM field for IPv6 modification
  2024-01-10  7:12 [RFC 0/2] use traffic class PRM field for IPv6 modification Gavin Li
  2024-01-10  7:12 ` [RFC 1/2] net/mlx5: discover IPv6 traffic class support in RDMA core Gavin Li
@ 2024-01-10  7:12 ` Gavin Li
  1 sibling, 0 replies; 3+ messages in thread
From: Gavin Li @ 2024-01-10  7:12 UTC (permalink / raw)
  To: dev, dsosnowski, viacheslavo, orika, suanmingm, matan; +Cc: jiaweiw, rasland
New PRM defined new field OUT_IPV6_TRAFFIC_CLASS for IPv6 which will be
used by both IPv6 ECN and DSCP. A new cap bit
modify_out_ipv6_traffic_class is added. It can be used to check if the
new field is supported by FW.
However, IPv6 ECN and DSCP starts from different offset in the same byte.
Update SWS and HWS to used the new filed and introduce extra offset for
IPv6 DSCP data and mask to solve the issue.
Signed-off-by: Gavin Li <gavinl@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  3 ++
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h       |  8 ++-
 drivers/net/mlx5/linux/mlx5_os.c     |  5 +-
 drivers/net/mlx5/mlx5_flow.h         |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c      | 78 ++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow_hw.c      |  7 +++
 7 files changed, 92 insertions(+), 13 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 4d8818924a..3a894f894a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1229,6 +1229,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
 	attr->modify_outer_ip_ecn = MLX5_GET
 		(flow_table_nic_cap, hcattr,
 		 ft_header_modify_nic_receive.outer_ip_ecn);
+	attr->modify_outer_ipv6_traffic_class = MLX5_GET
+		(flow_table_nic_cap, hcattr,
+		 ft_header_modify_nic_receive.outer_ipv6_traffic_class);
 	attr->set_reg_c = 0xffff;
 	if (attr->nic_flow_table) {
 #define GET_RX_REG_X_BITS \
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 7f23e925a5..4a6008dc1a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -304,6 +304,7 @@ struct mlx5_hca_attr {
 	uint32_t set_reg_c:16;
 	uint32_t nic_flow_table:1;
 	uint32_t modify_outer_ip_ecn:1;
+	uint32_t modify_outer_ipv6_traffic_class:1;
 	union {
 		uint32_t max_flow_counter;
 		struct {
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 0d46ba9c40..69404b5ed8 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -848,6 +848,7 @@ enum mlx5_modification_field {
 	MLX5_MODI_META_REG_C_13 = 0x94,
 	MLX5_MODI_META_REG_C_14 = 0x95,
 	MLX5_MODI_META_REG_C_15 = 0x96,
+	MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS = 0x11C,
 	MLX5_MODI_OUT_IPV4_TOTAL_LEN = 0x11D,
 	MLX5_MODI_OUT_IPV6_PAYLOAD_LEN = 0x11E,
 	MLX5_MODI_OUT_IPV4_IHL = 0x11F,
@@ -2202,7 +2203,9 @@ struct mlx5_ifc_ft_fields_support_bits {
 		u8 metadata_reg_c_x[0x8];
 	}; /* end of DW3 */
 	/* set_action_field_support_2 */
-	u8 reserved_at_80[0x80];
+	u8 reserved_at_80[0x37];
+	u8 outer_ipv6_traffic_class[0x1];
+	u8 reserved_at_B8[0x48];
 	/* add_action_field_support */
 	u8 reserved_at_100[0x80];
 	/* add_action_field_support_2 */
@@ -2240,7 +2243,8 @@ struct mlx5_ifc_ft_fields_support_2_bits {
 	u8 inner_l4_checksum_ok[0x1];
 	u8 outer_ipv4_checksum_ok[0x1];
 	u8 outer_l4_checksum_ok[0x1]; /* end of DW0 */
-	u8 reserved_at_20[0x18];
+	u8 reserved_at_20[0x17];
+	u8 outer_ipv6_traffic_class[0x1];
 	union {
 		struct {
 			u8 metadata_reg_c_15[0x1];
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 5ae31c88f4..6ea0296109 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1602,9 +1602,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 	}
 	rte_rwlock_init(&priv->ind_tbls_lock);
-	if (sh->config.dv_flow_en == 1 &&
+	if (!priv->sh->cdev->config.hca_attr.modify_outer_ipv6_traffic_class ||
+	    (sh->config.dv_flow_en == 1 &&
 	    !priv->sh->ipv6_tc_fallback &&
-	    mlx5_flow_discover_ipv6_tc_support(eth_dev))
+	    mlx5_flow_discover_ipv6_tc_support(eth_dev)))
 		priv->sh->ipv6_tc_fallback = 1;
 	if (priv->sh->config.dv_flow_en == 2) {
 #ifdef HAVE_MLX5_HWS_SUPPORT
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 33d4a28077..fe4f46724b 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -413,6 +413,9 @@ enum mlx5_feature_name {
 #define IPPROTO_MPLS 137
 #endif
 
+#define MLX5_IPV6_HDR_ECN_MASK 0x3
+#define MLX5_IPV6_HDR_DSCP_SHIFT 2
+
 /* UDP port number for MPLS */
 #define MLX5_UDP_PORT_MPLS 6635
 
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 97f55003c3..ecf86d861d 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -253,6 +253,11 @@ struct field_modify_info modify_ipv6[] = {
 	{0, 0, 0},
 };
 
+struct field_modify_info modify_ipv6_traffic_class[] = {
+	{1,  0, MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS},
+	{0, 0, 0},
+};
+
 struct field_modify_info modify_udp[] = {
 	{2, 0, MLX5_MODI_OUT_UDP_SPORT},
 	{2, 2, MLX5_MODI_OUT_UDP_DPORT},
@@ -1323,6 +1328,7 @@ static int
 flow_dv_convert_action_modify_ipv6_dscp
 			(struct mlx5_flow_dv_modify_hdr_resource *resource,
 			 const struct rte_flow_action *action,
+			 uint32_t ipv6_tc_off,
 			 struct rte_flow_error *error)
 {
 	const struct rte_flow_action_set_dscp *conf =
@@ -1330,6 +1336,7 @@ flow_dv_convert_action_modify_ipv6_dscp
 	struct rte_flow_item item = { .type = RTE_FLOW_ITEM_TYPE_IPV6 };
 	struct rte_flow_item_ipv6 ipv6;
 	struct rte_flow_item_ipv6 ipv6_mask;
+	struct field_modify_info *modify_info;
 
 	memset(&ipv6, 0, sizeof(ipv6));
 	memset(&ipv6_mask, 0, sizeof(ipv6_mask));
@@ -1338,12 +1345,19 @@ flow_dv_convert_action_modify_ipv6_dscp
 	 * rdma-core only accept the DSCP bits byte aligned start from
 	 * bit 0 to 5 as to be compatible with IPv4. No need to shift the
 	 * bits in IPv6 case as rdma-core requires byte aligned value.
+	 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+	 * bits left. Shift the mask left for IPV6 DSCP. Do it here because
+	 * it's needed to distinguish DSCP from ECN in data field construct
 	 */
-	ipv6.hdr.vtc_flow = conf->dscp;
-	ipv6_mask.hdr.vtc_flow = RTE_IPV6_HDR_DSCP_MASK >> 22;
+	ipv6.hdr.vtc_flow = conf->dscp << ipv6_tc_off;
+	ipv6_mask.hdr.vtc_flow = RTE_IPV6_HDR_DSCP_MASK >> (22 - ipv6_tc_off);
 	item.spec = &ipv6;
 	item.mask = &ipv6_mask;
-	return flow_dv_convert_modify_action(&item, modify_ipv6, NULL, resource,
+	if (ipv6_tc_off)
+		modify_info = modify_ipv6_traffic_class;
+	else
+		modify_info = modify_ipv6;
+	return flow_dv_convert_modify_action(&item, modify_info, NULL, resource,
 					     MLX5_MODIFICATION_TYPE_SET, error);
 }
 
@@ -1576,6 +1590,12 @@ mlx5_modify_flex_item(const struct rte_eth_dev *dev,
 	}
 }
 
+static inline bool
+mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv)
+{
+	return !priv->sh->ipv6_tc_fallback;
+}
+
 void
 mlx5_flow_field_id_to_modify_info
 		(const struct rte_flow_action_modify_data *data,
@@ -1731,9 +1751,20 @@ mlx5_flow_field_id_to_modify_info
 		break;
 	case RTE_FLOW_FIELD_IPV6_DSCP:
 		MLX5_ASSERT(data->offset + width <= 6);
-		off_be = 6 - (data->offset + width);
-		info[idx] = (struct field_modify_info){1, 0,
-					MLX5_MODI_OUT_IP_DSCP};
+		/*
+		 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+		 * bits left. Shift the mask left for IPV6 DSCP. Do it here because
+		 * it's needed to distinguish DSCP from ECN in data field construct
+		 */
+		if (mlx5_dv_modify_ipv6_traffic_class_supported(priv)) {
+			off_be = 6 - (data->offset + width) + MLX5_IPV6_HDR_DSCP_SHIFT;
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS};
+		} else {
+			off_be = 6 - (data->offset + width);
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IP_DSCP};
+		}
 		if (mask)
 			mask[idx] = flow_modify_info_mask_8(width, off_be);
 		else
@@ -2029,7 +2060,6 @@ mlx5_flow_field_id_to_modify_info
 		}
 		break;
 	case RTE_FLOW_FIELD_IPV4_ECN:
-	case RTE_FLOW_FIELD_IPV6_ECN:
 		MLX5_ASSERT(data->offset + width <= 2);
 		off_be = 2 - (data->offset + width);
 		info[idx] = (struct field_modify_info){1, 0,
@@ -2039,6 +2069,20 @@ mlx5_flow_field_id_to_modify_info
 		else
 			info[idx].offset = off_be;
 		break;
+	case RTE_FLOW_FIELD_IPV6_ECN:
+		MLX5_ASSERT(data->offset + width <= 2);
+		off_be = 2 - (data->offset + width);
+		if (mlx5_dv_modify_ipv6_traffic_class_supported(priv))
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS};
+		else
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IP_ECN};
+		if (mask)
+			mask[idx] = flow_modify_info_mask_8(width, off_be);
+		else
+			info[idx].offset = off_be;
+		break;
 	case RTE_FLOW_FIELD_GTP_PSC_QFI:
 		MLX5_ASSERT(data->offset + width <= 8);
 		off_be = data->offset + 8;
@@ -2161,7 +2205,7 @@ flow_dv_convert_action_modify_field
 	struct field_modify_info dcopy[MLX5_ACT_MAX_MOD_FIELDS] = {
 								{0, 0, 0} };
 	uint32_t mask[MLX5_ACT_MAX_MOD_FIELDS] = {0, 0, 0, 0, 0};
-	uint32_t type, meta = 0;
+	uint32_t type, meta = 0, dscp = 0;
 
 	if (conf->src.field == RTE_FLOW_FIELD_POINTER ||
 	    conf->src.field == RTE_FLOW_FIELD_VALUE) {
@@ -2181,6 +2225,17 @@ flow_dv_convert_action_modify_field
 			meta = rte_cpu_to_be_32(meta);
 			item.spec = &meta;
 		}
+		if (mlx5_dv_modify_ipv6_traffic_class_supported(dev->data->dev_private) &&
+		    conf->dst.field == RTE_FLOW_FIELD_IPV6_DSCP &&
+		    !(mask[0] & MLX5_IPV6_HDR_ECN_MASK)) {
+			dscp = *(const unaligned_uint32_t *)item.spec;
+			/*
+			 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+			 * bits left. Shift the data left for IPV6 DSCP
+			 */
+			dscp <<= MLX5_IPV6_HDR_DSCP_SHIFT;
+			item.spec = &dscp;
+		}
 	} else {
 		type = MLX5_MODIFICATION_TYPE_COPY;
 		/** For COPY fill the destination field (dcopy) without mask. */
@@ -14385,6 +14440,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 	struct mlx5_flow_sub_actions_list *sample_act;
 	uint32_t sample_act_pos = UINT32_MAX;
 	uint32_t age_act_pos = UINT32_MAX;
+	uint32_t ipv6_tc_off = 0;
 	uint32_t num_of_dest = 0;
 	int tmp_actions_n = 0;
 	uint32_t table;
@@ -14941,8 +14997,12 @@ flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_SET_IPV4_DSCP;
 			break;
 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
+			if (mlx5_dv_modify_ipv6_traffic_class_supported(priv))
+				ipv6_tc_off = MLX5_IPV6_HDR_DSCP_SHIFT;
+			else
+				ipv6_tc_off = 0;
 			if (flow_dv_convert_action_modify_ipv6_dscp(mhdr_res,
-							      actions, error))
+							      actions, ipv6_tc_off, error))
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_SET_IPV6_DSCP;
 			break;
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index c4a90a3690..504a250e44 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2862,6 +2862,13 @@ flow_hw_modify_field_construct(struct mlx5_hw_q_job *job,
 		}
 		off_b = rte_bsf32(mask);
 		data = flow_dv_fetch_field(values + field->offset, field->size);
+		/*
+		 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+		 * bits left. Shift the data left for IPV6 DSCP
+		 */
+		if (field->id == MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS &&
+		    !(mask & MLX5_IPV6_HDR_ECN_MASK))
+			data <<= MLX5_IPV6_HDR_DSCP_SHIFT;
 		data = (data & mask) >> off_b;
 		job->mhdr_cmd[i++].data1 = rte_cpu_to_be_32(data);
 		++field;
-- 
2.39.1
^ permalink raw reply related	[flat|nested] 3+ messages in thread