Netdev List
 help / color / mirror / Atom feed
* [net 02/11] net/mlx5: Initialize flow steering during driver probe
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Shay Drory, Mark Bloch, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Shay Drory <shayd@nvidia.com>

Currently, software objects of flow steering are created and destroyed
during reload flow. In case a device is unloaded, the following error
is printed during grace period:

 mlx5_core 0000:00:0b.0: mlx5_fw_fatal_reporter_err_work:690:(pid 95):
    Driver is in error state. Unloading

As a solution to fix use-after-free bugs, where we try to access
these objects, when reading the value of flow_steering_mode devlink
param[1], let's split flow steering creation and destruction into two
routines:
    * init and cleanup: memory, cache, and pools allocation/free.
    * create and destroy: namespaces initialization and cleanup.

While at it, re-order the cleanup function to mirror the init function.

[1]
Kasan trace:

[  385.119849 ] BUG: KASAN: use-after-free in mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ] Read of size 4 at addr ffff888104b79308 by task bash/291
[  385.119849 ]
[  385.119849 ] CPU: 1 PID: 291 Comm: bash Not tainted 5.17.0-rc1+ #2
[  385.119849 ] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014
[  385.119849 ] Call Trace:
[  385.119849 ]  <TASK>
[  385.119849 ]  dump_stack_lvl+0x6e/0x91
[  385.119849 ]  print_address_description.constprop.0+0x1f/0x160
[  385.119849 ]  ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  kasan_report.cold+0x83/0xdf
[  385.119849 ]  ? devlink_param_notify+0x20/0x190
[  385.119849 ]  ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  devlink_nl_param_fill+0x18a/0xa50
[  385.119849 ]  ? _raw_spin_lock_irqsave+0x8d/0xe0
[  385.119849 ]  ? devlink_flash_update_timeout_notify+0xf0/0xf0
[  385.119849 ]  ? __wake_up_common+0x4b/0x1e0
[  385.119849 ]  ? preempt_count_sub+0x14/0xc0
[  385.119849 ]  ? _raw_spin_unlock_irqrestore+0x28/0x40
[  385.119849 ]  ? __wake_up_common_lock+0xe3/0x140
[  385.119849 ]  ? __wake_up_common+0x1e0/0x1e0
[  385.119849 ]  ? __sanitizer_cov_trace_const_cmp8+0x27/0x80
[  385.119849 ]  ? __rcu_read_unlock+0x48/0x70
[  385.119849 ]  ? kasan_unpoison+0x23/0x50
[  385.119849 ]  ? __kasan_slab_alloc+0x2c/0x80
[  385.119849 ]  ? memset+0x20/0x40
[  385.119849 ]  ? __sanitizer_cov_trace_const_cmp4+0x25/0x80
[  385.119849 ]  devlink_param_notify+0xce/0x190
[  385.119849 ]  devlink_unregister+0x92/0x2b0
[  385.119849 ]  remove_one+0x41/0x140
[  385.119849 ]  pci_device_remove+0x68/0x140
[  385.119849 ]  ? pcibios_free_irq+0x10/0x10
[  385.119849 ]  __device_release_driver+0x294/0x3f0
[  385.119849 ]  device_driver_detach+0x82/0x130
[  385.119849 ]  unbind_store+0x193/0x1b0
[  385.119849 ]  ? subsys_interface_unregister+0x270/0x270
[  385.119849 ]  drv_attr_store+0x4e/0x70
[  385.119849 ]  ? drv_attr_show+0x60/0x60
[  385.119849 ]  sysfs_kf_write+0xa7/0xc0
[  385.119849 ]  kernfs_fop_write_iter+0x23a/0x2f0
[  385.119849 ]  ? sysfs_kf_bin_read+0x160/0x160
[  385.119849 ]  new_sync_write+0x311/0x430
[  385.119849 ]  ? new_sync_read+0x480/0x480
[  385.119849 ]  ? _raw_spin_lock+0x87/0xe0
[  385.119849 ]  ? __sanitizer_cov_trace_cmp4+0x25/0x80
[  385.119849 ]  ? security_file_permission+0x94/0xa0
[  385.119849 ]  vfs_write+0x4c7/0x590
[  385.119849 ]  ksys_write+0xf6/0x1e0
[  385.119849 ]  ? __x64_sys_read+0x50/0x50
[  385.119849 ]  ? fpregs_assert_state_consistent+0x99/0xa0
[  385.119849 ]  do_syscall_64+0x3d/0x90
[  385.119849 ]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  385.119849 ] RIP: 0033:0x7fc36ef38504
[  385.119849 ] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f
80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
[  385.119849 ] RSP: 002b:00007ffde0ff3d08 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  385.119849 ] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc36ef38504
[  385.119849 ] RDX: 000000000000000c RSI: 00007fc370521040 RDI: 0000000000000001
[  385.119849 ] RBP: 00007fc370521040 R08: 00007fc36f00b8c0 R09: 00007fc36ee4b740
[  385.119849 ] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc36f00a760
[  385.119849 ] R13: 000000000000000c R14: 00007fc36f005760 R15: 000000000000000c
[  385.119849 ]  </TASK>
[  385.119849 ]
[  385.119849 ] Allocated by task 65:
[  385.119849 ]  kasan_save_stack+0x1e/0x40
[  385.119849 ]  __kasan_kmalloc+0x81/0xa0
[  385.119849 ]  mlx5_init_fs+0x11b/0x1160
[  385.119849 ]  mlx5_load+0x13c/0x220
[  385.119849 ]  mlx5_load_one+0xda/0x160
[  385.119849 ]  mlx5_recover_device+0xb8/0x100
[  385.119849 ]  mlx5_health_try_recover+0x2f9/0x3a1
[  385.119849 ]  devlink_health_reporter_recover+0x75/0x100
[  385.119849 ]  devlink_health_report+0x26c/0x4b0
[  385.275909 ]  mlx5_fw_fatal_reporter_err_work+0x11e/0x1b0
[  385.275909 ]  process_one_work+0x520/0x970
[  385.275909 ]  worker_thread+0x378/0x950
[  385.275909 ]  kthread+0x1bb/0x200
[  385.275909 ]  ret_from_fork+0x1f/0x30
[  385.275909 ]
[  385.275909 ] Freed by task 65:
[  385.275909 ]  kasan_save_stack+0x1e/0x40
[  385.275909 ]  kasan_set_track+0x21/0x30
[  385.275909 ]  kasan_set_free_info+0x20/0x30
[  385.275909 ]  __kasan_slab_free+0xfc/0x140
[  385.275909 ]  kfree+0xa5/0x3b0
[  385.275909 ]  mlx5_unload+0x2e/0xb0
[  385.275909 ]  mlx5_unload_one+0x86/0xb0
[  385.275909 ]  mlx5_fw_fatal_reporter_err_work.cold+0xca/0xcf
[  385.275909 ]  process_one_work+0x520/0x970
[  385.275909 ]  worker_thread+0x378/0x950
[  385.275909 ]  kthread+0x1bb/0x200
[  385.275909 ]  ret_from_fork+0x1f/0x30
[  385.275909 ]
[  385.275909 ] The buggy address belongs to the object at ffff888104b79300
[  385.275909 ]  which belongs to the cache kmalloc-128 of size 128
[  385.275909 ] The buggy address is located 8 bytes inside of
[  385.275909 ]  128-byte region [ffff888104b79300, ffff888104b79380)
[  385.275909 ] The buggy address belongs to the page:
[  385.275909 ] page:00000000de44dd39 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x104b78
[  385.275909 ] head:00000000de44dd39 order:1 compound_mapcount:0
[  385.275909 ] flags: 0x8000000000010200(slab|head|zone=2)
[  385.275909 ] raw: 8000000000010200 0000000000000000 dead000000000122 ffff8881000428c0
[  385.275909 ] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[  385.275909 ] page dumped because: kasan: bad access detected
[  385.275909 ]
[  385.275909 ] Memory state around the buggy address:
[  385.275909 ]  ffff888104b79200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc
[  385.275909 ]  ffff888104b79280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  385.275909 ] >ffff888104b79300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  385.275909 ]                       ^
[  385.275909 ]  ffff888104b79380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  385.275909 ]  ffff888104b79400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  385.275909 ]]

Fixes: e890acd5ff18 ("net/mlx5: Add devlink flow_steering_mode parameter")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_core.c | 131 ++++++++++--------
 .../net/ethernet/mellanox/mlx5/core/fs_core.h |   6 +-
 .../net/ethernet/mellanox/mlx5/core/main.c    |  15 +-
 3 files changed, 91 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 816d991f7621..3ad67e6b5586 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
 	clean_tree(&root_ns->ns.node);
 }
 
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
-{
-	struct mlx5_flow_steering *steering = dev->priv.steering;
-
-	cleanup_root_ns(steering->root_ns);
-	cleanup_root_ns(steering->fdb_root_ns);
-	steering->fdb_root_ns = NULL;
-	kfree(steering->fdb_sub_ns);
-	steering->fdb_sub_ns = NULL;
-	cleanup_root_ns(steering->port_sel_root_ns);
-	cleanup_root_ns(steering->sniffer_rx_root_ns);
-	cleanup_root_ns(steering->sniffer_tx_root_ns);
-	cleanup_root_ns(steering->rdma_rx_root_ns);
-	cleanup_root_ns(steering->rdma_tx_root_ns);
-	cleanup_root_ns(steering->egress_root_ns);
-	mlx5_cleanup_fc_stats(dev);
-	kmem_cache_destroy(steering->ftes_cache);
-	kmem_cache_destroy(steering->fgs_cache);
-	mlx5_ft_pool_destroy(dev);
-	kfree(steering);
-}
-
 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
@@ -3086,42 +3064,27 @@ static int init_egress_root_ns(struct mlx5_flow_steering *steering)
 	return err;
 }
 
-int mlx5_init_fs(struct mlx5_core_dev *dev)
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
 {
-	struct mlx5_flow_steering *steering;
-	int err = 0;
-
-	err = mlx5_init_fc_stats(dev);
-	if (err)
-		return err;
-
-	err = mlx5_ft_pool_init(dev);
-	if (err)
-		return err;
-
-	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
-	if (!steering) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	steering->dev = dev;
-	dev->priv.steering = steering;
+	struct mlx5_flow_steering *steering = dev->priv.steering;
 
-	if (mlx5_fs_dr_is_supported(dev))
-		steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
-	else
-		steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+	cleanup_root_ns(steering->root_ns);
+	cleanup_root_ns(steering->fdb_root_ns);
+	steering->fdb_root_ns = NULL;
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
+	cleanup_root_ns(steering->port_sel_root_ns);
+	cleanup_root_ns(steering->sniffer_rx_root_ns);
+	cleanup_root_ns(steering->sniffer_tx_root_ns);
+	cleanup_root_ns(steering->rdma_rx_root_ns);
+	cleanup_root_ns(steering->rdma_tx_root_ns);
+	cleanup_root_ns(steering->egress_root_ns);
+}
 
-	steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
-						sizeof(struct mlx5_flow_group), 0,
-						0, NULL);
-	steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
-						 0, NULL);
-	if (!steering->ftes_cache || !steering->fgs_cache) {
-		err = -ENOMEM;
-		goto err;
-	}
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int err = 0;
 
 	if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
 	      (MLX5_CAP_GEN(dev, nic_flow_table))) ||
@@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 	}
 
 	return 0;
+
+err:
+	mlx5_fs_core_cleanup(dev);
+	return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
+	kmem_cache_destroy(steering->ftes_cache);
+	kmem_cache_destroy(steering->fgs_cache);
+	kfree(steering);
+	mlx5_ft_pool_destroy(dev);
+	mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering;
+	int err = 0;
+
+	err = mlx5_init_fc_stats(dev);
+	if (err)
+		return err;
+
+	err = mlx5_ft_pool_init(dev);
+	if (err)
+		goto err;
+
+	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+	if (!steering) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	steering->dev = dev;
+	dev->priv.steering = steering;
+
+	if (mlx5_fs_dr_is_supported(dev))
+		steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+	else
+		steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+	steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+						sizeof(struct mlx5_flow_group), 0,
+						0, NULL);
+	steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+						 0, NULL);
+	if (!steering->ftes_cache || !steering->fgs_cache) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	return 0;
+
 err:
-	mlx5_cleanup_fs(dev);
+	mlx5_fs_core_free(dev);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index c488a7c5b07e..3f20523e514f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
 				 enum mlx5_flow_steering_mode mode);
 
-int mlx5_init_fs(struct mlx5_core_dev *dev);
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
 void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2589e39eb9c7..c6737720bf3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_sf_table_cleanup;
 	}
 
+	err = mlx5_fs_core_alloc(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc flow steering\n");
+		goto err_fs;
+	}
+
 	dev->dm = mlx5_dm_create(dev);
 	if (IS_ERR(dev->dm))
 		mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
@@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 
 	return 0;
 
+err_fs:
+	mlx5_sf_table_cleanup(dev);
 err_sf_table_cleanup:
 	mlx5_sf_hw_table_cleanup(dev);
 err_sf_hw_table_cleanup:
@@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_hv_vhca_destroy(dev->hv_vhca);
 	mlx5_fw_tracer_destroy(dev->tracer);
 	mlx5_dm_cleanup(dev);
+	mlx5_fs_core_free(dev);
 	mlx5_sf_table_cleanup(dev);
 	mlx5_sf_hw_table_cleanup(dev);
 	mlx5_vhca_event_cleanup(dev);
@@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 		goto err_tls_start;
 	}
 
-	err = mlx5_init_fs(dev);
+	err = mlx5_fs_core_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to init flow steering\n");
 		goto err_fs;
@@ -1236,7 +1245,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 err_vhca:
 	mlx5_vhca_event_stop(dev);
 err_set_hca:
-	mlx5_cleanup_fs(dev);
+	mlx5_fs_core_cleanup(dev);
 err_fs:
 	mlx5_accel_tls_cleanup(dev);
 err_tls_start:
@@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_ec_cleanup(dev);
 	mlx5_sf_hw_table_destroy(dev);
 	mlx5_vhca_event_stop(dev);
-	mlx5_cleanup_fs(dev);
+	mlx5_fs_core_cleanup(dev);
 	mlx5_accel_ipsec_cleanup(dev);
 	mlx5_accel_tls_cleanup(dev);
 	mlx5_fpga_device_stop(dev);
-- 
2.36.1


^ permalink raw reply related

* [net 06/11] net/mlx5e: Properly block LRO when XDP is enabled
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Maxim Mikityanskiy, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Maxim Mikityanskiy <maximmi@nvidia.com>

LRO is incompatible and mutually exclusive with XDP. However, the needed
checks are only made when enabling XDP. If LRO is enabled when XDP is
already active, the command will succeed, and XDP will be skipped in the
data path, although still enabled.

This commit fixes the bug by checking the XDP status in
mlx5e_fix_features and disabling LRO if XDP is enabled.

Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 999241961714..3969916cfafe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3900,6 +3900,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 		}
 	}
 
+	if (params->xdp_prog) {
+		if (features & NETIF_F_LRO) {
+			netdev_warn(netdev, "LRO is incompatible with XDP\n");
+			features &= ~NETIF_F_LRO;
+		}
+	}
+
 	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
 		features &= ~NETIF_F_RXHASH;
 		if (netdev->features & NETIF_F_RXHASH)
-- 
2.36.1


^ permalink raw reply related

* [net 07/11] net/mlx5e: Properly block HW GRO when XDP is enabled
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Maxim Mikityanskiy, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Maxim Mikityanskiy <maximmi@nvidia.com>

HW GRO is incompatible and mutually exclusive with XDP and XSK. However,
the needed checks are only made when enabling XDP. If HW GRO is enabled
when XDP is already active, the command will succeed, and XDP will be
skipped in the data path, although still enabled.

This commit fixes the bug by checking the XDP and XSK status in
mlx5e_fix_features and disabling HW GRO if XDP is enabled.

Fixes: 83439f3c37aa ("net/mlx5e: Add HW-GRO offload")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3969916cfafe..6afd07901a10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3905,6 +3905,18 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 			netdev_warn(netdev, "LRO is incompatible with XDP\n");
 			features &= ~NETIF_F_LRO;
 		}
+		if (features & NETIF_F_GRO_HW) {
+			netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+			features &= ~NETIF_F_GRO_HW;
+		}
+	}
+
+	if (priv->xsk.refcnt) {
+		if (features & NETIF_F_GRO_HW) {
+			netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+				    priv->xsk.refcnt);
+			features &= ~NETIF_F_GRO_HW;
+		}
 	}
 
 	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
-- 
2.36.1


^ permalink raw reply related

* [net 11/11] net/mlx5: Drain fw_reset when removing device
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Shay Drory, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Shay Drory <shayd@nvidia.com>

In case fw sync reset is called in parallel to device removal, device
might stuck in the following deadlock:
         CPU 0                        CPU 1
         -----                        -----
                                  remove_one
                                   uninit_one (locks intf_state_mutex)
mlx5_sync_reset_now_event()
work in fw_reset->wq.
 mlx5_enter_error_state()
  mutex_lock (intf_state_mutex)
                                   cleanup_once
                                    fw_reset_cleanup()
                                     destroy_workqueue(fw_reset->wq)

Drain the fw_reset WQ, and make sure no new work is being queued, before
entering uninit_one().
The Drain is done before devlink_unregister() since fw_reset, in some
flows, is using devlink API devlink_remote_reload_actions_performed().

Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/fw_reset.c    | 25 ++++++++++++++++---
 .../ethernet/mellanox/mlx5/core/fw_reset.h    |  1 +
 .../net/ethernet/mellanox/mlx5/core/main.c    |  4 +++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index ca1aba845dd6..81eb67fb95b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -8,7 +8,8 @@
 enum {
 	MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
 	MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
-	MLX5_FW_RESET_FLAGS_PENDING_COMP
+	MLX5_FW_RESET_FLAGS_PENDING_COMP,
+	MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
 };
 
 struct mlx5_fw_reset {
@@ -208,7 +209,10 @@ static void poll_sync_reset(struct timer_list *t)
 
 	if (fatal_error) {
 		mlx5_core_warn(dev, "Got Device Reset\n");
-		queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+		if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+			queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+		else
+			mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
 		return;
 	}
 
@@ -433,9 +437,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
 	struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
 	struct mlx5_eqe *eqe = data;
 
+	if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+		return NOTIFY_DONE;
+
 	switch (eqe->sub_type) {
 	case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
-			queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+		queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
 		break;
 	case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
 		mlx5_sync_reset_events_handle(fw_reset, eqe);
@@ -479,6 +486,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
 	mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
 }
 
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+	set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+	cancel_work_sync(&fw_reset->fw_live_patch_work);
+	cancel_work_sync(&fw_reset->reset_request_work);
+	cancel_work_sync(&fw_reset->reset_reload_work);
+	cancel_work_sync(&fw_reset->reset_now_work);
+	cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
 int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
index 694fc7cb2684..dc141c7e641a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -16,6 +16,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
 int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c6737720bf3a..ef196cb764e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1627,6 +1627,10 @@ static void remove_one(struct pci_dev *pdev)
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct devlink *devlink = priv_to_devlink(dev);
 
+	/* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+	 * fw_reset before unregistering the devlink.
+	 */
+	mlx5_drain_fw_reset(dev);
 	devlink_unregister(devlink);
 	mlx5_sriov_disable(pdev);
 	mlx5_crdump_disable(dev);
-- 
2.36.1


^ permalink raw reply related

* [net 04/11] net/mlx5e: Wrap mlx5e_trap_napi_poll into rcu_read_lock
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Maxim Mikityanskiy, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Maxim Mikityanskiy <maximmi@nvidia.com>

The body of mlx5e_napi_poll is wrapped into rcu_read_lock to be able to
read the XDP program pointer using rcu_dereference. However, the trap RQ
NAPI doesn't use rcu_read_lock, because the trap RQ works only in the
non-linear mode, and mlx5e_skb_from_cqe_nonlinear, until recently,
didn't support XDP and didn't call rcu_dereference.

Starting from the cited commit, mlx5e_skb_from_cqe_nonlinear supports
XDP and calls rcu_dereference, but mlx5e_trap_napi_poll doesn't wrap it
into rcu_read_lock. It leads to RCU-lockdep warnings like this:

    WARNING: suspicious RCU usage

This commit fixes the issue by adding an rcu_read_lock to
mlx5e_trap_napi_poll, similarly to mlx5e_napi_poll.

Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index a55b066746cb..857840ab1e91 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
 	bool busy = false;
 	int work_done = 0;
 
+	rcu_read_lock();
+
 	ch_stats->poll++;
 
 	work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
 	busy |= work_done == budget;
 	busy |= rq->post_wqes(rq);
 
-	if (busy)
-		return budget;
+	if (busy) {
+		work_done = budget;
+		goto out;
+	}
 
 	if (unlikely(!napi_complete_done(napi, work_done)))
-		return work_done;
+		goto out;
 
 	mlx5e_cq_arm(&rq->cq);
+
+out:
+	rcu_read_unlock();
 	return work_done;
 }
 
-- 
2.36.1


^ permalink raw reply related

* [net 10/11] net/mlx5e: CT: Fix setting flow_source for smfs ct tuples
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Paul Blakey, Oz Shlomo, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Paul Blakey <paulb@nvidia.com>

Cited patch sets flow_source to ANY overriding the provided spec
flow_source, avoiding the optimization done by commit c9c079b4deaa
("net/mlx5: CT: Set flow source hint from provided tuple device").

To fix the above, set the dr_rule flow_source from provided flow spec.

Fixes: 3ee61ebb0df1 ("net/mlx5: CT: Add software steering ct flow steering provider")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
index 271261bf1dc2..bec9ed0103a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -330,7 +330,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
 	}
 
 	rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
-				     MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT);
+				     spec->flow_context.flow_source);
 	if (!rule) {
 		err = -EINVAL;
 		goto err_create;
-- 
2.36.1


^ permalink raw reply related

* [net 09/11] net/mlx5e: CT: Fix support for GRE tuples
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Paul Blakey, Oz Shlomo, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Paul Blakey <paulb@nvidia.com>

cited commit removed support for GRE tuples when software steering was enabled.

To bring back support for GRE tuples, add GRE ipv4/ipv6 matchers.

Fixes: 3ee61ebb0df1 ("net/mlx5: CT: Add software steering ct flow steering provider")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en/tc/ct_fs_smfs.c     | 56 +++++++++++--------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
index 59988e24b704..271261bf1dc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -23,7 +23,7 @@ struct mlx5_ct_fs_smfs_matcher {
 };
 
 struct mlx5_ct_fs_smfs_matchers {
-	struct mlx5_ct_fs_smfs_matcher smfs_matchers[4];
+	struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
 	struct list_head used;
 };
 
@@ -44,7 +44,8 @@ struct mlx5_ct_fs_smfs_rule {
 };
 
 static inline void
-mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+			  bool gre)
 {
 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
 
@@ -77,7 +78,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
 			 ntohs(MLX5_CT_TCP_FLAGS_MASK));
-	} else {
+	} else if (!gre) {
 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
 	}
@@ -87,7 +88,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
 
 static struct mlx5dr_matcher *
 mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
-			       bool tcp, u32 priority)
+			       bool tcp, bool gre, u32 priority)
 {
 	struct mlx5dr_matcher *dr_matcher;
 	struct mlx5_flow_spec *spec;
@@ -96,7 +97,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
 	if (!spec)
 		return ERR_PTR(-ENOMEM);
 
-	mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp);
+	mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
 
 	dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
@@ -108,7 +109,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
 }
 
 static struct mlx5_ct_fs_smfs_matcher *
-mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
 {
 	struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
 	struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
@@ -119,7 +120,7 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
 	int prio;
 
 	matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
-	smfs_matcher = &matchers->smfs_matchers[ipv4 * 2 + tcp];
+	smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
 
 	if (refcount_inc_not_zero(&smfs_matcher->ref))
 		return smfs_matcher;
@@ -145,11 +146,11 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
 	}
 
 	tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
-	dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, prio);
+	dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
 	if (IS_ERR(dr_matcher)) {
 		netdev_warn(fs->netdev,
-			    "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d), err: %ld\n",
-			    nat, ipv4, tcp, PTR_ERR(dr_matcher));
+			    "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+			    nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
 
 		smfs_matcher = ERR_CAST(dr_matcher);
 		goto out_unlock;
@@ -222,16 +223,17 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
 static inline bool
 mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
 {
-#define DISSECTOR_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
-	const u32 basic_keys = DISSECTOR_BIT(BASIC) | DISSECTOR_BIT(CONTROL) |
-			       DISSECTOR_BIT(PORTS) | DISSECTOR_BIT(META);
-	const u32 ipv4_tcp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS) | DISSECTOR_BIT(TCP);
-	const u32 ipv4_udp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS);
-	const u32 ipv6_tcp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS) | DISSECTOR_BIT(TCP);
-	const u32 ipv6_udp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS);
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+	const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+	const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+	const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+	const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+	const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+	const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+	const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
 
 	return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
-		used_keys == ipv6_udp);
+		used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
 }
 
 static bool
@@ -254,20 +256,24 @@ mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *f
 	flow_rule_match_control(flow_rule, &control);
 	flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
 	flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
-	flow_rule_match_ports(flow_rule, &ports);
-	flow_rule_match_tcp(flow_rule, &tcp);
+	if (basic.key->ip_proto != IPPROTO_GRE)
+		flow_rule_match_ports(flow_rule, &ports);
+	if (basic.key->ip_proto == IPPROTO_TCP)
+		flow_rule_match_tcp(flow_rule, &tcp);
 
 	if (basic.mask->n_proto != htons(0xFFFF) ||
 	    (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
 	    basic.mask->ip_proto != 0xFF ||
-	    (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP)) {
+	    (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+	     basic.key->ip_proto != IPPROTO_GRE)) {
 		ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
 		       ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
 		       basic.key->ip_proto, basic.mask->ip_proto);
 		return false;
 	}
 
-	if (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF)) {
+	if (basic.key->ip_proto != IPPROTO_GRE &&
+	    (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
 		ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
 		       ports.mask->src, ports.mask->dst);
 		return false;
@@ -291,7 +297,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
 	struct mlx5dr_action *actions[5];
 	struct mlx5dr_rule *rule;
 	int num_actions = 0, err;
-	bool nat, tcp, ipv4;
+	bool nat, tcp, ipv4, gre;
 
 	if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
 		return ERR_PTR(-EOPNOTSUPP);
@@ -314,8 +320,10 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
 	ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
 	tcp = MLX5_GET(fte_match_param, spec->match_value,
 		       outer_headers.ip_protocol) == IPPROTO_TCP;
+	gre = MLX5_GET(fte_match_param, spec->match_value,
+		       outer_headers.ip_protocol) == IPPROTO_GRE;
 
-	smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp);
+	smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
 	if (IS_ERR(smfs_matcher)) {
 		err = PTR_ERR(smfs_matcher);
 		goto err_matcher;
-- 
2.36.1


^ permalink raw reply related

* [net 08/11] net/mlx5e: Remove HW-GRO from reported features
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Gal Pressman, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Gal Pressman <gal@nvidia.com>

We got reports of certain HW-GRO flows causing kernel call traces, which
might be related to firmware. To be on the safe side, disable the
feature for now and re-enable it once a driver/firmware fix is found.

Fixes: 83439f3c37aa ("net/mlx5e: Add HW-GRO offload")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6afd07901a10..fa229998606c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4873,10 +4873,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-	if (!!MLX5_CAP_GEN(mdev, shampo) &&
-	    mlx5e_check_fragmented_striding_rq_cap(mdev))
-		netdev->hw_features    |= NETIF_F_GRO_HW;
-
 	if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
 		netdev->hw_enc_features |= NETIF_F_HW_CSUM;
 		netdev->hw_enc_features |= NETIF_F_TSO;
-- 
2.36.1


^ permalink raw reply related

* [net 05/11] net/mlx5e: Block rx-gro-hw feature in switchdev mode
From: Saeed Mahameed @ 2022-05-18  6:34 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Aya Levin, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20220518063427.123758-1-saeed@kernel.org>

From: Aya Levin <ayal@nvidia.com>

When the driver is in switchdev mode and rx-gro-hw is set, the RQ needs
special CQE handling. Till then, block setting of rx-gro-hw feature in
switchdev mode, to avoid failure while setting the feature due to
failure while opening the RQ.

Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2f1dedc721d1..999241961714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3864,6 +3864,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
 	if (netdev->features & NETIF_F_NTUPLE)
 		netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
 
+	features &= ~NETIF_F_GRO_HW;
+	if (netdev->features & NETIF_F_GRO_HW)
+		netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
 	return features;
 }
 
-- 
2.36.1


^ permalink raw reply related

* AW: [Patch] net: af_key: check encryption module availability consistency
From: Bartschies, Thomas @ 2022-05-18  6:36 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev@vger.kernel.org
In-Reply-To: <20220517174447.0e596e4f@kernel.org>

Hi,

thanks for the info. I've reposted it with the appropriate mail headers you've suggested. Although I saw my first post in
the LKML and netdev archives. It's my first own patch posting.

Best regards,
--
Thomas Bartschies
CVK IT Systeme

-----Ursprüngliche Nachricht-----
Von: Jakub Kicinski <kuba@kernel.org> 
Gesendet: Mittwoch, 18. Mai 2022 02:45
An: Bartschies, Thomas <Thomas.Bartschies@cvk.de>
Cc: netdev@vger.kernel.org
Betreff: Re: [Patch] net: af_key: check encryption module availability consistency

On Mon, 16 May 2022 14:57:30 +0200 (CEST) Thomas Bartschies wrote:
> Since the recent introduction supporting the SM3 and SM4 hash algos for IPsec, the kernel 
> produces invalid pfkey acquire messages, when these encryption modules are disabled. This 
> happens because the availability of the algos wasn't checked in all necessary functions. 
> This patch adds these checks.
> 
> Signed-off-by: Thomas Bartschies <thomas.bartschies@cvk.de>

This has not made it into patchwork.

Did you put the list on BCC or something? If so how would people 
on the list see replies to this patch?

Please repost it with appropriate To: and CC: lists.
To: davem@davemloft.net
CC: everyone from scripts/get_maintainer

^ permalink raw reply

* [pull request][net-next 00/16] mlx5 updates 2022-05-17
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni; +Cc: netdev, Saeed Mahameed

From: Saeed Mahameed <saeedm@nvidia.com>

This series adds misc updates and Multi "uplink" port eswitch support
For more information please see tag log below.

Please pull and let me know if there is any problem.

There will be a minor conflict, mostly contextual when net-next is merged with
the current mlx5 net PR [1], I will provide resolution details if necessary. 

[1] https://patchwork.kernel.org/project/netdevbpf/list/?series=642587

Thanks,
Saeed.


The following changes since commit 6e144b47f560edc25744498f360835b1042b73dd:

  octeontx2-pf: Add support for adaptive interrupt coalescing (2022-05-17 18:05:28 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2022-05-17

for you to fetch changes up to 94db3317781922ba52722c58061e0e8517d4d80d:

  net/mlx5: Support multiport eswitch mode (2022-05-17 23:41:51 -0700)

----------------------------------------------------------------
mlx5-updates-2022-05-17

MISC updates to mlx5 dirver

1) Aya Levin allows relaxed ordering over VFs

2) Gal Pressman Adds support XDP SQs for uplink representors in switchdev mode

3) Add debugfs TC stats and command failure syndrome for debuggability

4) Tariq uses variants of vzalloc where it could help

5) Multiport eswitch support from Elic Cohen:

Eli Cohen Says:
===============

The multiport eswitch feature allows to forward traffic from a
representor net device to the uplink port of an associated eswitch's
uplink port.

This feature requires creating a LAG object. Since LAG can be created
only once for a function, the feature is mutual exclusive with either
bonding or multipath.

Multipath eswitch mode is entered automatically these conditions are
met:
1. No other LAG related mode is active.
2. A rule that explicitly forwards to an uplink port is inserted.

The implementation maintains a reference count on such rules. When the
reference count reaches zero, the LAG is released and other modes may be
used.

When an explicit rule that explicitly forwards to an uplink port is
inserted while another LAG mode is active, that rule will not be
offloaded by the hardware since the hardware cannot guarantee that the
rule will actually be forwarded to that port.

Example rules that forwards to an uplink port is:

$ tc filter add dev rep0 root flower action mirred egress \
  redirect dev uplinkrep0

$ tc filter add dev rep0 root flower action mirred egress \
  redirect dev uplinkrep1

This feature is supported only if LAG_RESOURCE_ALLOCATION firmware
configuration parameter is set to true.

The series consists of three patches:
1. Lag state machine refactor
   This patch does not add new functionality but rather changes the way
   the state of the LAG is maintained.
2. Small fix to remove unused argument.
3. The actual implementation of the feature.
===============

----------------------------------------------------------------
Aya Levin (1):
      net/mlx5e: Allow relaxed ordering over VFs

Eli Cohen (3):
      net/mlx5: Lag, refactor lag state machine
      net/mlx5: Remove unused argument
      net/mlx5: Support multiport eswitch mode

Gal Pressman (3):
      net/mlx5e: IPoIB, Improve ethtool rxnfc callback structure in IPoIB
      net/mlx5e: Support partial GSO for tunnels over vlans
      net/mlx5e: Add XDP SQs to uplink representors steering tables

Moshe Shemesh (1):
      net/mlx5: Add last command failure syndrome to debugfs

Moshe Tal (1):
      net/mlx5e: Correct the calculation of max channels for rep

Saeed Mahameed (2):
      net/mlx5: sparse: error: context imbalance in 'mlx5_vf_get_core_dev'
      net/mlx5e: CT: Add ct driver counters

Tariq Toukan (5):
      net/mlx5: Inline db alloc API function
      net/mlx5: Allocate virtually contiguous memory in vport.c
      net/mlx5: Allocate virtually contiguous memory in pci_irq.c
      net/mlx5e: Allocate virtually contiguous memory for VLANs list
      net/mlx5e: Allocate virtually contiguous memory for reps structures

 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c    |   6 -
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |   7 +-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c  |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   1 +
 .../net/ethernet/mellanox/mlx5/core/en/params.c    |   3 +-
 .../ethernet/mellanox/mlx5/core/en/tc/act/mirred.c |  14 ++
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c |  52 +++++-
 .../net/ethernet/mellanox/mlx5/core/en_common.c    |   5 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c    |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  35 +++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  28 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    |   7 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |   3 +
 .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c    |  14 +-
 .../net/ethernet/mellanox/mlx5/core/lag/debugfs.c  |  21 +--
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c  | 192 +++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h  |  41 +++--
 drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c   |   4 +-
 .../net/ethernet/mellanox/mlx5/core/lag/mpesw.c    | 101 +++++++++++
 .../net/ethernet/mellanox/mlx5/core/lag/mpesw.h    |  26 +++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   2 -
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c  |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/vport.c    |  52 +++---
 include/linux/mlx5/driver.h                        |   9 +-
 include/linux/mlx5/mlx5_ifc.h                      |   5 +-
 27 files changed, 489 insertions(+), 166 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h

^ permalink raw reply

* [net-next 01/16] net/mlx5: sparse: error: context imbalance in 'mlx5_vf_get_core_dev'
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni; +Cc: netdev, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Saeed Mahameed <saeedm@nvidia.com>

Removing the annotation resolves the issue for some reason.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 84f75aa25214..87f1552b5d73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1886,7 +1886,6 @@ static struct pci_driver mlx5_core_driver = {
  * Return: Pointer to the associated mlx5_core_dev or NULL.
  */
 struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
-			__acquires(&mdev->intf_state_mutex)
 {
 	struct mlx5_core_dev *mdev;
 
@@ -1912,7 +1911,6 @@ EXPORT_SYMBOL(mlx5_vf_get_core_dev);
  * access the mdev any more.
  */
 void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
-			__releases(&mdev->intf_state_mutex)
 {
 	mutex_unlock(&mdev->intf_state_mutex);
 }
-- 
2.36.1


^ permalink raw reply related

* [net-next 02/16] net/mlx5: Add last command failure syndrome to debugfs
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Moshe Shemesh <moshe@nvidia.com>

Add syndrome of last command failure per command type to debugfs to ease
debugging of such failure.
last_failed_syndrome - last command failed syndrome returned by FW.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c     | 7 +++++--
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 2 ++
 include/linux/mlx5/driver.h                       | 2 ++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 26ba94cb432e..0377392848d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1887,7 +1887,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	return err;
 }
 
-static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err)
+static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
+			   u32 syndrome, int err)
 {
 	struct mlx5_cmd_stats *stats;
 
@@ -1902,6 +1903,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int
 	if (err == -EREMOTEIO) {
 		stats->failed_mbox_status++;
 		stats->last_failed_mbox_status = status;
+		stats->last_failed_syndrome = syndrome;
 	}
 	spin_unlock_irq(&stats->lock);
 }
@@ -1909,6 +1911,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int
 /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
 static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
 {
+	u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
 	u8 status = MLX5_GET(mbox_out, out, status);
 
 	if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
@@ -1917,7 +1920,7 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *
 	if (!err && status != MLX5_CMD_STAT_OK)
 		err = -EREMOTEIO;
 
-	cmd_status_log(dev, opcode, status, err);
+	cmd_status_log(dev, opcode, status, syndrome, err);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 3d3e55a5cb11..9caa1b52321b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -192,6 +192,8 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
 					   &stats->last_failed_errno);
 			debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
 					  &stats->last_failed_mbox_status);
+			debugfs_create_x32("last_failed_syndrome", 0400, stats->root,
+					   &stats->last_failed_syndrome);
 		}
 	}
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d6bac3976913..74c8cfb771a2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -272,6 +272,8 @@ struct mlx5_cmd_stats {
 	u32		last_failed_errno;
 	/* last bad status returned by FW */
 	u8		last_failed_mbox_status;
+	/* last command failed syndrome returned by FW */
+	u32		last_failed_syndrome;
 	struct dentry  *root;
 	/* protect command average calculations */
 	spinlock_t	lock;
-- 
2.36.1


^ permalink raw reply related

* [net-next 03/16] net/mlx5: Inline db alloc API function
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Tariq Toukan, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Tariq Toukan <tariqt@nvidia.com>

Take the wrapper version which picks default node into a header file.
This reduces the number of exported functions.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 6 ------
 include/linux/mlx5/driver.h                     | 7 ++++++-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index e52b0bac09da..6aca004e88cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -213,12 +213,6 @@ int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node)
 }
 EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
 
-int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
-{
-	return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
-}
-EXPORT_SYMBOL_GPL(mlx5_db_alloc);
-
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 {
 	u32 db_per_page = PAGE_SIZE / cache_line_size();
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 74c8cfb771a2..b064bc278f52 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1053,9 +1053,14 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
 
-int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
 		       int node);
+
+static inline int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+	return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
+}
+
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
 
 const char *mlx5_command_str(int command);
-- 
2.36.1


^ permalink raw reply related

* [net-next 04/16] net/mlx5: Allocate virtually contiguous memory in vport.c
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Tariq Toukan, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Tariq Toukan <tariqt@nvidia.com>

Physical continuity is not necessary, and requested allocation size might
be larger than PAGE_SIZE.
Hence, use v-alloc/free API.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/vport.c   | 52 +++++++++----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 8846d30a380a..ac020cb78072 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -280,7 +280,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
 	out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) +
 			req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
 
-	out = kzalloc(out_sz, GFP_KERNEL);
+	out = kvzalloc(out_sz, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -307,7 +307,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
 		ether_addr_copy(addr_list[i], mac_addr);
 	}
 out:
-	kfree(out);
+	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
@@ -335,7 +335,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 	in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
 		list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
 
-	in = kzalloc(in_sz, GFP_KERNEL);
+	in = kvzalloc(in_sz, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -360,7 +360,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 	}
 
 	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
-	kfree(in);
+	kvfree(in);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
@@ -386,7 +386,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 		list_size * MLX5_ST_SZ_BYTES(vlan_layout);
 
 	memset(out, 0, sizeof(out));
-	in = kzalloc(in_sz, GFP_KERNEL);
+	in = kvzalloc(in_sz, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -411,7 +411,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 	}
 
 	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
-	kfree(in);
+	kvfree(in);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
@@ -542,8 +542,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
 
 	out_sz += nout * sizeof(*gid);
 
-	in = kzalloc(in_sz, GFP_KERNEL);
-	out = kzalloc(out_sz, GFP_KERNEL);
+	in = kvzalloc(in_sz, GFP_KERNEL);
+	out = kvzalloc(out_sz, GFP_KERNEL);
 	if (!in || !out) {
 		err = -ENOMEM;
 		goto out;
@@ -573,8 +573,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
 	gid->global.interface_id = tmp->global.interface_id;
 
 out:
-	kfree(in);
-	kfree(out);
+	kvfree(in);
+	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
@@ -607,8 +607,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
 
 	out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
 
-	in = kzalloc(in_sz, GFP_KERNEL);
-	out = kzalloc(out_sz, GFP_KERNEL);
+	in = kvzalloc(in_sz, GFP_KERNEL);
+	out = kvzalloc(out_sz, GFP_KERNEL);
 	if (!in || !out) {
 		err = -ENOMEM;
 		goto out;
@@ -638,8 +638,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
 		*pkey = MLX5_GET_PR(pkey, pkarr, pkey);
 
 out:
-	kfree(in);
-	kfree(out);
+	kvfree(in);
+	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
@@ -658,7 +658,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
 
 	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
 
-	out = kzalloc(out_sz, GFP_KERNEL);
+	out = kvzalloc(out_sz, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -717,7 +717,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
 					    system_image_guid);
 
 ex:
-	kfree(out);
+	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
@@ -728,7 +728,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
 	struct mlx5_hca_vport_context *rep;
 	int err;
 
-	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
 	if (!rep)
 		return -ENOMEM;
 
@@ -736,7 +736,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
 	if (!err)
 		*sys_image_guid = rep->sys_image_guid;
 
-	kfree(rep);
+	kvfree(rep);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
@@ -747,7 +747,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 	struct mlx5_hca_vport_context *rep;
 	int err;
 
-	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	rep = kvzalloc(sizeof(*rep), GFP_KERNEL);
 	if (!rep)
 		return -ENOMEM;
 
@@ -755,7 +755,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 	if (!err)
 		*node_guid = rep->node_guid;
 
-	kfree(rep);
+	kvfree(rep);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
@@ -770,7 +770,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
 	int err;
 
-	out = kzalloc(outlen, GFP_KERNEL);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -786,7 +786,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
 				nic_vport_context.promisc_all);
 
 out:
-	kfree(out);
+	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
@@ -874,7 +874,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
 	int value;
 	int err;
 
-	out = kzalloc(outlen, GFP_KERNEL);
+	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
@@ -891,7 +891,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
 	*status = !value;
 
 out:
-	kfree(out);
+	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
@@ -1033,7 +1033,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 
 	mlx5_core_dbg(dev, "vf %d\n", vf);
 	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
-	in = kzalloc(in_sz, GFP_KERNEL);
+	in = kvzalloc(in_sz, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -1065,7 +1065,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 		 req->cap_mask1_perm);
 	err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in);
 ex:
-	kfree(in);
+	kvfree(in);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
-- 
2.36.1


^ permalink raw reply related

* [net-next 05/16] net/mlx5: Allocate virtually contiguous memory in pci_irq.c
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Tariq Toukan, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Tariq Toukan <tariqt@nvidia.com>

Physical continuity is not necessary, and requested allocation size might
be larger than PAGE_SIZE.
Hence, use v-alloc/free API.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index db77f1d2eeb4..662f1d55e30e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -94,8 +94,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 	if (msix_vec_count > max_msix)
 		return -EOVERFLOW;
 
-	query_cap = kzalloc(query_sz, GFP_KERNEL);
-	hca_cap = kzalloc(set_sz, GFP_KERNEL);
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
 	if (!hca_cap || !query_cap) {
 		ret = -ENOMEM;
 		goto out;
@@ -118,8 +118,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
 out:
-	kfree(hca_cap);
-	kfree(query_cap);
+	kvfree(hca_cap);
+	kvfree(query_cap);
 	return ret;
 }
 
-- 
2.36.1


^ permalink raw reply related

* [net-next 07/16] net/mlx5e: Allocate virtually contiguous memory for reps structures
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Tariq Toukan, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Tariq Toukan <tariqt@nvidia.com>

Physical continuity is not necessary, and requested allocation size might
be larger than PAGE_SIZE.
Hence, use v-alloc/free API.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 47f7b4c034cc..ce3b9e65c808 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -412,8 +412,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 		    MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS));
 	nch = priv->channels.num + ptp_sq;
 
-	sqs = kcalloc(nch * mlx5e_get_dcb_num_tc(&priv->channels.params), sizeof(*sqs),
-		      GFP_KERNEL);
+	sqs = kvcalloc(nch * mlx5e_get_dcb_num_tc(&priv->channels.params), sizeof(*sqs),
+		       GFP_KERNEL);
 	if (!sqs)
 		goto out;
 
@@ -430,7 +430,7 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 	}
 
 	err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
-	kfree(sqs);
+	kvfree(sqs);
 
 out:
 	if (err)
@@ -1269,7 +1269,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	struct mlx5e_rep_priv *rpriv;
 	int err;
 
-	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+	rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL);
 	if (!rpriv)
 		return -ENOMEM;
 
@@ -1284,7 +1284,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		err = mlx5e_vport_vf_rep_load(dev, rep);
 
 	if (err)
-		kfree(rpriv);
+		kvfree(rpriv);
 
 	return err;
 }
@@ -1312,7 +1312,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	priv->profile->cleanup(priv);
 	mlx5e_destroy_netdev(priv);
 free_ppriv:
-	kfree(ppriv); /* mlx5e_rep_priv */
+	kvfree(ppriv); /* mlx5e_rep_priv */
 }
 
 static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
-- 
2.36.1


^ permalink raw reply related

* [net-next 08/16] net/mlx5e: IPoIB, Improve ethtool rxnfc callback structure in IPoIB
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Gal Pressman, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Gal Pressman <gal@nvidia.com>

Followup commit
79ce39be1d63 ("net/mlx5e: Improve ethtool rxnfc callback structure")
and handle CONFIG_MLX5_EN_RXNFC enabled/disabled inside the fs layer so
the ethtool callbacks are always available. The fs layer will provide
stubs when CONFIG_MLX5_EN_RXNFC is compiled out.

Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c    | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index f4f7eaf16446..8da73ef5680f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -221,7 +221,6 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev,
 	return 0;
 }
 
-#ifdef CONFIG_MLX5_EN_RXNFC
 static u32 mlx5i_flow_type_mask(u32 flow_type)
 {
 	return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
@@ -243,9 +242,18 @@ static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 {
 	struct mlx5e_priv *priv = mlx5i_epriv(dev);
 
+	/* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+	 * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+	 * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+	 * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+	 */
+	if (info->cmd == ETHTOOL_GRXRINGS) {
+		info->data = priv->channels.params.num_channels;
+		return 0;
+	}
+
 	return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs);
 }
-#endif
 
 const struct ethtool_ops mlx5i_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
@@ -263,10 +271,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
 	.get_coalesce       = mlx5i_get_coalesce,
 	.set_coalesce       = mlx5i_set_coalesce,
 	.get_ts_info        = mlx5i_get_ts_info,
-#ifdef CONFIG_MLX5_EN_RXNFC
 	.get_rxnfc          = mlx5i_get_rxnfc,
 	.set_rxnfc          = mlx5i_set_rxnfc,
-#endif
 	.get_link_ksettings = mlx5i_get_link_ksettings,
 	.get_link           = ethtool_op_get_link,
 };
-- 
2.36.1


^ permalink raw reply related

* [net-next 06/16] net/mlx5e: Allocate virtually contiguous memory for VLANs list
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Tariq Toukan, Moshe Shemesh, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Tariq Toukan <tariqt@nvidia.com>

Physical continuity is not necessary, and requested allocation size might
be larger than PAGE_SIZE.
Hence, use v-alloc/free API.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index aeff1d972a46..d2f0773f95c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -155,7 +155,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 		list_size = max_list_size;
 	}
 
-	vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+	vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
 	if (!vlans)
 		return -ENOMEM;
 
@@ -171,7 +171,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 		netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
 			   err);
 
-	kfree(vlans);
+	kvfree(vlans);
 	return err;
 }
 
-- 
2.36.1


^ permalink raw reply related

* [net-next 09/16] net/mlx5e: Support partial GSO for tunnels over vlans
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Gal Pressman, Aya Levin, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Gal Pressman <gal@nvidia.com>

Offloading outer checksum on tunnels requires GSO partial, add it to
'vlan_features' to allow offloading tunnels over vlans.
For example, running GENEVE over vlan & ipv6 (mandatory UDP checksum)
now allows for hardware TSO instead of software segmentation in GSO
only.

Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bf3bca79e160..29e1e1f8f49e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4812,6 +4812,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->vlan_features    |= NETIF_F_TSO6;
 	netdev->vlan_features    |= NETIF_F_RXCSUM;
 	netdev->vlan_features    |= NETIF_F_RXHASH;
+	netdev->vlan_features    |= NETIF_F_GSO_PARTIAL;
 
 	netdev->mpls_features    |= NETIF_F_SG;
 	netdev->mpls_features    |= NETIF_F_HW_CSUM;
@@ -4877,7 +4878,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 						NETIF_F_GSO_IPXIP6;
 	}
 
-	netdev->hw_features	                 |= NETIF_F_GSO_PARTIAL;
 	netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
 	netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
 	netdev->features                         |= NETIF_F_GSO_UDP_L4;
-- 
2.36.1


^ permalink raw reply related

* [net-next 10/16] net/mlx5e: Allow relaxed ordering over VFs
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Aya Levin, Gal Pressman, Marina Varshaver, Gal Shalom,
	Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Aya Levin <ayal@nvidia.com>

By PCI spec, the config space of the VF always report relaxed ordering
not supported while it inherits this property from its PF. Hence using
pcie_relaxed_ordering_enable(), always disables the relaxed ordering on
all VFs. Remove this check and rely on the firmware which queries the
config space of the PF and set the capability bit accordingly.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Marina Varshaver <marinav@nvidia.com>
Reviewed-by: Gal Shalom <galshalom@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 3c1edfa33aa7..68364484a435 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -565,8 +565,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
 	bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
-	bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
-		MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+	bool ro = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
 
 	return ro && lro_en ?
 		MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index c0f409c195bf..43a536cb81db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -38,12 +38,11 @@
 
 void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
 {
-	bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
 	bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
 	bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read);
 
-	MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read);
-	MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
+	MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_read);
+	MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_write);
 }
 
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-- 
2.36.1


^ permalink raw reply related

* [net-next 13/16] net/mlx5e: Add XDP SQs to uplink representors steering tables
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Gal Pressman, Maor Dickman, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Gal Pressman <gal@nvidia.com>

This patch adds the XDP SQs to the uplink representors steering tables
in swichdev mode and enables XDP usage on them.

Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rep.c    | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index aa32b1062f7a..eb90e79388f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -399,7 +399,9 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 {
+	int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params);
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	bool is_uplink_rep = mlx5e_is_uplink_rep(priv);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
 	int n, tc, nch, num_sqs = 0;
@@ -411,9 +413,13 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 	ptp_sq = !!(priv->channels.ptp &&
 		    MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS));
 	nch = priv->channels.num + ptp_sq;
+	/* +2 for xdpsqs, they don't exist on the ptp channel but will not be
+	 * counted for by num_sqs.
+	 */
+	if (is_uplink_rep)
+		sqs_per_channel += 2;
 
-	sqs = kvcalloc(nch * mlx5e_get_dcb_num_tc(&priv->channels.params), sizeof(*sqs),
-		       GFP_KERNEL);
+	sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL);
 	if (!sqs)
 		goto out;
 
@@ -421,6 +427,13 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 		c = priv->channels.c[n];
 		for (tc = 0; tc < c->num_tc; tc++)
 			sqs[num_sqs++] = c->sq[tc].sqn;
+
+		if (is_uplink_rep) {
+			if (c->xdp)
+				sqs[num_sqs++] = c->rq_xdpsq.sqn;
+
+			sqs[num_sqs++] = c->xdpsq.sqn;
+		}
 	}
 	if (ptp_sq) {
 		struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
-- 
2.36.1


^ permalink raw reply related

* [net-next 11/16] net/mlx5e: CT: Add ct driver counters
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Saeed Mahameed, Paul Blakey, Oz Shlomo, Paul Blakey
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Saeed Mahameed <saeedm@nvidia.com>

Connection offload is translated to multiple rules over several
hardware flow tables. Unhandled end-cases may cause a hardware
resource leak causing multiple system symptoms such as a host
memory leak, decreased performance and other scale related issues.

Export the current number of firmware FTEs related to the CT table
as a debugfs counter. Also add a dropped packets counter to help
debug packets dropped on restore failure.

To show the offloaded count:
cat /sys/kernel/debug/mlx5/<PCI>/ct_nic/offloaded

To show the dropped count:
cat /sys/kernel/debug/mlx5/<PCI>/ct_nic/rx_dropped

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Roi Dayan <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c    | 52 +++++++++++++++++--
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 228fbd2c20d4..bceea7a1589e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -15,6 +15,7 @@
 #include <linux/refcount.h>
 #include <linux/xarray.h>
 #include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
 
 #include "lib/fs_chains.h"
 #include "en/tc_ct.h"
@@ -47,6 +48,15 @@
 #define ct_dbg(fmt, args...)\
 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
 
+struct mlx5_tc_ct_debugfs {
+	struct {
+		atomic_t offloaded;
+		atomic_t rx_dropped;
+	} stats;
+
+	struct dentry *root;
+};
+
 struct mlx5_tc_ct_priv {
 	struct mlx5_core_dev *dev;
 	const struct net_device *netdev;
@@ -66,6 +76,8 @@ struct mlx5_tc_ct_priv {
 	struct mlx5_ct_fs *fs;
 	struct mlx5_ct_fs_ops *fs_ops;
 	spinlock_t ht_lock; /* protects ft entries */
+
+	struct mlx5_tc_ct_debugfs debugfs;
 };
 
 struct mlx5_ct_flow {
@@ -520,6 +532,8 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 {
 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+	atomic_dec(&ct_priv->debugfs.stats.offloaded);
 }
 
 static struct flow_action_entry *
@@ -1040,6 +1054,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 	if (err)
 		goto err_nat;
 
+	atomic_inc(&ct_priv->debugfs.stats.offloaded);
 	return 0;
 
 err_nat:
@@ -2064,6 +2079,29 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
 	return err;
 }
 
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+	bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
+	struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+	char dirname[16] = {};
+
+	if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
+		return;
+
+	ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
+	debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+				&ct_dbgfs->stats.offloaded);
+	debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+				&ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+	debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
 #define INIT_ERR_PREFIX "tc ct offload init failed"
 
 struct mlx5_tc_ct_priv *
@@ -2139,6 +2177,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
 	if (err)
 		goto err_init_fs;
 
+	mlx5_ct_tc_create_dbgfs(ct_priv);
 	return ct_priv;
 
 err_init_fs:
@@ -2171,6 +2210,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
 	if (!ct_priv)
 		return;
 
+	mlx5_ct_tc_remove_dbgfs(ct_priv);
 	chains = ct_priv->chains;
 
 	ct_priv->fs_ops->destroy(ct_priv->fs);
@@ -2200,22 +2240,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
 		return true;
 
 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
-		return false;
+		goto out_inc_drop;
 
 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
-		return false;
+		goto out_inc_drop;
 
 	spin_lock(&ct_priv->ht_lock);
 
 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
 	if (!entry) {
 		spin_unlock(&ct_priv->ht_lock);
-		return false;
+		goto out_inc_drop;
 	}
 
 	if (IS_ERR(entry)) {
 		spin_unlock(&ct_priv->ht_lock);
-		return false;
+		goto out_inc_drop;
 	}
 	spin_unlock(&ct_priv->ht_lock);
 
@@ -2223,4 +2263,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
 	__mlx5_tc_ct_entry_put(entry);
 
 	return true;
+
+out_inc_drop:
+	atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+	return false;
 }
-- 
2.36.1


^ permalink raw reply related

* [net-next 12/16] net/mlx5e: Correct the calculation of max channels for rep
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Moshe Tal, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Moshe Tal <moshet@nvidia.com>

Correct the calculation of maximum channels of rep to better utilize
the hardware resources and allow a larger scale of reps.

This will allow creation of all virtual ports configured.

Fixes: 473baf2e9e8c ("net/mlx5e: Allow profile-specific limitation on max num of channels")
Signed-off-by: Moshe Tal <moshet@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  9 +++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 10 ++++++++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b90902db7819..65d3c4865abf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1220,6 +1220,7 @@ mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
 		MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
 }
 
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev);
 int mlx5e_priv_init(struct mlx5e_priv *priv,
 		    const struct mlx5e_profile *profile,
 		    struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 29e1e1f8f49e..6816a024db8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5221,6 +5221,15 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
 	return max_nch;
 }
 
+int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
+{
+	/* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
+	 * and 1 set of direct TIRS
+	 */
+	return 2 * MLX5E_NUM_INDIR_TIRS
+		+ mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
+}
+
 /* mlx5e generic netdev management API (move to en_common.c) */
 int mlx5e_priv_init(struct mlx5e_priv *priv,
 		    const struct mlx5e_profile *profile,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index ce3b9e65c808..aa32b1062f7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -604,10 +604,16 @@ bool mlx5e_eswitch_vf_rep(const struct net_device *netdev)
 	return netdev->netdev_ops == &mlx5e_netdev_ops_rep;
 }
 
+/* One indirect TIR set for outer. Inner not supported in reps. */
+#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS
+
 static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev)
 {
-	return (1 << MLX5_CAP_GEN(mdev, log_max_tir)) /
-		mlx5_eswitch_get_total_vports(mdev);
+	int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir);
+	int num_vports = mlx5_eswitch_get_total_vports(mdev);
+
+	return (max_tir_num - mlx5e_get_pf_num_tirs(mdev)
+		- (num_vports * REP_NUM_INDIR_TIRS)) / num_vports;
 }
 
 static void mlx5e_build_rep_params(struct net_device *netdev)
-- 
2.36.1


^ permalink raw reply related

* [net-next 14/16] net/mlx5: Lag, refactor lag state machine
From: Saeed Mahameed @ 2022-05-18  6:49 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Eli Cohen, Mark Bloch, Saeed Mahameed
In-Reply-To: <20220518064938.128220-1-saeed@kernel.org>

From: Eli Cohen <elic@nvidia.com>

LAG state machine is implemented using bit flags. However, all these bit
flags, except for MLX5_LAG_FLAG_HASH_BASED, are really mutual exclusive.

In addition, MLX5_LAG_FLAG_READY is used by bonding to mark if we have
our netdevices successfully added to lag and does not really belong in
the same flags variable as the other flags.

Rename MLX5_LAG_FLAG_READY to MLX5_LAG_FLAG_NDEVS_READY to better
reflect its purpose and put it in a new flags variable.

For the rest of the flags, we introduce a mode enum to hold the state
of the LAG.

Remove the shared fdb boolean flag from struct mlx5_lag and store this
configuration as a mode flag.

Change all flag related operations to use standard Linux APIs.

Signed-off-by: Eli Cohen <elic@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/lag/debugfs.c |  12 +-
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 112 +++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/lag/lag.h |  33 +++---
 .../net/ethernet/mellanox/mlx5/core/lag/mp.c  |   4 +-
 4 files changed, 93 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
index 443daf6e3d4b..6e7001c0cfd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -5,11 +5,11 @@
 
 static char *get_str_mode_type(struct mlx5_lag *ldev)
 {
-	if (ldev->flags & MLX5_LAG_FLAG_ROCE)
+	if (ldev->mode == MLX5_LAG_MODE_ROCE)
 		return "roce";
-	if (ldev->flags & MLX5_LAG_FLAG_SRIOV)
+	if (ldev->mode == MLX5_LAG_MODE_SRIOV)
 		return "switchdev";
-	if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH)
+	if (ldev->mode == MLX5_LAG_MODE_MULTIPATH)
 		return "multipath";
 
 	return NULL;
@@ -43,7 +43,7 @@ static int port_sel_mode_show(struct seq_file *file, void *priv)
 	ldev = dev->priv.lag;
 	mutex_lock(&ldev->lock);
 	if (__mlx5_lag_is_active(ldev))
-		mode = get_str_port_sel_mode(ldev->flags);
+		mode = get_str_port_sel_mode(ldev->mode_flags);
 	else
 		ret = -EINVAL;
 	mutex_unlock(&ldev->lock);
@@ -79,7 +79,7 @@ static int flags_show(struct seq_file *file, void *priv)
 	mutex_lock(&ldev->lock);
 	lag_active = __mlx5_lag_is_active(ldev);
 	if (lag_active)
-		shared_fdb = ldev->shared_fdb;
+		shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
 
 	mutex_unlock(&ldev->lock);
 	if (!lag_active)
@@ -103,7 +103,7 @@ static int mapping_show(struct seq_file *file, void *priv)
 	mutex_lock(&ldev->lock);
 	lag_active = __mlx5_lag_is_active(ldev);
 	if (lag_active) {
-		if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) {
+		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
 			mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
 					      &num_ports);
 			hash = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index b6dd9043061f..3eb195cba205 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -53,21 +53,30 @@ enum {
  */
 static DEFINE_SPINLOCK(lag_lock);
 
-static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, bool shared_fdb, u8 flags)
+static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
 {
+	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
+
+	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
+}
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
+			       unsigned long flags)
+{
+	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+	int port_sel_mode = get_port_sel_mode(mode, flags);
 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
-	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+	void *lag_ctx;
 
+	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
-
 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
-	if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) {
+	if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
-	} else {
-		MLX5_SET(lagc, lag_ctx, port_select_mode,
-			 MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT);
 	}
+	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
 
 	return mlx5_cmd_exec_in(dev, create_lag, in);
 }
@@ -139,7 +148,7 @@ void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
 				   struct mlx5_lag *ldev,
 				   struct lag_tracker *tracker,
-				   u8 flags)
+				   unsigned long flags)
 {
 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
@@ -150,7 +159,7 @@ static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
 	int i;
 	int j;
 
-	if (flags & MLX5_LAG_FLAG_HASH_BASED) {
+	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
 		mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
 				      &num_enabled);
 		for (i = 0; i < num_enabled; i++) {
@@ -227,6 +236,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 		ldev->nb.notifier_call = NULL;
 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 	}
+	ldev->mode = MLX5_LAG_MODE_NONE;
 
 	err = mlx5_lag_mp_init(ldev);
 	if (err)
@@ -252,12 +262,12 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 
 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
 {
-	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+	return ldev->mode == MLX5_LAG_MODE_ROCE;
 }
 
 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
 {
-	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
+	return ldev->mode == MLX5_LAG_MODE_SRIOV;
 }
 
 /* Create a mapping between steering slots and active ports.
@@ -372,7 +382,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 
-	if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED)
+	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags))
 		return mlx5_lag_port_sel_modify(ldev, ports);
 	return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
 }
@@ -404,19 +414,19 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 			memcpy(ldev->v2p_map, ports, sizeof(ports));
 
 			mlx5_lag_print_mapping(dev0, ldev, tracker,
-					       ldev->flags);
+					       ldev->mode_flags);
 			break;
 		}
 	}
 
 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
-	    !(ldev->flags & MLX5_LAG_FLAG_ROCE))
+	    !(ldev->mode == MLX5_LAG_MODE_ROCE))
 		mlx5_lag_drop_rule_setup(ldev, tracker);
 }
 
 #define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4
 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
-					   struct lag_tracker *tracker, u8 *flags)
+					   unsigned long *flags)
 {
 	struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
 
@@ -424,7 +434,7 @@ static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
 		/* Four ports are support only in hash mode */
 		if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table))
 			return -EINVAL;
-		*flags |= MLX5_LAG_FLAG_HASH_BASED;
+		set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
 		if (ldev->ports > 2)
 			ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
 	}
@@ -433,38 +443,46 @@ static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
 }
 
 static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
-					       struct lag_tracker *tracker, u8 *flags)
+					       struct lag_tracker *tracker, unsigned long *flags)
 {
 	struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
 
 	if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
 	    tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
-		*flags |= MLX5_LAG_FLAG_HASH_BASED;
+		set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
 
 	return 0;
 }
 
-static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
-				      struct lag_tracker *tracker, u8 *flags)
+static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
+			      struct lag_tracker *tracker, bool shared_fdb,
+			      unsigned long *flags)
 {
-	bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
+	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
+
+	*flags = 0;
+	if (shared_fdb)
+		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
 
 	if (roce_lag)
-		return mlx5_lag_set_port_sel_mode_roce(ldev, tracker, flags);
+		return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
+
 	return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
 }
 
-char *get_str_port_sel_mode(u8 flags)
+char *get_str_port_sel_mode(unsigned long flags)
 {
-	if (flags &  MLX5_LAG_FLAG_HASH_BASED)
+	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
 		return "hash";
 	return "queue_affinity";
 }
 
 static int mlx5_create_lag(struct mlx5_lag *ldev,
 			   struct lag_tracker *tracker,
-			   bool shared_fdb, u8 flags)
+			   enum mlx5_lag_mode mode,
+			   unsigned long flags)
 {
+	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
@@ -474,7 +492,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
 		       shared_fdb, get_str_port_sel_mode(flags));
 
-	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, shared_fdb, flags);
+	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
 	if (err) {
 		mlx5_core_err(dev0,
 			      "Failed to create LAG (%d)\n",
@@ -503,20 +521,20 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 
 int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      struct lag_tracker *tracker,
-		      u8 flags,
+		      enum mlx5_lag_mode mode,
 		      bool shared_fdb)
 {
-	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
+	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	unsigned long flags;
 	int err;
 
-	err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
+	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
 	if (err)
 		return err;
 
 	mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
-
-	if (flags & MLX5_LAG_FLAG_HASH_BASED) {
+	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
 		err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
 					       ldev->v2p_map);
 		if (err) {
@@ -527,9 +545,9 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 		}
 	}
 
-	err = mlx5_create_lag(ldev, tracker, shared_fdb, flags);
+	err = mlx5_create_lag(ldev, tracker, mode, flags);
 	if (err) {
-		if (flags & MLX5_LAG_FLAG_HASH_BASED)
+		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
 			mlx5_lag_port_sel_destroy(ldev);
 		if (roce_lag)
 			mlx5_core_err(dev0,
@@ -545,8 +563,8 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 	    !roce_lag)
 		mlx5_lag_drop_rule_setup(ldev, tracker);
 
-	ldev->flags |= flags;
-	ldev->shared_fdb = shared_fdb;
+	ldev->mode = mode;
+	ldev->mode_flags = flags;
 	return 0;
 }
 
@@ -556,16 +574,17 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	bool roce_lag = __mlx5_lag_is_roce(ldev);
-	u8 flags = ldev->flags;
+	unsigned long flags = ldev->mode_flags;
 	int err;
 
-	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+	ldev->mode = MLX5_LAG_MODE_NONE;
+	ldev->mode_flags = 0;
 	mlx5_lag_mp_reset(ldev);
 
-	if (ldev->shared_fdb) {
+	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
 		mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
 							 dev1->priv.eswitch);
-		ldev->shared_fdb = false;
+		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	}
 
 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
@@ -582,7 +601,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 		return err;
 	}
 
-	if (flags & MLX5_LAG_FLAG_HASH_BASED)
+	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
 		mlx5_lag_port_sel_destroy(ldev);
 	if (mlx5_lag_has_drop_rule(ldev))
 		mlx5_lag_drop_rule_cleanup(ldev);
@@ -658,9 +677,9 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
 
 static void mlx5_disable_lag(struct mlx5_lag *ldev)
 {
+	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
-	bool shared_fdb = ldev->shared_fdb;
 	bool roce_lag;
 	int err;
 	int i;
@@ -759,8 +778,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 			mlx5_lag_remove_devices(ldev);
 
 		err = mlx5_activate_lag(ldev, &tracker,
-					roce_lag ? MLX5_LAG_FLAG_ROCE :
-						   MLX5_LAG_FLAG_SRIOV,
+					roce_lag ? MLX5_LAG_MODE_ROCE :
+						   MLX5_LAG_MODE_SRIOV,
 					shared_fdb);
 		if (err) {
 			if (shared_fdb || roce_lag)
@@ -1156,7 +1175,7 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
 
 	mutex_lock(&ldev->lock);
 	mlx5_ldev_remove_netdev(ldev, netdev);
-	ldev->flags &= ~MLX5_LAG_FLAG_READY;
+	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
 
 	lag_is_active = __mlx5_lag_is_active(ldev);
 	mutex_unlock(&ldev->lock);
@@ -1183,7 +1202,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
 			break;
 
 	if (i >= ldev->ports)
-		ldev->flags |= MLX5_LAG_FLAG_READY;
+		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
 	mutex_unlock(&ldev->lock);
 	mlx5_queue_bond_work(ldev, 0);
 }
@@ -1252,7 +1271,8 @@ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
 
 	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev(dev);
-	res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+	res = ldev && __mlx5_lag_is_sriov(ldev) &&
+	      test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
 	spin_unlock(&lag_lock);
 
 	return res;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 46683b84ff84..244b548e1420 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -17,16 +17,20 @@ enum {
 };
 
 enum {
-	MLX5_LAG_FLAG_ROCE   = 1 << 0,
-	MLX5_LAG_FLAG_SRIOV  = 1 << 1,
-	MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
-	MLX5_LAG_FLAG_READY = 1 << 3,
-	MLX5_LAG_FLAG_HASH_BASED = 1 << 4,
+	MLX5_LAG_FLAG_NDEVS_READY,
 };
 
-#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
-			     MLX5_LAG_FLAG_MULTIPATH | \
-			     MLX5_LAG_FLAG_HASH_BASED)
+enum {
+	MLX5_LAG_MODE_FLAG_HASH_BASED,
+	MLX5_LAG_MODE_FLAG_SHARED_FDB,
+};
+
+enum mlx5_lag_mode {
+	MLX5_LAG_MODE_NONE,
+	MLX5_LAG_MODE_ROCE,
+	MLX5_LAG_MODE_SRIOV,
+	MLX5_LAG_MODE_MULTIPATH,
+};
 
 struct lag_func {
 	struct mlx5_core_dev *dev;
@@ -47,11 +51,12 @@ struct lag_tracker {
  * It serves both its phys functions.
  */
 struct mlx5_lag {
-	u8                        flags;
+	enum mlx5_lag_mode        mode;
+	unsigned long		  mode_flags;
+	unsigned long		  state_flags;
 	u8			  ports;
 	u8			  buckets;
 	int			  mode_changes_in_progress;
-	bool			  shared_fdb;
 	u8			  v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS];
 	struct kref               ref;
 	struct lag_func           pf[MLX5_MAX_PORTS];
@@ -74,25 +79,25 @@ mlx5_lag_dev(struct mlx5_core_dev *dev)
 static inline bool
 __mlx5_lag_is_active(struct mlx5_lag *ldev)
 {
-	return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
+	return ldev->mode != MLX5_LAG_MODE_NONE;
 }
 
 static inline bool
 mlx5_lag_is_ready(struct mlx5_lag *ldev)
 {
-	return ldev->flags & MLX5_LAG_FLAG_READY;
+	return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
 }
 
 void mlx5_modify_lag(struct mlx5_lag *ldev,
 		     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      struct lag_tracker *tracker,
-		      u8 flags,
+		      enum mlx5_lag_mode mode,
 		      bool shared_fdb);
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 				struct net_device *ndev);
 
-char *get_str_port_sel_mode(u8 flags);
+char *get_str_port_sel_mode(unsigned long flags);
 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
 			   u8 *ports, int *num_enabled);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index d6c3e6dfd71f..0259a149a64c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -11,7 +11,7 @@
 
 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
 {
-	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
+	return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
 }
 
 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
@@ -179,7 +179,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
 		struct lag_tracker tracker;
 
 		tracker = ldev->tracker;
-		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
+		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
 	}
 
 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
-- 
2.36.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox