From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Moshe Shemesh <moshe@nvidia.com>,
Saeed Mahameed <saeedm@nvidia.com>,
Jacob Keller <jacob.e.keller@intel.com>,
Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.6 38/75] net/mlx5: Add support for sync reset using hot reset
Date: Tue, 2 Sep 2025 15:20:50 +0200 [thread overview]
Message-ID: <20250902131936.615991393@linuxfoundation.org> (raw)
In-Reply-To: <20250902131935.107897242@linuxfoundation.org>
6.6-stable review patch. If anyone has any objections, please let me know.
------------------
From: Moshe Shemesh <moshe@nvidia.com>
[ Upstream commit 57502f62678ced0149d415324931bde37b42885a ]
On device that supports sync reset for firmware activate using hot
reset, the driver queries the required reset method while handling the
sync reset request. If the required reset method is hot reset, the
driver will use pci_reset_bus() to reset the PCI link instead of the
link toggle.
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/20240911201757.1505453-11-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Stable-dep-of: 902a8bc23a24 ("net/mlx5: Fix lockdep assertion on sync reset unload event")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
.../ethernet/mellanox/mlx5/core/fw_reset.c | 82 +++++++++++++++----
.../net/ethernet/mellanox/mlx5/core/main.c | 3 +
2 files changed, 69 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 6b17346aa4cef..a1c2dd7f5c2c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -26,6 +26,7 @@ struct mlx5_fw_reset {
struct work_struct reset_now_work;
struct work_struct reset_abort_work;
unsigned long reset_flags;
+ u8 reset_method;
struct timer_list timer;
struct completion done;
int ret;
@@ -94,7 +95,7 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
}
static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
- u8 *reset_type, u8 *reset_state)
+ u8 *reset_type, u8 *reset_state, u8 *reset_method)
{
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -110,13 +111,26 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
if (reset_state)
*reset_state = MLX5_GET(mfrl_reg, out, reset_state);
+ if (reset_method)
+ *reset_method = MLX5_GET(mfrl_reg, out, pci_reset_req_method);
return 0;
}
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
{
- return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
+}
+
+static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
+ u8 *reset_method)
+{
+ if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method)) {
+ *reset_method = MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE;
+ return 0;
+ }
+
+ return mlx5_reg_mfrl_query(dev, NULL, NULL, NULL, reset_method);
}
static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
@@ -124,7 +138,7 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
{
u8 reset_state;
- if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+ if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state, NULL))
goto out;
if (!reset_state)
@@ -402,7 +416,11 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
- if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
+ err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
+ if (err)
+ mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
+
+ if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
!mlx5_is_reset_now_capable(dev)) {
err = mlx5_fw_reset_set_reset_sync_nack(dev);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
@@ -419,21 +437,15 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
}
-static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
{
struct pci_bus *bridge_bus = dev->pdev->bus;
struct pci_dev *bridge = bridge_bus->self;
unsigned long timeout;
struct pci_dev *sdev;
- u16 reg16, dev_id;
int cap, err;
+ u16 reg16;
- err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
- if (err)
- return pcibios_err_to_errno(err);
- err = mlx5_check_dev_ids(dev, dev_id);
- if (err)
- return err;
cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
if (!cap)
return -EOPNOTSUPP;
@@ -503,6 +515,44 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
return err;
}
+static int mlx5_pci_reset_bus(struct mlx5_core_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method))
+ return -EOPNOTSUPP;
+
+ return pci_reset_bus(dev->pdev);
+}
+
+static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method)
+{
+ u16 dev_id;
+ int err;
+
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+ if (err)
+ return pcibios_err_to_errno(err);
+ err = mlx5_check_dev_ids(dev, dev_id);
+ if (err)
+ return err;
+
+ switch (reset_method) {
+ case MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE:
+ err = mlx5_pci_link_toggle(dev, dev_id);
+ if (err)
+ mlx5_core_warn(dev, "mlx5_pci_link_toggle failed\n");
+ break;
+ case MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET:
+ err = mlx5_pci_reset_bus(dev);
+ if (err)
+ mlx5_core_warn(dev, "mlx5_pci_reset_bus failed\n");
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
static void mlx5_sync_reset_now_event(struct work_struct *work)
{
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
@@ -521,9 +571,9 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
goto done;
}
- err = mlx5_pci_link_toggle(dev);
+ err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
if (err) {
- mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+ mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err);
set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
}
@@ -585,9 +635,9 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work)
mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
- err = mlx5_pci_link_toggle(dev);
+ err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
if (err) {
- mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err);
+ mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err);
fw_reset->ret = err;
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 62a85f09b52fd..8a11e410f7c13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -627,6 +627,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
MLX5_SET(cmd_hca_cap, set_hca_cap,
pci_sync_for_fw_update_with_driver_unload, 1);
+ if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method))
+ MLX5_SET(cmd_hca_cap, set_hca_cap,
+ pcie_reset_using_hotreset_method, 1);
if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
MLX5_SET(cmd_hca_cap,
--
2.50.1
next prev parent reply other threads:[~2025-09-02 13:38 UTC|newest]
Thread overview: 83+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-02 13:20 [PATCH 6.6 00/75] 6.6.104-rc1 review Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 01/75] of: dynamic: Fix memleak when of_pci_add_properties() failed Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 02/75] pinctrl: STMFX: add missing HAS_IOMEM dependency Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 03/75] mips: dts: lantiq: danube: add missing burst length property Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 04/75] mips: lantiq: xway: sysctrl: rename the etop node Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 05/75] of: Add a helper to free property struct Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 06/75] of: dynamic: Fix use after free in of_changeset_add_prop_helper() Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 07/75] ftrace: Fix potential warning in trace_printk_seq during ftrace_dump Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 08/75] scsi: core: sysfs: Correct sysfs attributes access rights Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 09/75] smb: client: fix race with concurrent opens in unlink(2) Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 10/75] smb: client: fix race with concurrent opens in rename(2) Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 11/75] ASoC: codecs: tx-macro: correct tx_macro_component_drv name Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 12/75] erofs: fix atomic context detection when !CONFIG_DEBUG_LOCK_ALLOC Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 13/75] ACPI: EC: Add device to acpi_ec_no_wakeup[] qurik list Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 14/75] nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 15/75] NFS: Fix a race when updating an existing write Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 16/75] vhost/net: Protect ubufs with rcu read lock in vhost_net_ubuf_put() Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 17/75] net: ipv4: fix regression in local-broadcast routes Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 18/75] drm/msm: Defer fd_install in SUBMIT ioctl Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 19/75] powerpc/kvm: Fix ifdef to remove build warning Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 20/75] HID: input: rename hidinput_set_battery_charge_status() Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 21/75] HID: input: report battery status changes immediately Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 22/75] Bluetooth: hci_event: Treat UNKNOWN_CONN_ID on disconnect as success Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 23/75] Bluetooth: hci_event: Mark connection as closed during suspend disconnect Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 24/75] Bluetooth: hci_event: Detect if HCI_EV_NUM_COMP_PKTS is unbalanced Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 25/75] Bluetooth: hci_sync: fix set_local_name race condition Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 26/75] atm: atmtcp: Prevent arbitrary write in atmtcp_recv_control() Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 27/75] drm/nouveau: remove unused increment in gm200_flcn_pio_imem_wr Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 28/75] drm/nouveau: remove unused memory target test Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 29/75] ice: Introduce ice_xdp_buff Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 30/75] ice: gather page_count()s of each frag right before XDP prog call Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 31/75] ice: stop storing XDP verdict within ice_rx_buf Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 32/75] ice: fix incorrect counter for buffer allocation failures Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 33/75] dt-bindings: display/msm: qcom,mdp5: drop lut clock Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 34/75] net: dlink: fix multicast stats being counted incorrectly Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 35/75] phy: mscc: Fix when PTP clock is register and unregister Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 36/75] net/mlx5: Reload auxiliary drivers on fw_activate Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 37/75] net/mlx5: Add device cap for supporting hot reset in sync reset flow Greg Kroah-Hartman
2025-09-02 13:20 ` Greg Kroah-Hartman [this message]
2025-09-02 13:20 ` [PATCH 6.6 39/75] net/mlx5: Fix lockdep assertion on sync reset unload event Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 40/75] net/mlx5: Call mlx5_sf_id_erase() once in mlx5_sf_dealloc() Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 41/75] net/mlx5: Use devlink port pointer to get the pointer of container SF struct Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 42/75] net/mlx5: Convert SF port_indices xarray to function_ids xarray Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 43/75] net/mlx5: Nack sync reset when SFs are present Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 44/75] net/mlx5e: Update and set Xon/Xoff upon MTU set Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 45/75] net/mlx5e: Update and set Xon/Xoff upon port speed set Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 46/75] net/mlx5e: Set local Xoff after FW update Greg Kroah-Hartman
2025-09-02 13:20 ` [PATCH 6.6 47/75] net: stmmac: xgmac: Do not enable RX FIFO Overflow interrupts Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 48/75] net: stmmac: Rename phylink_get_caps() callback to update_caps() Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 49/75] net: stmmac: xgmac: Correct supported speed modes Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 50/75] net: stmmac: Set CIC bit only for TX queues with COE Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 51/75] net: rose: split remove and free operations in rose_remove_neigh() Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 52/75] net: rose: convert use field to refcount_t Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 53/75] net: rose: include node references in rose_neigh refcount Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 54/75] sctp: initialize more fields in sctp_v6_from_sk() Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 55/75] efivarfs: Fix slab-out-of-bounds in efivarfs_d_compare Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 56/75] KVM: x86: use array_index_nospec with indices that come from guest Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 57/75] x86/microcode/AMD: Handle the case of no BIOS microcode Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 58/75] HID: asus: fix UAF via HID_CLAIMED_INPUT validation Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 59/75] HID: multitouch: fix slab out-of-bounds access in mt_report_fixup() Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 60/75] HID: quirks: add support for Legion Go dual dinput modes Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 61/75] HID: logitech: Add ids for G PRO 2 LIGHTSPEED Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 62/75] HID: wacom: Add a new Art Pen 2 Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 63/75] HID: hid-ntrig: fix unable to handle page fault in ntrig_report_version() Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 64/75] Revert "drm/amdgpu: fix incorrect vm flags to map bo" Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 65/75] dma/pool: Ensure DMA_DIRECT_REMAP allocations are decrypted Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 66/75] fs/smb: Fix inconsistent refcnt update Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 67/75] net: usb: qmi_wwan: add Telit Cinterion LE910C4-WWX new compositions Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 68/75] smb3 client: fix return code mapping of remap_file_range Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 69/75] drm/nouveau/disp: Always accept linear modifier Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 70/75] net: rose: fix a typo in rose_clear_routes() Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 71/75] net/mlx5: SF, Fix add port error handling Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 72/75] HID: mcp2221: Dont set bus speed on every transfer Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 73/75] HID: mcp2221: Handle reads greater than 60 bytes Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 74/75] Revert "drm/dp: Change AUX DPCD probe address from DPCD_REV to LANE0_1_STATUS" Greg Kroah-Hartman
2025-09-02 13:21 ` [PATCH 6.6 75/75] xfs: do not propagate ENODATA disk errors into xattr code Greg Kroah-Hartman
2025-09-02 16:30 ` 6.6.104-rc1 review Brett A C Sheffield
2025-09-02 18:03 ` [PATCH 6.6 00/75] " Jon Hunter
2025-09-02 19:17 ` Florian Fainelli
2025-09-03 8:28 ` Naresh Kamboju
2025-09-03 9:02 ` Ron Economos
2025-09-03 10:47 ` Mark Brown
2025-09-03 11:51 ` Peter Schneider
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250902131936.615991393@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=jacob.e.keller@intel.com \
--cc=kuba@kernel.org \
--cc=moshe@nvidia.com \
--cc=patches@lists.linux.dev \
--cc=saeedm@nvidia.com \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.