From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
Jason Gunthorpe <jgg@nvidia.com>
Cc: Leon Romanovsky <leonro@nvidia.com>,
linux-rdma@vger.kernel.org, netdev@vger.kernel.org,
Moshe Shemesh <moshe@nvidia.com>,
Saeed Mahameed <saeedm@nvidia.com>
Subject: [for-next v2 17/17] net/mlx5: Add clarification on sync reset failure
Date: Wed, 23 Feb 2022 15:39:30 -0800 [thread overview]
Message-ID: <20220223233930.319301-18-saeed@kernel.org> (raw)
In-Reply-To: <20220223233930.319301-1-saeed@kernel.org>
From: Moshe Shemesh <moshe@nvidia.com>
In case devlink reload action fw_activate failed in sync reset stage,
use the new MFRL field reset_state to find why it failed and share this
clarification with the user.
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/devlink.c | 10 +---
.../ethernet/mellanox/mlx5/core/fw_reset.c | 57 ++++++++++++++++---
.../ethernet/mellanox/mlx5/core/fw_reset.h | 3 +-
.../net/ethernet/mellanox/mlx5/core/port.c | 20 +++++--
include/linux/mlx5/driver.h | 3 +
5 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index d1093bb2d436..057dde6f4417 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -100,15 +100,11 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli
}
net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
- err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive);
+ err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack);
if (err)
- goto out;
+ return err;
- err = mlx5_fw_reset_wait_reset_done(dev);
-out:
- if (err)
- NL_SET_ERR_MSG_MOD(extack, "FW activate command failed");
- return err;
+ return mlx5_fw_reset_wait_reset_done(dev);
}
static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 0b0234f9d694..d438d7a61500 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -57,7 +57,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
}
-static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
+ u8 *reset_type, u8 *reset_state)
{
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -71,25 +72,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r
*reset_level = MLX5_GET(mfrl_reg, out, reset_level);
if (reset_type)
*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+ if (reset_state)
+ *reset_state = MLX5_GET(mfrl_reg, out, reset_state);
return 0;
}
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
{
- return mlx5_reg_mfrl_query(dev, reset_level, reset_type);
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
}
-int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel)
+static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack)
+{
+ u8 reset_state;
+
+ if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+ goto out;
+
+ switch (reset_state) {
+ case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
+ case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
+ return -EBUSY;
+ case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
+ return -ETIMEDOUT;
+ case MLX5_MFRL_REG_RESET_STATE_NACK:
+ NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
+ return -EPERM;
+ }
+
+out:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset failed");
+ return -EIO;
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
int err;
set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true);
- if (err)
- clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- return err;
+
+ MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1);
+ err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MFRL, 0, 1, false);
+ if (!err)
+ return 0;
+
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
+ return mlx5_fw_reset_get_reset_state_err(dev, extack);
+
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
+ return mlx5_cmd_check(dev, err, in, out);
}
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
index 7761ee5fc7d0..694fc7cb2684 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -9,7 +9,8 @@
void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
-int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel);
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack);
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 1ef2b6a848c1..d15b417d3e07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -33,9 +33,10 @@
#include <linux/mlx5/port.h>
#include "mlx5_core.h"
-int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
- int size_in, void *data_out, int size_out,
- u16 reg_id, int arg, int write)
+/* calling with verbose false will not print error to log */
+int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
+ void *data_out, int size_out, u16 reg_id, int arg,
+ int write, bool verbose)
{
int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
@@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
MLX5_SET(access_register_in, in, argument, arg);
MLX5_SET(access_register_in, in, register_id, reg_id);
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_do(dev, in, inlen, out, outlen);
+ if (verbose)
+ err = mlx5_cmd_check(dev, err, in, out);
if (err)
goto out;
@@ -69,6 +72,15 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
kvfree(in);
return err;
}
+EXPORT_SYMBOL_GPL(mlx5_access_reg);
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ int size_in, void *data_out, int size_out,
+ u16 reg_id, int arg, int write)
+{
+ return mlx5_access_reg(dev, data_in, size_in, data_out, size_out,
+ reg_id, arg, write, true);
+}
EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 432151d7d0bf..d3b1a6a1f8d2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1031,6 +1031,9 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
+ void *data_out, int size_out, u16 reg_id, int arg,
+ int write, bool verbose);
int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
int size_in, void *data_out, int size_out,
u16 reg_num, int arg, int write);
--
2.35.1
next prev parent reply other threads:[~2022-02-23 23:40 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-23 23:39 [pull request][for-next v2 00/17] mlx5-next 2022-22-02 Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 01/17] mlx5: remove unused static inlines Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 02/17] net/mlx5: Add ability to insert to specific flow group Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 03/17] net/mlx5: E-Switch, reserve and use same uplink metadata across ports Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 04/17] net/mlx5: E-switch, remove special uplink ingress ACL handling Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 05/17] net/mlx5: E-switch, add drop rule support to ingress ACL Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 06/17] net/mlx5: Lag, use local variable already defined to access E-Switch Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 07/17] net/mlx5: Lag, don't use magic numbers for ports Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 08/17] net/mlx5: Lag, record inactive state of bond device Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 09/17] net/mlx5: Lag, offload active-backup drops to hardware Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 10/17] net/mlx5: cmdif, Return value improvements Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 11/17] net/mlx5: cmdif, cmd_check refactoring Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 12/17] net/mlx5: cmdif, Add new api for command execution Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 13/17] net/mlx5: Use mlx5_cmd_do() in core create_{cq,dct} Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 14/17] net/mlx5: cmdif, Refactor error handling and reporting of async commands Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 15/17] RDMA/mlx5: Use new command interface API Saeed Mahameed
2022-02-23 23:39 ` [for-next v2 16/17] net/mlx5: Add reset_state field to MFRL register Saeed Mahameed
2022-02-23 23:39 ` Saeed Mahameed [this message]
2022-02-28 22:41 ` [pull request][for-next v2 00/17] mlx5-next 2022-22-02 Saeed Mahameed
2022-03-01 0:31 ` Jakub Kicinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220223233930.319301-18-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=jgg@nvidia.com \
--cc=kuba@kernel.org \
--cc=leonro@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=moshe@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).