From: Saeed Mahameed <saeedm@mellanox.com>
To: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org, Jiri Pirko <jiri@mellanox.com>,
Jakub Kicinski <jakub.kicinski@netronome.com>,
Alexander Duyck <alexander.duyck@gmail.com>,
Bjorn Helgaas <helgaas@kernel.org>,
Alex Vesker <valex@mellanox.com>,
Saeed Mahameed <saeedm@mellanox.com>
Subject: [net-next 09/10] net/mlx5: Add Crdump FW snapshot support
Date: Wed, 1 Aug 2018 14:52:54 -0700 [thread overview]
Message-ID: <20180801215255.6642-10-saeedm@mellanox.com> (raw)
In-Reply-To: <20180801215255.6642-1-saeedm@mellanox.com>
From: Alex Vesker <valex@mellanox.com>
Crdump allows the driver to create a snapshot of the FW PCI
crspace. This is useful in case of catastrophic issues which
require FW reset. The snapshot can be used for later debug.
The snapshot is exposed using devlink, cr-space
address regions are registered on init and snapshots are attached
once a new snapshot is collected by the driver.
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/Makefile | 3 +-
.../ethernet/mellanox/mlx5/core/diag/crdump.c | 201 ++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/health.c | 1 +
.../ethernet/mellanox/mlx5/core/lib/mlx5.h | 2 +
.../net/ethernet/mellanox/mlx5/core/main.c | 5 +
include/linux/mlx5/driver.h | 4 +
6 files changed, 215 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 15f6916efe1b..6ea9e9462c77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,7 +6,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \
- lib/pci_vsc.o diag/fs_tracepoint.o diag/fw_tracer.o devlink.o
+ lib/pci_vsc.o diag/fs_tracepoint.o diag/fw_tracer.o diag/crdump.o \
+ devlink.o
mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 000000000000..fe779e62fc70
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+
+#define BAD_ACCESS 0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7
+#define MAX_NUM_OF_DUMPS_TO_STORE (8)
+
+static const char *region_cr_space_str = "cr-space";
+
+struct mlx5_fw_crdump {
+ u32 size;
+ struct devlink_region *region_crspace;
+};
+
+bool mlx5_crdump_enbaled(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ return (!!priv->health.crdump);
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_fw_crdump *crdump = priv->health.crdump;
+ int i, ret = 0;
+ u32 *cr_data;
+ u32 id;
+
+ cr_data = kvmalloc(crdump->size, GFP_KERNEL);
+ if (!cr_data)
+ return -ENOMEM;
+
+ for (i = 0; i < (crdump->size / 4); i++)
+ cr_data[i] = BAD_ACCESS;
+
+ ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump->size);
+ if (ret <= 0)
+ goto free_data;
+
+ if (crdump->size != ret) {
+ mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+ ret, crdump->size);
+ ret = -EINVAL;
+ goto free_data;
+ }
+
+ /* Get the available snapshot ID for the dumps */
+ id = devlink_region_shapshot_id_get(devlink);
+ ret = devlink_region_snapshot_create(crdump->region_crspace,
+ crdump->size, (u8 *)cr_data,
+ id, &kvfree);
+ if (ret) {
+ mlx5_core_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+ region_cr_space_str, id, ret);
+ goto free_data;
+ } else {
+ mlx5_core_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+ id, region_cr_space_str);
+ }
+ return 0;
+
+free_data:
+ kvfree(cr_data);
+ return ret;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev)
+{
+ int ret = 0;
+
+ if (!mlx5_crdump_enbaled(dev))
+ return -ENODEV;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret)
+ return ret;
+
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+ if (ret)
+ goto unlock;
+
+ ret = mlx5_crdump_fill(dev);
+ if (ret)
+ goto unlock;
+
+unlock:
+ mlx5_vsc_gw_unlock(dev);
+ return ret;
+}
+
+int mlx5_crdump_init(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_fw_crdump *crdump;
+ u32 space_size;
+ int ret;
+
+ if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+ mlx5_crdump_enbaled(dev))
+ return 0;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret)
+ return ret;
+
+ /* Check if space is supported and get space size */
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+ &space_size);
+ if (ret) {
+ /* Unlock and mask error since space is not supported */
+ mlx5_vsc_gw_unlock(dev);
+ return 0;
+ }
+
+ if (space_size == 0) {
+ mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+ mlx5_vsc_gw_unlock(dev);
+ return -EINVAL;
+ }
+
+ ret = mlx5_vsc_gw_unlock(dev);
+ if (ret)
+ return ret;
+
+ crdump = kzalloc(sizeof(*crdump), GFP_KERNEL);
+ if (!crdump)
+ return -ENOMEM;
+
+ /* Create cr-space region */
+ crdump->size = space_size;
+ crdump->region_crspace =
+ devlink_region_create(devlink,
+ region_cr_space_str,
+ MAX_NUM_OF_DUMPS_TO_STORE,
+ space_size);
+ if (IS_ERR(crdump->region_crspace)) {
+ mlx5_core_warn(dev,
+ "crdump: create devlink region %s err %ld\n",
+ region_cr_space_str,
+ PTR_ERR(crdump->region_crspace));
+ ret = PTR_ERR(crdump->region_crspace);
+ goto free_crdump;
+ }
+ priv->health.crdump = crdump;
+ return 0;
+
+free_crdump:
+ kfree(crdump);
+ return ret;
+}
+
+void mlx5_crdump_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_fw_crdump *crdump = priv->health.crdump;
+
+ if (!crdump)
+ return;
+
+ devlink_region_destroy(crdump->region_crspace);
+ kfree(crdump);
+ priv->health.crdump = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index db9e39fdc33e..10ac6a98ea96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -390,6 +390,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
INIT_WORK(&health->work, health_care);
INIT_DELAYED_WORK(&health->recover_work, health_recover);
mlx5_vsc_init(dev);
+ health->crdump = NULL;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 7550b1cc8c6a..fbdf332a9174 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -39,5 +39,7 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_init(struct mlx5_core_dev *dev);
+void mlx5_crdump_cleanup(struct mlx5_core_dev *dev);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7a1ddf96f065..f2dd54accd0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1450,6 +1450,10 @@ static int init_one(struct pci_dev *pdev,
if (err)
goto clean_load;
+ err = mlx5_crdump_init(dev);
+ if (err)
+ dev_err(&pdev->dev, "mlx5_crdump_init failed with error code %d\n", err);
+
pci_save_state(pdev);
return 0;
@@ -1476,6 +1480,7 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_priv *priv = &dev->priv;
+ mlx5_crdump_cleanup(dev);
mlx5_devlink_unregister(devlink);
mlx5_unregister_device(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6690875b368b..52744065924a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -52,6 +52,7 @@
#include <linux/mlx5/srq.h>
#include <linux/timecounter.h>
#include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
enum {
MLX5_BOARD_ID_LEN = 64,
@@ -528,6 +529,8 @@ struct mlx5_sq_bfreg {
unsigned int offset;
};
+struct mlx5_fw_crdump;
+
struct mlx5_core_health {
struct health_buffer __iomem *health;
__be32 __iomem *health_counter;
@@ -542,6 +545,7 @@ struct mlx5_core_health {
struct work_struct work;
struct delayed_work recover_work;
u32 vsc_addr;
+ struct mlx5_fw_crdump *crdump;
};
struct mlx5_qp_table {
--
2.17.0
next prev parent reply other threads:[~2018-08-01 23:49 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-01 21:52 [pull request][net-next 00/10] Mellanox, mlx5 and devlink updates 2018-07-31 Saeed Mahameed
2018-08-01 21:52 ` [net-next 01/10] devlink: Fix param set handling for string type Saeed Mahameed
2018-08-01 22:33 ` Jakub Kicinski
2018-08-01 21:52 ` [net-next 02/10] devlink: Fix param cmode driverinit " Saeed Mahameed
2018-08-01 21:52 ` [net-next 03/10] devlink: Add helper function for safely copy string param Saeed Mahameed
2018-08-01 21:52 ` [net-next 04/10] devlink: Add extack messages support to param set Saeed Mahameed
2018-08-01 21:52 ` [net-next 05/10] net/mlx5: Move all devlink related functions calls to devlink.c Saeed Mahameed
2018-08-01 21:52 ` [net-next 06/10] net/mlx5: Add MPEGC register configuration functionality Saeed Mahameed
2018-08-01 21:52 ` [net-next 07/10] net/mlx5: Enable PCIe buffer congestion handling workaround via devlink Saeed Mahameed
2018-08-01 22:18 ` Alexander Duyck
2018-08-01 21:52 ` [net-next 08/10] net/mlx5: Add Vendor Specific Capability access gateway Saeed Mahameed
2018-08-01 21:52 ` Saeed Mahameed [this message]
2018-08-01 21:52 ` [net-next 10/10] net/mlx5: Use devlink region_snapshot parameter Saeed Mahameed
2018-08-01 22:34 ` [pull request][net-next 00/10] Mellanox, mlx5 and devlink updates 2018-07-31 Alexander Duyck
2018-08-01 23:13 ` Saeed Mahameed
2018-08-02 0:36 ` Alexander Duyck
[not found] ` <2d84340e-0703-0bc7-4917-3b18979b2aa5@mellanox.com>
2018-08-29 15:42 ` Alex Vesker
2018-08-29 17:04 ` Alexander Duyck
[not found] ` <5206dd74-432d-3342-2a48-3cdd1be8b5cb@mellanox.com>
2018-08-30 15:39 ` Alexander Duyck
2018-08-02 6:15 ` Jiri Pirko
2018-08-02 0:00 ` Jakub Kicinski
2018-08-02 1:40 ` David Miller
2018-08-02 8:29 ` Petr Machata
2018-08-02 17:11 ` Jakub Kicinski
2018-08-02 18:04 ` David Miller
2018-08-02 20:10 ` Petr Machata
2018-08-02 15:07 ` Eran Ben Elisha
2018-08-02 22:53 ` Jakub Kicinski
2018-08-03 16:41 ` Ido Schimmel
2018-08-04 4:59 ` Jakub Kicinski
2018-08-06 13:01 ` Eran Ben Elisha
2018-08-07 0:49 ` Jakub Kicinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180801215255.6642-10-saeedm@mellanox.com \
--to=saeedm@mellanox.com \
--cc=alexander.duyck@gmail.com \
--cc=davem@davemloft.net \
--cc=helgaas@kernel.org \
--cc=jakub.kicinski@netronome.com \
--cc=jiri@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=valex@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox