* [PATCH net-next v2 2/3] devlink: Implicitly set auto recover flag when registering health reporter
2020-03-29 11:05 [PATCH net-next v2 0/3] Devlink health auto attributes refactor Eran Ben Elisha
2020-03-29 11:05 ` [PATCH net-next v2 1/3] netdevsim: Change dummy reporter auto recover default Eran Ben Elisha
@ 2020-03-29 11:05 ` Eran Ben Elisha
2020-03-29 11:05 ` [PATCH net-next v2 3/3] devlink: Add auto dump flag to " Eran Ben Elisha
2020-03-30 18:12 ` [PATCH net-next v2 0/3] Devlink health auto attributes refactor David Miller
3 siblings, 0 replies; 6+ messages in thread
From: Eran Ben Elisha @ 2020-03-29 11:05 UTC (permalink / raw)
To: netdev, Jakub Kicinski, Jiri Pirko, Michael Chan, David S. Miller,
Saeed Mahameed
Cc: Eran Ben Elisha
When health reporter is registered to devlink, devlink will implicitly set
auto recover if and only if the reporter has a recover method. No reason
to explicitly get the auto recover flag from the driver.
Remove this flag from all drivers that called
devlink_health_reporter_create.
All existing health reporters set auto recovery to true if they have a
recover method.
Yet, administrator can unset auto recover via netlink command as prior to
this patch.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
---
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 6 +++---
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 2 +-
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 2 +-
drivers/net/ethernet/mellanox/mlx5/core/health.c | 4 ++--
drivers/net/netdevsim/health.c | 4 ++--
include/net/devlink.h | 3 +--
net/core/devlink.c | 9 +++------
7 files changed, 13 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 8e09a52a9c06..a812beb46325 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -150,7 +150,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
health->fw_reset_reporter =
devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_reset_reporter_ops,
- 0, true, bp);
+ 0, bp);
if (IS_ERR(health->fw_reset_reporter)) {
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_reset_reporter));
@@ -166,7 +166,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
health->fw_reporter =
devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_reporter_ops,
- 0, false, bp);
+ 0, bp);
if (IS_ERR(health->fw_reporter)) {
netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
PTR_ERR(health->fw_reporter));
@@ -182,7 +182,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
health->fw_fatal_reporter =
devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_fatal_reporter_ops,
- 0, true, bp);
+ 0, bp);
if (IS_ERR(health->fw_fatal_reporter)) {
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_fatal_reporter));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index af77c86c9aea..c209579fc213 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -571,7 +571,7 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
reporter = devlink_health_reporter_create(devlink,
&mlx5_rx_reporter_ops,
MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
- true, priv);
+ priv);
if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
PTR_ERR(reporter));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 2028ce9b151f..9805fc085512 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -416,7 +416,7 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
reporter =
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD,
- true, priv);
+ priv);
if (IS_ERR(reporter)) {
netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d9f4e8c59c1f..fa1665caac46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -627,7 +627,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
health->fw_reporter =
devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
- 0, false, dev);
+ 0, dev);
if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter));
@@ -636,7 +636,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
devlink_health_reporter_create(devlink,
&mlx5_fw_fatal_reporter_ops,
MLX5_REPORTER_FW_GRACEFUL_PERIOD,
- true, dev);
+ dev);
if (IS_ERR(health->fw_fatal_reporter))
mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
PTR_ERR(health->fw_fatal_reporter));
diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
index 9ff345d5524b..62958b238d50 100644
--- a/drivers/net/netdevsim/health.c
+++ b/drivers/net/netdevsim/health.c
@@ -271,14 +271,14 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink)
health->empty_reporter =
devlink_health_reporter_create(devlink,
&nsim_dev_empty_reporter_ops,
- 0, false, health);
+ 0, health);
if (IS_ERR(health->empty_reporter))
return PTR_ERR(health->empty_reporter);
health->dummy_reporter =
devlink_health_reporter_create(devlink,
&nsim_dev_dummy_reporter_ops,
- 0, true, health);
+ 0, health);
if (IS_ERR(health->dummy_reporter)) {
err = PTR_ERR(health->dummy_reporter);
goto err_empty_reporter_destroy;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3be50346c69b..3f5cf62e4de8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1040,8 +1040,7 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, bool auto_recover,
- void *priv);
+ u64 graceful_period, void *priv);
void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d20efdc8cc73..0763b0494401 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -5124,14 +5124,12 @@ devlink_health_reporter_find_by_name(struct devlink *devlink,
* @devlink: devlink
* @ops: ops
* @graceful_period: to avoid recovery loops, in msecs
- * @auto_recover: auto recover when error occurs
* @priv: priv
*/
struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, bool auto_recover,
- void *priv)
+ u64 graceful_period, void *priv)
{
struct devlink_health_reporter *reporter;
@@ -5141,8 +5139,7 @@ devlink_health_reporter_create(struct devlink *devlink,
goto unlock;
}
- if (WARN_ON(auto_recover && !ops->recover) ||
- WARN_ON(graceful_period && !ops->recover)) {
+ if (WARN_ON(graceful_period && !ops->recover)) {
reporter = ERR_PTR(-EINVAL);
goto unlock;
}
@@ -5157,7 +5154,7 @@ devlink_health_reporter_create(struct devlink *devlink,
reporter->ops = ops;
reporter->devlink = devlink;
reporter->graceful_period = graceful_period;
- reporter->auto_recover = auto_recover;
+ reporter->auto_recover = !!ops->recover;
mutex_init(&reporter->dump_lock);
refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list);
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH net-next v2 3/3] devlink: Add auto dump flag to health reporter
2020-03-29 11:05 [PATCH net-next v2 0/3] Devlink health auto attributes refactor Eran Ben Elisha
2020-03-29 11:05 ` [PATCH net-next v2 1/3] netdevsim: Change dummy reporter auto recover default Eran Ben Elisha
2020-03-29 11:05 ` [PATCH net-next v2 2/3] devlink: Implicitly set auto recover flag when registering health reporter Eran Ben Elisha
@ 2020-03-29 11:05 ` Eran Ben Elisha
2020-03-30 18:12 ` [PATCH net-next v2 0/3] Devlink health auto attributes refactor David Miller
3 siblings, 0 replies; 6+ messages in thread
From: Eran Ben Elisha @ 2020-03-29 11:05 UTC (permalink / raw)
To: netdev, Jakub Kicinski, Jiri Pirko, Michael Chan, David S. Miller,
Saeed Mahameed
Cc: Eran Ben Elisha
On low memory system, run time dumps can consume too much memory. Add
administrator ability to disable auto dumps per reporter as part of the
error flow handle routine.
This attribute is not relevant while executing
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET.
By default, auto dump is activated for any reporter that has a dump method,
as part of the reporter registration to devlink.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
---
include/uapi/linux/devlink.h | 2 ++
net/core/devlink.c | 26 ++++++++++++++++++++++----
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index dfdffc42e87d..e7891d1d2ebd 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -429,6 +429,8 @@ enum devlink_attr {
DEVLINK_ATTR_NETNS_FD, /* u32 */
DEVLINK_ATTR_NETNS_PID, /* u32 */
DEVLINK_ATTR_NETNS_ID, /* u32 */
+
+ DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, /* u8 */
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0763b0494401..1caa43a7fba4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -5089,6 +5089,7 @@ struct devlink_health_reporter {
struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period;
bool auto_recover;
+ bool auto_dump;
u8 health_state;
u64 dump_ts;
u64 dump_real_ts;
@@ -5155,6 +5156,7 @@ devlink_health_reporter_create(struct devlink *devlink,
reporter->devlink = devlink;
reporter->graceful_period = graceful_period;
reporter->auto_recover = !!ops->recover;
+ reporter->auto_dump = !!ops->dump;
mutex_init(&reporter->dump_lock);
refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list);
@@ -5235,6 +5237,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
reporter->dump_real_ts, DEVLINK_ATTR_PAD))
goto reporter_nest_cancel;
+ if (reporter->ops->dump &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
+ reporter->auto_dump))
+ goto reporter_nest_cancel;
nla_nest_end(msg, reporter_attr);
genlmsg_end(msg, hdr);
@@ -5381,10 +5387,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
- mutex_lock(&reporter->dump_lock);
- /* store current dump of current error, for later analysis */
- devlink_health_do_dump(reporter, priv_ctx, NULL);
- mutex_unlock(&reporter->dump_lock);
+ if (reporter->auto_dump) {
+ mutex_lock(&reporter->dump_lock);
+ /* store current dump of current error, for later analysis */
+ devlink_health_do_dump(reporter, priv_ctx, NULL);
+ mutex_unlock(&reporter->dump_lock);
+ }
if (reporter->auto_recover)
return devlink_health_reporter_recover(reporter,
@@ -5558,6 +5566,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
err = -EOPNOTSUPP;
goto out;
}
+ if (!reporter->ops->dump &&
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
reporter->graceful_period =
@@ -5567,6 +5580,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
reporter->auto_recover =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
+ reporter->auto_dump =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
+
devlink_health_reporter_put(reporter);
return 0;
out:
@@ -6313,6 +6330,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread