netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiri Pirko <jiri@resnulli.us>
To: Eran Ben Elisha <eranbe@mellanox.com>
Cc: netdev@vger.kernel.org, Jiri Pirko <jiri@mellanox.com>,
	"David S. Miller" <davem@davemloft.net>,
	Ariel Almog <ariela@mellanox.com>, Aya Levin <ayal@mellanox.com>,
	Moshe Shemesh <moshe@mellanox.com>
Subject: Re: [PATCH net-next v2 02/11] devlink: Add health reporter create/destroy functionality
Date: Sun, 20 Jan 2019 12:49:59 +0100	[thread overview]
Message-ID: <20190120114959.GL2730@nanopsycho> (raw)
In-Reply-To: <1547762360-7075-3-git-send-email-eranbe@mellanox.com>

Thu, Jan 17, 2019 at 10:59:11PM CET, eranbe@mellanox.com wrote:
>Devlink health reporter is an instance for reporting, diagnosing and
>recovering from run time errors discovered by the reporters.
>Define it's data structure and supported operations.
>In addition, expose devlink API to create and destroy a reporter.
>Each devlink instance will hold it's own reporters list.
>
>As part of the allocation, driver shall provide a set of callbacks which
>will be used the devlink in order to handle health reports and user
>commands related to this reporter. In addition, driver is entitled to
>provide some priv pointer, which can be fetched from the reporter by
>devlink_health_reporter_priv function.
>
>For each reporter, devlink will hold a metadata of statistics,
>buffers and status.
>
>Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
>---
> include/net/devlink.h |  59 ++++++++++++++++++++
> net/core/devlink.c    | 127 ++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 186 insertions(+)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 77c77319290a..7fe30d67678a 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -30,6 +30,7 @@ struct devlink {
> 	struct list_head param_list;
> 	struct list_head region_list;
> 	u32 snapshot_id;
>+	struct list_head reporter_list;
> 	struct devlink_dpipe_headers *dpipe_headers;
> 	const struct devlink_ops *ops;
> 	struct device *dev;
>@@ -424,6 +425,34 @@ struct devlink_region;
> typedef void devlink_snapshot_data_dest_t(const void *data);
> 
> struct devlink_health_buffer;
>+struct devlink_health_reporter;
>+
>+/**
>+ * struct devlink_health_reporter_ops - Reporter operations
>+ * @name: reporter name
>+ * dump_size: dump buffer size allocated by the devlink
>+ * diagnose_size: diagnose buffer size allocated by the devlink
>+ * recover: callback to recover from reported error
>+ *          if priv_ctx is NULL, run a full recover
>+ * dump: callback to dump an object
>+ *       if priv_ctx is NULL, run a full dump
>+ * diagnose: callback to diagnose the current status
>+ */
>+
>+struct devlink_health_reporter_ops {
>+	char *name;
>+	unsigned int dump_size;
>+	unsigned int diagnose_size;
>+	int (*recover)(struct devlink_health_reporter *reporter,
>+		       void *priv_ctx);
>+	int (*dump)(struct devlink_health_reporter *reporter,
>+		    struct devlink_health_buffer **buffers_array,
>+		    unsigned int buffer_size, unsigned int num_buffers,
>+		    void *priv_ctx);
>+	int (*diagnose)(struct devlink_health_reporter *reporter,
>+			struct devlink_health_buffer **buffers_array,
>+			unsigned int buffer_size, unsigned int num_buffers);
>+};
> 
> struct devlink_ops {
> 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
>@@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
> 					   char *name);
> int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
> 					 void *data, int len);
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+			       const struct devlink_health_reporter_ops *ops,
>+			       u64 graceful_period, bool auto_recover,
>+			       void *priv);
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
> #else
> 
> static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
>@@ -920,6 +959,26 @@ devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
> {
> 	return 0;
> }
>+
>+static inline struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+			       const struct devlink_health_reporter_ops *ops,
>+			       u64 graceful_period, bool auto_recover,
>+			       void *priv)
>+{
>+	return NULL;
>+}
>+
>+static inline void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+}
>+
>+static inline void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+	return NULL;
>+}
> #endif
> 
> #endif /* _NET_DEVLINK_H_ */
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 8984501edade..fec169a28dba 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -4098,6 +4098,132 @@ devlink_health_buffer_snd(struct genl_info *info,
> 	return err;
> }
> 
>+struct devlink_health_reporter {
>+	struct list_head list;
>+	struct devlink_health_buffer **dump_buffers_array;
>+	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
>+	struct devlink_health_buffer **diagnose_buffers_array;
>+	struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
>+	void *priv;
>+	const struct devlink_health_reporter_ops *ops;
>+	struct devlink *devlink;
>+	u64 graceful_period;
>+	bool auto_recover;
>+	u8 health_state;
>+};
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+	return reporter->priv;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
>+
>+static struct devlink_health_reporter *
>+devlink_health_reporter_find_by_name(struct devlink *devlink,
>+				     const char *reporter_name)
>+{
>+	struct devlink_health_reporter *reporter;
>+
>+	list_for_each_entry(reporter, &devlink->reporter_list, list)
>+		if (!strcmp(reporter->ops->name, reporter_name))
>+			return reporter;
>+	return NULL;
>+}
>+
>+/**
>+ *	devlink_health_reporter_create - create devlink health reporter
>+ *
>+ *	@devlink: devlink
>+ *	@ops: ops
>+ *	@graceful_period: to avoid recovery loops, in msecs
>+ *	@auto_recover: auto recover when error occurs
>+ *	@priv: priv
>+ */
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+			       const struct devlink_health_reporter_ops *ops,
>+			       u64 graceful_period, bool auto_recover,
>+			       void *priv)
>+{
>+	struct devlink_health_reporter *reporter;
>+
>+	mutex_lock(&devlink->lock);
>+	if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
>+		reporter = ERR_PTR(-EEXIST);
>+		goto unlock;
>+	}
>+
>+	if (WARN_ON(ops->dump && !ops->dump_size) ||
>+	    WARN_ON(ops->diagnose && !ops->diagnose_size) ||
>+	    WARN_ON(auto_recover && !ops->recover) ||
>+	    WARN_ON(graceful_period && !ops->recover)) {
>+		reporter = ERR_PTR(-EINVAL);
>+		goto unlock;
>+	}
>+
>+	reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
>+	if (!reporter) {
>+		reporter = ERR_PTR(-ENOMEM);
>+		goto unlock;
>+	}
>+
>+	if (ops->dump) {
>+		reporter->dump_buffers_array =
>+			devlink_health_buffers_create(ops->dump_size);
>+		if (!reporter->dump_buffers_array) {
>+			kfree(reporter);
>+			reporter = ERR_PTR(-ENOMEM);
>+			goto unlock;
>+		}
>+	}
>+
>+	if (ops->diagnose) {
>+		reporter->diagnose_buffers_array =
>+			devlink_health_buffers_create(ops->diagnose_size);
>+		if (!reporter->diagnose_buffers_array) {
>+			devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+						       DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));

This is just ugly. :/

As I wrote in the other email, should be converted to simple
"msg_ctx = msg_ctx_create(), msg_ctx_destroy(msg_ctx)", no sizes, no
buffers visible to the driver.


>+			kfree(reporter);
>+			reporter = ERR_PTR(-ENOMEM);
>+			goto unlock;
>+		}
>+	}
>+
>+	list_add_tail(&reporter->list, &devlink->reporter_list);
>+	mutex_init(&reporter->dump_lock);
>+	mutex_init(&reporter->diagnose_lock);
>+
>+	reporter->priv = priv;
>+	reporter->ops = ops;
>+	reporter->devlink = devlink;
>+	reporter->graceful_period = graceful_period;
>+	reporter->auto_recover = auto_recover;
>+unlock:
>+	mutex_unlock(&devlink->lock);
>+	return reporter;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
>+
>+/**
>+ *	devlink_health_reporter_destroy - destroy devlink health reporter
>+ *
>+ *	@reporter: devlink health reporter to destroy
>+ */
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+	mutex_lock(&reporter->devlink->lock);
>+	list_del(&reporter->list);
>+	devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
>+	devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
>+				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
>+	kfree(reporter);
>+	mutex_unlock(&reporter->devlink->lock);
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
>+
> static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
> 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
> 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
>@@ -4383,6 +4509,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
> 	INIT_LIST_HEAD(&devlink->resource_list);
> 	INIT_LIST_HEAD(&devlink->param_list);
> 	INIT_LIST_HEAD(&devlink->region_list);
>+	INIT_LIST_HEAD(&devlink->reporter_list);
> 	mutex_init(&devlink->lock);
> 	return devlink;
> }
>-- 
>2.17.1
>

  reply	other threads:[~2019-01-20 12:01 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-17 21:59 [PATCH net-next v2 00/11] Devlink health reporting and recovery system Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 01/11] devlink: Add health buffer support Eran Ben Elisha
2019-01-20 10:03   ` Jiri Pirko
2019-01-20 11:06     ` Eran Ben Elisha
2019-01-20 11:08       ` Jiri Pirko
2019-01-20 18:45         ` David Miller
2019-01-21 11:07           ` Eran Ben Elisha
2019-01-21 12:08             ` Jiri Pirko
2019-01-20 11:20   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 02/11] devlink: Add health reporter create/destroy functionality Eran Ben Elisha
2019-01-20 11:49   ` Jiri Pirko [this message]
2019-01-17 21:59 ` [PATCH net-next v2 03/11] devlink: Add health report functionality Eran Ben Elisha
2019-01-20 11:27   ` Jiri Pirko
2019-01-21 11:12     ` Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 04/11] devlink: Add health get command Eran Ben Elisha
2019-01-20 11:31   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 05/11] devlink: Add health set command Eran Ben Elisha
2019-01-20 11:32   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 06/11] devlink: Add health recover command Eran Ben Elisha
2019-01-20 11:33   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 07/11] devlink: Add health diagnose command Eran Ben Elisha
2019-01-20 11:38   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 08/11] devlink: Add health dump {get,clear} commands Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 09/11] net/mlx5e: Add TX reporter support Eran Ben Elisha
2019-01-20 11:06   ` Jiri Pirko
2019-01-21 11:32     ` Eran Ben Elisha
2019-01-21 12:11       ` Jiri Pirko
2019-01-21 13:06         ` Eran Ben Elisha
2019-01-21 13:45           ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 10/11] net/mlx5e: Add TX timeout support for mlx5e TX reporter Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 11/11] devlink: Add Documentation/networking/devlink-health.txt Eran Ben Elisha
2019-01-18 22:59 ` [PATCH net-next v2 00/11] Devlink health reporting and recovery system David Miller
2019-01-20  9:27 ` [PATCH iproute2-next v2] devlink: Add health command support Aya Levin
2019-01-23  3:37   ` David Ahern
2019-01-23 11:27     ` Aya Levin
2019-01-24  0:27       ` David Ahern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190120114959.GL2730@nanopsycho \
    --to=jiri@resnulli.us \
    --cc=ariela@mellanox.com \
    --cc=ayal@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=eranbe@mellanox.com \
    --cc=jiri@mellanox.com \
    --cc=moshe@mellanox.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).