All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiri Pirko <jiri@resnulli.us>
To: Eran Ben Elisha <eranbe@mellanox.com>
Cc: netdev@vger.kernel.org, Jiri Pirko <jiri@mellanox.com>,
	"David S. Miller" <davem@davemloft.net>,
	Ariel Almog <ariela@mellanox.com>, Aya Levin <ayal@mellanox.com>,
	Moshe Shemesh <moshe@mellanox.com>
Subject: Re: [PATCH net-next v2 02/11] devlink: Add health reporter create/destroy functionality
Date: Sun, 20 Jan 2019 12:49:59 +0100	[thread overview]
Message-ID: <20190120114959.GL2730@nanopsycho> (raw)
In-Reply-To: <1547762360-7075-3-git-send-email-eranbe@mellanox.com>

Thu, Jan 17, 2019 at 10:59:11PM CET, eranbe@mellanox.com wrote:
>Devlink health reporter is an instance for reporting, diagnosing and
>recovering from run time errors discovered by the reporters.
>Define it's data structure and supported operations.
>In addition, expose devlink API to create and destroy a reporter.
>Each devlink instance will hold it's own reporters list.
>
>As part of the allocation, driver shall provide a set of callbacks which
>will be used the devlink in order to handle health reports and user
>commands related to this reporter. In addition, driver is entitled to
>provide some priv pointer, which can be fetched from the reporter by
>devlink_health_reporter_priv function.
>
>For each reporter, devlink will hold a metadata of statistics,
>buffers and status.
>
>Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
>---
> include/net/devlink.h |  59 ++++++++++++++++++++
> net/core/devlink.c    | 127 ++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 186 insertions(+)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 77c77319290a..7fe30d67678a 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -30,6 +30,7 @@ struct devlink {
> 	struct list_head param_list;
> 	struct list_head region_list;
> 	u32 snapshot_id;
>+	struct list_head reporter_list;
> 	struct devlink_dpipe_headers *dpipe_headers;
> 	const struct devlink_ops *ops;
> 	struct device *dev;
>@@ -424,6 +425,34 @@ struct devlink_region;
> typedef void devlink_snapshot_data_dest_t(const void *data);
> 
> struct devlink_health_buffer;
>+struct devlink_health_reporter;
>+
>+/**
>+ * struct devlink_health_reporter_ops - Reporter operations
>+ * @name: reporter name
>+ * dump_size: dump buffer size allocated by the devlink
>+ * diagnose_size: diagnose buffer size allocated by the devlink
>+ * recover: callback to recover from reported error
>+ *          if priv_ctx is NULL, run a full recover
>+ * dump: callback to dump an object
>+ *       if priv_ctx is NULL, run a full dump
>+ * diagnose: callback to diagnose the current status
>+ */
>+
>+struct devlink_health_reporter_ops {
>+	char *name;
>+	unsigned int dump_size;
>+	unsigned int diagnose_size;
>+	int (*recover)(struct devlink_health_reporter *reporter,
>+		       void *priv_ctx);
>+	int (*dump)(struct devlink_health_reporter *reporter,
>+		    struct devlink_health_buffer **buffers_array,
>+		    unsigned int buffer_size, unsigned int num_buffers,
>+		    void *priv_ctx);
>+	int (*diagnose)(struct devlink_health_reporter *reporter,
>+			struct devlink_health_buffer **buffers_array,
>+			unsigned int buffer_size, unsigned int num_buffers);
>+};
> 
> struct devlink_ops {
> 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
>@@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
> 					   char *name);
> int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
> 					 void *data, int len);
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+			       const struct devlink_health_reporter_ops *ops,
>+			       u64 graceful_period, bool auto_recover,
>+			       void *priv);
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
> #else
> 
> static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
>@@ -920,6 +959,26 @@ devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
> {
> 	return 0;
> }
>+
>+static inline struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+			       const struct devlink_health_reporter_ops *ops,
>+			       u64 graceful_period, bool auto_recover,
>+			       void *priv)
>+{
>+	return NULL;
>+}
>+
>+static inline void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+}
>+
>+static inline void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+	return NULL;
>+}
> #endif
> 
> #endif /* _NET_DEVLINK_H_ */
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 8984501edade..fec169a28dba 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -4098,6 +4098,132 @@ devlink_health_buffer_snd(struct genl_info *info,
> 	return err;
> }
> 
>+struct devlink_health_reporter {
>+	struct list_head list;
>+	struct devlink_health_buffer **dump_buffers_array;
>+	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
>+	struct devlink_health_buffer **diagnose_buffers_array;
>+	struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
>+	void *priv;
>+	const struct devlink_health_reporter_ops *ops;
>+	struct devlink *devlink;
>+	u64 graceful_period;
>+	bool auto_recover;
>+	u8 health_state;
>+};
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+	return reporter->priv;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
>+
>+static struct devlink_health_reporter *
>+devlink_health_reporter_find_by_name(struct devlink *devlink,
>+				     const char *reporter_name)
>+{
>+	struct devlink_health_reporter *reporter;
>+
>+	list_for_each_entry(reporter, &devlink->reporter_list, list)
>+		if (!strcmp(reporter->ops->name, reporter_name))
>+			return reporter;
>+	return NULL;
>+}
>+
>+/**
>+ *	devlink_health_reporter_create - create devlink health reporter
>+ *
>+ *	@devlink: devlink
>+ *	@ops: ops
>+ *	@graceful_period: to avoid recovery loops, in msecs
>+ *	@auto_recover: auto recover when error occurs
>+ *	@priv: priv
>+ */
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+			       const struct devlink_health_reporter_ops *ops,
>+			       u64 graceful_period, bool auto_recover,
>+			       void *priv)
>+{
>+	struct devlink_health_reporter *reporter;
>+
>+	mutex_lock(&devlink->lock);
>+	if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
>+		reporter = ERR_PTR(-EEXIST);
>+		goto unlock;
>+	}
>+
>+	if (WARN_ON(ops->dump && !ops->dump_size) ||
>+	    WARN_ON(ops->diagnose && !ops->diagnose_size) ||
>+	    WARN_ON(auto_recover && !ops->recover) ||
>+	    WARN_ON(graceful_period && !ops->recover)) {
>+		reporter = ERR_PTR(-EINVAL);
>+		goto unlock;
>+	}
>+
>+	reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
>+	if (!reporter) {
>+		reporter = ERR_PTR(-ENOMEM);
>+		goto unlock;
>+	}
>+
>+	if (ops->dump) {
>+		reporter->dump_buffers_array =
>+			devlink_health_buffers_create(ops->dump_size);
>+		if (!reporter->dump_buffers_array) {
>+			kfree(reporter);
>+			reporter = ERR_PTR(-ENOMEM);
>+			goto unlock;
>+		}
>+	}
>+
>+	if (ops->diagnose) {
>+		reporter->diagnose_buffers_array =
>+			devlink_health_buffers_create(ops->diagnose_size);
>+		if (!reporter->diagnose_buffers_array) {
>+			devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+						       DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));

This is just ugly. :/

As I wrote in the other email, should be converted to simple
"msg_ctx = msg_ctx_create(), msg_ctx_destroy(msg_ctx)", no sizes, no
buffers visible to the driver.


>+			kfree(reporter);
>+			reporter = ERR_PTR(-ENOMEM);
>+			goto unlock;
>+		}
>+	}
>+
>+	list_add_tail(&reporter->list, &devlink->reporter_list);
>+	mutex_init(&reporter->dump_lock);
>+	mutex_init(&reporter->diagnose_lock);
>+
>+	reporter->priv = priv;
>+	reporter->ops = ops;
>+	reporter->devlink = devlink;
>+	reporter->graceful_period = graceful_period;
>+	reporter->auto_recover = auto_recover;
>+unlock:
>+	mutex_unlock(&devlink->lock);
>+	return reporter;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
>+
>+/**
>+ *	devlink_health_reporter_destroy - destroy devlink health reporter
>+ *
>+ *	@reporter: devlink health reporter to destroy
>+ */
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+	mutex_lock(&reporter->devlink->lock);
>+	list_del(&reporter->list);
>+	devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
>+	devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
>+				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
>+	kfree(reporter);
>+	mutex_unlock(&reporter->devlink->lock);
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
>+
> static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
> 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
> 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
>@@ -4383,6 +4509,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
> 	INIT_LIST_HEAD(&devlink->resource_list);
> 	INIT_LIST_HEAD(&devlink->param_list);
> 	INIT_LIST_HEAD(&devlink->region_list);
>+	INIT_LIST_HEAD(&devlink->reporter_list);
> 	mutex_init(&devlink->lock);
> 	return devlink;
> }
>-- 
>2.17.1
>

  reply	other threads:[~2019-01-20 12:01 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-17 21:59 [PATCH net-next v2 00/11] Devlink health reporting and recovery system Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 01/11] devlink: Add health buffer support Eran Ben Elisha
2019-01-20 10:03   ` Jiri Pirko
2019-01-20 11:06     ` Eran Ben Elisha
2019-01-20 11:08       ` Jiri Pirko
2019-01-20 18:45         ` David Miller
2019-01-21 11:07           ` Eran Ben Elisha
2019-01-21 12:08             ` Jiri Pirko
2019-01-20 11:20   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 02/11] devlink: Add health reporter create/destroy functionality Eran Ben Elisha
2019-01-20 11:49   ` Jiri Pirko [this message]
2019-01-17 21:59 ` [PATCH net-next v2 03/11] devlink: Add health report functionality Eran Ben Elisha
2019-01-20 11:27   ` Jiri Pirko
2019-01-21 11:12     ` Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 04/11] devlink: Add health get command Eran Ben Elisha
2019-01-20 11:31   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 05/11] devlink: Add health set command Eran Ben Elisha
2019-01-20 11:32   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 06/11] devlink: Add health recover command Eran Ben Elisha
2019-01-20 11:33   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 07/11] devlink: Add health diagnose command Eran Ben Elisha
2019-01-20 11:38   ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 08/11] devlink: Add health dump {get,clear} commands Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 09/11] net/mlx5e: Add TX reporter support Eran Ben Elisha
2019-01-20 11:06   ` Jiri Pirko
2019-01-21 11:32     ` Eran Ben Elisha
2019-01-21 12:11       ` Jiri Pirko
2019-01-21 13:06         ` Eran Ben Elisha
2019-01-21 13:45           ` Jiri Pirko
2019-01-17 21:59 ` [PATCH net-next v2 10/11] net/mlx5e: Add TX timeout support for mlx5e TX reporter Eran Ben Elisha
2019-01-17 21:59 ` [PATCH net-next v2 11/11] devlink: Add Documentation/networking/devlink-health.txt Eran Ben Elisha
2019-01-18 22:59 ` [PATCH net-next v2 00/11] Devlink health reporting and recovery system David Miller
2019-01-20  9:27 ` [PATCH iproute2-next v2] devlink: Add health command support Aya Levin
2019-01-23  3:37   ` David Ahern
2019-01-23 11:27     ` Aya Levin
2019-01-24  0:27       ` David Ahern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190120114959.GL2730@nanopsycho \
    --to=jiri@resnulli.us \
    --cc=ariela@mellanox.com \
    --cc=ayal@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=eranbe@mellanox.com \
    --cc=jiri@mellanox.com \
    --cc=moshe@mellanox.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.