From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-nvme@lists.infradead.org
Cc: dwagner@suse.de, hare@suse.com, kbusch@kernel.org, hch@lst.de,
sagi@grimberg.me, axboe@kernel.dk, chaitanyak@nvidia.com,
venkat88@linux.ibm.com, gjoyce@linux.ibm.com,
wenxiong@linux.ibm.com, Nilay Shroff <nilay@linux.ibm.com>
Subject: [PATCHv4 8/8] nvme: export controller reconnect event count via sysfs
Date: Sun, 17 May 2026 00:06:55 +0530 [thread overview]
Message-ID: <20260516183709.269937-9-nilay@linux.ibm.com> (raw)
In-Reply-To: <20260516183709.269937-1-nilay@linux.ibm.com>
When an NVMe-oF link goes down, the driver attempts to recover the
connection by repeatedly reconnecting to the remote controller at
configured intervals. A maximum number of reconnect attempts is also
configured, after which recovery stops and the controller is removed
if the connection cannot be re-established.
The driver maintains a counter, nr_reconnects, which is incremented on
each reconnect attempt. However if in case the reconnect is successful
then this counter reset to zero. Moreover, currently, this counter is
only reported via kernel log messages and is not exposed to userspace.
Since dmesg is a circular buffer, this information may be lost over
time.
So introduce a new accumulator which accumulates nr_reconnect attempts
and also expose this accumulator per-fabric ctrl via a new sysfs
attribute reconnect_count, under diag attribute grroup to provide
persistent visibility into the number of reconnect attempts made by the
host. This information can help users diagnose unstable links or
connectivity issues. Furthermore, this sysfs attribute is also writable
so user may reset it to zero, if needed.
The reconnect_count can also be consumed by monitoring tools such as
nvme-top to improve controller-level observability.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/fc.c | 3 +++
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/rdma.c | 2 ++
drivers/nvme/host/sysfs.c | 35 +++++++++++++++++++++++++++++++++++
drivers/nvme/host/tcp.c | 2 ++
5 files changed, 44 insertions(+)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e4f4528fe2a2..f04eb13dd5e9 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3148,6 +3148,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
goto out_term_aen_ops;
}
+ /* accumulate reconnect attempts before resetting it to zero */
+ atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects);
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
@@ -3470,6 +3472,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
+ atomic_long_set(&ctrl->ctrl.acc_reconnects, 0);
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e575bef99d4a..22535328fdd5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -456,6 +456,8 @@ struct nvme_ctrl {
u16 icdoff;
u16 maxcmd;
int nr_reconnects;
+ /* accumulate reconenct attempts, as nr_reconnects can reset to zero */
+ atomic_long_t acc_reconnects;
unsigned long flags;
struct nvmf_ctrl_options *opts;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f77c960f7632..de45fefdc15e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1110,6 +1110,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
ctrl->ctrl.nr_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects);
ctrl->ctrl.nr_reconnects = 0;
return;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 72300d6de880..9c15e7d869ed 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -1094,17 +1094,52 @@ static ssize_t reset_count_store(struct device *dev,
return count;
}
+static ssize_t reconnect_count_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n",
+ atomic_long_read(&ctrl->acc_reconnects) +
+ ctrl->nr_reconnects);
+}
+
+static ssize_t reconnect_count_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int err;
+ unsigned long reconnect_cnt;
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ err = kstrtoul(buf, 0, &reconnect_cnt);
+ if (err)
+ return -EINVAL;
+
+ atomic_long_set(&ctrl->acc_reconnects, reconnect_cnt);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(reconnect_count);
+
static DEVICE_ATTR_RW(reset_count);
static struct attribute *nvme_dev_diag_attrs[] = {
&dev_attr_adm_errors.attr,
&dev_attr_reset_count.attr,
+ &dev_attr_reconnect_count.attr,
NULL,
};
static umode_t nvme_dev_diag_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (a == &dev_attr_reconnect_count.attr && !ctrl->opts)
+ return 0;
+
return a->mode;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 15d36d6a728e..ab9d19497b3f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2475,6 +2475,8 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n",
ctrl->nr_reconnects, ctrl->opts->max_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ atomic_long_add(ctrl->nr_reconnects, &ctrl->acc_reconnects);
ctrl->nr_reconnects = 0;
return;
--
2.53.0
next prev parent reply other threads:[~2026-05-16 18:38 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-16 18:36 [PATCHv4 0/8] nvme: export additional diagnostic counters via sysfs Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 1/8] nvme: add diag attribute group under sysfs Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 2/8] nvme: export command retry count via sysfs Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 3/8] nvme: export multipath failover " Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 4/8] nvme: export command error counters " Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 5/8] nvme: export I/O requeue count when no path is usable " Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 6/8] nvme: export I/O failure count when no path is available " Nilay Shroff
2026-05-16 18:36 ` [PATCHv4 7/8] nvme: export controller reset event count " Nilay Shroff
2026-05-16 18:36 ` Nilay Shroff [this message]
2026-05-16 18:47 ` [PATCHv4 0/8] nvme: export additional diagnostic counters " Nilay Shroff
2026-05-25 9:12 ` Venkat Rao Bagalkote
2026-05-27 19:54 ` Keith Busch
2026-06-04 8:58 ` Keith Busch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260516183709.269937-9-nilay@linux.ibm.com \
--to=nilay@linux.ibm.com \
--cc=axboe@kernel.dk \
--cc=chaitanyak@nvidia.com \
--cc=dwagner@suse.de \
--cc=gjoyce@linux.ibm.com \
--cc=hare@suse.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
--cc=venkat88@linux.ibm.com \
--cc=wenxiong@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.