From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-nvme@lists.infradead.org
Cc: kbusch@kernel.org, axboe@kernel.dk, hch@lst.de, sagi@grimberg.me,
hare@suse.de, dwagner@suse.de, wenxiong@linux.ibm.com,
gjoyce@ibm.com, Nilay Shroff <nilay@linux.ibm.com>
Subject: [PATCHv3 7/7] nvme: export controller reconnect event count via sysfs
Date: Fri, 20 Feb 2026 23:18:52 +0530 [thread overview]
Message-ID: <20260220175024.292898-8-nilay@linux.ibm.com> (raw)
In-Reply-To: <20260220175024.292898-1-nilay@linux.ibm.com>
When an NVMe-oF link goes down, the driver attempts to recover the
connection by repeatedly reconnecting to the remote controller at
configured intervals. A maximum number of reconnect attempts is also
configured, after which recovery stops and the controller is removed
if the connection cannot be re-established.
The driver maintains a counter, nr_reconnects, which is incremented on
each reconnect attempt. However if in case the reconnect is successful
then this counter reset to zero. Moreover, currently, this counter is
only reported via kernel log messages and is not exposed to userspace.
Since dmesg is a circular buffer, this information may be lost over
time.
So introduce a new accumulator which accumulates nr_reconnect
attempts and also expose this accumulator via a new sysfs attribute
"reconnect_events" to provide persistent visibility into the number
of reconnect attempts made by the host. This information can help
users diagnose unstable links or connectivity issues. Furthermore,
this sysfs attribute is also writable so user may reset it to zero,
if needed.
The "reconnect_events" can also be consumed by monitoring tools such
as nvme-top to improve controller-level observability.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/fc.c | 5 +++++
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/rdma.c | 4 ++++
drivers/nvme/host/sysfs.c | 30 ++++++++++++++++++++++++++++++
drivers/nvme/host/tcp.c | 3 +++
5 files changed, 44 insertions(+)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6948de3f438a..a918217620d1 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3148,6 +3148,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
goto out_term_aen_ops;
}
+ /* accumulate reconnect attempts before resetting it to zero */
+ WRITE_ONCE(ctrl->ctrl.acc_reconnects,
+ READ_ONCE(ctrl->ctrl.acc_reconnects) +
+ ctrl->ctrl.nr_reconnects);
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
@@ -3470,6 +3474,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
+ ctrl->ctrl.acc_reconnects = 0;
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5d90e5fa7298..9146d1b48606 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -401,6 +401,8 @@ struct nvme_ctrl {
u16 icdoff;
u16 maxcmd;
int nr_reconnects;
+ /* accumulate reconenct attempts, as nr_reconnects can reset to zero */
+ size_t acc_reconnects;
unsigned long flags;
struct nvmf_ctrl_options *opts;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 35c0822edb2d..bd5492ad3da6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1110,6 +1110,10 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
ctrl->ctrl.nr_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ WRITE_ONCE(ctrl->ctrl.acc_reconnects,
+ READ_ONCE(ctrl->ctrl.acc_reconnects) +
+ ctrl->ctrl.nr_reconnects);
ctrl->ctrl.nr_reconnects = 0;
return;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index f3e6c7208315..166e45b589ad 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -736,6 +736,33 @@ static ssize_t reset_events_store(struct device *dev,
static DEVICE_ATTR_RW(reset_events);
+static ssize_t reconnect_events_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n",
+ READ_ONCE(ctrl->acc_reconnects) + ctrl->nr_reconnects);
+}
+
+static ssize_t reconnect_events_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int err;
+ unsigned long reconnect_cnt;
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ err = kstrtoul(buf, 0, &reconnect_cnt);
+ if (err)
+ return -EINVAL;
+
+ WRITE_ONCE(ctrl->acc_reconnects, reconnect_cnt);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(reconnect_events);
+
#ifdef CONFIG_NVME_HOST_AUTH
static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -884,6 +911,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_adm_passthru_err_log_enabled.attr,
&dev_attr_adm_errors.attr,
&dev_attr_reset_events.attr,
+ &dev_attr_reconnect_events.attr,
NULL
};
@@ -913,6 +941,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
return 0;
#endif
+ if (a == &dev_attr_reconnect_events.attr && !ctrl->opts)
+ return 0;
return a->mode;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 69cb04406b47..46398c826368 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2460,6 +2460,9 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n",
ctrl->nr_reconnects, ctrl->opts->max_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ WRITE_ONCE(ctrl->acc_reconnects,
+ READ_ONCE(ctrl->acc_reconnects) + ctrl->nr_reconnects);
ctrl->nr_reconnects = 0;
return;
--
2.52.0
next prev parent reply other threads:[~2026-02-20 17:51 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-20 17:48 [PATCHv3 0/7] nvme: export additional diagnostic counters via sysfs Nilay Shroff
2026-02-20 17:48 ` [PATCHv3 1/7] nvme: export command retry count " Nilay Shroff
2026-02-20 17:48 ` [PATCHv3 2/7] nvme: export multipath failover " Nilay Shroff
2026-02-20 17:48 ` [PATCHv3 3/7] nvme: export command error counters " Nilay Shroff
2026-02-20 17:48 ` [PATCHv3 4/7] nvme: export I/O requeue count when no path is available " Nilay Shroff
2026-02-20 17:48 ` [PATCHv3 5/7] nvme: export I/O failure " Nilay Shroff
2026-02-20 17:48 ` [PATCHv3 6/7] nvme: export controller reset event count " Nilay Shroff
2026-02-20 17:48 ` Nilay Shroff [this message]
2026-02-22 12:36 ` [PATCHv3 0/7] nvme: export additional diagnostic counters " Venkat
2026-02-22 14:10 ` Nilay Shroff
2026-02-22 15:06 ` Venkat Rao Bagalkote
2026-02-26 5:37 ` Chaitanya Kulkarni
2026-03-04 14:33 ` Nilay Shroff
2026-03-06 16:02 ` Keith Busch
2026-03-08 18:55 ` Nilay Shroff
2026-03-09 15:32 ` John Garry
2026-03-19 15:55 ` Nilay Shroff
2026-03-16 12:56 ` Nilay Shroff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260220175024.292898-8-nilay@linux.ibm.com \
--to=nilay@linux.ibm.com \
--cc=axboe@kernel.dk \
--cc=dwagner@suse.de \
--cc=gjoyce@ibm.com \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
--cc=wenxiong@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox