Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] nvme-pci: add NVMe controller statistics
@ 2025-05-29 17:29 Tokunori Ikegami
  2025-06-03  0:51 ` Chaitanya Kulkarni
  0 siblings, 1 reply; 3+ messages in thread
From: Tokunori Ikegami @ 2025-05-29 17:29 UTC (permalink / raw)
  To: linux-nvme; +Cc: Tokunori Ikegami

This is to count the controller warning events.

Signed-off-by: Tokunori Ikegami <ikegami.t@gmail.com>
---
Changes since v1:
- Split the sysfs stats attribute to create 4 new files.
- Create stats subdirectory for the attibutes split.
- Change the device attributes to read-write version.

 drivers/nvme/host/nvme.h |   9 +++
 drivers/nvme/host/pci.c  | 127 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ad0c1f834f09..5a6d0aebc9f8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -277,6 +277,13 @@ enum nvme_ctrl_flags {
 	NVME_CTRL_FROZEN		= 6,
 };
 
+struct nvme_stats {
+	unsigned long timeouts;
+	unsigned long aborts;
+	unsigned long resets;
+	unsigned long disables;
+};
+
 struct nvme_ctrl {
 	bool comp_seen;
 	bool identified;
@@ -411,6 +418,8 @@ struct nvme_ctrl {
 	enum nvme_ctrl_type cntrltype;
 	enum nvme_dctype dctype;
 	u16 awupf; /* 0's based value. */
+
+	struct nvme_stats stats;
 };
 
 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e0bfe04a2bc2..632b222b51ff 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1467,6 +1467,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
 		dev_warn(dev->ctrl.device,
 			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
 			 csts, result);
+	dev->ctrl.stats.resets++;
 
 	if (csts != ~0)
 		return;
@@ -1528,6 +1529,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 		dev_warn(dev->ctrl.device,
 			 "I/O tag %d (%04x) QID %d timeout, completion polled\n",
 			 req->tag, nvme_cid(req), nvmeq->qid);
+		dev->ctrl.stats.timeouts++;
 		return BLK_EH_DONE;
 	}
 
@@ -1565,6 +1567,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 			 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
 			 req->tag, nvme_cid(req), opcode,
 			 nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid);
+		dev->ctrl.stats.resets++;
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		goto disable;
 	}
@@ -1584,6 +1587,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 		 req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
 		 nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
 		 blk_rq_bytes(req));
+	dev->ctrl.stats.aborts++;
 
 	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
 					 BLK_MQ_REQ_NOWAIT);
@@ -2424,9 +2428,130 @@ static const struct attribute_group nvme_pci_dev_attrs_group = {
 	.is_visible	= nvme_pci_attrs_are_visible,
 };
 
+static ssize_t timeouts_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", ctrl->stats.timeouts);
+}
+
+static ssize_t timeouts_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	unsigned long timeouts;
+	int err;
+
+	err = kstrtoul(buf, 10, &timeouts);
+	if (err)
+		return -EINVAL;
+
+	ctrl->stats.timeouts = timeouts;
+
+	return count;
+}
+static DEVICE_ATTR_RW(timeouts);
+
+static ssize_t aborts_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", ctrl->stats.aborts);
+}
+
+static ssize_t aborts_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	unsigned long aborts;
+	int err;
+
+	err = kstrtoul(buf, 10, &aborts);
+	if (err)
+		return -EINVAL;
+
+	ctrl->stats.aborts = aborts;
+
+	return count;
+}
+static DEVICE_ATTR_RW(aborts);
+
+static ssize_t resets_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", ctrl->stats.resets);
+}
+
+static ssize_t resets_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	unsigned long resets;
+	int err;
+
+	err = kstrtoul(buf, 10, &resets);
+	if (err)
+		return -EINVAL;
+
+	ctrl->stats.resets = resets;
+
+	return count;
+}
+static DEVICE_ATTR_RW(resets);
+
+static ssize_t disables_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", ctrl->stats.disables);
+}
+
+static ssize_t disables_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	unsigned long disables;
+	int err;
+
+	err = kstrtoul(buf, 10, &disables);
+	if (err)
+		return -EINVAL;
+
+	ctrl->stats.disables = disables;
+
+	return count;
+}
+static DEVICE_ATTR_RW(disables);
+
+static umode_t nvme_stats_attrs_are_visible(struct kobject *kobj,
+					    struct attribute *a, int n)
+{
+	return a->mode;
+}
+
+static struct attribute *nvme_stats_attrs[] = {
+	&dev_attr_timeouts.attr,
+	&dev_attr_aborts.attr,
+	&dev_attr_resets.attr,
+	&dev_attr_disables.attr,
+	NULL,
+};
+
+static const struct attribute_group nvme_stats_attrs_group = {
+	.name		= "stats",
+	.attrs		= nvme_stats_attrs,
+	.is_visible	= nvme_stats_attrs_are_visible,
+};
+
 static const struct attribute_group *nvme_pci_dev_attr_groups[] = {
 	&nvme_dev_attrs_group,
 	&nvme_pci_dev_attrs_group,
+	&nvme_stats_attrs_group,
 	NULL,
 };
 
@@ -3057,6 +3182,7 @@ static void nvme_reset_work(struct work_struct *work)
 	 */
 	dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n",
 		 result);
+	dev->ctrl.stats.disables++;
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 	nvme_dev_disable(dev, true);
 	nvme_sync_queues(&dev->ctrl);
@@ -3593,6 +3719,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
 	case pci_channel_io_frozen:
 		dev_warn(dev->ctrl.device,
 			"frozen state error detected, reset controller\n");
+		dev->ctrl.stats.resets++;
 		if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
 			nvme_dev_disable(dev, true);
 			return PCI_ERS_RESULT_DISCONNECT;
-- 
2.48.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] nvme-pci: add NVMe controller statistics
  2025-05-29 17:29 [PATCH v2] nvme-pci: add NVMe controller statistics Tokunori Ikegami
@ 2025-06-03  0:51 ` Chaitanya Kulkarni
  2025-06-03 14:10   ` Tokunori Ikegami
  0 siblings, 1 reply; 3+ messages in thread
From: Chaitanya Kulkarni @ 2025-06-03  0:51 UTC (permalink / raw)
  To: Tokunori Ikegami, linux-nvme@lists.infradead.org

On 5/29/25 10:29, Tokunori Ikegami wrote:
> This is to count the controller warning events.
>
> Signed-off-by: Tokunori Ikegami<ikegami.t@gmail.com>

can you please update the commit log with why you want to
count the events at the first place, what kind problem
it is solving and any impact on performance because of it ?

-ck



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] nvme-pci: add NVMe controller statistics
  2025-06-03  0:51 ` Chaitanya Kulkarni
@ 2025-06-03 14:10   ` Tokunori Ikegami
  0 siblings, 0 replies; 3+ messages in thread
From: Tokunori Ikegami @ 2025-06-03 14:10 UTC (permalink / raw)
  To: Chaitanya Kulkarni, linux-nvme@lists.infradead.org

On 2025/06/03 9:51, Chaitanya Kulkarni wrote:
> can you please update the commit log with why you want to
> count the events at the first place, what kind problem
> it is solving and any impact on performance because of it ?
Just done by the version 3 patch. Thank you.



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-06-03 15:28 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-29 17:29 [PATCH v2] nvme-pci: add NVMe controller statistics Tokunori Ikegami
2025-06-03  0:51 ` Chaitanya Kulkarni
2025-06-03 14:10   ` Tokunori Ikegami

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox