Linux SCSI subsystem development
 help / color / mirror / Atom feed
From: Ranjan Kumar <ranjan.kumar@broadcom.com>
To: linux-scsi@vger.kernel.org, martin.petersen@oracle.com
Cc: rajsekhar.chundru@broadcom.com, sathya.prakash@broadcom.com,
	chandrakanth.patil@broadcom.com, prayas.patel@broadcom.com,
	salomondush@google.com, Ranjan Kumar <ranjan.kumar@broadcom.com>
Subject: [PATCH v1 6/7] mpi3mr: Record and report controller firmware faults
Date: Mon, 12 Jan 2026 13:40:36 +0530	[thread overview]
Message-ID: <20260112081037.74376-7-ranjan.kumar@broadcom.com> (raw)
In-Reply-To: <20260112081037.74376-1-ranjan.kumar@broadcom.com>

Capture and retain firmware fault codes and extended fault information
whenever the controller enters a fault state.

Maintain a persistent firmware fault counter, expose it via sysfs,
and generate uevents to aid userspace diagnostics and failure analysis.

Signed-off-by: Salomon Dushimirimana <salomondush@google.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
---
 drivers/scsi/mpi3mr/mpi3mr.h     |  8 +++
 drivers/scsi/mpi3mr/mpi3mr_app.c | 24 +++++++++
 drivers/scsi/mpi3mr/mpi3mr_fw.c  | 87 ++++++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+)

diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 590c017acf25..58db60e13c13 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -1137,6 +1137,10 @@ struct scmd_priv {
  * @default_qcount: Total Default queues
  * @active_poll_qcount: Currently active poll queue count
  * @requested_poll_qcount: User requested poll queue count
+ * @fault_during_init: Indicates a firmware fault occurred during initialization
+ * @saved_fault_code: Firmware fault code captured at the time of failure
+ * @saved_fault_info: Additional firmware-provided fault information
+ * @fwfault_counter: Count of firmware faults detected by the driver
  * @bsg_dev: BSG device structure
  * @bsg_queue: Request queue for BSG device
  * @stop_bsgs: Stop BSG request flag
@@ -1340,6 +1344,10 @@ struct mpi3mr_ioc {
 	u16 default_qcount;
 	u16 active_poll_qcount;
 	u16 requested_poll_qcount;
+	u8 fault_during_init;
+	u32 saved_fault_code;
+	u32 saved_fault_info[3];
+	u64 fwfault_counter;
 
 	struct device bsg_dev;
 	struct request_queue *bsg_queue;
diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c
index 37cca0573ddc..1353a8ff9c85 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_app.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_app.c
@@ -3255,6 +3255,29 @@ adp_state_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(adp_state);
 
+/**
+ * fwfault_count_show() - SysFS callback to show firmware fault count
+ * @dev: class device
+ * @attr: Device attribute
+ * @buf: Buffer to copy data into
+ *
+ * Displays the total number of firmware faults detected by the driver
+ * since the controller was initialized.
+ *
+ * Return: Number of bytes written to @buf
+ */
+
+static ssize_t
+fwfault_count_show(struct device *dev, struct device_attribute *attr,
+	char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct mpi3mr_ioc *mrioc = shost_priv(shost);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", mrioc->fwfault_counter);
+}
+static DEVICE_ATTR_RO(fwfault_count);
+
 static struct attribute *mpi3mr_host_attrs[] = {
 	&dev_attr_version_fw.attr,
 	&dev_attr_fw_queue_depth.attr,
@@ -3263,6 +3286,7 @@ static struct attribute *mpi3mr_host_attrs[] = {
 	&dev_attr_reply_qfull_count.attr,
 	&dev_attr_logging_level.attr,
 	&dev_attr_adp_state.attr,
+	&dev_attr_fwfault_count.attr,
 	NULL,
 };
 
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 178738850541..ea951ef4b2d9 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -1108,6 +1108,31 @@ void mpi3mr_print_fault_info(struct mpi3mr_ioc *mrioc)
 	}
 }
 
+/**
+ * mpi3mr_save_fault_info - Save fault information
+ * @mrioc: Adapter instance reference
+ *
+ * Save the controller fault information if there is a
+ * controller fault.
+ *
+ * Return: Nothing.
+ */
+static void mpi3mr_save_fault_info(struct mpi3mr_ioc *mrioc)
+{
+	u32 ioc_status, i;
+
+	ioc_status = readl(&mrioc->sysif_regs->ioc_status);
+
+	if (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT) {
+		mrioc->saved_fault_code = readl(&mrioc->sysif_regs->fault) &
+		    MPI3_SYSIF_FAULT_CODE_MASK;
+		for (i = 0; i < 3; i++) {
+			mrioc->saved_fault_info[i] =
+			readl(&mrioc->sysif_regs->fault_info[i]);
+		}
+	}
+}
+
 /**
  * mpi3mr_get_iocstate - Get IOC State
  * @mrioc: Adapter instance reference
@@ -1249,6 +1274,44 @@ static void mpi3mr_alloc_ioctl_dma_memory(struct mpi3mr_ioc *mrioc)
 	mpi3mr_free_ioctl_dma_memory(mrioc);
 }
 
+/**
+ * mpi3mr_fault_uevent_emit - Emit uevent for any controller
+ * fault
+ * @mrioc: Pointer to the mpi3mr_ioc structure for the controller instance
+ *
+ * This function is invoked when the controller undergoes any
+ * type of fault.
+ */
+
+static void mpi3mr_fault_uevent_emit(struct mpi3mr_ioc *mrioc)
+{
+	struct kobj_uevent_env *env;
+
+	env = kzalloc(sizeof(*env), GFP_KERNEL);
+	if (!env)
+		return;
+
+	if (add_uevent_var(env, "DRIVER=%s", mrioc->driver_name))
+		return;
+	if (add_uevent_var(env, "IOC_ID=%u", mrioc->id))
+		return;
+	if (add_uevent_var(env, "FAULT_CODE=0x%08x", mrioc->saved_fault_code))
+		return;
+	if (add_uevent_var(env, "FAULT_INFO0=0x%08x",
+	     mrioc->saved_fault_info[0]))
+		return;
+	if (add_uevent_var(env, "FAULT_INFO1=0x%08x",
+	     mrioc->saved_fault_info[1]))
+		return;
+	if (add_uevent_var(env, "FAULT_INFO2=0x%08x",
+	    mrioc->saved_fault_info[2]))
+		return;
+
+	kobject_uevent_env(&mrioc->shost->shost_gendev.kobj,
+	    KOBJ_CHANGE, env->envp);
+	kfree(env);
+}
+
 /**
  * mpi3mr_clear_reset_history - clear reset history
  * @mrioc: Adapter instance reference
@@ -1480,6 +1543,10 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
 		if (ioc_state == MRIOC_STATE_FAULT) {
 			timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10;
 			mpi3mr_print_fault_info(mrioc);
+			mpi3mr_save_fault_info(mrioc);
+			mrioc->fault_during_init = 1;
+			mrioc->fwfault_counter++;
+
 			do {
 				host_diagnostic =
 					readl(&mrioc->sysif_regs->host_diagnostic);
@@ -2577,6 +2644,9 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
 		mpi3mr_set_trigger_data_in_all_hdb(mrioc,
 		    MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0);
 		mpi3mr_print_fault_info(mrioc);
+		mpi3mr_save_fault_info(mrioc);
+		mrioc->fault_during_init = 1;
+		mrioc->fwfault_counter++;
 		return;
 	}
 
@@ -2594,6 +2664,10 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
 			break;
 		msleep(100);
 	} while (--timeout);
+
+	mpi3mr_save_fault_info(mrioc);
+	mrioc->fault_during_init = 1;
+	mrioc->fwfault_counter++;
 }
 
 /**
@@ -2770,6 +2844,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
 	union mpi3mr_trigger_data trigger_data;
 	u16 reset_reason = MPI3MR_RESET_FROM_FAULT_WATCH;
 
+	if (mrioc->fault_during_init) {
+		mpi3mr_fault_uevent_emit(mrioc);
+		mrioc->fault_during_init = 0;
+	}
+
 	if (mrioc->reset_in_progress || mrioc->pci_err_recovery)
 		return;
 
@@ -2842,6 +2921,10 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
 		goto schedule_work;
 	}
 
+	mpi3mr_save_fault_info(mrioc);
+	mpi3mr_fault_uevent_emit(mrioc);
+	mrioc->fwfault_counter++;
+
 	switch (trigger_data.fault) {
 	case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED:
 	case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED:
@@ -5478,6 +5561,10 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
 					break;
 				msleep(100);
 			} while (--timeout);
+
+			mpi3mr_save_fault_info(mrioc);
+			mpi3mr_fault_uevent_emit(mrioc);
+			mrioc->fwfault_counter++;
 			mpi3mr_set_trigger_data_in_all_hdb(mrioc,
 			    MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0);
 		}
-- 
2.47.3


  parent reply	other threads:[~2026-01-12  8:17 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-12  8:10 [PATCH v1 0/7] mpi3mr: Enhancements for mpi3mr Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 1/7] mpi3mr: Add module parameter to control threaded IRQ polling Ranjan Kumar
2026-01-12 14:14   ` Damien Le Moal
2026-01-13 17:36     ` Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 2/7] mpi3mr: Rename log data save helper to reflect threaded/BH context Ranjan Kumar
2026-01-12 14:13   ` Damien Le Moal
2026-01-13  7:27     ` Ranjan Kumar
2026-01-15  8:45       ` Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 3/7] mpi3mr: Avoid redundant diag-fault resets Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 4/7] mpi3mr: Use negotiated link rate from DevicePage0 Ranjan Kumar
2026-01-12 14:18   ` Damien Le Moal
2026-01-13 17:46     ` Ranjan Kumar
2026-01-12 16:38   ` kernel test robot
2026-01-12  8:10 ` [PATCH v1 5/7] mpi3mr: Update MPI Headers to revision 39 Ranjan Kumar
2026-01-12 14:21   ` Damien Le Moal
2026-01-13 17:39     ` Ranjan Kumar
2026-01-12  8:10 ` Ranjan Kumar [this message]
2026-01-12 14:25   ` [PATCH v1 6/7] mpi3mr: Record and report controller firmware faults Damien Le Moal
2026-01-13 18:28     ` Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 7/7] mpi3mr: Driver version update to 8.17.0.3.50 Ranjan Kumar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260112081037.74376-7-ranjan.kumar@broadcom.com \
    --to=ranjan.kumar@broadcom.com \
    --cc=chandrakanth.patil@broadcom.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=prayas.patel@broadcom.com \
    --cc=rajsekhar.chundru@broadcom.com \
    --cc=salomondush@google.com \
    --cc=sathya.prakash@broadcom.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox