All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ranjan Kumar <ranjan.kumar@broadcom.com>
To: linux-scsi@vger.kernel.org, martin.petersen@oracle.com
Cc: rajsekhar.chundru@broadcom.com, sathya.prakash@broadcom.com,
	chandrakanth.patil@broadcom.com, prayas.patel@broadcom.com,
	salomondush@google.com, Ranjan Kumar <ranjan.kumar@broadcom.com>
Subject: [PATCH v1 6/7] mpi3mr: Record and report controller firmware faults
Date: Mon, 12 Jan 2026 13:40:36 +0530	[thread overview]
Message-ID: <20260112081037.74376-7-ranjan.kumar@broadcom.com> (raw)
In-Reply-To: <20260112081037.74376-1-ranjan.kumar@broadcom.com>

Capture and retain firmware fault codes and extended fault information
whenever the controller enters a fault state.

Maintain a persistent firmware fault counter, expose it via sysfs,
and generate uevents to aid userspace diagnostics and failure analysis.

Signed-off-by: Salomon Dushimirimana <salomondush@google.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
---
 drivers/scsi/mpi3mr/mpi3mr.h     |  8 +++
 drivers/scsi/mpi3mr/mpi3mr_app.c | 24 +++++++++
 drivers/scsi/mpi3mr/mpi3mr_fw.c  | 87 ++++++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+)

diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 590c017acf25..58db60e13c13 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -1137,6 +1137,10 @@ struct scmd_priv {
  * @default_qcount: Total Default queues
  * @active_poll_qcount: Currently active poll queue count
  * @requested_poll_qcount: User requested poll queue count
+ * @fault_during_init: Indicates a firmware fault occurred during initialization
+ * @saved_fault_code: Firmware fault code captured at the time of failure
+ * @saved_fault_info: Additional firmware-provided fault information
+ * @fwfault_counter: Count of firmware faults detected by the driver
  * @bsg_dev: BSG device structure
  * @bsg_queue: Request queue for BSG device
  * @stop_bsgs: Stop BSG request flag
@@ -1340,6 +1344,10 @@ struct mpi3mr_ioc {
 	u16 default_qcount;
 	u16 active_poll_qcount;
 	u16 requested_poll_qcount;
+	u8 fault_during_init;
+	u32 saved_fault_code;
+	u32 saved_fault_info[3];
+	u64 fwfault_counter;
 
 	struct device bsg_dev;
 	struct request_queue *bsg_queue;
diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c
index 37cca0573ddc..1353a8ff9c85 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_app.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_app.c
@@ -3255,6 +3255,29 @@ adp_state_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(adp_state);
 
+/**
+ * fwfault_count_show() - SysFS callback to show firmware fault count
+ * @dev: class device
+ * @attr: Device attribute
+ * @buf: Buffer to copy data into
+ *
+ * Displays the total number of firmware faults detected by the driver
+ * since the controller was initialized.
+ *
+ * Return: Number of bytes written to @buf
+ */
+
+static ssize_t
+fwfault_count_show(struct device *dev, struct device_attribute *attr,
+	char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct mpi3mr_ioc *mrioc = shost_priv(shost);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", mrioc->fwfault_counter);
+}
+static DEVICE_ATTR_RO(fwfault_count);
+
 static struct attribute *mpi3mr_host_attrs[] = {
 	&dev_attr_version_fw.attr,
 	&dev_attr_fw_queue_depth.attr,
@@ -3263,6 +3286,7 @@ static struct attribute *mpi3mr_host_attrs[] = {
 	&dev_attr_reply_qfull_count.attr,
 	&dev_attr_logging_level.attr,
 	&dev_attr_adp_state.attr,
+	&dev_attr_fwfault_count.attr,
 	NULL,
 };
 
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 178738850541..ea951ef4b2d9 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -1108,6 +1108,31 @@ void mpi3mr_print_fault_info(struct mpi3mr_ioc *mrioc)
 	}
 }
 
+/**
+ * mpi3mr_save_fault_info - Save fault information
+ * @mrioc: Adapter instance reference
+ *
+ * Save the controller fault information if there is a
+ * controller fault.
+ *
+ * Return: Nothing.
+ */
+static void mpi3mr_save_fault_info(struct mpi3mr_ioc *mrioc)
+{
+	u32 ioc_status, i;
+
+	ioc_status = readl(&mrioc->sysif_regs->ioc_status);
+
+	if (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT) {
+		mrioc->saved_fault_code = readl(&mrioc->sysif_regs->fault) &
+		    MPI3_SYSIF_FAULT_CODE_MASK;
+		for (i = 0; i < 3; i++) {
+			mrioc->saved_fault_info[i] =
+			readl(&mrioc->sysif_regs->fault_info[i]);
+		}
+	}
+}
+
 /**
  * mpi3mr_get_iocstate - Get IOC State
  * @mrioc: Adapter instance reference
@@ -1249,6 +1274,44 @@ static void mpi3mr_alloc_ioctl_dma_memory(struct mpi3mr_ioc *mrioc)
 	mpi3mr_free_ioctl_dma_memory(mrioc);
 }
 
+/**
+ * mpi3mr_fault_uevent_emit - Emit uevent for any controller
+ * fault
+ * @mrioc: Pointer to the mpi3mr_ioc structure for the controller instance
+ *
+ * This function is invoked when the controller undergoes any
+ * type of fault.
+ */
+
+static void mpi3mr_fault_uevent_emit(struct mpi3mr_ioc *mrioc)
+{
+	struct kobj_uevent_env *env;
+
+	env = kzalloc(sizeof(*env), GFP_KERNEL);
+	if (!env)
+		return;
+
+	if (add_uevent_var(env, "DRIVER=%s", mrioc->driver_name))
+		return;
+	if (add_uevent_var(env, "IOC_ID=%u", mrioc->id))
+		return;
+	if (add_uevent_var(env, "FAULT_CODE=0x%08x", mrioc->saved_fault_code))
+		return;
+	if (add_uevent_var(env, "FAULT_INFO0=0x%08x",
+	     mrioc->saved_fault_info[0]))
+		return;
+	if (add_uevent_var(env, "FAULT_INFO1=0x%08x",
+	     mrioc->saved_fault_info[1]))
+		return;
+	if (add_uevent_var(env, "FAULT_INFO2=0x%08x",
+	    mrioc->saved_fault_info[2]))
+		return;
+
+	kobject_uevent_env(&mrioc->shost->shost_gendev.kobj,
+	    KOBJ_CHANGE, env->envp);
+	kfree(env);
+}
+
 /**
  * mpi3mr_clear_reset_history - clear reset history
  * @mrioc: Adapter instance reference
@@ -1480,6 +1543,10 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc)
 		if (ioc_state == MRIOC_STATE_FAULT) {
 			timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10;
 			mpi3mr_print_fault_info(mrioc);
+			mpi3mr_save_fault_info(mrioc);
+			mrioc->fault_during_init = 1;
+			mrioc->fwfault_counter++;
+
 			do {
 				host_diagnostic =
 					readl(&mrioc->sysif_regs->host_diagnostic);
@@ -2577,6 +2644,9 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
 		mpi3mr_set_trigger_data_in_all_hdb(mrioc,
 		    MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0);
 		mpi3mr_print_fault_info(mrioc);
+		mpi3mr_save_fault_info(mrioc);
+		mrioc->fault_during_init = 1;
+		mrioc->fwfault_counter++;
 		return;
 	}
 
@@ -2594,6 +2664,10 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
 			break;
 		msleep(100);
 	} while (--timeout);
+
+	mpi3mr_save_fault_info(mrioc);
+	mrioc->fault_during_init = 1;
+	mrioc->fwfault_counter++;
 }
 
 /**
@@ -2770,6 +2844,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
 	union mpi3mr_trigger_data trigger_data;
 	u16 reset_reason = MPI3MR_RESET_FROM_FAULT_WATCH;
 
+	if (mrioc->fault_during_init) {
+		mpi3mr_fault_uevent_emit(mrioc);
+		mrioc->fault_during_init = 0;
+	}
+
 	if (mrioc->reset_in_progress || mrioc->pci_err_recovery)
 		return;
 
@@ -2842,6 +2921,10 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
 		goto schedule_work;
 	}
 
+	mpi3mr_save_fault_info(mrioc);
+	mpi3mr_fault_uevent_emit(mrioc);
+	mrioc->fwfault_counter++;
+
 	switch (trigger_data.fault) {
 	case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED:
 	case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED:
@@ -5478,6 +5561,10 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
 					break;
 				msleep(100);
 			} while (--timeout);
+
+			mpi3mr_save_fault_info(mrioc);
+			mpi3mr_fault_uevent_emit(mrioc);
+			mrioc->fwfault_counter++;
 			mpi3mr_set_trigger_data_in_all_hdb(mrioc,
 			    MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0);
 		}
-- 
2.47.3


  parent reply	other threads:[~2026-01-12  8:17 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-12  8:10 [PATCH v1 0/7] mpi3mr: Enhancements for mpi3mr Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 1/7] mpi3mr: Add module parameter to control threaded IRQ polling Ranjan Kumar
2026-01-12 14:14   ` Damien Le Moal
2026-01-13 17:36     ` Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 2/7] mpi3mr: Rename log data save helper to reflect threaded/BH context Ranjan Kumar
2026-01-12 14:13   ` Damien Le Moal
2026-01-13  7:27     ` Ranjan Kumar
2026-01-15  8:45       ` Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 3/7] mpi3mr: Avoid redundant diag-fault resets Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 4/7] mpi3mr: Use negotiated link rate from DevicePage0 Ranjan Kumar
2026-01-12 14:18   ` Damien Le Moal
2026-01-13 17:46     ` Ranjan Kumar
2026-01-12 16:38   ` kernel test robot
2026-01-12  8:10 ` [PATCH v1 5/7] mpi3mr: Update MPI Headers to revision 39 Ranjan Kumar
2026-01-12 14:21   ` Damien Le Moal
2026-01-13 17:39     ` Ranjan Kumar
2026-01-12  8:10 ` Ranjan Kumar [this message]
2026-01-12 14:25   ` [PATCH v1 6/7] mpi3mr: Record and report controller firmware faults Damien Le Moal
2026-01-13 18:28     ` Ranjan Kumar
2026-01-12  8:10 ` [PATCH v1 7/7] mpi3mr: Driver version update to 8.17.0.3.50 Ranjan Kumar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260112081037.74376-7-ranjan.kumar@broadcom.com \
    --to=ranjan.kumar@broadcom.com \
    --cc=chandrakanth.patil@broadcom.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=prayas.patel@broadcom.com \
    --cc=rajsekhar.chundru@broadcom.com \
    --cc=salomondush@google.com \
    --cc=sathya.prakash@broadcom.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.