From: "Stephen M. Cameron" <scameron@beardog.cce.hp.com>
To: james.bottomley@hansenpartnership.com
Cc: linux-scsi@vger.kernel.org, linux-kernel@vger.kernel.org,
stephenmcameron@gmail.com, thenzl@redhat.com,
akpm@linux-foundation.org, mikem@beardog.cce.hp.com
Subject: [PATCH 17/17] hpsa: dial down lockup detection during firmware flash
Date: Tue, 01 May 2012 11:43:42 -0500 [thread overview]
Message-ID: <20120501164342.11705.58013.stgit@beardog.cce.hp.com> (raw)
In-Reply-To: <20120501163819.11705.10299.stgit@beardog.cce.hp.com>
From: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Dial back the aggressiveness of the controller lockup detection thread.
Currently it will declare the controller to be locked up if it goes
for 10 seconds with no interrupts and no change in the heartbeat
register. Dial back this to 30 seconds with no heartbeat change, and
also snoop the ioctl path and if a firmware flash command is detected,
dial it back further to 4 minutes until the firmware flash command
completes. The reason for this is that during the firmware flash
operation, the controller apparently doesn't update the heartbeat
register as frequently as it is supposed to, and we can get a false
positive.
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
---
drivers/scsi/hpsa.c | 39 ++++++++++++++++++++++++++++++++++-----
drivers/scsi/hpsa.h | 2 ++
drivers/scsi/hpsa_cmd.h | 1 +
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 84239d8..28a568c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -569,12 +569,42 @@ static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
}
}
+static int is_firmware_flash_cmd(u8 *cdb)
+{
+ return cdb[0] == BMIC_WRITE && cdb[6] == BMIC_FLASH_FIRMWARE;
+}
+
+/*
+ * During firmware flash, the heartbeat register may not update as frequently
+ * as it should. So we dial down lockup detection during firmware flash. and
+ * dial it back up when firmware flash completes.
+ */
+#define HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH (240 * HZ)
+#define HEARTBEAT_SAMPLE_INTERVAL (30 * HZ)
+static void dial_down_lockup_detection_during_fw_flash(struct ctlr_info *h,
+ struct CommandList *c)
+{
+ if (!is_firmware_flash_cmd(c->Request.CDB))
+ return;
+ atomic_inc(&h->firmware_flash_in_progress);
+ h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH;
+}
+
+static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
+ struct CommandList *c)
+{
+ if (is_firmware_flash_cmd(c->Request.CDB) &&
+ atomic_dec_and_test(&h->firmware_flash_in_progress))
+ h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
+}
+
static void enqueue_cmd_and_start_io(struct ctlr_info *h,
struct CommandList *c)
{
unsigned long flags;
set_performant_mode(h, c);
+ dial_down_lockup_detection_during_fw_flash(h, c);
spin_lock_irqsave(&h->lock, flags);
addQ(&h->reqQ, c);
h->Qdepth++;
@@ -3385,6 +3415,7 @@ static inline void finish_cmd(struct CommandList *c)
spin_lock_irqsave(&c->h->lock, flags);
removeQ(c);
spin_unlock_irqrestore(&c->h->lock, flags);
+ dial_up_lockup_detection_on_fw_flash_complete(c->h, c);
if (likely(c->cmd_type == CMD_SCSI))
complete_scsi_command(c);
else if (c->cmd_type == CMD_IOCTL_PEND)
@@ -4565,9 +4596,6 @@ static void controller_lockup_detected(struct ctlr_info *h)
spin_unlock_irqrestore(&h->lock, flags);
}
-#define HEARTBEAT_SAMPLE_INTERVAL (10 * HZ)
-#define HEARTBEAT_CHECK_MINIMUM_INTERVAL (HEARTBEAT_SAMPLE_INTERVAL / 2)
-
static void detect_controller_lockup(struct ctlr_info *h)
{
u64 now;
@@ -4578,7 +4606,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
now = get_jiffies_64();
/* If we've received an interrupt recently, we're ok. */
if (time_after64(h->last_intr_timestamp +
- (HEARTBEAT_CHECK_MINIMUM_INTERVAL), now))
+ (h->heartbeat_sample_interval), now))
return;
/*
@@ -4587,7 +4615,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
* otherwise don't care about signals in this thread.
*/
if (time_after64(h->last_heartbeat_timestamp +
- (HEARTBEAT_CHECK_MINIMUM_INTERVAL), now))
+ (h->heartbeat_sample_interval), now))
return;
/* If heartbeat has not changed since we last looked, we're not ok. */
@@ -4629,6 +4657,7 @@ static void add_ctlr_to_lockup_detector_list(struct ctlr_info *h)
{
unsigned long flags;
+ h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
spin_lock_irqsave(&lockup_detector_lock, flags);
list_add_tail(&h->lockup_list, &hpsa_ctlr_list);
spin_unlock_irqrestore(&lockup_detector_lock, flags);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index fb51ef7..9816479 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -129,6 +129,8 @@ struct ctlr_info {
u64 last_intr_timestamp;
u32 last_heartbeat;
u64 last_heartbeat_timestamp;
+ u32 heartbeat_sample_interval;
+ atomic_t firmware_flash_in_progress;
u32 lockup_detected;
struct list_head lockup_list;
/* Address of h->q[x] is passed to intr handler to know which queue */
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 43f1631..a894f2e 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -186,6 +186,7 @@ struct SenseSubsystem_info {
#define BMIC_WRITE 0x27
#define BMIC_CACHE_FLUSH 0xc2
#define HPSA_CACHE_FLUSH 0x01 /* C2 was already being used by HPSA */
+#define BMIC_FLASH_FIRMWARE 0xF7
/* Command List Structure */
union SCSI3Addr {
next prev parent reply other threads:[~2012-05-01 16:43 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-01 16:42 [PATCH 00/17] hpsa: resend updates for April, 2012 Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 01/17] hpsa: call pci_disable_device on driver unload Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 02/17] hpsa: do not skip disabled devices Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 03/17] hpsa: enable bus master bit after pci_enable_device Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 04/17] hpsa: suppress excessively chatty error messages Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 05/17] hpsa: do not read from controller unnecessarily in completion code Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 06/17] hpsa: retry driver initiated commands on busy status Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 07/17] hpsa: do not give up retry of driver cmds after only 3 retries Stephen M. Cameron
2012-05-01 17:26 ` Andi Shyti
2012-05-01 18:20 ` scameron
2012-05-01 21:39 ` Andi Shyti
2012-05-02 16:30 ` scameron
2012-05-02 20:27 ` Andi Shyti
2012-05-01 16:42 ` [PATCH 08/17] hpsa: remove unused parameter from finish_cmd Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 09/17] hpsa: add abort error handler function Stephen M. Cameron
2012-05-01 16:42 ` [PATCH 10/17] hpsa: do aborts two ways Stephen M. Cameron
2012-05-01 16:43 ` [PATCH 11/17] hpsa: factor out tail calls to next_command() in process_(non)indexed_cmd() Stephen M. Cameron
2012-05-01 16:43 ` [PATCH 12/17] hpsa: use multiple reply queues Stephen M. Cameron
2012-05-01 16:43 ` [PATCH 13/17] hpsa: refine interrupt handler locking for greater concurrency Stephen M. Cameron
2012-05-01 16:43 ` [PATCH 14/17] hpsa: factor out hpsa_free_irqs_and_disable_msix Stephen M. Cameron
2012-05-01 16:43 ` [PATCH 15/17] hpsa: add new RAID level "1(ADM)" Stephen M. Cameron
2012-05-01 16:43 ` [PATCH 16/17] hpsa: removed unused member maxQsinceinit Stephen M. Cameron
2012-05-01 16:43 ` Stephen M. Cameron [this message]
-- strict thread matches above, loose matches on Subject: below --
2012-04-20 15:06 [PATCH 00/17] hpsa updates for April, 2012 Stephen M. Cameron
2012-04-20 15:07 ` [PATCH 17/17] hpsa: dial down lockup detection during firmware flash Stephen M. Cameron
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120501164342.11705.58013.stgit@beardog.cce.hp.com \
--to=scameron@beardog.cce.hp.com \
--cc=akpm@linux-foundation.org \
--cc=james.bottomley@hansenpartnership.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=mikem@beardog.cce.hp.com \
--cc=stephenmcameron@gmail.com \
--cc=thenzl@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).