linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Don Brace <don.brace@pmcs.com>
To: scott.teel@pmcs.com, Kevin.Barnett@pmcs.com,
	james.bottomley@parallels.com, hch@infradead.org,
	Justin.Lindley@pmcs.com, brace@pmcs.com
Cc: linux-scsi@vger.kernel.org
Subject: [PATCH v2 29/48] hpsa: fix race between abort handler and main i/o path
Date: Fri, 23 Jan 2015 16:43:35 -0600	[thread overview]
Message-ID: <20150123224335.14919.69164.stgit@brunhilda> (raw)
In-Reply-To: <20150123224020.14919.29458.stgit@brunhilda>

From: Webb Scales <webbnh@hp.com>

This means changing the allocator to reference count commands.
The reference count is now the authoritative indicator of whether a
command is allocated or not.  The h->cmd_pool_bits bitmap is now
only a heuristic hint to speed up the allocation process, it is no
longer the authoritative record of allocated commands.

Since we changed the command allocator to use reference counting
as the authoritative indicator of whether a command is allocated,
fail_all_outstanding_cmds needs to use the reference count not
h->cmd_pool_bits for this purpose.

Fix hpsa_drain_accel_commands to use the reference count as the
authoritative indicator of whether a command is allocated instead of
the h->cmd_pool_bits bitmap.

Reviewed-by: Scott Teel <scott.teel@pmcs.com>
Signed-off-by: Don Brace <don.brace@pmcs.com>
---
 drivers/scsi/hpsa.c     |  109 +++++++++++++++++++++++++++--------------------
 drivers/scsi/hpsa.h     |    2 +
 drivers/scsi/hpsa_cmd.h |    1 
 3 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 60f5734..c95a20c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4552,6 +4552,7 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 	char msg[256];		/* For debug messaging. */
 	int ml = 0;
 	__le32 tagupper, taglower;
+	int refcount;
 
 	/* Find the controller of the command to be aborted */
 	h = sdev_to_hba(sc->device);
@@ -4580,9 +4581,13 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 	/* Get SCSI command to be aborted */
 	abort = (struct CommandList *) sc->host_scribble;
 	if (abort == NULL) {
-		dev_err(&h->pdev->dev, "%s FAILED, Command to abort is NULL.\n",
-				msg);
-		return FAILED;
+		/* This can happen if the command already completed. */
+		return SUCCESS;
+	}
+	refcount = atomic_inc_return(&abort->refcount);
+	if (refcount == 1) { /* Command is done already. */
+		cmd_free(h, abort);
+		return SUCCESS;
 	}
 	hpsa_get_tag(h, abort, &taglower, &tagupper);
 	ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
@@ -4604,6 +4609,7 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 		dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n",
 			h->scsi_host->host_no,
 			dev->bus, dev->target, dev->lun);
+		cmd_free(h, abort);
 		return FAILED;
 	}
 	dev_info(&h->pdev->dev, "%s REQUEST SUCCEEDED.\n", msg);
@@ -4615,32 +4621,35 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 	 */
 #define ABORT_COMPLETE_WAIT_SECS 30
 	for (i = 0; i < ABORT_COMPLETE_WAIT_SECS * 10; i++) {
-		if (test_bit(abort->cmdindex & (BITS_PER_LONG - 1),
-				h->cmd_pool_bits +
-				(abort->cmdindex / BITS_PER_LONG)))
-			msleep(100);
-		else
+		refcount = atomic_read(&abort->refcount);
+		if (refcount < 2) {
+			cmd_free(h, abort);
 			return SUCCESS;
+		} else {
+			msleep(100);
+		}
 	}
 	dev_warn(&h->pdev->dev, "%s FAILED. Aborted command has not completed after %d seconds.\n",
 		msg, ABORT_COMPLETE_WAIT_SECS);
+	cmd_free(h, abort);
 	return FAILED;
 }
 
-
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
  * which ones are free or in use.  Lock must be held when calling this.
  * cmd_free() is the complement.
  */
+
 static struct CommandList *cmd_alloc(struct ctlr_info *h)
 {
 	struct CommandList *c;
 	int i;
 	union u64bit temp64;
 	dma_addr_t cmd_dma_handle, err_dma_handle;
-	int loopcount;
+	int refcount;
+	unsigned long offset = 0;
 
 	/* There is some *extremely* small but non-zero chance that that
 	 * multiple threads could get in here, and one thread could
@@ -4653,23 +4662,27 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
 	 * infrequently as to be indistinguishable from never.
 	 */
 
-	loopcount = 0;
-	do {
-		i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
-		if (i == h->nr_cmds)
-			i = 0;
-		loopcount++;
-	} while (test_and_set_bit(i & (BITS_PER_LONG - 1),
-		  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0 &&
-		loopcount < 10);
-
-	/* Thread got starved?  We do not expect this to ever happen. */
-	if (loopcount >= 10)
-		return NULL;
-
-	c = h->cmd_pool + i;
-	memset(c, 0, sizeof(*c));
-	c->Header.tag = cpu_to_le64((u64) i << DIRECT_LOOKUP_SHIFT);
+	for (;;) {
+		i = find_next_zero_bit(h->cmd_pool_bits, h->nr_cmds, offset);
+		if (unlikely(i == h->nr_cmds)) {
+			offset = 0;
+			continue;
+		}
+		c = h->cmd_pool + i;
+		refcount = atomic_inc_return(&c->refcount);
+		if (unlikely(refcount > 1)) {
+			cmd_free(h, c); /* already in use */
+			offset = (i + 1) % h->nr_cmds;
+			continue;
+		}
+		set_bit(i & (BITS_PER_LONG - 1),
+			h->cmd_pool_bits + (i / BITS_PER_LONG));
+		break; /* it's ours now. */
+	}
+
+	/* Zero out all of commandlist except the last field, refcount */
+	memset(c, 0, offsetof(struct CommandList, refcount));
+	c->Header.tag = cpu_to_le64((u64) (i << DIRECT_LOOKUP_SHIFT));
 	cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(*c);
 	c->err_info = h->errinfo_pool + i;
 	memset(c->err_info, 0, sizeof(*c->err_info));
@@ -4680,8 +4693,8 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
 
 	c->busaddr = (u32) cmd_dma_handle;
 	temp64.val = (u64) err_dma_handle;
-	c->ErrDesc.Addr = cpu_to_le64(err_dma_handle);
-	c->ErrDesc.Len = cpu_to_le32(sizeof(*c->err_info));
+	c->ErrDesc.Addr = cpu_to_le64((u64) err_dma_handle);
+	c->ErrDesc.Len = cpu_to_le32((u32) sizeof(*c->err_info));
 
 	c->h = h;
 	return c;
@@ -4689,11 +4702,13 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
 
 static void cmd_free(struct ctlr_info *h, struct CommandList *c)
 {
-	int i;
+	if (atomic_dec_and_test(&c->refcount)) {
+		int i;
 
-	i = c - h->cmd_pool;
-	clear_bit(i & (BITS_PER_LONG - 1),
-		  h->cmd_pool_bits + (i / BITS_PER_LONG));
+		i = c - h->cmd_pool;
+		clear_bit(i & (BITS_PER_LONG - 1),
+			  h->cmd_pool_bits + (i / BITS_PER_LONG));
+	}
 }
 
 #ifdef CONFIG_COMPAT
@@ -6598,17 +6613,18 @@ static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
 /* Called when controller lockup detected. */
 static void fail_all_outstanding_cmds(struct ctlr_info *h)
 {
-	int i;
-	struct CommandList *c = NULL;
+	int i, refcount;
+	struct CommandList *c;
 
 	flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */
 	for (i = 0; i < h->nr_cmds; i++) {
-		if (!test_bit(i & (BITS_PER_LONG - 1),
-				h->cmd_pool_bits + (i / BITS_PER_LONG)))
-			continue;
 		c = h->cmd_pool + i;
-		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-		finish_cmd(c);
+		refcount = atomic_inc_return(&c->refcount);
+		if (refcount > 1) {
+			c->err_info->CommandStatus = CMD_HARDWARE_ERR;
+			finish_cmd(c);
+		}
+		cmd_free(h, c);
 	}
 }
 
@@ -6645,9 +6661,7 @@ static void controller_lockup_detected(struct ctlr_info *h)
 	dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
 			lockup_detected);
 	pci_disable_device(h->pdev);
-	spin_lock_irqsave(&h->lock, flags);
 	fail_all_outstanding_cmds(h);
-	spin_unlock_irqrestore(&h->lock, flags);
 }
 
 static void detect_controller_lockup(struct ctlr_info *h)
@@ -7449,18 +7463,19 @@ static void hpsa_drain_accel_commands(struct ctlr_info *h)
 {
 	struct CommandList *c = NULL;
 	int i, accel_cmds_out;
+	int refcount;
 
 	do { /* wait for all outstanding ioaccel commands to drain out */
 		accel_cmds_out = 0;
 		for (i = 0; i < h->nr_cmds; i++) {
-			if (!test_bit(i & (BITS_PER_LONG - 1),
-					h->cmd_pool_bits + (i / BITS_PER_LONG)))
-				continue;
 			c = h->cmd_pool + i;
-			accel_cmds_out += is_accelerated_cmd(c);
+			refcount = atomic_inc_return(&c->refcount);
+			if (refcount > 1) /* Command is allocated */
+				accel_cmds_out += is_accelerated_cmd(c);
+			cmd_free(h, c);
 		}
 		if (accel_cmds_out <= 0)
-				break;
+			break;
 		msleep(100);
 	} while (1);
 }
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index d0fb854..679e4d2 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -309,6 +309,8 @@ struct offline_device_entry {
  */
 #define SA5_DOORBELL	0x20
 #define SA5_REQUEST_PORT_OFFSET	0x40
+#define SA5_REQUEST_PORT64_LO_OFFSET 0xC0
+#define SA5_REQUEST_PORT64_HI_OFFSET 0xC4
 #define SA5_REPLY_INTR_MASK_OFFSET	0x34
 #define SA5_REPLY_PORT_OFFSET		0x44
 #define SA5_INTR_STATUS		0x30
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 4726dbb..071b64c 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -421,6 +421,7 @@ struct CommandList {
 	 * not used.
 	 */
 	struct hpsa_scsi_dev_t *phys_disk;
+	atomic_t refcount; /* Must be last to avoid memset in cmd_alloc */
 } __aligned(COMMANDLIST_ALIGNMENT);
 
 /* Max S/G elements in I/O accelerator command */


  parent reply	other threads:[~2015-01-23 22:44 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-23 22:41 [PATCH v2 00/48] hpsa driver updates Don Brace
2015-01-23 22:41 ` [PATCH v2 01/48] hpsa: correct endian sparse warnings Don Brace
2015-01-23 22:41 ` [PATCH v2 02/48] hpsa: fix memory leak in kdump hard reset Don Brace
2015-01-23 22:41 ` [PATCH v2 03/48] hpsa: turn off interrupts when kdump starts Don Brace
2015-01-23 22:41 ` [PATCH v2 04/48] hpsa: change how SA controllers are reset Don Brace
2015-01-23 22:41 ` [PATCH v2 05/48] hpsa: correct change_queue_depth Don Brace
2015-01-23 22:41 ` [PATCH v2 06/48] hpsa: adjust RAID-1, RAID-1ADM, and RAID-6 names Don Brace
2015-01-23 22:41 ` [PATCH v2 07/48] hpsa: rename free_irqs to hpsa_free_irqs Don Brace
2015-01-23 22:41 ` [PATCH v2 08/48] hpsa: Fix -Wunused-but-set-variable warning Don Brace
2015-01-23 22:41 ` [PATCH v2 09/48] hpsa: notice all request_irq errors Don Brace
2015-01-23 22:41 ` [PATCH v2 10/48] hpsa: remove 0x from queue depth print which is in decimal Don Brace
2015-01-23 22:42 ` [PATCH v2 11/48] hpsa: propagate hard_reset failures in reset_devices mode Don Brace
2015-01-23 22:42 ` [PATCH v2 12/48] hpsa: propagate return value from board ID lookup Don Brace
2015-01-23 22:42 ` [PATCH v2 13/48] hpsa: downgrade the Waiting for no-op print to dev_info Don Brace
2015-01-23 22:42 ` [PATCH v2 14/48] hpsa: refactor hpsa_find_board_params() to encapsulate legacy test Don Brace
2015-01-23 22:42 ` [PATCH v2 15/48] hpsa: trivial message and comment clean ups Don Brace
2015-01-23 22:42 ` [PATCH v2 16/48] hpsa: report failure to ioremap config table Don Brace
2015-01-23 22:42 ` [PATCH v2 17/48] hpsa: rename hpsa_request_irq to hpsa_request_irqs Don Brace
2015-01-23 22:42 ` [PATCH v2 18/48] hpsa: pass error from pci_set_consistent_dma_mask from hpsa_message Don Brace
2015-01-23 22:42 ` [PATCH v2 19/48] hpsa: report allocation failures while allocating SG chain blocks Don Brace
2015-01-23 22:42 ` [PATCH v2 20/48] hpsa: fix memory leak in hpsa_alloc_cmd_pool Don Brace
2015-01-23 22:42 ` [PATCH v2 21/48] hpsa: avoid unneccesary calls to resource freeing functions Don Brace
2015-01-23 22:42 ` [PATCH v2 22/48] hpsa: reserve some commands for use by driver Don Brace
2015-01-23 22:43 ` [PATCH v2 23/48] hpsa: get rid of cmd_special_alloc and cmd_special_free Don Brace
2015-01-23 22:43 ` [PATCH v2 24/48] hpsa: do not queue commands internally in driver Don Brace
2015-01-23 22:43 ` [PATCH v2 25/48] hpsa: do not request device rescan on every ioaccel path error Don Brace
2015-01-23 22:43 ` [PATCH v2 26/48] hpsa: factor out hpsa_ciss_submit function Don Brace
2015-01-23 22:43 ` [PATCH v2 27/48] hpsa: use workqueue to resubmit failed ioaccel commands Don Brace
2015-01-23 22:43 ` [PATCH v2 28/48] hpsa: honor queue depth of physical devices Don Brace
2015-01-23 22:43 ` Don Brace [this message]
2015-01-23 22:43 ` [PATCH v2 30/48] hpsa: optimize cmd_alloc function by remembering last allocation Don Brace
2015-01-23 22:43 ` [PATCH v2 31/48] hpsa: count passthru cmds with atomics, not a spin locked int Don Brace
2015-01-23 22:43 ` [PATCH v2 32/48] hpsa: slightly optimize SA5_performant_completed Don Brace
2015-01-23 22:43 ` [PATCH v2 33/48] hpsa: do not check for msi(x) in interrupt_pending Don Brace
2015-01-23 22:44 ` [PATCH v2 34/48] hpsa: remove incorrect BUG_ONs checking for raid offload enable Don Brace
2015-01-23 22:44 ` [PATCH v2 35/48] hpsa: do not ack controller events on controllers that do not support it Don Brace
2015-01-23 22:44 ` [PATCH v2 36/48] hpsa: guard against overflowing raid map array Don Brace
2015-01-23 22:44 ` [PATCH v2 37/48] hpsa: check for ctlr lockup after command allocation in main io path Don Brace
2015-01-23 22:44 ` [PATCH v2 38/48] hpsa: return failed from device reset/abort handlers Don Brace
2015-01-23 22:44 ` [PATCH v2 39/48] hpsa: do not use a void pointer for scsi_cmd field of struct CommandList Don Brace
2015-01-23 22:44 ` [PATCH v2 40/48] hpsa: print CDBs instead of kernel virtual addresses for uncommon errors Don Brace
2015-01-23 22:44 ` [PATCH v2 41/48] hpsa: do not use function pointers in fast path command submission Don Brace
2015-01-23 22:44 ` [PATCH v2 42/48] hpsa: move SG descriptor set-up out of hpsa_scatter_gather() Don Brace
2015-01-23 22:44 ` [PATCH v2 43/48] hpsa: refactor duplicated scan completion code into a new routine Don Brace
2015-01-23 22:44 ` [PATCH v2 44/48] hpsa: shorten the wait for the CISS doorbell mode change ack Don Brace
2015-01-23 22:45 ` [PATCH v2 45/48] hpsa: detect and report failures changing controller transport modes Don Brace
2015-01-23 22:45 ` [PATCH v2 46/48] hpsa: add in gen9 controller model names Don Brace
2015-01-23 22:45 ` [PATCH v2 47/48] hpsa: add in P840ar controller model name Don Brace
2015-01-23 22:45 ` [PATCH v2 48/48] hpsa: Use local workqueues instead of system workqueues Don Brace
2015-01-27 16:55   ` Tomas Henzl
2015-01-27 17:13     ` Tomas Henzl

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150123224335.14919.69164.stgit@brunhilda \
    --to=don.brace@pmcs.com \
    --cc=Justin.Lindley@pmcs.com \
    --cc=Kevin.Barnett@pmcs.com \
    --cc=brace@pmcs.com \
    --cc=hch@infradead.org \
    --cc=james.bottomley@parallels.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=scott.teel@pmcs.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).