[PATCH 1/3] ipr: Reduce queuecommand lock time

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/3] ipr: Reduce queuecommand lock time
@ 2012-07-16 20:48 Brian King
  2012-07-17  2:03 ` Matthew Wilcox
  0 siblings, 1 reply; 3+ messages in thread
From: Brian King @ 2012-07-16 20:48 UTC (permalink / raw)
  To: James.Bottomley; +Cc: linux-scsi, wenxiong, brking


Reduce the amount of time the host lock is held in queuecommand
for improved performance.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
---

 drivers/scsi/ipr.c |   90 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 63 insertions(+), 27 deletions(-)

diff -puN drivers/scsi/ipr.c~ipr_reduce_lock_contention drivers/scsi/ipr.c
--- linux-2.6/drivers/scsi/ipr.c~ipr_reduce_lock_contention	2012-07-13 13:19:42.000000000 -0500
+++ linux-2.6-bjking1/drivers/scsi/ipr.c	2012-07-13 16:06:06.000000000 -0500
@@ -620,25 +620,39 @@ static void ipr_init_ipr_cmnd(struct ipr
 }
 
 /**
- * ipr_get_free_ipr_cmnd - Get a free IPR Cmnd block
+ * __ipr_get_free_ipr_cmnd - Get a free IPR Cmnd block
  * @ioa_cfg:	ioa config struct
  *
  * Return value:
  * 	pointer to ipr command struct
  **/
 static
-struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
+struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
 {
 	struct ipr_cmnd *ipr_cmd;
 
 	ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue);
 	list_del(&ipr_cmd->queue);
-	ipr_init_ipr_cmnd(ipr_cmd);
 
 	return ipr_cmd;
 }
 
 /**
+ * ipr_get_free_ipr_cmnd - Get a free IPR Cmnd block and initialize it
+ * @ioa_cfg:	ioa config struct
+ *
+ * Return value:
+ * 	pointer to ipr command struct
+ **/
+static
+struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
+{
+	struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
+	ipr_init_ipr_cmnd(ipr_cmd);
+	return ipr_cmd;
+}
+
+/**
  * ipr_mask_and_clear_interrupts - Mask all and clear specified interrupts
  * @ioa_cfg:	ioa config struct
  * @clr_ints:     interrupts to clear
@@ -5783,8 +5797,8 @@ static void ipr_scsi_done(struct ipr_cmn
 
 /**
  * ipr_queuecommand - Queue a mid-layer request
+ * @shost:		scsi host struct
  * @scsi_cmd:	scsi command struct
- * @done:		done function
  *
  * This function queues a request generated by the mid-layer.
  *
@@ -5793,61 +5807,58 @@ static void ipr_scsi_done(struct ipr_cmn
  *	SCSI_MLQUEUE_DEVICE_BUSY if device is busy
  *	SCSI_MLQUEUE_HOST_BUSY if host is busy
  **/
-static int ipr_queuecommand_lck(struct scsi_cmnd *scsi_cmd,
-			    void (*done) (struct scsi_cmnd *))
+static int ipr_queuecommand(struct Scsi_Host *shost,
+			    struct scsi_cmnd *scsi_cmd)
 {
 	struct ipr_ioa_cfg *ioa_cfg;
 	struct ipr_resource_entry *res;
 	struct ipr_ioarcb *ioarcb;
 	struct ipr_cmnd *ipr_cmd;
+	unsigned long lock_flags = 0;
 	int rc = 0;
 
-	scsi_cmd->scsi_done = done;
-	ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata;
-	res = scsi_cmd->device->hostdata;
+	ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
+
+	spin_lock_irqsave(shost->host_lock, lock_flags);
 	scsi_cmd->result = (DID_OK << 16);
+	res = scsi_cmd->device->hostdata;
 
 	/*
 	 * We are currently blocking all devices due to a host reset
 	 * We have told the host to stop giving us new requests, but
 	 * ERP ops don't count. FIXME
 	 */
-	if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))
+	if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) {
+		spin_unlock_irqrestore(shost->host_lock, lock_flags);
 		return SCSI_MLQUEUE_HOST_BUSY;
+	}
 
 	/*
 	 * FIXME - Create scsi_set_host_offline interface
 	 *  and the ioa_is_dead check can be removed
 	 */
 	if (unlikely(ioa_cfg->ioa_is_dead || !res)) {
-		memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
-		scsi_cmd->result = (DID_NO_CONNECT << 16);
-		scsi_cmd->scsi_done(scsi_cmd);
-		return 0;
+		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+		goto err_nodev;
 	}
 
 	if (ipr_is_gata(res) && res->sata_port)
 		return ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
 
-	ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
+	ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+
+	ipr_init_ipr_cmnd(ipr_cmd);
 	ioarcb = &ipr_cmd->ioarcb;
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
 
 	memcpy(ioarcb->cmd_pkt.cdb, scsi_cmd->cmnd, scsi_cmd->cmd_len);
 	ipr_cmd->scsi_cmd = scsi_cmd;
-	ioarcb->res_handle = res->res_handle;
 	ipr_cmd->done = ipr_scsi_done;
-	ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res));
 
 	if (ipr_is_gscsi(res) || ipr_is_vset_device(res)) {
 		if (scsi_cmd->underflow == 0)
 			ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_ULEN_CHK;
 
-		if (res->needs_sync_complete) {
-			ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE;
-			res->needs_sync_complete = 0;
-		}
-
 		ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_LINK_DESC;
 		if (ipr_is_gscsi(res))
 			ioarcb->cmd_pkt.flags_lo |= IPR_FLAGS_LO_DELAY_AFTER_RST;
@@ -5866,16 +5877,41 @@ static int ipr_queuecommand_lck(struct s
 			rc = ipr_build_ioadl(ioa_cfg, ipr_cmd);
 	}
 
-	if (unlikely(rc != 0)) {
-		list_move_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	spin_lock_irqsave(shost->host_lock, lock_flags);
+	if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) {
+		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+		if (!rc)
+			scsi_dma_unmap(scsi_cmd);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
+	if (unlikely(ioa_cfg->ioa_is_dead)) {
+		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+		scsi_dma_unmap(scsi_cmd);
+		goto err_nodev;
+	}
+
+	ioarcb->res_handle = res->res_handle;
+	if (res->needs_sync_complete) {
+		ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE;
+		res->needs_sync_complete = 0;
+	}
+	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+	ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res));
 	ipr_send_command(ipr_cmd);
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
 	return 0;
-}
 
-static DEF_SCSI_QCMD(ipr_queuecommand)
+err_nodev:
+	spin_lock_irqsave(shost->host_lock, lock_flags);
+	memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+	scsi_cmd->result = (DID_NO_CONNECT << 16);
+	scsi_cmd->scsi_done(scsi_cmd);
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	return 0;
+}
 
 /**
  * ipr_ioctl - IOCTL handler
_


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/3] ipr: Reduce queuecommand lock time
  2012-07-16 20:48 [PATCH 1/3] ipr: Reduce queuecommand lock time Brian King
@ 2012-07-17  2:03 ` Matthew Wilcox
  2012-07-17 13:09   ` Brian King
  0 siblings, 1 reply; 3+ messages in thread
From: Matthew Wilcox @ 2012-07-17  2:03 UTC (permalink / raw)
  To: Brian King; +Cc: James.Bottomley, linux-scsi, wenxiong

On Mon, Jul 16, 2012 at 03:48:08PM -0500, Brian King wrote:
> +static int ipr_queuecommand(struct Scsi_Host *shost,
> +			    struct scsi_cmnd *scsi_cmd)
>  {
>  	struct ipr_ioa_cfg *ioa_cfg;
>  	struct ipr_resource_entry *res;
>  	struct ipr_ioarcb *ioarcb;
>  	struct ipr_cmnd *ipr_cmd;
> +	unsigned long lock_flags = 0;

You don't need to initialise lock_flags.

Looking at the rest of the code, you drop the lock in the middle,
then re-acquire it.  That'll help with hold time, but I'm not convinced
it'll help with performance.  Have you done performance testing with
these changes?  I seem to remember we used an eight-socket box to show
host_lock problems in the past.

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/3] ipr: Reduce queuecommand lock time
  2012-07-17  2:03 ` Matthew Wilcox
@ 2012-07-17 13:09   ` Brian King
  0 siblings, 0 replies; 3+ messages in thread
From: Brian King @ 2012-07-17 13:09 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: James.Bottomley, linux-scsi, wenxiong

On 07/16/2012 09:03 PM, Matthew Wilcox wrote:
> On Mon, Jul 16, 2012 at 03:48:08PM -0500, Brian King wrote:
>> +static int ipr_queuecommand(struct Scsi_Host *shost,
>> +			    struct scsi_cmnd *scsi_cmd)
>>  {
>>  	struct ipr_ioa_cfg *ioa_cfg;
>>  	struct ipr_resource_entry *res;
>>  	struct ipr_ioarcb *ioarcb;
>>  	struct ipr_cmnd *ipr_cmd;
>> +	unsigned long lock_flags = 0;
> 
> You don't need to initialise lock_flags.
> 
> Looking at the rest of the code, you drop the lock in the middle,
> then re-acquire it.  That'll help with hold time, but I'm not convinced
> it'll help with performance.  Have you done performance testing with
> these changes?  I seem to remember we used an eight-socket box to show
> host_lock problems in the past.

We've done performance testing of these patches and they provided
roughly a 25% increase in the number of IOPs we are able to push
through an adapter on Power. This was running on an 8 socket box with
4 way SMT, so 32 separate hardware threads.

One of the main things these patches do is to get the dma map/unmap
call from underneath the host lock. On Power, these calls have
more overhead than on some other platforms, since they end up
resulting in a hypervisor call, which can significantly increase
host lock hold times. 

I'll resend with the change to not initialize the lock flags.

Thanks,

Brian

-- 
Brian King
Power Linux I/O
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-07-17 13:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-07-16 20:48 [PATCH 1/3] ipr: Reduce queuecommand lock time Brian King
2012-07-17  2:03 ` Matthew Wilcox
2012-07-17 13:09   ` Brian King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).