* [PATCH 1/3] ipr: Reduce queuecommand lock time
@ 2012-07-16 20:48 Brian King
2012-07-17 2:03 ` Matthew Wilcox
0 siblings, 1 reply; 3+ messages in thread
From: Brian King @ 2012-07-16 20:48 UTC (permalink / raw)
To: James.Bottomley; +Cc: linux-scsi, wenxiong, brking
Reduce the amount of time the host lock is held in queuecommand
for improved performance.
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/ipr.c | 90 +++++++++++++++++++++++++++++++++++++----------------
1 file changed, 63 insertions(+), 27 deletions(-)
diff -puN drivers/scsi/ipr.c~ipr_reduce_lock_contention drivers/scsi/ipr.c
--- linux-2.6/drivers/scsi/ipr.c~ipr_reduce_lock_contention 2012-07-13 13:19:42.000000000 -0500
+++ linux-2.6-bjking1/drivers/scsi/ipr.c 2012-07-13 16:06:06.000000000 -0500
@@ -620,25 +620,39 @@ static void ipr_init_ipr_cmnd(struct ipr
}
/**
- * ipr_get_free_ipr_cmnd - Get a free IPR Cmnd block
+ * __ipr_get_free_ipr_cmnd - Get a free IPR Cmnd block
* @ioa_cfg: ioa config struct
*
* Return value:
* pointer to ipr command struct
**/
static
-struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
+struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
{
struct ipr_cmnd *ipr_cmd;
ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue);
list_del(&ipr_cmd->queue);
- ipr_init_ipr_cmnd(ipr_cmd);
return ipr_cmd;
}
/**
+ * ipr_get_free_ipr_cmnd - Get a free IPR Cmnd block and initialize it
+ * @ioa_cfg: ioa config struct
+ *
+ * Return value:
+ * pointer to ipr command struct
+ **/
+static
+struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
+{
+ struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
+ ipr_init_ipr_cmnd(ipr_cmd);
+ return ipr_cmd;
+}
+
+/**
* ipr_mask_and_clear_interrupts - Mask all and clear specified interrupts
* @ioa_cfg: ioa config struct
* @clr_ints: interrupts to clear
@@ -5783,8 +5797,8 @@ static void ipr_scsi_done(struct ipr_cmn
/**
* ipr_queuecommand - Queue a mid-layer request
+ * @shost: scsi host struct
* @scsi_cmd: scsi command struct
- * @done: done function
*
* This function queues a request generated by the mid-layer.
*
@@ -5793,61 +5807,58 @@ static void ipr_scsi_done(struct ipr_cmn
* SCSI_MLQUEUE_DEVICE_BUSY if device is busy
* SCSI_MLQUEUE_HOST_BUSY if host is busy
**/
-static int ipr_queuecommand_lck(struct scsi_cmnd *scsi_cmd,
- void (*done) (struct scsi_cmnd *))
+static int ipr_queuecommand(struct Scsi_Host *shost,
+ struct scsi_cmnd *scsi_cmd)
{
struct ipr_ioa_cfg *ioa_cfg;
struct ipr_resource_entry *res;
struct ipr_ioarcb *ioarcb;
struct ipr_cmnd *ipr_cmd;
+ unsigned long lock_flags = 0;
int rc = 0;
- scsi_cmd->scsi_done = done;
- ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata;
- res = scsi_cmd->device->hostdata;
+ ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
+
+ spin_lock_irqsave(shost->host_lock, lock_flags);
scsi_cmd->result = (DID_OK << 16);
+ res = scsi_cmd->device->hostdata;
/*
* We are currently blocking all devices due to a host reset
* We have told the host to stop giving us new requests, but
* ERP ops don't count. FIXME
*/
- if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))
+ if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) {
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
return SCSI_MLQUEUE_HOST_BUSY;
+ }
/*
* FIXME - Create scsi_set_host_offline interface
* and the ioa_is_dead check can be removed
*/
if (unlikely(ioa_cfg->ioa_is_dead || !res)) {
- memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
- scsi_cmd->result = (DID_NO_CONNECT << 16);
- scsi_cmd->scsi_done(scsi_cmd);
- return 0;
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
+ goto err_nodev;
}
if (ipr_is_gata(res) && res->sata_port)
return ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
- ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
+ ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
+
+ ipr_init_ipr_cmnd(ipr_cmd);
ioarcb = &ipr_cmd->ioarcb;
- list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
memcpy(ioarcb->cmd_pkt.cdb, scsi_cmd->cmnd, scsi_cmd->cmd_len);
ipr_cmd->scsi_cmd = scsi_cmd;
- ioarcb->res_handle = res->res_handle;
ipr_cmd->done = ipr_scsi_done;
- ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res));
if (ipr_is_gscsi(res) || ipr_is_vset_device(res)) {
if (scsi_cmd->underflow == 0)
ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_ULEN_CHK;
- if (res->needs_sync_complete) {
- ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE;
- res->needs_sync_complete = 0;
- }
-
ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_LINK_DESC;
if (ipr_is_gscsi(res))
ioarcb->cmd_pkt.flags_lo |= IPR_FLAGS_LO_DELAY_AFTER_RST;
@@ -5866,16 +5877,41 @@ static int ipr_queuecommand_lck(struct s
rc = ipr_build_ioadl(ioa_cfg, ipr_cmd);
}
- if (unlikely(rc != 0)) {
- list_move_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ spin_lock_irqsave(shost->host_lock, lock_flags);
+ if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) {
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
+ if (!rc)
+ scsi_dma_unmap(scsi_cmd);
return SCSI_MLQUEUE_HOST_BUSY;
}
+ if (unlikely(ioa_cfg->ioa_is_dead)) {
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
+ scsi_dma_unmap(scsi_cmd);
+ goto err_nodev;
+ }
+
+ ioarcb->res_handle = res->res_handle;
+ if (res->needs_sync_complete) {
+ ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE;
+ res->needs_sync_complete = 0;
+ }
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+ ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res));
ipr_send_command(ipr_cmd);
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
return 0;
-}
-static DEF_SCSI_QCMD(ipr_queuecommand)
+err_nodev:
+ spin_lock_irqsave(shost->host_lock, lock_flags);
+ memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+ scsi_cmd->result = (DID_NO_CONNECT << 16);
+ scsi_cmd->scsi_done(scsi_cmd);
+ spin_unlock_irqrestore(shost->host_lock, lock_flags);
+ return 0;
+}
/**
* ipr_ioctl - IOCTL handler
_
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 1/3] ipr: Reduce queuecommand lock time
2012-07-16 20:48 [PATCH 1/3] ipr: Reduce queuecommand lock time Brian King
@ 2012-07-17 2:03 ` Matthew Wilcox
2012-07-17 13:09 ` Brian King
0 siblings, 1 reply; 3+ messages in thread
From: Matthew Wilcox @ 2012-07-17 2:03 UTC (permalink / raw)
To: Brian King; +Cc: James.Bottomley, linux-scsi, wenxiong
On Mon, Jul 16, 2012 at 03:48:08PM -0500, Brian King wrote:
> +static int ipr_queuecommand(struct Scsi_Host *shost,
> + struct scsi_cmnd *scsi_cmd)
> {
> struct ipr_ioa_cfg *ioa_cfg;
> struct ipr_resource_entry *res;
> struct ipr_ioarcb *ioarcb;
> struct ipr_cmnd *ipr_cmd;
> + unsigned long lock_flags = 0;
You don't need to initialise lock_flags.
Looking at the rest of the code, you drop the lock in the middle,
then re-acquire it. That'll help with hold time, but I'm not convinced
it'll help with performance. Have you done performance testing with
these changes? I seem to remember we used an eight-socket box to show
host_lock problems in the past.
--
Matthew Wilcox Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 1/3] ipr: Reduce queuecommand lock time
2012-07-17 2:03 ` Matthew Wilcox
@ 2012-07-17 13:09 ` Brian King
0 siblings, 0 replies; 3+ messages in thread
From: Brian King @ 2012-07-17 13:09 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: James.Bottomley, linux-scsi, wenxiong
On 07/16/2012 09:03 PM, Matthew Wilcox wrote:
> On Mon, Jul 16, 2012 at 03:48:08PM -0500, Brian King wrote:
>> +static int ipr_queuecommand(struct Scsi_Host *shost,
>> + struct scsi_cmnd *scsi_cmd)
>> {
>> struct ipr_ioa_cfg *ioa_cfg;
>> struct ipr_resource_entry *res;
>> struct ipr_ioarcb *ioarcb;
>> struct ipr_cmnd *ipr_cmd;
>> + unsigned long lock_flags = 0;
>
> You don't need to initialise lock_flags.
>
> Looking at the rest of the code, you drop the lock in the middle,
> then re-acquire it. That'll help with hold time, but I'm not convinced
> it'll help with performance. Have you done performance testing with
> these changes? I seem to remember we used an eight-socket box to show
> host_lock problems in the past.
We've done performance testing of these patches and they provided
roughly a 25% increase in the number of IOPs we are able to push
through an adapter on Power. This was running on an 8 socket box with
4 way SMT, so 32 separate hardware threads.
One of the main things these patches do is to get the dma map/unmap
call from underneath the host lock. On Power, these calls have
more overhead than on some other platforms, since they end up
resulting in a hypervisor call, which can significantly increase
host lock hold times.
I'll resend with the change to not initialize the lock flags.
Thanks,
Brian
--
Brian King
Power Linux I/O
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-07-17 13:11 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-07-16 20:48 [PATCH 1/3] ipr: Reduce queuecommand lock time Brian King
2012-07-17 2:03 ` Matthew Wilcox
2012-07-17 13:09 ` Brian King
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).