From: Hannes Reinecke <hare@suse.de>
To: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>,
James Bottomley <james.bottomley@hansenpartnership.com>,
James Smart <james.smart@broadcom.com>,
Dick Kennedy <dick.kennedy@broadcom.com>,
linux-scsi@vger.kernel.org, Hannes Reinecke <hare@suse.de>,
Hannes Reinecke <hare@suse.com>
Subject: [PATCH 12/14] lpfc: move to array based command allocation for SLI-4
Date: Thu, 2 Jun 2016 16:39:14 +0200 [thread overview]
Message-ID: <1464878356-42407-13-git-send-email-hare@suse.de> (raw)
In-Reply-To: <1464878356-42407-1-git-send-email-hare@suse.de>
On high-end arrays the list-based command allocation becomes a
bottleneck as the lock needs to be taken for each command allocation.
On the other hand the current blk-mq/scsi-mq infrastructure ensures
that a tag is never being reused. So this patch moves the command
allocation to an array-based structure, indexed by the command tag.
With this we can avoid taking a lock during command allocation, and
just mark the command as 'in-use' by setting a flag.
This allows for proper house-keeping in case the HBA needs to be
resetted.
Signed-off-by: Hannes Reinecke <hare@suse.com>
---
drivers/scsi/lpfc/lpfc.h | 1 +
drivers/scsi/lpfc/lpfc_init.c | 53 +++++++++++++++++++++++++--
drivers/scsi/lpfc/lpfc_scsi.c | 84 +++++++++++++++++++++++++++++++++++++------
drivers/scsi/lpfc/lpfc_scsi.h | 7 ++--
4 files changed, 128 insertions(+), 17 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index bb53b81..289cc50 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -840,6 +840,7 @@ struct lpfc_hba {
uint64_t bg_reftag_err_cnt;
/* fastpath list. */
+ struct lpfc_scsi_buf **lpfc_scsi_buf_arr;
spinlock_t scsi_buf_list_get_lock; /* SCSI buf alloc list lock */
spinlock_t scsi_buf_list_put_lock; /* SCSI buf free list lock */
struct list_head lpfc_scsi_buf_list_get;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index d2a6302..55ed075 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3071,6 +3071,20 @@ lpfc_scsi_free(struct lpfc_hba *phba)
}
spin_unlock(&phba->scsi_buf_list_get_lock);
+ if (phba->lpfc_scsi_buf_arr) {
+ int idx;
+ for (idx = 0; idx < phba->cfg_hba_queue_depth; idx++) {
+ sb = phba->lpfc_scsi_buf_arr[idx];
+ if (!sb)
+ continue;
+ clear_bit(LPFC_CMD_QUEUED, &sb->flags);
+ list_del(&sb->list);
+ pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+ sb->dma_handle);
+ kfree(sb);
+ phba->total_scsi_bufs--;
+ }
+ }
/* Release all the lpfc_iocbq entries maintained by this host. */
list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
list_del(&io->list);
@@ -3212,6 +3226,18 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
phba->sli4_hba.scsi_xri_cnt,
phba->sli4_hba.scsi_xri_max);
+ if (phba->lpfc_scsi_buf_arr) {
+ for (i = 0; i < phba->cfg_hba_queue_depth; i++) {
+ psb = phba->lpfc_scsi_buf_arr[i];
+ if (psb) {
+ if (test_and_set_bit(LPFC_CMD_QUEUED,
+ &psb->flags))
+ continue;
+ list_add_tail(&psb->list, &scsi_sgl_list);
+ }
+ }
+ }
+
spin_lock_irq(&phba->scsi_buf_list_get_lock);
spin_lock(&phba->scsi_buf_list_put_lock);
list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list);
@@ -3228,6 +3254,9 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
list_remove_head(&scsi_sgl_list, psb,
struct lpfc_scsi_buf, list);
if (psb) {
+ clear_bit(LPFC_CMD_QUEUED, &psb->flags);
+ if (phba->lpfc_scsi_buf_arr)
+ phba->lpfc_scsi_buf_arr[psb->iotag] = NULL;
pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
psb->data, psb->dma_handle);
kfree(psb);
@@ -3258,8 +3287,17 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get);
INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
spin_unlock(&phba->scsi_buf_list_put_lock);
- spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+ if (phba->lpfc_scsi_buf_arr) {
+ for (i = 0; i < phba->cfg_hba_queue_depth; i++) {
+ psb = phba->lpfc_scsi_buf_arr[i];
+ if (psb) {
+ clear_bit(LPFC_CMD_QUEUED, &psb->flags);
+ list_del_init(&psb->list);
+ }
+ }
+ }
+ spin_unlock_irq(&phba->scsi_buf_list_get_lock);
return 0;
out_free_mem:
@@ -3329,7 +3367,8 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
* scsi_add_host will fail. This will be adjusted later based on the
* max xri value determined in hba setup.
*/
- shost->can_queue = phba->cfg_hba_queue_depth - 10;
+ shost->can_queue = (phba->cfg_hba_queue_depth - 10) /
+ phba->cfg_fcp_io_channel;
if (dev != &phba->pcidev->dev) {
shost->transportt = lpfc_vport_transport_template;
vport->port_type = LPFC_NPIV_PORT;
@@ -3338,6 +3377,13 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
vport->port_type = LPFC_PHYSICAL_PORT;
}
+ if (shost_use_blk_mq(shost) && phba->sli_rev == LPFC_SLI_REV4) {
+ phba->lpfc_scsi_buf_arr = kzalloc(sizeof(struct lpfc_scsi_buf *) *
+ phba->cfg_hba_queue_depth, GFP_KERNEL);
+ if (!phba->lpfc_scsi_buf_arr)
+ goto out_put_shost;
+ }
+
/* Initialize all internally managed lists. */
INIT_LIST_HEAD(&vport->fc_nodes);
INIT_LIST_HEAD(&vport->rcv_buffer_list);
@@ -6312,7 +6358,8 @@ lpfc_post_init_setup(struct lpfc_hba *phba)
* adjust the value of can_queue.
*/
shost = pci_get_drvdata(phba->pcidev);
- shost->can_queue = phba->cfg_hba_queue_depth - 10;
+ shost->can_queue = (phba->cfg_hba_queue_depth - 10) /
+ phba->cfg_fcp_io_channel;
if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
lpfc_setup_bg(phba, shost);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 3111a9d..a3eb5ff 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -746,9 +746,19 @@ int
lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba)
{
LIST_HEAD(post_sblist);
- int num_posted, rc = 0;
+ int i, num_posted, rc = 0;
/* get all SCSI buffers need to repost to a local list */
+ if (phba->lpfc_scsi_buf_arr) {
+ struct lpfc_scsi_buf *psb;
+
+ for (i = 0; i < phba->cfg_hba_queue_depth; i++) {
+ psb = phba->lpfc_scsi_buf_arr[i];
+ if (psb &&
+ !test_and_set_bit(LPFC_CMD_QUEUED, &psb->flags))
+ list_add(&psb->list, &post_sblist);
+ }
+ }
spin_lock_irq(&phba->scsi_buf_list_get_lock);
spin_lock(&phba->scsi_buf_list_put_lock);
list_splice_init(&phba->lpfc_scsi_buf_list_get, &post_sblist);
@@ -913,6 +923,12 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
psb->dma_phys_bpl = pdma_phys_bpl;
/* add the scsi buffer to a post list */
+ if (phba->lpfc_scsi_buf_arr) {
+ int idx = phba->total_scsi_bufs + bcnt;
+ psb->iotag = idx;
+ phba->lpfc_scsi_buf_arr[idx] = psb;
+ set_bit(LPFC_CMD_QUEUED, &psb->flags);
+ }
list_add_tail(&psb->list, &post_sblist);
spin_lock_irq(&phba->scsi_buf_list_get_lock);
phba->sli4_hba.scsi_xri_cnt++;
@@ -1105,9 +1121,13 @@ lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
} else {
psb->pCmd = NULL;
psb->cur_iocbq.iocb_flag = LPFC_IO_FCP;
- spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
- list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list_put);
- spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+ if (phba->lpfc_scsi_buf_arr)
+ clear_bit(LPFC_CMD_QUEUED, &psb->flags);
+ else {
+ spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
+ list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list_put);
+ spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+ }
}
}
@@ -4533,7 +4553,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
struct lpfc_hba *phba = vport->phba;
struct lpfc_rport_data *rdata;
struct lpfc_nodelist *ndlp;
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_scsi_buf *lpfc_cmd = NULL;
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
int err;
@@ -4566,7 +4586,28 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
if (atomic_read(&ndlp->cmd_pending) >= ndlp->cmd_qdepth)
goto out_tgt_busy;
- lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp);
+ if (phba->lpfc_scsi_buf_arr) {
+ u32 tag = blk_mq_unique_tag(cmnd->request);
+ u16 hwq = blk_mq_unique_tag_to_hwq(tag);
+ u16 idx = blk_mq_unique_tag_to_tag(tag);
+
+ idx = idx * phba->cfg_fcp_io_channel + hwq;
+ if (idx >= phba->cfg_hba_queue_depth) {
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_SCSI_CMD,
+ "9034 iotag %x too large\n", idx);
+ } else
+ lpfc_cmd = phba->lpfc_scsi_buf_arr[idx];
+ if (!lpfc_cmd)
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_SCSI_CMD,
+ "9035 iotag %x invalid\n", idx);
+ else if (test_and_set_bit(LPFC_CMD_QUEUED, &lpfc_cmd->flags)) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_SCSI_CMD,
+ "9036 iotag %x hwq %x busy\n",
+ lpfc_cmd->iotag, hwq);
+ lpfc_cmd = NULL;
+ }
+ } else
+ lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp);
if (lpfc_cmd == NULL) {
lpfc_rampdown_queue_depth(phba);
@@ -4962,7 +5003,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct lpfc_rport_data *rdata,
uint8_t task_mgmt_cmd)
{
struct lpfc_hba *phba = vport->phba;
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_scsi_buf *lpfc_cmd = NULL;
struct lpfc_iocbq *iocbq;
struct lpfc_iocbq *iocbqrsp;
struct lpfc_nodelist *pnode = rdata->pnode;
@@ -4972,7 +5013,21 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct lpfc_rport_data *rdata,
if (!pnode || !NLP_CHK_NODE_ACT(pnode))
return FAILED;
- lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode);
+ if (phba->lpfc_scsi_buf_arr) {
+ int idx;
+ for (idx = 0; idx < phba->cfg_hba_queue_depth; idx++) {
+ lpfc_cmd = phba->lpfc_scsi_buf_arr[idx];
+ if (test_and_set_bit(LPFC_CMD_QUEUED,
+ &lpfc_cmd->flags)) {
+ ret = 0;
+ break;
+ }
+ ret = -EBUSY;
+ }
+ if (ret < 0)
+ lpfc_cmd = NULL;
+ } else
+ lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode);
if (lpfc_cmd == NULL)
return FAILED;
lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo;
@@ -5483,10 +5538,12 @@ lpfc_slave_alloc(struct scsi_device *sdev)
* extra. This list of scsi bufs exists for the lifetime of the driver.
*/
total = phba->total_scsi_bufs;
- num_to_alloc = vport->cfg_lun_queue_depth + 2;
+ num_to_alloc = (vport->cfg_lun_queue_depth + 2) *
+ phba->cfg_fcp_io_channel;
/* If allocated buffers are enough do nothing */
- if ((sdev_cnt * (vport->cfg_lun_queue_depth + 2)) < total)
+ if (!shost_use_blk_mq(sdev->host) &&
+ (sdev_cnt * (vport->cfg_lun_queue_depth + 2)) < total)
return 0;
/* Allow some exchanges to be available always to complete discovery */
@@ -5514,8 +5571,13 @@ lpfc_slave_alloc(struct scsi_device *sdev)
"Allocated %d buffers.\n",
num_to_alloc, num_allocated);
}
- if (num_allocated > 0)
+ if (num_allocated > 0) {
phba->total_scsi_bufs += num_allocated;
+ if (shost_use_blk_mq(sdev->host)) {
+ int num_tags = num_allocated / phba->cfg_fcp_io_channel;
+ scsi_mq_resize_tags(sdev->host, num_tags);
+ }
+ }
return 0;
}
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 4e8f0bd..a07341e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -134,7 +134,7 @@ struct lpfc_scsi_buf {
uint32_t timeout;
- uint16_t xx_exch_busy; /* SLI4 hba reported XB on complete WCQE */
+ uint16_t iotag;
uint16_t status; /* From IOCB Word 7- ulpStatus */
uint32_t result; /* From IOCB Word 4. */
@@ -144,8 +144,9 @@ struct lpfc_scsi_buf {
uint32_t prot_seg_cnt; /* seg_cnt's counterpart for protection data */
unsigned long flags;
-#define LPFC_CMD_EXCH_BUSY 1
-#define LPFC_CMD_ABORTED 2
+#define LPFC_CMD_EXCH_BUSY 0
+#define LPFC_CMD_ABORTED 1
+#define LPFC_CMD_QUEUED 2
dma_addr_t nonsg_phys; /* Non scatter-gather physical address. */
/*
--
1.8.5.6
next prev parent reply other threads:[~2016-06-02 14:39 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-02 14:39 [PATCH 00/14] lpfc multiqueue performance fixes Hannes Reinecke
2016-06-02 14:39 ` [PATCH 01/14] block: don't check request size in blk_cloned_rq_check_limits() Hannes Reinecke
2016-06-02 14:39 ` [PATCH 02/14] lpfc: remove unused 'pring' argument Hannes Reinecke
2016-06-02 14:39 ` [PATCH 03/14] lpfc: Add config option 'lpfc_enable_stats' Hannes Reinecke
2016-06-02 14:39 ` [PATCH 04/14] scsi: add 'abort_completions' host template flag Hannes Reinecke
2016-06-02 14:39 ` [PATCH 05/14] lpfc: do not wait for completion when aborting commands Hannes Reinecke
2016-06-02 14:39 ` [PATCH 06/14] lpfc: display manual queue assignment Hannes Reinecke
2016-06-02 14:39 ` [PATCH 07/14] scsi: modify can_queue after calling mq_allocate() Hannes Reinecke
2016-06-02 14:39 ` [PATCH 08/14] blk-mq: add blk_mq_resize_tag_set() Hannes Reinecke
2016-06-02 15:55 ` Bart Van Assche
2016-06-02 14:39 ` [PATCH 09/14] scsi: Implement scsi_mq_resize_tags() Hannes Reinecke
2016-06-02 15:58 ` Bart Van Assche
2016-06-02 14:39 ` [PATCH 10/14] lpfc: use bitflag for exch_busy Hannes Reinecke
2016-06-02 14:39 ` [PATCH 11/14] lpfc: set LPFC_CMD_ABORTED when a command is put on the aborted list Hannes Reinecke
2016-06-02 14:39 ` Hannes Reinecke [this message]
2016-06-02 14:39 ` [PATCH 13/14] lpfc: LPFC_CMD_RRQ_ACTIVE flag to mark commands Hannes Reinecke
2016-06-02 14:39 ` [PATCH 14/14] lpfc: Complete scsi commands after RRQ has completed Hannes Reinecke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1464878356-42407-13-git-send-email-hare@suse.de \
--to=hare@suse.de \
--cc=dick.kennedy@broadcom.com \
--cc=hare@suse.com \
--cc=hch@lst.de \
--cc=james.bottomley@hansenpartnership.com \
--cc=james.smart@broadcom.com \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).