linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: John Garry <john.g.garry@oracle.com>
To: Sagar Biradar <sagar.biradar@microchip.com>,
	Don Brace <don.brace@microchip.com>,
	Gilbert Wu <gilbert.wu@microchip.com>,
	linux-scsi@vger.kernel.org,
	Martin Petersen <martin.petersen@oracle.com>,
	James Bottomley <jejb@linux.ibm.com>,
	Brian King <brking@linux.vnet.ibm.com>,
	stable@vger.kernel.org, Tom White <tom.white@microchip.com>,
	regressions@leemhuis.info, hare@suse.com
Subject: Re: [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity
Date: Thu, 23 Nov 2023 12:01:38 +0000	[thread overview]
Message-ID: <c830058d-8d03-4da4-bdd4-0e56c567308f@oracle.com> (raw)
In-Reply-To: <20230519230834.27436-1-sagar.biradar@microchip.com>

On 20/05/2023 00:08, Sagar Biradar wrote:
> Fix the IO hang that arises because of MSIx vector not
> having a mapped online CPU upon receiving completion.
> 
> The SCSI cmds take the blk_mq route, which is setup during the init.
> The reserved cmds fetch the vector_no from mq_map after the init
> is complete and before the init, they use 0 - as per the norm.
> 
> Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
> Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>

This the patch which seems to be causing the issue in 
https://bugzilla.kernel.org/show_bug.cgi?id=217599

I will comment here since I got no response there...

> ---
>   drivers/scsi/aacraid/aacraid.h  |  1 +
>   drivers/scsi/aacraid/comminit.c |  1 -
>   drivers/scsi/aacraid/commsup.c  |  6 +++++-
>   drivers/scsi/aacraid/linit.c    | 14 ++++++++++++++
>   drivers/scsi/aacraid/src.c      | 25 +++++++++++++++++++++++--
>   5 files changed, 43 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
> index 5e115e8b2ba4..7c6efde75da6 100644
> --- a/drivers/scsi/aacraid/aacraid.h
> +++ b/drivers/scsi/aacraid/aacraid.h
> @@ -1678,6 +1678,7 @@ struct aac_dev
>   	u32			handle_pci_error;
>   	bool			init_reset;
>   	u8			soft_reset_support;
> +	u8			use_map_queue;
>   };
>   
>   #define aac_adapter_interrupt(dev) \
> diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
> index bd99c5492b7d..a5483e7e283a 100644
> --- a/drivers/scsi/aacraid/comminit.c
> +++ b/drivers/scsi/aacraid/comminit.c
> @@ -657,4 +657,3 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
>   
>   	return dev;
>   }
> -
> diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
> index deb32c9f4b3e..3f062e4013ab 100644
> --- a/drivers/scsi/aacraid/commsup.c
> +++ b/drivers/scsi/aacraid/commsup.c
> @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
>   struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
>   {
>   	struct fib *fibptr;
> +	u32 blk_tag;
> +	int i;
>   
> -	fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
> +	blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> +	i = blk_mq_unique_tag_to_tag(blk_tag);
> +	fibptr = &dev->fibs[i];
>   	/*
>   	 *	Null out fields that depend on being zero at the start of
>   	 *	each I/O
> diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
> index 5ba5c18b77b4..9caf8c314ce1 100644
> --- a/drivers/scsi/aacraid/linit.c
> +++ b/drivers/scsi/aacraid/linit.c
> @@ -19,6 +19,7 @@
>   
>   #include <linux/compat.h>
>   #include <linux/blkdev.h>
> +#include <linux/blk-mq-pci.h>
>   #include <linux/completion.h>
>   #include <linux/init.h>
>   #include <linux/interrupt.h>
> @@ -505,6 +506,15 @@ static int aac_slave_configure(struct scsi_device *sdev)
>   	return 0;
>   }
>   
> +static void aac_map_queues(struct Scsi_Host *shost)
> +{
> +	struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
> +
> +	blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
> +				aac->pdev, 0);
> +	aac->use_map_queue = true;
> +}
> +
>   /**
>    *	aac_change_queue_depth		-	alter queue depths
>    *	@sdev:	SCSI device we are considering
> @@ -1489,6 +1499,7 @@ static struct scsi_host_template aac_driver_template = {
>   	.bios_param			= aac_biosparm,
>   	.shost_groups			= aac_host_groups,
>   	.slave_configure		= aac_slave_configure,
> +	.map_queues			= aac_map_queues,
>   	.change_queue_depth		= aac_change_queue_depth,
>   	.sdev_groups			= aac_dev_groups,
>   	.eh_abort_handler		= aac_eh_abort,
> @@ -1776,6 +1787,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
>   	shost->max_lun = AAC_MAX_LUN;
>   
>   	pci_set_drvdata(pdev, shost);
> +	shost->nr_hw_queues = aac->max_msix;
> +	shost->host_tagset = 1;
>   
>   	error = scsi_add_host(shost, &pdev->dev);
>   	if (error)
> @@ -1908,6 +1921,7 @@ static void aac_remove_one(struct pci_dev *pdev)
>   	struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
>   
>   	aac_cancel_rescan_worker(aac);
> +	aac->use_map_queue = false;
>   	scsi_remove_host(shost);
>   
>   	__aac_shutdown(aac);
> diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
> index 11ef58204e96..61949f374188 100644
> --- a/drivers/scsi/aacraid/src.c
> +++ b/drivers/scsi/aacraid/src.c
> @@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib)
>   #endif
>   
>   	u16 vector_no;
> +	struct scsi_cmnd *scmd;
> +	u32 blk_tag;
> +	struct Scsi_Host *shost = dev->scsi_host_ptr;
> +	struct blk_mq_queue_map *qmap;
>   
>   	atomic_inc(&q->numpending);
>   
> @@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib)
>   		if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
>   			&& dev->sa_firmware)
>   			vector_no = aac_get_vector(dev);
> -		else
> -			vector_no = fib->vector_no;
> +		else {
> +			if (!fib->vector_no || !fib->callback_data) {
> +				if (shost && dev->use_map_queue) {
> +					qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> +					vector_no = qmap->mq_map[raw_smp_processor_id()];
> +				}
> +				/*
> +				 *	We hardcode the vector_no for
> +				 *	reserved commands as a valid shost is
> +				 *	absent during the init
> +				 */
> +				else
> +					vector_no = 0;
> +			} else {
> +				scmd = (struct scsi_cmnd *)fib->callback_data;
> +				blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> +				vector_no = blk_mq_unique_tag_to_hwq(blk_tag);



Hannes' patch in the bugzilla was to revert to using hw queue #0 always 
for internal commands, and it didn't help.

Could there be any issue in using hw queue #0 for regular SCSI commands?

AFAICS, that's a significant change. Previously we would use 
fib->vector_no to decide the queue, which was in range (1, dev->max_msix).

BTW, is there any code which relies on a command being sent/received on 
the HW queue same as fib->vector_no?

Thanks,
John

> +			
> +		}
>   
>   		if (native_hba) {
>   			if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {


  parent reply	other threads:[~2023-11-23 12:02 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-19 23:08 [PATCH v4] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
2023-06-08 21:09 ` Sagar.Biradar
2023-06-15  2:15 ` Martin K. Petersen
2023-11-23 12:01 ` John Garry [this message]
2023-11-23 12:51   ` James Bottomley
2023-11-24  6:32   ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c830058d-8d03-4da4-bdd4-0e56c567308f@oracle.com \
    --to=john.g.garry@oracle.com \
    --cc=brking@linux.vnet.ibm.com \
    --cc=don.brace@microchip.com \
    --cc=gilbert.wu@microchip.com \
    --cc=hare@suse.com \
    --cc=jejb@linux.ibm.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=regressions@leemhuis.info \
    --cc=sagar.biradar@microchip.com \
    --cc=stable@vger.kernel.org \
    --cc=tom.white@microchip.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).