* [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity
@ 2023-04-28 21:07 Sagar Biradar
2023-04-28 21:08 ` kernel test robot
2023-05-01 15:59 ` John Garry
0 siblings, 2 replies; 3+ messages in thread
From: Sagar Biradar @ 2023-04-28 21:07 UTC (permalink / raw)
To: Don Brace, Sagar Biradar, Gilbert Wu, linux-scsi, Martin Petersen,
James Bottomley, Brian King, stable, Tom White
Fix the IO hang that arises because of MSIx vector not
having a mapped online CPU upon receiving completion.
This patch sets up a reply queue mapping to CPUs based on the
IRQ affinity retrieved using pci_irq_get_affinity() API.
aac_setup_reply_map() is an explicit mapping for internally
generated (non-SCSI) cmds.
The SCSI cmds take the blk_mq route, and the non-SCSI cmds are mapped
to the reply_map.
Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
---
drivers/scsi/aacraid/aacraid.h | 1 +
drivers/scsi/aacraid/comminit.c | 32 ++++++++++++++++++++++++++++++++
drivers/scsi/aacraid/commsup.c | 6 +++++-
drivers/scsi/aacraid/linit.c | 25 +++++++++++++++++++++++++
drivers/scsi/aacraid/src.c | 13 +++++++++++--
5 files changed, 74 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 5e115e8b2ba4..20f8560a3038 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1678,6 +1678,7 @@ struct aac_dev
u32 handle_pci_error;
bool init_reset;
u8 soft_reset_support;
+ unsigned int *reply_map;
};
#define aac_adapter_interrupt(dev) \
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index bd99c5492b7d..6f4e40cdaade 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -33,6 +33,8 @@
#include "aacraid.h"
+void aac_setup_reply_map(struct aac_dev *dev);
+
struct aac_common aac_config = {
.irq_mod = 1
};
@@ -630,6 +632,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
if (aac_is_src(dev))
aac_define_int_mode(dev);
+
+ aac_setup_reply_map(dev);
+
/*
* Ok now init the communication subsystem
*/
@@ -658,3 +663,30 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
return dev;
}
+/*
+ * aac_setup_reply_map - This is an explicit mapping for
+ * internally generated (non-SCSI) cmds which need to be
+ * serviced outside of IO requests.
+ * The SCSI cmds take the blk_mq mechanism,
+ * and the non-SCSI cmds are mapped to the reply_map.
+ */
+void aac_setup_reply_map(struct aac_dev *dev)
+{
+ const struct cpumask *mask;
+ unsigned int i, cpu = 1;
+
+ for (i = 1; i < dev->max_msix; i++) {
+ mask = pci_irq_get_affinity(dev->pdev, i);
+ if (!mask)
+ goto fallback;
+
+ for_each_cpu(cpu, mask) {
+ dev->reply_map[cpu] = i;
+ }
+ }
+ return;
+
+fallback:
+ for_each_possible_cpu(cpu)
+ dev->reply_map[cpu] = 0;
+}
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index deb32c9f4b3e..3f062e4013ab 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
{
struct fib *fibptr;
+ u32 blk_tag;
+ int i;
- fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ i = blk_mq_unique_tag_to_tag(blk_tag);
+ fibptr = &dev->fibs[i];
/*
* Null out fields that depend on being zero at the start of
* each I/O
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 5ba5c18b77b4..077adbcde909 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -34,6 +34,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/msdos_partition.h>
+#include <linux/blk-mq-pci.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -505,6 +506,16 @@ static int aac_slave_configure(struct scsi_device *sdev)
return 0;
}
+static void aac_map_queues(struct Scsi_Host *shost)
+{
+ struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
+
+ blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+ aac->pdev, 0);
+}
+
+
+
/**
* aac_change_queue_depth - alter queue depths
* @sdev: SCSI device we are considering
@@ -1489,6 +1500,7 @@ static struct scsi_host_template aac_driver_template = {
.bios_param = aac_biosparm,
.shost_groups = aac_host_groups,
.slave_configure = aac_slave_configure,
+ .map_queues = aac_map_queues,
.change_queue_depth = aac_change_queue_depth,
.sdev_groups = aac_dev_groups,
.eh_abort_handler = aac_eh_abort,
@@ -1668,6 +1680,14 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_free_host;
}
+ aac->reply_map = kzalloc(sizeof(unsigned int) * nr_cpu_ids,
+ GFP_KERNEL);
+ if (!aac->reply_map) {
+ error = -ENOMEM;
+ dev_err(&pdev->dev, "reply_map allocation failed\n");
+ goto out_free_host;
+ }
+
spin_lock_init(&aac->fib_lock);
mutex_init(&aac->ioctl_mutex);
@@ -1776,6 +1796,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
shost->max_lun = AAC_MAX_LUN;
pci_set_drvdata(pdev, shost);
+ shost->nr_hw_queues = aac->max_msix;
+ shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev);
if (error)
@@ -1797,6 +1819,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
aac->comm_addr, aac->comm_phys);
kfree(aac->queues);
aac_adapter_ioremap(aac, 0);
+ /* By now we should have configured the reply_map */
+ kfree(aac->reply_map);
kfree(aac->fibs);
kfree(aac->fsa_dev);
out_free_host:
@@ -1918,6 +1942,7 @@ static void aac_remove_one(struct pci_dev *pdev)
aac_adapter_ioremap(aac, 0);
+ kfree(aac->reply_map);
kfree(aac->fibs);
kfree(aac->fsa_dev);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 11ef58204e96..46c0f4df995d 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -493,6 +493,8 @@ static int aac_src_deliver_message(struct fib *fib)
#endif
u16 vector_no;
+ struct scsi_cmnd *scmd;
+ u32 blk_tag;
atomic_inc(&q->numpending);
@@ -505,8 +507,15 @@ static int aac_src_deliver_message(struct fib *fib)
if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
&& dev->sa_firmware)
vector_no = aac_get_vector(dev);
- else
- vector_no = fib->vector_no;
+ else {
+ if (!fib->vector_no || !fib->callback_data) {
+ vector_no = dev->reply_map[raw_smp_processor_id()];
+ } else {
+ scmd = (struct scsi_cmnd *)fib->callback_data;
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
+ }
+ }
if (native_hba) {
if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
--
2.29.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-04-28 21:07 [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
@ 2023-04-28 21:08 ` kernel test robot
2023-05-01 15:59 ` John Garry
1 sibling, 0 replies; 3+ messages in thread
From: kernel test robot @ 2023-04-28 21:08 UTC (permalink / raw)
To: Sagar Biradar; +Cc: stable, oe-kbuild-all
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
Rule: 'Cc: stable@vger.kernel.org' or 'commit <sha1> upstream.'
Subject: [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity
Link: https://lore.kernel.org/stable/20230428210751.29722-1-sagar.biradar%40microchip.com
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity
2023-04-28 21:07 [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
2023-04-28 21:08 ` kernel test robot
@ 2023-05-01 15:59 ` John Garry
1 sibling, 0 replies; 3+ messages in thread
From: John Garry @ 2023-05-01 15:59 UTC (permalink / raw)
To: Sagar Biradar, Don Brace, Gilbert Wu, linux-scsi, Martin Petersen,
James Bottomley, Brian King, stable, Tom White
On 28/04/2023 22:07, Sagar Biradar wrote:
> Fix the IO hang that arises because of MSIx vector not
> having a mapped online CPU upon receiving completion.
> This patch sets up a reply queue mapping to CPUs based on the
> IRQ affinity retrieved using pci_irq_get_affinity() API.
>
> aac_setup_reply_map() is an explicit mapping for internally
> generated (non-SCSI) cmds.
> The SCSI cmds take the blk_mq route, and the non-SCSI cmds are mapped
> to the reply_map.
This now looks better.
I would still prefer if no reply_map was used even for internal
commands. As I see, you have two alternatives (to using reply_map):
- instead of using a driver-internal reply_map, lookup CPU->HW queue
mapping for internal commands by using
shost->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[raw_smp_processor_id()]
Ideally when we finally support reserved commands for SCSI ML we will
have a better solution for this.
- if it is possible to send driver internal commands on a specific HW
queue always, then reserve a dedicated HW queue for them (and always
send on that HW queue). You may reserve this HW queue by omitting 1x HW
queue from pci_alloc_irq_vectors_affinity() for affinity spread
>
> Reviewed-by: Gilbert Wu <gilbert.wu@microchip.com>
> Signed-off-by: Sagar Biradar <Sagar.Biradar@microchip.com>
> ---
> drivers/scsi/aacraid/aacraid.h | 1 +
> drivers/scsi/aacraid/comminit.c | 32 ++++++++++++++++++++++++++++++++
> drivers/scsi/aacraid/commsup.c | 6 +++++-
> drivers/scsi/aacraid/linit.c | 25 +++++++++++++++++++++++++
> drivers/scsi/aacraid/src.c | 13 +++++++++++--
> 5 files changed, 74 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
> index 5e115e8b2ba4..20f8560a3038 100644
> --- a/drivers/scsi/aacraid/aacraid.h
> +++ b/drivers/scsi/aacraid/aacraid.h
> @@ -1678,6 +1678,7 @@ struct aac_dev
> u32 handle_pci_error;
> bool init_reset;
> u8 soft_reset_support;
> + unsigned int *reply_map;
> };
>
> #define aac_adapter_interrupt(dev) \
> diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
> index bd99c5492b7d..6f4e40cdaade 100644
> --- a/drivers/scsi/aacraid/comminit.c
> +++ b/drivers/scsi/aacraid/comminit.c
> @@ -33,6 +33,8 @@
>
> #include "aacraid.h"
>
> +void aac_setup_reply_map(struct aac_dev *dev);
> +
> struct aac_common aac_config = {
> .irq_mod = 1
> };
> @@ -630,6 +632,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
>
> if (aac_is_src(dev))
> aac_define_int_mode(dev);
> +
> + aac_setup_reply_map(dev);
> +
> /*
> * Ok now init the communication subsystem
> */
> @@ -658,3 +663,30 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
> return dev;
> }
>
> +/*
> + * aac_setup_reply_map - This is an explicit mapping for
> + * internally generated (non-SCSI) cmds which need to be
> + * serviced outside of IO requests.
> + * The SCSI cmds take the blk_mq mechanism,
> + * and the non-SCSI cmds are mapped to the reply_map.
> + */
> +void aac_setup_reply_map(struct aac_dev *dev)
> +{
> + const struct cpumask *mask;
> + unsigned int i, cpu = 1;
> +
> + for (i = 1; i < dev->max_msix; i++) {
> + mask = pci_irq_get_affinity(dev->pdev, i);
> + if (!mask)
> + goto fallback;
> +
> + for_each_cpu(cpu, mask) {
> + dev->reply_map[cpu] = i;
> + }
> + }
> + return;
> +
> +fallback:
> + for_each_possible_cpu(cpu)
> + dev->reply_map[cpu] = 0;
> +}
> diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
> index deb32c9f4b3e..3f062e4013ab 100644
> --- a/drivers/scsi/aacraid/commsup.c
> +++ b/drivers/scsi/aacraid/commsup.c
> @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
> struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
> {
> struct fib *fibptr;
> + u32 blk_tag;
> + int i;
>
> - fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
> + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> + i = blk_mq_unique_tag_to_tag(blk_tag);
> + fibptr = &dev->fibs[i];
> /*
> * Null out fields that depend on being zero at the start of
> * each I/O
> diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
> index 5ba5c18b77b4..077adbcde909 100644
> --- a/drivers/scsi/aacraid/linit.c
> +++ b/drivers/scsi/aacraid/linit.c
> @@ -34,6 +34,7 @@
> #include <linux/delay.h>
> #include <linux/kthread.h>
> #include <linux/msdos_partition.h>
> +#include <linux/blk-mq-pci.h>
>
> #include <scsi/scsi.h>
> #include <scsi/scsi_cmnd.h>
> @@ -505,6 +506,16 @@ static int aac_slave_configure(struct scsi_device *sdev)
> return 0;
> }
>
> +static void aac_map_queues(struct Scsi_Host *shost)
> +{
> + struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
I don't think that you need a explicit casting ...
> +
> + blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
> + aac->pdev, 0);
> +}
> +
> +
> +
> /**
> * aac_change_queue_depth - alter queue depths
> * @sdev: SCSI device we are considering
> @@ -1489,6 +1500,7 @@ static struct scsi_host_template aac_driver_template = {
> .bios_param = aac_biosparm,
> .shost_groups = aac_host_groups,
> .slave_configure = aac_slave_configure,
> + .map_queues = aac_map_queues,
> .change_queue_depth = aac_change_queue_depth,
> .sdev_groups = aac_dev_groups,
> .eh_abort_handler = aac_eh_abort,
> @@ -1668,6 +1680,14 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
> goto out_free_host;
> }
>
> + aac->reply_map = kzalloc(sizeof(unsigned int) * nr_cpu_ids,
> + GFP_KERNEL);
> + if (!aac->reply_map) {
> + error = -ENOMEM;
> + dev_err(&pdev->dev, "reply_map allocation failed\n");
> + goto out_free_host;
> + }
> +
> spin_lock_init(&aac->fib_lock);
>
> mutex_init(&aac->ioctl_mutex);
> @@ -1776,6 +1796,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
> shost->max_lun = AAC_MAX_LUN;
>
> pci_set_drvdata(pdev, shost);
> + shost->nr_hw_queues = aac->max_msix;
> + shost->host_tagset = 1;
>
> error = scsi_add_host(shost, &pdev->dev);
> if (error)
> @@ -1797,6 +1819,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
> aac->comm_addr, aac->comm_phys);
> kfree(aac->queues);
> aac_adapter_ioremap(aac, 0);
> + /* By now we should have configured the reply_map */
> + kfree(aac->reply_map);
> kfree(aac->fibs);
> kfree(aac->fsa_dev);
> out_free_host:
> @@ -1918,6 +1942,7 @@ static void aac_remove_one(struct pci_dev *pdev)
>
> aac_adapter_ioremap(aac, 0);
>
> + kfree(aac->reply_map);
> kfree(aac->fibs);
> kfree(aac->fsa_dev);
>
> diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
> index 11ef58204e96..46c0f4df995d 100644
> --- a/drivers/scsi/aacraid/src.c
> +++ b/drivers/scsi/aacraid/src.c
> @@ -493,6 +493,8 @@ static int aac_src_deliver_message(struct fib *fib)
> #endif
>
> u16 vector_no;
> + struct scsi_cmnd *scmd;
> + u32 blk_tag;
>
> atomic_inc(&q->numpending);
>
> @@ -505,8 +507,15 @@ static int aac_src_deliver_message(struct fib *fib)
> if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
> && dev->sa_firmware)
> vector_no = aac_get_vector(dev);
> - else
> - vector_no = fib->vector_no;
> + else {
> + if (!fib->vector_no || !fib->callback_data) {
> + vector_no = dev->reply_map[raw_smp_processor_id()];
> + } else {
> + scmd = (struct scsi_cmnd *)fib->callback_data;
> + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> + vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
> + }
> + }
>
> if (native_hba) {
> if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-05-01 16:00 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-28 21:07 [PATCH v2] aacraid: reply queue mapping to CPUs based of IRQ affinity Sagar Biradar
2023-04-28 21:08 ` kernel test robot
2023-05-01 15:59 ` John Garry
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox