From: Don Brace <don.brace@microchip.com>
To: <don.brace@microchip.com>, <Kevin.Barnett@microchip.com>,
<scott.teel@microchip.com>, <Justin.Lindley@microchip.com>,
<scott.benesh@microchip.com>, <gerry.morong@microchip.com>,
<mahesh.rajashekhara@microchip.com>, <mike.mcgowen@microchip.com>,
<murthy.bhat@microchip.com>, <kumar.meiyappan@microchip.com>,
<jeremy.reeves@microchip.com>, <david.strahan@microchip.com>,
<hch@infradead.org>, <jejb@linux.vnet.ibm.com>,
<joseph.szczypek@hpe.com>, <POSWALD@suse.com>
Cc: <linux-scsi@vger.kernel.org>
Subject: [PATCH 2/9] smartpqi: add abort handler
Date: Thu, 17 Aug 2023 08:12:25 -0500 [thread overview]
Message-ID: <20230817131232.86754-3-don.brace@microchip.com> (raw)
In-Reply-To: <20230817131232.86754-1-don.brace@microchip.com>
From: Kevin Barnett <kevin.barnett@microchip.com>
Implement aborts as resets.
Avoid I/O stalls across all devices attached to a controller when
device I/O requests time out.
Reviewed-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Mike McGowen <mike.mcgowen@microchip.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
---
drivers/scsi/smartpqi/smartpqi.h | 14 ++-
drivers/scsi/smartpqi/smartpqi_init.c | 171 ++++++++++++++++++++------
2 files changed, 149 insertions(+), 36 deletions(-)
diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index e392eaf5b2bf..e560d99efa95 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1085,7 +1085,16 @@ struct pqi_stream_data {
u32 last_accessed;
};
-#define PQI_MAX_LUNS_PER_DEVICE 256
+#define PQI_MAX_LUNS_PER_DEVICE 256
+
+struct pqi_tmf_work {
+ struct work_struct work_struct;
+ struct scsi_cmnd *scmd;
+ struct pqi_ctrl_info *ctrl_info;
+ struct pqi_scsi_dev *device;
+ u8 lun;
+ u8 scsi_opcode;
+};
struct pqi_scsi_dev {
int devtype; /* as reported by INQUIRY command */
@@ -1111,6 +1120,7 @@ struct pqi_scsi_dev {
u8 erase_in_progress : 1;
bool aio_enabled; /* only valid for physical disks */
bool in_remove;
+ bool in_reset[PQI_MAX_LUNS_PER_DEVICE];
bool device_offline;
u8 vendor[8]; /* bytes 8-15 of inquiry data */
u8 model[16]; /* bytes 16-31 of inquiry data */
@@ -1149,6 +1159,8 @@ struct pqi_scsi_dev {
struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN];
atomic_t scsi_cmds_outstanding[PQI_MAX_LUNS_PER_DEVICE];
unsigned int raid_bypass_cnt;
+
+ struct pqi_tmf_work tmf_work[PQI_MAX_LUNS_PER_DEVICE];
};
/* VPD inquiry pages */
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 4486259f85ab..ec36896eb08e 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -48,6 +48,8 @@
#define PQI_POST_RESET_DELAY_SECS 5
#define PQI_POST_OFA_RESET_DELAY_UPON_TIMEOUT_SECS 10
+#define PQI_NO_COMPLETION ((void *)-1)
+
MODULE_AUTHOR("Microchip");
MODULE_DESCRIPTION("Driver for Microchip Smart Family Controller version "
DRIVER_VERSION);
@@ -96,6 +98,7 @@ static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs);
static void pqi_fail_all_outstanding_requests(struct pqi_ctrl_info *ctrl_info);
+static void pqi_tmf_worker(struct work_struct *work);
/* for flags argument to pqi_submit_raid_request_synchronous() */
#define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1
@@ -455,6 +458,21 @@ static inline bool pqi_device_in_remove(struct pqi_scsi_dev *device)
return device->in_remove;
}
+static inline void pqi_device_reset_start(struct pqi_scsi_dev *device, u8 lun)
+{
+ device->in_reset[lun] = true;
+}
+
+static inline void pqi_device_reset_done(struct pqi_scsi_dev *device, u8 lun)
+{
+ device->in_reset[lun] = false;
+}
+
+static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device, u8 lun)
+{
+ return device->in_reset[lun];
+}
+
static inline int pqi_event_type_to_event_index(unsigned int event_type)
{
int index;
@@ -2122,6 +2140,15 @@ static inline bool pqi_is_device_added(struct pqi_scsi_dev *device)
return device->sdev != NULL;
}
+static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device)
+{
+ unsigned int lun;
+ struct pqi_tmf_work *tmf_work;
+
+ for (lun = 0, tmf_work = device->tmf_work; lun < PQI_MAX_LUNS_PER_DEVICE; lun++, tmf_work++)
+ INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker);
+}
+
static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices)
{
@@ -2202,6 +2229,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
list_add_tail(&device->add_list_entry, &add_list);
/* To prevent this device structure from being freed later. */
device->keep_device = true;
+ pqi_init_device_tmf_work(device);
}
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@ -5623,6 +5651,7 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
{
struct pqi_scsi_dev *device;
+ struct completion *wait;
if (!scmd->device) {
set_host_byte(scmd, DID_NO_CONNECT);
@@ -5636,6 +5665,10 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
}
atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
+
+ wait = (struct completion *)xchg(&scmd->host_scribble, NULL);
+ if (wait != PQI_NO_COMPLETION)
+ complete(wait);
}
static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info,
@@ -5719,6 +5752,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
u16 hw_queue;
struct pqi_queue_group *queue_group;
bool raid_bypassed;
+ u8 lun;
+
+ scmd->host_scribble = PQI_NO_COMPLETION;
device = scmd->device->hostdata;
@@ -5728,7 +5764,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
return 0;
}
- atomic_inc(&device->scsi_cmds_outstanding[scmd->device->lun]);
+ lun = (u8)scmd->device->lun;
+
+ atomic_inc(&device->scsi_cmds_outstanding[lun]);
ctrl_info = shost_to_hba(shost);
@@ -5738,7 +5776,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
return 0;
}
- if (pqi_ctrl_blocked(ctrl_info)) {
+ if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device, lun)) {
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
@@ -5773,8 +5811,10 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
}
out:
- if (rc)
- atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
+ if (rc) {
+ scmd->host_scribble = NULL;
+ atomic_dec(&device->scsi_cmds_outstanding[lun]);
+ }
return rc;
}
@@ -5868,7 +5908,7 @@ static int pqi_wait_until_inbound_queues_empty(struct pqi_ctrl_info *ctrl_info)
}
static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
- struct pqi_scsi_dev *device)
+ struct pqi_scsi_dev *device, u8 lun)
{
unsigned int i;
unsigned int path;
@@ -5894,6 +5934,9 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
if (scsi_device != device)
continue;
+ if ((u8)scmd->device->lun != lun)
+ continue;
+
list_del(&io_request->request_list_entry);
set_host_byte(scmd, DID_RESET);
pqi_free_io_request(io_request);
@@ -5990,15 +6033,13 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
#define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS 30
-static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
{
int rc;
struct pqi_io_request *io_request;
DECLARE_COMPLETION_ONSTACK(wait);
struct pqi_task_management_request *request;
- struct pqi_scsi_dev *device;
- device = scmd->device->hostdata;
io_request = pqi_alloc_io_request(ctrl_info, NULL);
io_request->io_complete_callback = pqi_lun_reset_complete;
io_request->context = &wait;
@@ -6011,14 +6052,14 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
put_unaligned_le16(io_request->index, &request->request_id);
memcpy(request->lun_number, device->scsi3addr, sizeof(request->lun_number));
if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported)
- request->ml_device_lun_number = (u8)scmd->device->lun;
+ request->ml_device_lun_number = lun;
request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
if (ctrl_info->tmf_iu_timeout_supported)
put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout);
pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, io_request);
- rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, (u8)scmd->device->lun, &wait);
+ rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, lun, &wait);
if (rc == 0)
rc = io_request->status;
@@ -6032,18 +6073,16 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
#define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS (10 * 60 * 1000)
#define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS (2 * 60 * 1000)
-static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
{
int reset_rc;
int wait_rc;
unsigned int retries;
unsigned long timeout_msecs;
- struct pqi_scsi_dev *device;
- device = scmd->device->hostdata;
for (retries = 0;;) {
- reset_rc = pqi_lun_reset(ctrl_info, scmd);
- if (reset_rc == 0 || reset_rc == -ENODEV || ++retries > PQI_LUN_RESET_RETRIES)
+ reset_rc = pqi_lun_reset(ctrl_info, device, lun);
+ if (reset_rc == 0 || reset_rc == -ENODEV || reset_rc == -ENXIO || ++retries > PQI_LUN_RESET_RETRIES)
break;
msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
}
@@ -6051,60 +6090,53 @@ static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct sc
timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS :
PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS;
- wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, scmd->device->lun, timeout_msecs);
+ wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, timeout_msecs);
if (wait_rc && reset_rc == 0)
reset_rc = wait_rc;
return reset_rc == 0 ? SUCCESS : FAILED;
}
-static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
{
int rc;
- struct pqi_scsi_dev *device;
- device = scmd->device->hostdata;
pqi_ctrl_block_requests(ctrl_info);
pqi_ctrl_wait_until_quiesced(ctrl_info);
- pqi_fail_io_queued_for_device(ctrl_info, device);
+ pqi_fail_io_queued_for_device(ctrl_info, device, lun);
rc = pqi_wait_until_inbound_queues_empty(ctrl_info);
+ pqi_device_reset_start(device, lun);
+ pqi_ctrl_unblock_requests(ctrl_info);
if (rc)
rc = FAILED;
else
- rc = pqi_lun_reset_with_retries(ctrl_info, scmd);
- pqi_ctrl_unblock_requests(ctrl_info);
+ rc = pqi_lun_reset_with_retries(ctrl_info, device, lun);
+ pqi_device_reset_done(device, lun);
return rc;
}
-static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
+static int pqi_device_reset_handler(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun, struct scsi_cmnd *scmd, u8 scsi_opcode)
{
int rc;
- struct Scsi_Host *shost;
- struct pqi_ctrl_info *ctrl_info;
- struct pqi_scsi_dev *device;
-
- shost = scmd->device->host;
- ctrl_info = shost_to_hba(shost);
- device = scmd->device->hostdata;
mutex_lock(&ctrl_info->lun_reset_mutex);
dev_err(&ctrl_info->pci_dev->dev,
"resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n",
- shost->host_no,
- device->bus, device->target, (u32)scmd->device->lun,
+ ctrl_info->scsi_host->host_no,
+ device->bus, device->target, lun,
scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff);
pqi_check_ctrl_health(ctrl_info);
if (pqi_ctrl_offline(ctrl_info))
rc = FAILED;
else
- rc = pqi_device_reset(ctrl_info, scmd);
+ rc = pqi_device_reset(ctrl_info, device, lun);
dev_err(&ctrl_info->pci_dev->dev,
- "reset of scsi %d:%d:%d:%d: %s\n",
- shost->host_no, device->bus, device->target, (u32)scmd->device->lun,
+ "reset of scsi %d:%d:%d:%u: %s\n",
+ ctrl_info->scsi_host->host_no, device->bus, device->target, lun,
rc == SUCCESS ? "SUCCESS" : "FAILED");
mutex_unlock(&ctrl_info->lun_reset_mutex);
@@ -6112,6 +6144,74 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
return rc;
}
+static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
+{
+ struct Scsi_Host *shost;
+ struct pqi_ctrl_info *ctrl_info;
+ struct pqi_scsi_dev *device;
+ u8 scsi_opcode;
+
+ shost = scmd->device->host;
+ ctrl_info = shost_to_hba(shost);
+ device = scmd->device->hostdata;
+ scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
+
+ return pqi_device_reset_handler(ctrl_info, device, (u8)scmd->device->lun, scmd, scsi_opcode);
+}
+
+static void pqi_tmf_worker(struct work_struct *work)
+{
+ struct pqi_tmf_work *tmf_work;
+ struct scsi_cmnd *scmd;
+
+ tmf_work = container_of(work, struct pqi_tmf_work, work_struct);
+ scmd = (struct scsi_cmnd *)xchg(&tmf_work->scmd, NULL);
+
+ pqi_device_reset_handler(tmf_work->ctrl_info, tmf_work->device, tmf_work->lun, scmd, tmf_work->scsi_opcode);
+}
+
+static int pqi_eh_abort_handler(struct scsi_cmnd *scmd)
+{
+ struct Scsi_Host *shost;
+ struct pqi_ctrl_info *ctrl_info;
+ struct pqi_scsi_dev *device;
+ struct pqi_tmf_work *tmf_work;
+ DECLARE_COMPLETION_ONSTACK(wait);
+
+ shost = scmd->device->host;
+ ctrl_info = shost_to_hba(shost);
+
+ dev_err(&ctrl_info->pci_dev->dev,
+ "attempting TASK ABORT on SCSI cmd at %p\n", scmd);
+
+ if (cmpxchg(&scmd->host_scribble, PQI_NO_COMPLETION, (void *)&wait) == NULL) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "SCSI cmd at %p already completed\n", scmd);
+ scmd->result = DID_RESET << 16;
+ goto out;
+ }
+
+ device = scmd->device->hostdata;
+ tmf_work = &device->tmf_work[scmd->device->lun];
+
+ if (cmpxchg(&tmf_work->scmd, NULL, scmd) == NULL) {
+ tmf_work->ctrl_info = ctrl_info;
+ tmf_work->device = device;
+ tmf_work->lun = (u8)scmd->device->lun;
+ tmf_work->scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
+ schedule_work(&tmf_work->work_struct);
+ }
+
+ wait_for_completion(&wait);
+
+ dev_err(&ctrl_info->pci_dev->dev,
+ "TASK ABORT on SCSI cmd at %p: SUCCESS\n", scmd);
+
+out:
+
+ return SUCCESS;
+}
+
static int pqi_slave_alloc(struct scsi_device *sdev)
{
struct pqi_scsi_dev *device;
@@ -7106,6 +7206,7 @@ static const struct scsi_host_template pqi_driver_template = {
.scan_finished = pqi_scan_finished,
.this_id = -1,
.eh_device_reset_handler = pqi_eh_device_reset_handler,
+ .eh_abort_handler = pqi_eh_abort_handler,
.ioctl = pqi_ioctl,
.slave_alloc = pqi_slave_alloc,
.slave_configure = pqi_slave_configure,
--
2.42.0.rc2
next prev parent reply other threads:[~2023-08-17 13:13 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-17 13:12 [PATCH 0/9] smartpqi updates Don Brace
2023-08-17 13:12 ` [PATCH 1/9] smartpqi: reformat to align with oob driver Don Brace
2023-08-21 22:08 ` Martin K. Petersen
2023-08-17 13:12 ` Don Brace [this message]
2023-08-17 13:12 ` [PATCH 3/9] smartpqi: refactor rename MACRO to clarify purpose Don Brace
2023-08-17 13:12 ` [PATCH 4/9] smartpqi: refactor rename pciinfo to pci_info Don Brace
2023-08-17 13:12 ` [PATCH 5/9] smartpqi: simplify lun_number assignment Don Brace
2023-08-17 13:12 ` [PATCH 6/9] smartpqi: enhance shutdown notification Don Brace
2023-08-17 13:12 ` [PATCH 7/9] smartpqi: enhance controller offline notification Don Brace
2023-08-17 13:12 ` [PATCH 8/9] smartpqi: enhance error messages Don Brace
2023-08-17 13:12 ` [PATCH 9/9] smartpqi: change driver version to 2.1.24-046 Don Brace
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230817131232.86754-3-don.brace@microchip.com \
--to=don.brace@microchip.com \
--cc=Justin.Lindley@microchip.com \
--cc=Kevin.Barnett@microchip.com \
--cc=POSWALD@suse.com \
--cc=david.strahan@microchip.com \
--cc=gerry.morong@microchip.com \
--cc=hch@infradead.org \
--cc=jejb@linux.vnet.ibm.com \
--cc=jeremy.reeves@microchip.com \
--cc=joseph.szczypek@hpe.com \
--cc=kumar.meiyappan@microchip.com \
--cc=linux-scsi@vger.kernel.org \
--cc=mahesh.rajashekhara@microchip.com \
--cc=mike.mcgowen@microchip.com \
--cc=murthy.bhat@microchip.com \
--cc=scott.benesh@microchip.com \
--cc=scott.teel@microchip.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox