All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Brace <don.brace@microchip.com>
To: <don.brace@microchip.com>, <Kevin.Barnett@microchip.com>,
	<scott.teel@microchip.com>, <Justin.Lindley@microchip.com>,
	<scott.benesh@microchip.com>, <gerry.morong@microchip.com>,
	<mahesh.rajashekhara@microchip.com>, <mike.mcgowen@microchip.com>,
	<murthy.bhat@microchip.com>, <kumar.meiyappan@microchip.com>,
	<jeremy.reeves@microchip.com>, <david.strahan@microchip.com>,
	<hch@infradead.org>, <jejb@linux.vnet.ibm.com>,
	<joseph.szczypek@hpe.com>, <POSWALD@suse.com>
Cc: <linux-scsi@vger.kernel.org>
Subject: [PATCH 2/9] smartpqi: add abort handler
Date: Thu, 17 Aug 2023 08:12:25 -0500	[thread overview]
Message-ID: <20230817131232.86754-3-don.brace@microchip.com> (raw)
In-Reply-To: <20230817131232.86754-1-don.brace@microchip.com>

From: Kevin Barnett <kevin.barnett@microchip.com>

Implement aborts as resets.

Avoid I/O stalls across all devices attached to a controller when
device I/O requests time out.

Reviewed-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Mike McGowen <mike.mcgowen@microchip.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
---
 drivers/scsi/smartpqi/smartpqi.h      |  14 ++-
 drivers/scsi/smartpqi/smartpqi_init.c | 171 ++++++++++++++++++++------
 2 files changed, 149 insertions(+), 36 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index e392eaf5b2bf..e560d99efa95 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1085,7 +1085,16 @@ struct pqi_stream_data {
 	u32	last_accessed;
 };
 
-#define PQI_MAX_LUNS_PER_DEVICE         256
+#define PQI_MAX_LUNS_PER_DEVICE		256
+
+struct pqi_tmf_work {
+	struct work_struct work_struct;
+	struct scsi_cmnd *scmd;
+	struct pqi_ctrl_info *ctrl_info;
+	struct pqi_scsi_dev *device;
+	u8	lun;
+	u8	scsi_opcode;
+};
 
 struct pqi_scsi_dev {
 	int	devtype;		/* as reported by INQUIRY command */
@@ -1111,6 +1120,7 @@ struct pqi_scsi_dev {
 	u8	erase_in_progress : 1;
 	bool	aio_enabled;		/* only valid for physical disks */
 	bool	in_remove;
+	bool	in_reset[PQI_MAX_LUNS_PER_DEVICE];
 	bool	device_offline;
 	u8	vendor[8];		/* bytes 8-15 of inquiry data */
 	u8	model[16];		/* bytes 16-31 of inquiry data */
@@ -1149,6 +1159,8 @@ struct pqi_scsi_dev {
 	struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN];
 	atomic_t scsi_cmds_outstanding[PQI_MAX_LUNS_PER_DEVICE];
 	unsigned int raid_bypass_cnt;
+
+	struct pqi_tmf_work tmf_work[PQI_MAX_LUNS_PER_DEVICE];
 };
 
 /* VPD inquiry pages */
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 4486259f85ab..ec36896eb08e 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -48,6 +48,8 @@
 #define PQI_POST_RESET_DELAY_SECS			5
 #define PQI_POST_OFA_RESET_DELAY_UPON_TIMEOUT_SECS	10
 
+#define PQI_NO_COMPLETION	((void *)-1)
+
 MODULE_AUTHOR("Microchip");
 MODULE_DESCRIPTION("Driver for Microchip Smart Family Controller version "
 	DRIVER_VERSION);
@@ -96,6 +98,7 @@ static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
 static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs);
 static void pqi_fail_all_outstanding_requests(struct pqi_ctrl_info *ctrl_info);
+static void pqi_tmf_worker(struct work_struct *work);
 
 /* for flags argument to pqi_submit_raid_request_synchronous() */
 #define PQI_SYNC_FLAGS_INTERRUPTABLE	0x1
@@ -455,6 +458,21 @@ static inline bool pqi_device_in_remove(struct pqi_scsi_dev *device)
 	return device->in_remove;
 }
 
+static inline void pqi_device_reset_start(struct pqi_scsi_dev *device, u8 lun)
+{
+	device->in_reset[lun] = true;
+}
+
+static inline void pqi_device_reset_done(struct pqi_scsi_dev *device, u8 lun)
+{
+	device->in_reset[lun] = false;
+}
+
+static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device, u8 lun)
+{
+	return device->in_reset[lun];
+}
+
 static inline int pqi_event_type_to_event_index(unsigned int event_type)
 {
 	int index;
@@ -2122,6 +2140,15 @@ static inline bool pqi_is_device_added(struct pqi_scsi_dev *device)
 	return device->sdev != NULL;
 }
 
+static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device)
+{
+	unsigned int lun;
+	struct pqi_tmf_work *tmf_work;
+
+	for (lun = 0, tmf_work = device->tmf_work; lun < PQI_MAX_LUNS_PER_DEVICE; lun++, tmf_work++)
+		INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker);
+}
+
 static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices)
 {
@@ -2202,6 +2229,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
 		list_add_tail(&device->add_list_entry, &add_list);
 		/* To prevent this device structure from being freed later. */
 		device->keep_device = true;
+		pqi_init_device_tmf_work(device);
 	}
 
 	spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@ -5623,6 +5651,7 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
 void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
 {
 	struct pqi_scsi_dev *device;
+	struct completion *wait;
 
 	if (!scmd->device) {
 		set_host_byte(scmd, DID_NO_CONNECT);
@@ -5636,6 +5665,10 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
 	}
 
 	atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
+
+	wait = (struct completion *)xchg(&scmd->host_scribble, NULL);
+	if (wait != PQI_NO_COMPLETION)
+		complete(wait);
 }
 
 static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info,
@@ -5719,6 +5752,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 	u16 hw_queue;
 	struct pqi_queue_group *queue_group;
 	bool raid_bypassed;
+	u8 lun;
+
+	scmd->host_scribble = PQI_NO_COMPLETION;
 
 	device = scmd->device->hostdata;
 
@@ -5728,7 +5764,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 		return 0;
 	}
 
-	atomic_inc(&device->scsi_cmds_outstanding[scmd->device->lun]);
+	lun = (u8)scmd->device->lun;
+
+	atomic_inc(&device->scsi_cmds_outstanding[lun]);
 
 	ctrl_info = shost_to_hba(shost);
 
@@ -5738,7 +5776,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 		return 0;
 	}
 
-	if (pqi_ctrl_blocked(ctrl_info)) {
+	if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device, lun)) {
 		rc = SCSI_MLQUEUE_HOST_BUSY;
 		goto out;
 	}
@@ -5773,8 +5811,10 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 	}
 
 out:
-	if (rc)
-		atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
+	if (rc) {
+		scmd->host_scribble = NULL;
+		atomic_dec(&device->scsi_cmds_outstanding[lun]);
+	}
 
 	return rc;
 }
@@ -5868,7 +5908,7 @@ static int pqi_wait_until_inbound_queues_empty(struct pqi_ctrl_info *ctrl_info)
 }
 
 static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
-	struct pqi_scsi_dev *device)
+	struct pqi_scsi_dev *device, u8 lun)
 {
 	unsigned int i;
 	unsigned int path;
@@ -5894,6 +5934,9 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
 				if (scsi_device != device)
 					continue;
 
+				if ((u8)scmd->device->lun != lun)
+					continue;
+
 				list_del(&io_request->request_list_entry);
 				set_host_byte(scmd, DID_RESET);
 				pqi_free_io_request(io_request);
@@ -5990,15 +6033,13 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
 
 #define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS	30
 
-static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
 {
 	int rc;
 	struct pqi_io_request *io_request;
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct pqi_task_management_request *request;
-	struct pqi_scsi_dev *device;
 
-	device = scmd->device->hostdata;
 	io_request = pqi_alloc_io_request(ctrl_info, NULL);
 	io_request->io_complete_callback = pqi_lun_reset_complete;
 	io_request->context = &wait;
@@ -6011,14 +6052,14 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
 	put_unaligned_le16(io_request->index, &request->request_id);
 	memcpy(request->lun_number, device->scsi3addr, sizeof(request->lun_number));
 	if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported)
-		request->ml_device_lun_number = (u8)scmd->device->lun;
+		request->ml_device_lun_number = lun;
 	request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
 	if (ctrl_info->tmf_iu_timeout_supported)
 		put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout);
 
 	pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, io_request);
 
-	rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, (u8)scmd->device->lun, &wait);
+	rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, lun, &wait);
 	if (rc == 0)
 		rc = io_request->status;
 
@@ -6032,18 +6073,16 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
 #define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS		(10 * 60 * 1000)
 #define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS	(2 * 60 * 1000)
 
-static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
 {
 	int reset_rc;
 	int wait_rc;
 	unsigned int retries;
 	unsigned long timeout_msecs;
-	struct pqi_scsi_dev *device;
 
-	device = scmd->device->hostdata;
 	for (retries = 0;;) {
-		reset_rc = pqi_lun_reset(ctrl_info, scmd);
-		if (reset_rc == 0 || reset_rc == -ENODEV || ++retries > PQI_LUN_RESET_RETRIES)
+		reset_rc = pqi_lun_reset(ctrl_info, device, lun);
+		if (reset_rc == 0 || reset_rc == -ENODEV || reset_rc == -ENXIO || ++retries > PQI_LUN_RESET_RETRIES)
 			break;
 		msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
 	}
@@ -6051,60 +6090,53 @@ static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct sc
 	timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS :
 		PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS;
 
-	wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, scmd->device->lun, timeout_msecs);
+	wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, timeout_msecs);
 	if (wait_rc && reset_rc == 0)
 		reset_rc = wait_rc;
 
 	return reset_rc == 0 ? SUCCESS : FAILED;
 }
 
-static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
 {
 	int rc;
-	struct pqi_scsi_dev *device;
 
-	device = scmd->device->hostdata;
 	pqi_ctrl_block_requests(ctrl_info);
 	pqi_ctrl_wait_until_quiesced(ctrl_info);
-	pqi_fail_io_queued_for_device(ctrl_info, device);
+	pqi_fail_io_queued_for_device(ctrl_info, device, lun);
 	rc = pqi_wait_until_inbound_queues_empty(ctrl_info);
+	pqi_device_reset_start(device, lun);
+	pqi_ctrl_unblock_requests(ctrl_info);
 	if (rc)
 		rc = FAILED;
 	else
-		rc = pqi_lun_reset_with_retries(ctrl_info, scmd);
-	pqi_ctrl_unblock_requests(ctrl_info);
+		rc = pqi_lun_reset_with_retries(ctrl_info, device, lun);
+	pqi_device_reset_done(device, lun);
 
 	return rc;
 }
 
-static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
+static int pqi_device_reset_handler(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun, struct scsi_cmnd *scmd, u8 scsi_opcode)
 {
 	int rc;
-	struct Scsi_Host *shost;
-	struct pqi_ctrl_info *ctrl_info;
-	struct pqi_scsi_dev *device;
-
-	shost = scmd->device->host;
-	ctrl_info = shost_to_hba(shost);
-	device = scmd->device->hostdata;
 
 	mutex_lock(&ctrl_info->lun_reset_mutex);
 
 	dev_err(&ctrl_info->pci_dev->dev,
 		"resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n",
-		shost->host_no,
-		device->bus, device->target, (u32)scmd->device->lun,
+		ctrl_info->scsi_host->host_no,
+		device->bus, device->target, lun,
 		scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff);
 
 	pqi_check_ctrl_health(ctrl_info);
 	if (pqi_ctrl_offline(ctrl_info))
 		rc = FAILED;
 	else
-		rc = pqi_device_reset(ctrl_info, scmd);
+		rc = pqi_device_reset(ctrl_info, device, lun);
 
 	dev_err(&ctrl_info->pci_dev->dev,
-		"reset of scsi %d:%d:%d:%d: %s\n",
-		shost->host_no, device->bus, device->target, (u32)scmd->device->lun,
+		"reset of scsi %d:%d:%d:%u: %s\n",
+		ctrl_info->scsi_host->host_no, device->bus, device->target, lun,
 		rc == SUCCESS ? "SUCCESS" : "FAILED");
 
 	mutex_unlock(&ctrl_info->lun_reset_mutex);
@@ -6112,6 +6144,74 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
 	return rc;
 }
 
+static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
+{
+	struct Scsi_Host *shost;
+	struct pqi_ctrl_info *ctrl_info;
+	struct pqi_scsi_dev *device;
+	u8 scsi_opcode;
+
+	shost = scmd->device->host;
+	ctrl_info = shost_to_hba(shost);
+	device = scmd->device->hostdata;
+	scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
+
+	return pqi_device_reset_handler(ctrl_info, device, (u8)scmd->device->lun, scmd, scsi_opcode);
+}
+
+static void pqi_tmf_worker(struct work_struct *work)
+{
+	struct pqi_tmf_work *tmf_work;
+	struct scsi_cmnd *scmd;
+
+	tmf_work = container_of(work, struct pqi_tmf_work, work_struct);
+	scmd = (struct scsi_cmnd *)xchg(&tmf_work->scmd, NULL);
+
+	pqi_device_reset_handler(tmf_work->ctrl_info, tmf_work->device, tmf_work->lun, scmd, tmf_work->scsi_opcode);
+}
+
+static int pqi_eh_abort_handler(struct scsi_cmnd *scmd)
+{
+	struct Scsi_Host *shost;
+	struct pqi_ctrl_info *ctrl_info;
+	struct pqi_scsi_dev *device;
+	struct pqi_tmf_work *tmf_work;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	shost = scmd->device->host;
+	ctrl_info = shost_to_hba(shost);
+
+	dev_err(&ctrl_info->pci_dev->dev,
+		"attempting TASK ABORT on SCSI cmd at %p\n", scmd);
+
+	if (cmpxchg(&scmd->host_scribble, PQI_NO_COMPLETION, (void *)&wait) == NULL) {
+		dev_err(&ctrl_info->pci_dev->dev,
+			"SCSI cmd at %p already completed\n", scmd);
+		scmd->result = DID_RESET << 16;
+		goto out;
+	}
+
+	device = scmd->device->hostdata;
+	tmf_work = &device->tmf_work[scmd->device->lun];
+
+	if (cmpxchg(&tmf_work->scmd, NULL, scmd) == NULL) {
+		tmf_work->ctrl_info = ctrl_info;
+		tmf_work->device = device;
+		tmf_work->lun = (u8)scmd->device->lun;
+		tmf_work->scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
+		schedule_work(&tmf_work->work_struct);
+	}
+
+	wait_for_completion(&wait);
+
+	dev_err(&ctrl_info->pci_dev->dev,
+		"TASK ABORT on SCSI cmd at %p: SUCCESS\n", scmd);
+
+out:
+
+	return SUCCESS;
+}
+
 static int pqi_slave_alloc(struct scsi_device *sdev)
 {
 	struct pqi_scsi_dev *device;
@@ -7106,6 +7206,7 @@ static const struct scsi_host_template pqi_driver_template = {
 	.scan_finished = pqi_scan_finished,
 	.this_id = -1,
 	.eh_device_reset_handler = pqi_eh_device_reset_handler,
+	.eh_abort_handler = pqi_eh_abort_handler,
 	.ioctl = pqi_ioctl,
 	.slave_alloc = pqi_slave_alloc,
 	.slave_configure = pqi_slave_configure,
-- 
2.42.0.rc2


  parent reply	other threads:[~2023-08-17 13:13 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-17 13:12 [PATCH 0/9] smartpqi updates Don Brace
2023-08-17 13:12 ` [PATCH 1/9] smartpqi: reformat to align with oob driver Don Brace
2023-08-21 22:08   ` Martin K. Petersen
2023-08-17 13:12 ` Don Brace [this message]
2023-08-17 13:12 ` [PATCH 3/9] smartpqi: refactor rename MACRO to clarify purpose Don Brace
2023-08-17 13:12 ` [PATCH 4/9] smartpqi: refactor rename pciinfo to pci_info Don Brace
2023-08-17 13:12 ` [PATCH 5/9] smartpqi: simplify lun_number assignment Don Brace
2023-08-17 13:12 ` [PATCH 6/9] smartpqi: enhance shutdown notification Don Brace
2023-08-17 13:12 ` [PATCH 7/9] smartpqi: enhance controller offline notification Don Brace
2023-08-17 13:12 ` [PATCH 8/9] smartpqi: enhance error messages Don Brace
2023-08-17 13:12 ` [PATCH 9/9] smartpqi: change driver version to 2.1.24-046 Don Brace

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230817131232.86754-3-don.brace@microchip.com \
    --to=don.brace@microchip.com \
    --cc=Justin.Lindley@microchip.com \
    --cc=Kevin.Barnett@microchip.com \
    --cc=POSWALD@suse.com \
    --cc=david.strahan@microchip.com \
    --cc=gerry.morong@microchip.com \
    --cc=hch@infradead.org \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=jeremy.reeves@microchip.com \
    --cc=joseph.szczypek@hpe.com \
    --cc=kumar.meiyappan@microchip.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=mahesh.rajashekhara@microchip.com \
    --cc=mike.mcgowen@microchip.com \
    --cc=murthy.bhat@microchip.com \
    --cc=scott.benesh@microchip.com \
    --cc=scott.teel@microchip.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.