From: Dan Williams <dan.j.williams@intel.com>
To: linux-scsi@vger.kernel.org
Cc: linux-ide@vger.kernel.org, "Darrick J. Wong" <djwong@us.ibm.com>
Subject: [PATCH v2 12/28] libsas: close error handling vs sas_ata_task_done() race
Date: Thu, 22 Dec 2011 18:59:31 -0800 [thread overview]
Message-ID: <20111223025931.21827.43929.stgit@localhost6.localdomain6> (raw)
In-Reply-To: <20111223025350.21827.85779.stgit@localhost6.localdomain6>
Since sas_ata does not implement ->freeze(), completions for scmds and
internal commands can still arrive concurrent with
ata_scsi_cmd_error_handler() and sas_ata_post_internal() respectively.
By the time either of those is called libata has committed to completing
the qc, and the ATA_PFLAG_FROZEN flag tells sas_ata_task_done() it has
lost the race.
In the sas_ata_post_internal() case we take on the additional
responsibility of freeing the sas_task to close the race with
sas_ata_task_done() freeing the the task while sas_ata_post_internal()
is in the process of invoking ->lldd_abort_task().
Cc: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/scsi/libsas/sas_ata.c | 84 +++++++++++++++++++++++++++++++----
drivers/scsi/libsas/sas_scsi_host.c | 44 ------------------
include/scsi/libsas.h | 1
3 files changed, 75 insertions(+), 54 deletions(-)
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 649b04b..11d049d 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -100,15 +100,31 @@ static void sas_ata_task_done(struct sas_task *task)
enum ata_completion_errors ac;
unsigned long flags;
struct ata_link *link;
+ struct ata_port *ap;
if (!qc)
goto qc_already_gone;
- dev = qc->ap->private_data;
+ ap = qc->ap;
+ dev = ap->private_data;
sas_ha = dev->port->ha;
- link = &dev->sata_dev.ap->link;
+ link = &ap->link;
+
+ spin_lock_irqsave(ap->lock, flags);
+ /* check if we lost the race with libata/sas_ata_post_internal() */
+ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) {
+ spin_unlock_irqrestore(ap->lock, flags);
+ if (qc->scsicmd)
+ goto qc_already_gone;
+ else {
+ /* if eh is not involved and the port is frozen then the
+ * ata internal abort process has taken responsibility
+ * for this sas_task
+ */
+ return;
+ }
+ }
- spin_lock_irqsave(dev->sata_dev.ap->lock, flags);
if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_STAT_GOOD ||
((stat->stat == SAM_STAT_CHECK_CONDITION &&
dev->sata_dev.command_set == ATAPI_COMMAND_SET))) {
@@ -143,7 +159,7 @@ static void sas_ata_task_done(struct sas_task *task)
if (qc->scsicmd)
ASSIGN_SAS_TASK(qc->scsicmd, NULL);
ata_qc_complete(qc);
- spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags);
+ spin_unlock_irqrestore(ap->lock, flags);
qc_already_gone:
list_del_init(&task->list);
@@ -320,6 +336,54 @@ static int sas_ata_soft_reset(struct ata_link *link, unsigned int *class,
return ret;
}
+/*
+ * notify the lldd to forget the sas_task for this internal ata command
+ * that bypasses scsi-eh
+ */
+static void sas_ata_internal_abort(struct sas_task *task)
+{
+ struct sas_internal *si =
+ to_sas_internal(task->dev->port->ha->core.shost->transportt);
+ unsigned long flags;
+ int res;
+
+ spin_lock_irqsave(&task->task_state_lock, flags);
+ if (task->task_state_flags & SAS_TASK_STATE_ABORTED ||
+ task->task_state_flags & SAS_TASK_STATE_DONE) {
+ spin_unlock_irqrestore(&task->task_state_lock, flags);
+ SAS_DPRINTK("%s: Task %p already finished.\n", __func__,
+ task);
+ goto out;
+ }
+ task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+ spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+ res = si->dft->lldd_abort_task(task);
+
+ spin_lock_irqsave(&task->task_state_lock, flags);
+ if (task->task_state_flags & SAS_TASK_STATE_DONE ||
+ res == TMF_RESP_FUNC_COMPLETE) {
+ spin_unlock_irqrestore(&task->task_state_lock, flags);
+ goto out;
+ }
+
+ /* XXX we are not prepared to deal with ->lldd_abort_task()
+ * failures. TODO: lldds need to unconditionally forget about
+ * aborted ata tasks, otherwise we (likely) leak the sas task
+ * here
+ */
+ SAS_DPRINTK("%s: Task %p leaked.\n", __func__, task);
+
+ if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
+ task->task_state_flags &= ~SAS_TASK_STATE_ABORTED;
+ spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+ return;
+ out:
+ list_del_init(&task->list);
+ sas_free_task(task);
+}
+
static void sas_ata_post_internal(struct ata_queued_cmd *qc)
{
if (qc->flags & ATA_QCFLAG_FAILED)
@@ -327,10 +391,12 @@ static void sas_ata_post_internal(struct ata_queued_cmd *qc)
if (qc->err_mask) {
/*
- * Find the sas_task and kill it. By this point,
- * libata has decided to kill the qc, so we needn't
- * bother with sas_ata_task_done. But we still
- * ought to abort the task.
+ * Find the sas_task and kill it. By this point, libata
+ * has decided to kill the qc and has frozen the port.
+ * In this state sas_ata_task_done() will no longer free
+ * the sas_task, so we need to notify the lldd (via
+ * ->lldd_abort_task) that the task is dead and free it
+ * ourselves.
*/
struct sas_task *task = qc->lldd_task;
unsigned long flags;
@@ -343,7 +409,7 @@ static void sas_ata_post_internal(struct ata_queued_cmd *qc)
spin_unlock_irqrestore(&task->task_state_lock, flags);
task->uldd_task = NULL;
- __sas_task_abort(task);
+ sas_ata_internal_abort(task);
}
}
}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index fd60465..5e9fa99 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -957,49 +957,6 @@ void sas_shutdown_queue(struct sas_ha_struct *sas_ha)
}
/*
- * Call the LLDD task abort routine directly. This function is intended for
- * use by upper layers that need to tell the LLDD to abort a task.
- */
-int __sas_task_abort(struct sas_task *task)
-{
- struct sas_internal *si =
- to_sas_internal(task->dev->port->ha->core.shost->transportt);
- unsigned long flags;
- int res;
-
- spin_lock_irqsave(&task->task_state_lock, flags);
- if (task->task_state_flags & SAS_TASK_STATE_ABORTED ||
- task->task_state_flags & SAS_TASK_STATE_DONE) {
- spin_unlock_irqrestore(&task->task_state_lock, flags);
- SAS_DPRINTK("%s: Task %p already finished.\n", __func__,
- task);
- return 0;
- }
- task->task_state_flags |= SAS_TASK_STATE_ABORTED;
- spin_unlock_irqrestore(&task->task_state_lock, flags);
-
- if (!si->dft->lldd_abort_task)
- return -ENODEV;
-
- res = si->dft->lldd_abort_task(task);
-
- spin_lock_irqsave(&task->task_state_lock, flags);
- if ((task->task_state_flags & SAS_TASK_STATE_DONE) ||
- (res == TMF_RESP_FUNC_COMPLETE))
- {
- spin_unlock_irqrestore(&task->task_state_lock, flags);
- task->task_done(task);
- return 0;
- }
-
- if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
- task->task_state_flags &= ~SAS_TASK_STATE_ABORTED;
- spin_unlock_irqrestore(&task->task_state_lock, flags);
-
- return -EAGAIN;
-}
-
-/*
* Tell an upper layer that it needs to initiate an abort for a given task.
* This should only ever be called by an LLDD.
*/
@@ -1097,7 +1054,6 @@ EXPORT_SYMBOL_GPL(sas_slave_configure);
EXPORT_SYMBOL_GPL(sas_change_queue_depth);
EXPORT_SYMBOL_GPL(sas_change_queue_type);
EXPORT_SYMBOL_GPL(sas_bios_param);
-EXPORT_SYMBOL_GPL(__sas_task_abort);
EXPORT_SYMBOL_GPL(sas_task_abort);
EXPORT_SYMBOL_GPL(sas_phy_reset);
EXPORT_SYMBOL_GPL(sas_phy_enable);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 94845c3..d100503 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -657,7 +657,6 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *);
void sas_init_dev(struct domain_device *);
void sas_task_abort(struct sas_task *);
-int __sas_task_abort(struct sas_task *);
int sas_eh_device_reset_handler(struct scsi_cmnd *cmd);
int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd);
next prev parent reply other threads:[~2011-12-23 2:59 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-23 2:58 [PATCH v2 00/28] libsas: eh reworks (ata-eh vs discovery, races, ...) Dan Williams
2011-12-23 2:58 ` [PATCH v2 01/28] libsas: remove unused ata_task_resp fields Dan Williams
2011-12-23 2:58 ` [PATCH v2 02/28] libsas: kill sas_slave_destroy Dan Williams
2011-12-23 2:58 ` [PATCH v2 03/28] libsas: fix domain_device leak Dan Williams
2011-12-23 2:58 ` [PATCH v2 04/28] libsas: fix leak of dev->sata_dev.identify_[packet_]device Dan Williams
2011-12-23 2:58 ` [PATCH v2 05/28] libsas: replace event locks with atomic bitops Dan Williams
2011-12-23 2:59 ` [PATCH v2 06/28] libsas: convert ha->state to flags Dan Williams
2011-12-23 2:59 ` [PATCH v2 07/28] libsas: introduce sas_drain_work() Dan Williams
2011-12-23 2:59 ` [PATCH v2 08/28] libsas: remove ata_port.lock management duties from lldds Dan Williams
2011-12-23 2:59 ` [PATCH v2 09/28] libsas: prevent domain rediscovery competing with ata error handling Dan Williams
2011-12-23 2:59 ` [PATCH v2 10/28] libsas: use ->set_dmamode to notify lldds of NCQ parameters Dan Williams
2011-12-23 2:59 ` [PATCH v2 11/28] libsas: kill invocation of scsi_eh_finish_cmd from sas_ata_task_done Dan Williams
2011-12-23 2:59 ` Dan Williams [this message]
2011-12-23 2:59 ` [PATCH v2 13/28] libsas: prevent double completion of scmds from eh Dan Williams
2011-12-23 2:59 ` [PATCH v2 14/28] libsas: fix timeout vs completion race Dan Williams
2011-12-23 2:59 ` [PATCH v2 15/28] libsas: let libata handle command timeouts Dan Williams
2011-12-23 2:59 ` [PATCH v2 16/28] libsas: defer SAS_TASK_NEED_DEV_RESET commands to libata Dan Williams
2011-12-23 2:59 ` [PATCH v2 17/28] libsas: use libata-eh-reset for sata rediscovery fis transmit failures Dan Williams
2011-12-23 3:00 ` [PATCH v2 18/28] libsas: perform sas-transport resets in shost->workq context Dan Williams
2011-12-23 3:00 ` [PATCH v2 19/28] libsas: execute transport link resets with libata-eh via host workqueue Dan Williams
2011-12-23 3:00 ` [PATCH v2 20/28] libsas: sas_phy_enable via transport_sas_phy_reset Dan Williams
2011-12-23 3:00 ` [PATCH v2 21/28] libsas: Remove redundant phy state notification calls Dan Williams
2011-12-23 3:00 ` [PATCH v2 22/28] libsas: add mutex for SMP task execution Dan Williams
2011-12-23 3:00 ` [PATCH v2 23/28] libsas: async ata-eh Dan Williams
2011-12-23 3:00 ` [PATCH v2 24/28] libsas: poll for ata device readiness after reset Dan Williams
2011-12-29 6:18 ` Jack Wang
2012-02-19 22:06 ` James Bottomley
2012-02-20 1:08 ` Jack Wang
2011-12-23 3:00 ` [PATCH v2 25/28] libsas: don't mark expanders as gone when a child device is removed Dan Williams
2011-12-23 3:00 ` [PATCH v2 26/28] libsas: check for 'gone' expanders in smp_execute_task() Dan Williams
2012-01-09 19:04 ` Dan Williams
2011-12-23 3:00 ` [PATCH v2 27/28] libsas: fix sas_find_local_phy(), take phy references Dan Williams
2011-12-27 9:21 ` Jack Wang
2011-12-28 18:45 ` Dan Williams
2011-12-29 6:18 ` Jack Wang
2011-12-23 3:00 ` [PATCH v2 28/28] libsas: don't recover 'gone' devices in sas_ata_hard_reset() Dan Williams
2011-12-27 9:23 ` Jack Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111223025931.21827.43929.stgit@localhost6.localdomain6 \
--to=dan.j.williams@intel.com \
--cc=djwong@us.ibm.com \
--cc=linux-ide@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).