From: Dan Williams <dan.j.williams@intel.com>
To: linux-scsi@vger.kernel.org
Cc: linux-ide@vger.kernel.org, jack_wang@usish.com
Subject: [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...)
Date: Mon, 09 Jan 2012 23:38:26 -0800 [thread overview]
Message-ID: <20120110073647.4563.7504.stgit@localhost6.localdomain6> (raw)
The latest version of the patch kit is available here:
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4
Changes since v3: http://marc.info/?l=linux-scsi&m=132581455214789&w=2
1/ null pointer regression smp_execute_task: make sure device is not
removed from the domain while eh is in flight. Fix up the logic that
defers domain_device destruction to its own context. Folded into:
"libsas: prevent domain rediscovery competing with ata error handling"
plus a small prep patch: "libsas: convert dev->gone to flags"
2/ lockdep regression in smp_execute_task: failed to release the lock
before returning. Folded into: "libsas: check for 'gone' expanders in
smp_execute_task()"
3/ unnecessary locking in isci_ata_check_ready(). Folded into: "isci:
->lldd_ata_check_ready handler"
4/ two new fixes follow in separate mails
[PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race
[PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter
Incremental diff: libsas-eh-reworks-v3..libsas-eh-reworks-v4
diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index e795645..eca9ba0 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -1681,10 +1681,7 @@ int isci_ata_check_ready(struct domain_device *dev)
if (test_bit(IPORT_RESET_PENDING, &iport->state))
goto out;
- /* snapshot active phy mask */
- spin_lock_irqsave(&ihost->scic_lock, flags);
rc = !!iport->active_phy_mask;
- spin_unlock_irqrestore(&ihost->scic_lock, flags);
out:
isci_put_device(idev);
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index f90fdcf..a062adc 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -195,7 +195,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
spin_unlock(ap->lock);
/* If the device fell off, no sense in issuing commands */
- if (dev->gone)
+ if (test_bit(SAS_DEV_GONE, &dev->state))
goto out;
task = sas_alloc_task(GFP_ATOMIC);
@@ -327,7 +327,7 @@ static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
struct domain_device *dev = ap->private_data;
struct sas_internal *i = dev_to_sas_internal(dev);
- if (dev->gone)
+ if (test_bit(SAS_DEV_GONE, &dev->state))
return -ENODEV;
res = i->dft->lldd_I_T_nexus_reset(dev);
@@ -663,6 +663,11 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie)
ata_port_printk(ap, KERN_DEBUG, "sas eh calling libata port error handler");
ata_scsi_port_error_handler(ha->core.shost, ap);
+
+ /* tell scsi_block_when_processing_errors() waiters that we are
+ * still making forward progress
+ */
+ wake_up(&ha->core.shost->host_wait);
}
void sas_ata_strategy_handler(struct Scsi_Host *shost)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 0b7555d..dfb0250 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -269,17 +269,22 @@ static void sas_destruct_devices(struct work_struct *work)
clear_bit(DISCE_DESTRUCT, &port->disc.pending);
- list_for_each_entry_safe(dev, n, &port->destroy_list, dev_list_node) {
+ list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) {
+ list_del_init(&dev->disco_list_node);
+
sas_remove_children(&dev->rphy->dev);
sas_rphy_delete(dev->rphy);
dev->rphy = NULL;
sas_unregister_common_dev(port, dev);
+
+ sas_put_device(dev);
}
}
void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
{
- if (!list_empty(&dev->disco_list_node)) {
+ if (!test_bit(SAS_DEV_DESTROY, &dev->state) &&
+ !list_empty(&dev->disco_list_node)) {
/* this rphy never saw sas_rphy_add */
list_del_init(&dev->disco_list_node);
sas_rphy_free(dev->rphy);
@@ -287,13 +292,9 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
sas_unregister_common_dev(port, dev);
}
- if (dev->rphy) {
+ if (dev->rphy && !test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) {
sas_rphy_unlink(dev->rphy);
-
- spin_lock_irq(&port->dev_list_lock);
- list_move_tail(&dev->dev_list_node, &port->destroy_list);
- spin_unlock_irq(&port->dev_list_lock);
-
+ list_move_tail(&dev->disco_list_node, &port->destroy_list);
sas_discover_event(dev->port, DISCE_DESTRUCT);
}
}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index e47599b..68a80a0 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -74,8 +74,10 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
mutex_lock(&dev->ex_dev.cmd_mutex);
for (retry = 0; retry < 3; retry++) {
- if (dev->gone)
- return -ECOMM;
+ if (test_bit(SAS_DEV_GONE, &dev->state)) {
+ res = -ECOMM;
+ break;
+ }
task = sas_alloc_task(GFP_KERNEL);
if (!task) {
@@ -1794,7 +1796,7 @@ static void sas_unregister_ex_tree(struct asd_sas_port *port, struct domain_devi
struct domain_device *child, *n;
list_for_each_entry_safe(child, n, &ex->children, siblings) {
- child->gone = 1;
+ set_bit(SAS_DEV_GONE, &child->state);
if (child->dev_type == EDGE_DEV ||
child->dev_type == FANOUT_DEV)
sas_unregister_ex_tree(port, child);
@@ -1815,7 +1817,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
&ex_dev->children, siblings) {
if (SAS_ADDR(child->sas_addr) ==
SAS_ADDR(phy->attached_sas_addr)) {
- child->gone = 1;
+ set_bit(SAS_DEV_GONE, &child->state);
if (child->dev_type == EDGE_DEV ||
child->dev_type == FANOUT_DEV)
sas_unregister_ex_tree(parent->port, child);
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 36e2905..31adcd1 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -168,7 +168,7 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
if (port->num_phys == 1) {
if (dev && gone)
- dev->gone = 1;
+ set_bit(SAS_DEV_GONE, &dev->state);
sas_unregister_domain_devices(port);
sas_port_delete(port->port);
port->port = NULL;
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 59a227d..731c892 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -208,7 +208,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
int res = 0;
/* If the device fell off, no sense in issuing commands */
- if (dev->gone) {
+ if (test_bit(SAS_DEV_GONE, &dev->state)) {
cmd->result = DID_BAD_TARGET << 16;
goto out_done;
}
@@ -249,8 +249,8 @@ out_done:
static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
{
- struct sas_task *task = TO_SAS_TASK(cmd);
struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
+ struct sas_task *task = TO_SAS_TASK(cmd);
/* At this point, we only get called following an actual abort
* of the task, so we should be guaranteed not to be racing with
@@ -267,9 +267,9 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
{
- struct sas_task *task = TO_SAS_TASK(cmd);
- struct domain_device *dev = task->dev;
+ struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_ha_struct *ha = dev->port->ha;
+ struct sas_task *task = TO_SAS_TASK(cmd);
if (!dev_is_sata(dev)) {
sas_eh_finish_cmd(cmd);
@@ -530,8 +530,9 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
struct sas_internal *i = to_sas_internal(shost->transportt);
unsigned long flags;
struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+ LIST_HEAD(done);
-Again:
+ /* clean out any commands that won the completion vs eh race */
list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_task *task;
@@ -545,7 +546,12 @@ Again:
spin_unlock_irqrestore(&dev->done_lock, flags);
if (!task)
- continue;
+ list_move_tail(&cmd->eh_entry, &done);
+ }
+
+ Again:
+ list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
+ struct sas_task *task = TO_SAS_TASK(cmd);
list_del_init(&cmd->eh_entry);
@@ -649,15 +655,16 @@ Again:
goto clear_q;
}
}
+ out:
+ list_splice_tail(&done, work_q);
list_splice_tail_init(&ha->eh_ata_q, work_q);
return list_empty(work_q);
-clear_q:
+
+ clear_q:
SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__);
list_for_each_entry_safe(cmd, n, work_q, eh_entry)
sas_eh_finish_cmd(cmd);
-
- list_splice_tail_init(&ha->eh_ata_q, work_q);
- return list_empty(work_q);
+ goto out;
}
void sas_scsi_recover_host(struct Scsi_Host *shost)
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index feb61a8..55bab86 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -174,7 +174,11 @@ struct sata_device {
struct ata_taskfile tf;
};
-/* ---------- Domain device ---------- */
+enum {
+ SAS_DEV_GONE,
+ SAS_DEV_DESTROY,
+};
+
struct domain_device {
spinlock_t done_lock;
enum sas_dev_type dev_type;
@@ -191,7 +195,7 @@ struct domain_device {
struct sas_phy *phy;
struct list_head dev_list_node;
- struct list_head disco_list_node;
+ struct list_head disco_list_node; /* awaiting probe or destruct */
enum sas_protocol iproto;
enum sas_protocol tproto;
@@ -209,7 +213,7 @@ struct domain_device {
};
void *lldd_dev;
- int gone;
+ unsigned long state;
struct kref kref;
};
The following changes since commit 5c41dc3a79150e93e5d050871a10b761be8281a1:
[SCSI] lpfc 8.3.28: Update driver version to 8.3.28 (2011-12-15 10:57:45 +0400)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4
Dan Williams (36):
libsas: remove unused ata_task_resp fields
libsas: kill sas_slave_destroy
libsas: fix domain_device leak
libsas: fix leak of dev->sata_dev.identify_[packet_]device
libsas: replace event locks with atomic bitops
libsas: convert ha->state to flags
libsas: introduce sas_drain_work()
libsas: remove ata_port.lock management duties from lldds
libsas: convert dev->gone to flags
libsas: prevent domain rediscovery competing with ata error handling
libsas: use ->set_dmamode to notify lldds of NCQ parameters
libsas: kill invocation of scsi_eh_finish_cmd from sas_ata_task_done
libsas: close error handling vs sas_ata_task_done() race
libsas: prevent double completion of scmds from eh
libsas: fix timeout vs completion race
libsas: let libata handle command timeouts
libsas: defer SAS_TASK_NEED_DEV_RESET commands to libata
libsas: use libata-eh-reset for sata rediscovery fis transmit failures
libsas: perform sas-transport resets in shost->workq context
libsas: execute transport link resets with libata-eh via host workqueue
libsas: sas_phy_enable via transport_sas_phy_reset
libsas: async ata-eh
libsas: poll for ata device readiness after reset
libsas: don't mark expanders as gone when a child device is removed
libsas: check for 'gone' expanders in smp_execute_task()
libsas: fix sas_find_local_phy(), take phy references
libsas: don't recover 'gone' devices in sas_ata_hard_reset()
isci: kill iphy->isci_port lookups
isci: kill isci_port->status
isci: fix interpretation of "hard" reset
isci: stop interpreting ->lldd_lu_reset() as an ata soft-reset
isci: ->lldd_ata_check_ready handler
isci: remove bus and reset handlers
isci: remove IDEV_EH hack to disable "discovery-time" ata resets
libsas: pre-clean commands that won the eh vs completion race
libsas: feed the scsi_block_when_processing_errors() meter
Jeff Skirvin (2):
libsas: Remove redundant phy state notification calls.
libsas: add mutex for SMP task execution
Documentation/scsi/libsas.txt | 15 -
drivers/ata/libata-eh.c | 1 +
drivers/ata/libata.h | 1 -
drivers/scsi/aic94xx/aic94xx.h | 2 +
drivers/scsi/aic94xx/aic94xx_dev.c | 38 ++-
drivers/scsi/aic94xx/aic94xx_init.c | 5 +-
drivers/scsi/aic94xx/aic94xx_tmf.c | 9 +-
drivers/scsi/isci/host.c | 8 +-
drivers/scsi/isci/host.h | 19 +-
drivers/scsi/isci/init.c | 13 +-
drivers/scsi/isci/phy.c | 18 +-
drivers/scsi/isci/phy.h | 1 -
drivers/scsi/isci/port.c | 217 ++++++------
drivers/scsi/isci/port.h | 11 +-
drivers/scsi/isci/remote_device.c | 32 +--
drivers/scsi/isci/remote_device.h | 7 +-
drivers/scsi/isci/request.c | 198 +----------
drivers/scsi/isci/request.h | 9 +-
drivers/scsi/isci/task.c | 158 ++-------
drivers/scsi/isci/task.h | 40 --
drivers/scsi/libsas/sas_ata.c | 692 +++++++++++++++--------------------
drivers/scsi/libsas/sas_discover.c | 152 +++++++--
drivers/scsi/libsas/sas_event.c | 89 +++++-
drivers/scsi/libsas/sas_expander.c | 113 ++++--
drivers/scsi/libsas/sas_init.c | 192 +++++++++-
drivers/scsi/libsas/sas_internal.h | 73 ++--
drivers/scsi/libsas/sas_phy.c | 12 +-
drivers/scsi/libsas/sas_port.c | 26 +-
drivers/scsi/libsas/sas_scsi_host.c | 320 +++++++---------
drivers/scsi/mvsas/mv_init.c | 1 -
drivers/scsi/mvsas/mv_sas.c | 11 +-
drivers/scsi/pm8001/pm8001_init.c | 1 -
drivers/scsi/pm8001/pm8001_sas.c | 29 +-
drivers/scsi/scsi_transport_sas.c | 59 +++-
include/linux/libata.h | 1 +
include/scsi/libsas.h | 67 ++--
include/scsi/sas_ata.h | 26 +-
include/scsi/scsi_transport_sas.h | 12 +-
38 files changed, 1321 insertions(+), 1357 deletions(-)
next reply other threads:[~2012-01-10 7:38 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-01-10 7:38 Dan Williams [this message]
2012-01-10 7:38 ` [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race Dan Williams
2012-01-10 7:38 ` [PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter Dan Williams
2012-01-10 19:29 ` [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...) James Bottomley
2012-01-10 20:18 ` Dan Williams
2012-01-16 8:40 ` James Bottomley
2012-01-16 17:00 ` Dan Williams
2012-01-16 20:24 ` James Bottomley
2012-01-16 20:44 ` Dan Williams
2012-01-13 0:57 ` Jack Wang
2012-01-13 1:21 ` Dan Williams
2012-01-13 1:37 ` Jack Wang
2012-01-13 1:51 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120110073647.4563.7504.stgit@localhost6.localdomain6 \
--to=dan.j.williams@intel.com \
--cc=jack_wang@usish.com \
--cc=linux-ide@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).