From: hch@lst.de (Christoph Hellwig)
Subject: [PATCH] nvme: do not block connect call on inaccessible devices
Date: Tue, 5 Jun 2018 13:17:46 +0200 [thread overview]
Message-ID: <20180605111746.GA5802@lst.de> (raw)
In-Reply-To: <20180605082905.1740-1-hare@suse.de>
On Tue, Jun 05, 2018@10:29:05AM +0200, Hannes Reinecke wrote:
> When a device is in ANA inaccessible state the call to 'nvme connect'
> will hang as a parition scan is triggered and the I/O will be requeued
> indefinitely.
> Or rather, requeued until the next path is connected, but we cannot issue
> any nvme ioctls anymore as the nvmf_dev_mutex is blocked by the initial
> 'connect' call.
>
> This patch inhibits partition scan if no working paths are found, and
> retriggers the partition scan once the device becomes active.
And what updates the state once all your inaccessible or change
state groups become optimized?
Also I see no point registering the disk if we can't do I/O to it.
I'd rather prefer something along the lines of the completely untested
patch below if we really want to go down this route:
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3b55580bb9be..5d90560775e1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3080,7 +3080,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name);
- nvme_mpath_add_disk(ns->head);
+ nvme_mpath_add_disk(ns);
nvme_fault_inject_init(ns);
return;
out_unlink_ns:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 6e4fdc1a0581..8058dc385df5 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -264,20 +264,31 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
return -ENOMEM;
}
-void nvme_mpath_add_disk(struct nvme_ns_head *head)
+static void __nvme_mpath_add_disk(struct nvme_ns *ns)
{
- if (!head->disk)
+
+ struct nvme_ns_head *head = ns->head;
+ enum nvme_ana_state state = nvme_ns_ana_state(ns);
+
+ lockdep_assert_held(&head->subsys->lock);
+
+ if (!head->disk ||
+ (head->disk->flags & GENHD_FL_UP) ||
+ (state != NVME_ANA_OPTIMIZED && state != NVME_ANA_NONOPTIMIZED))
return;
- mutex_lock(&head->subsys->lock);
- if (!(head->disk->flags & GENHD_FL_UP)) {
- device_add_disk(&head->subsys->dev, head->disk);
- if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
- &nvme_ns_id_attr_group))
- pr_warn("%s: failed to create sysfs group for identification\n",
- head->disk->disk_name);
- }
- mutex_unlock(&head->subsys->lock);
+ device_add_disk(&head->subsys->dev, head->disk);
+ if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
+ &nvme_ns_id_attr_group))
+ pr_warn("%s: failed to create sysfs group for identification\n",
+ head->disk->disk_name);
+}
+
+void nvme_mpath_add_disk(struct nvme_ns *ns)
+{
+ mutex_lock(&ns->head->subsys->lock);
+ __nvme_mpath_add_disk(ns);
+ mutex_unlock(&ns->head->subsys->lock);
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -299,6 +310,7 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
{
void *base = ctrl->ana_log_buf;
size_t offset = sizeof(struct nvme_ana_rsp_hdr);
+ struct nvme_ns *ns;
int error, i;
/*
@@ -316,21 +328,22 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
return error;
}
+ error = -EINVAL;
+ mutex_lock(&ctrl->subsys->lock);
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
struct nvme_ana_group_desc *desc = base + offset;
u32 grpid = le32_to_cpu(desc->grpid);
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
size_t nsid_buf_size = nr_nsids * sizeof(__le32);
- struct nvme_ns *ns;
if (WARN_ON_ONCE(grpid == 0))
- return -EINVAL;
+ goto out_error;
if (WARN_ON_ONCE(grpid > ctrl->anagrpmax))
- return -EINVAL;
+ goto out_error;
if (WARN_ON_ONCE(desc->state == 0))
- return -EINVAL;
+ goto out_error;
if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
- return -EINVAL;
+ goto out_error;
dev_info(ctrl->device, "ANA group %d: %s.\n",
grpid, nvme_ana_state_names[desc->state]);
@@ -340,9 +353,9 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
continue;
if (WARN_ON_ONCE(groups_only))
- return -EINVAL;
+ goto out_error;
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
- return -EINVAL;
+ goto out_error;
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
@@ -359,10 +372,19 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
offset += nsid_buf_size;
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
- return -EINVAL;
+ goto out_error;
}
- return 0;
+ // XXX: optimize when to actually calls this..
+ down_write(&ctrl->namespaces_rwsem);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ __nvme_mpath_add_disk(ns);
+ up_write(&ctrl->namespaces_rwsem);
+
+ error = 0;
+out_error:
+ mutex_unlock(&ctrl->subsys->lock);
+ return error;
}
static void nvme_ana_work(struct work_struct *work)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a6541b9f1548..ccbeae09599d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -460,7 +460,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
-void nvme_mpath_add_disk(struct nvme_ns_head *head);
+void nvme_mpath_add_disk(struct nvme_ns *ns);
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_configure_ana(struct nvme_ctrl *ctrl);
void nvme_deconfigure_ana(struct nvme_ctrl *ctrl);
@@ -507,7 +507,7 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
{
return 0;
}
-static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
+static inline void nvme_mpath_add_disk(struct nvme_ns *ns)
{
}
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
next prev parent reply other threads:[~2018-06-05 11:17 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-05 8:29 [PATCH] nvme: do not block connect call on inaccessible devices Hannes Reinecke
2018-06-05 11:17 ` Christoph Hellwig [this message]
2018-06-05 12:23 ` Hannes Reinecke
2018-06-05 12:35 ` Bart Van Assche
2018-06-05 12:50 ` hch
2018-06-05 12:51 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180605111746.GA5802@lst.de \
--to=hch@lst.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox