All of lore.kernel.org
 help / color / mirror / Atom feed
From: hch@lst.de (Christoph Hellwig)
Subject: [PATCH] nvme: do not block connect call on inaccessible devices
Date: Tue, 5 Jun 2018 13:17:46 +0200	[thread overview]
Message-ID: <20180605111746.GA5802@lst.de> (raw)
In-Reply-To: <20180605082905.1740-1-hare@suse.de>

On Tue, Jun 05, 2018@10:29:05AM +0200, Hannes Reinecke wrote:
> When a device is in ANA inaccessible state the call to 'nvme connect'
> will hang as a parition scan is triggered and the I/O will be requeued
> indefinitely.
> Or rather, requeued until the next path is connected, but we cannot issue
> any nvme ioctls anymore as the nvmf_dev_mutex is blocked by the initial
> 'connect' call.
> 
> This patch inhibits partition scan if no working paths are found, and
> retriggers the partition scan once the device becomes active.

And what updates the state once all your inaccessible or change
state groups become optimized?

Also I see no point registering the disk if we can't do I/O to it.

I'd rather prefer something along the lines of the completely untested
patch below if we really want to go down this route:

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3b55580bb9be..5d90560775e1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3080,7 +3080,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 		pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
 			ns->disk->disk_name);
 
-	nvme_mpath_add_disk(ns->head);
+	nvme_mpath_add_disk(ns);
 	nvme_fault_inject_init(ns);
 	return;
  out_unlink_ns:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 6e4fdc1a0581..8058dc385df5 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -264,20 +264,31 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	return -ENOMEM;
 }
 
-void nvme_mpath_add_disk(struct nvme_ns_head *head)
+static void __nvme_mpath_add_disk(struct nvme_ns *ns)
 {
-	if (!head->disk)
+
+	struct nvme_ns_head *head = ns->head;
+	enum nvme_ana_state state = nvme_ns_ana_state(ns);
+
+	lockdep_assert_held(&head->subsys->lock);
+
+	if (!head->disk ||
+	    (head->disk->flags & GENHD_FL_UP) ||
+	    (state != NVME_ANA_OPTIMIZED && state != NVME_ANA_NONOPTIMIZED))
 		return;
 
-	mutex_lock(&head->subsys->lock);
-	if (!(head->disk->flags & GENHD_FL_UP)) {
-		device_add_disk(&head->subsys->dev, head->disk);
-		if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
-				&nvme_ns_id_attr_group))
-			pr_warn("%s: failed to create sysfs group for identification\n",
-				head->disk->disk_name);
-	}
-	mutex_unlock(&head->subsys->lock);
+	device_add_disk(&head->subsys->dev, head->disk);
+	if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
+			&nvme_ns_id_attr_group))
+		pr_warn("%s: failed to create sysfs group for identification\n",
+			head->disk->disk_name);
+}
+
+void nvme_mpath_add_disk(struct nvme_ns *ns)
+{
+	mutex_lock(&ns->head->subsys->lock);
+	__nvme_mpath_add_disk(ns);
+	mutex_unlock(&ns->head->subsys->lock);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -299,6 +310,7 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
 {
 	void *base = ctrl->ana_log_buf;
 	size_t offset = sizeof(struct nvme_ana_rsp_hdr);
+	struct nvme_ns *ns;
 	int error, i;
 
 	/*
@@ -316,21 +328,22 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
 		return error;
 	}
 
+	error = -EINVAL;
+	mutex_lock(&ctrl->subsys->lock);
 	for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
 		struct nvme_ana_group_desc *desc = base + offset;
 		u32 grpid = le32_to_cpu(desc->grpid);
 		u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
 		size_t nsid_buf_size = nr_nsids * sizeof(__le32);
-		struct nvme_ns *ns;
 
 		if (WARN_ON_ONCE(grpid == 0))
-			return -EINVAL;
+			goto out_error;
 		if (WARN_ON_ONCE(grpid > ctrl->anagrpmax))
-			return -EINVAL;
+			goto out_error;
 		if (WARN_ON_ONCE(desc->state == 0))
-			return -EINVAL;
+			goto out_error;
 		if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
-			return -EINVAL;
+			goto out_error;
 
 		dev_info(ctrl->device, "ANA group %d: %s.\n",
 				grpid, nvme_ana_state_names[desc->state]);
@@ -340,9 +353,9 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
 			continue;
 
 		if (WARN_ON_ONCE(groups_only))
-			return -EINVAL;
+			goto out_error;
 		if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
-			return -EINVAL;
+			goto out_error;
 
 		down_write(&ctrl->namespaces_rwsem);
 		list_for_each_entry(ns, &ctrl->namespaces, list) {
@@ -359,10 +372,19 @@ static int nvme_process_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
 
 		offset += nsid_buf_size;
 		if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
-			return -EINVAL;
+			goto out_error;
 	}
 
-	return 0;
+	// XXX: optimize when to actually calls this..
+	down_write(&ctrl->namespaces_rwsem);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		__nvme_mpath_add_disk(ns);
+	up_write(&ctrl->namespaces_rwsem);
+
+	error = 0;
+out_error:
+	mutex_unlock(&ctrl->subsys->lock);
+	return error;
 }
 
 static void nvme_ana_work(struct work_struct *work)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a6541b9f1548..ccbeae09599d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -460,7 +460,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 void nvme_failover_req(struct request *req);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
-void nvme_mpath_add_disk(struct nvme_ns_head *head);
+void nvme_mpath_add_disk(struct nvme_ns *ns);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
 int nvme_configure_ana(struct nvme_ctrl *ctrl);
 void nvme_deconfigure_ana(struct nvme_ctrl *ctrl);
@@ -507,7 +507,7 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
 {
 	return 0;
 }
-static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
+static inline void nvme_mpath_add_disk(struct nvme_ns *ns)
 {
 }
 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)

  reply	other threads:[~2018-06-05 11:17 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-05  8:29 [PATCH] nvme: do not block connect call on inaccessible devices Hannes Reinecke
2018-06-05 11:17 ` Christoph Hellwig [this message]
2018-06-05 12:23   ` Hannes Reinecke
2018-06-05 12:35     ` Bart Van Assche
2018-06-05 12:50       ` hch
2018-06-05 12:51     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180605111746.GA5802@lst.de \
    --to=hch@lst.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.