All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Cc: kbusch@kernel.org, Matthew Wilcox <willy@infradead.org>,
	hch@lst.de, linux-nvme@lists.infradead.org, sagi@grimberg.me
Subject: Re: [PATCH V2 1/2] nvme-core: use xarray for ctrl ns tracking
Date: Wed, 1 Jul 2020 15:12:35 +0200	[thread overview]
Message-ID: <20200701131235.GA17919@lst.de> (raw)
In-Reply-To: <20200701022517.6738-2-chaitanya.kulkarni@wdc.com>

[willy: a comment/request on the xa_load API below]

On Tue, Jun 30, 2020 at 07:25:16PM -0700, Chaitanya Kulkarni wrote:
> This patch replaces the ctrl->namespaces tracking from linked list to
> xarray and improves the performance.

The performance improvement needs to be clearly stated here.

>  static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
>  {
> +	struct nvme_id_ctrl *id;
>  	struct nvme_ns *ns;
> +	int ret = 0;
>  
> +	if (xa_empty(&ctrl->namespaces)) {
>  		ret = -ENOTTY;
> +		goto out;
>  	}
>  
> -	ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
> -	if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
> -		dev_warn(ctrl->device,
> -			"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
> +	/* Let the scan work finish updating ctrl->namespaces */
> +	flush_work(&ctrl->scan_work);
> +	if (nvme_identify_ctrl(ctrl, &id)) {
> +		dev_err(ctrl->device, "nvme_identify_ctrl() failed\n");
>  		ret = -EINVAL;
> -		goto out_unlock;
> +		goto out;
> +	}
> +	if (le32_to_cpu(id->nn) > 1) {
> +		dev_warn(ctrl->device,
> +		"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
> +		goto out;
>  	}

This code doesn't make any sense at all.  Why does a patch changing
data structures add new calls that go out on the wire?

> +	struct nvme_ns *ns;
> +	XA_STATE(xas, &ctrl->namespaces, nsid);
>  
> +	rcu_read_lock();
> +	do {
> +		ns = xas_load(&xas);
> +		if (xa_is_zero(ns))
> +			ns = NULL;
> +	} while (xas_retry(&xas, ns));
> +	ns = ns && kref_get_unless_zero(&ns->kref) ? ns : NULL;
> +	rcu_read_unlock();

This looks pretty weird, but I think the problem is one in the xarray
API, as for the typical lookup pattern we'd want an xa_load with
external RCU locking:

	rcu_read_lock();
	ns = xa_load_rcu(&ctrl->namespaces, nsid);
	if (ns && !kref_get_unless_zero(&ns->kref))
		ns = NULL;
	rcu_read_unlock();

instead of duplicating this fairly arcane loop in all kinds of callers.

> -	down_write(&ctrl->namespaces_rwsem);
> -	list_add_tail(&ns->list, &ctrl->namespaces);
> -	up_write(&ctrl->namespaces_rwsem);
> +	ret = xa_insert(&ctrl->namespaces, nsid, ns, GFP_KERNEL);
> +	if (ret) {
> +		switch (ret) {
> +		case -ENOMEM:
> +			dev_err(ctrl->device,
> +				"xa insert memory allocation\n");
> +			break;
> +		case -EBUSY:
> +			dev_err(ctrl->device,
> +				"xa insert entry already present\n");
> +			break;
> +		}
> +	}

No need for the switch and the detailed printks here, but we do need
actual error handling.

>  static void nvme_ns_remove(struct nvme_ns *ns)
>  {
> +	struct xarray *xa = &ns->ctrl->namespaces;
> +	bool free;
> +
>  	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
>  		return;
>  
> @@ -3740,12 +3749,14 @@ static void nvme_ns_remove(struct nvme_ns *ns)
>  			blk_integrity_unregister(ns->disk);
>  	}
>  
> -	down_write(&ns->ctrl->namespaces_rwsem);
> -	list_del_init(&ns->list);
> -	up_write(&ns->ctrl->namespaces_rwsem);
> +	xa_lock(xa);
> +	__xa_erase(xa, ns->head->ns_id);
> +	free = refcount_dec_and_test(&ns->kref.refcount) ? true : false;
> +	xa_unlock(xa);
>  
>  	nvme_mpath_check_last_path(ns);
> -	nvme_put_ns(ns);
> +	if (free)
> +		__nvme_free_ns(ns);

This looks very strange to me.  Shoudn't this be a normal xa_erase
followed by a normal nvme_put_ns?  For certain the driver code has
no business poking into the kref internals.

>  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
>  					unsigned nsid)
>  {
> +	struct xarray *namespaces = &ctrl->namespaces;
> +	struct xarray rm_array;
> +	unsigned long tnsid;
> +	struct nvme_ns *ns;
> +	unsigned long idx;
> +	int ret;
>  
> +	xa_init(&rm_array);
> +
> +	xa_lock(namespaces);
> +	xa_for_each(namespaces, idx, ns) {
> +		tnsid = ns->head->ns_id;
> +		if (tnsid > nsid || test_bit(NVME_NS_DEAD, &ns->flags)) {
> +			xa_unlock(namespaces);
> +			xa_erase(namespaces, tnsid);
> +			/* Even if insert fails keep going */
> +			ret = xa_insert(&rm_array, nsid, ns, GFP_KERNEL);
> +			switch (ret) {
> +			case -ENOMEM:
> +				pr_err("xa insert memory allocation failed\n");
> +				break;
> +			case -EBUSY:
> +				pr_err("xa insert entry already present\n");
> +				break;
> +			}
> +			xa_lock(namespaces);
> +		}
>  	}
> +	xa_unlock(namespaces);

I don't think you want an xarray for the delete list.  Just keep the
list head for that now - once we moved to RCU read side locking some
of this could potentially be simplified later.

>   */
>  void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
>  {
> -	struct nvme_ns *ns, *next;
> -	LIST_HEAD(ns_list);
> +	struct xarray rm_array;
> +	unsigned long tnsid;
> +	struct nvme_ns *ns;
> +	unsigned long idx;
> +	int ret;
> +
> +	xa_init(&rm_array);
>  
>  	/*
>  	 * make sure to requeue I/O to all namespaces as these
> @@ -3919,11 +3950,30 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
>  	if (ctrl->state == NVME_CTRL_DEAD)
>  		nvme_kill_queues(ctrl);
>  
> -	down_write(&ctrl->namespaces_rwsem);
> -	list_splice_init(&ctrl->namespaces, &ns_list);
> -	up_write(&ctrl->namespaces_rwsem);
> +	xa_lock(&ctrl->namespaces);
> +	xa_for_each(&ctrl->namespaces, idx, ns) {
> +		tnsid = ns->head->ns_id;
> +		xa_unlock(&ctrl->namespaces);
> +		xa_erase(&ctrl->namespaces, tnsid);
> +		/* Even if insert fails keep going */
> +		ret = xa_insert(&rm_array, tnsid, ns, GFP_KERNEL);
> +		if (ret) {
> +			switch (ret) {
> +			case -ENOMEM:
> +				dev_err(ctrl->device,
> +					"xa insert memory allocation\n");
> +				break;
> +			case -EBUSY:
> +				dev_err(ctrl->device,
> +					"xa insert entry already present\n");
> +				break;
> +			}
> +		}
> +		xa_lock(&ctrl->namespaces);
> +	}
> +	xa_unlock(&ctrl->namespaces);

Same here.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

  reply	other threads:[~2020-07-01 13:12 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-01  2:25 [PATCH V2 0/2] nvme: replace linked list with xarray for ns tracking Chaitanya Kulkarni
2020-07-01  2:25 ` [PATCH V2 1/2] nvme-core: use xarray for ctrl " Chaitanya Kulkarni
2020-07-01 13:12   ` Christoph Hellwig [this message]
2020-07-01 16:44     ` Keith Busch
2020-07-01 18:19     ` Chaitanya Kulkarni
2020-07-01 18:29       ` Keith Busch
2020-07-01 18:40         ` Chaitanya Kulkarni
2020-07-01  2:25 ` [PATCH V2 2/2] nvmet: use xarray for ctrl ns storing Chaitanya Kulkarni
2020-07-01  6:08   ` Christoph Hellwig
2020-07-01 18:36     ` Chaitanya Kulkarni
2020-07-01  5:36 ` [PATCH V2 0/2] nvme: replace linked list with xarray for ns tracking Christoph Hellwig
2020-07-01 13:21   ` Keith Busch
2020-07-01 16:49     ` Sagi Grimberg
2020-07-01 18:43       ` Chaitanya Kulkarni
2020-07-01 18:41     ` Chaitanya Kulkarni
2020-07-01 18:41   ` Chaitanya Kulkarni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200701131235.GA17919@lst.de \
    --to=hch@lst.de \
    --cc=chaitanya.kulkarni@wdc.com \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.