All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	Hillf Danton <hdanton@sina.com>,
	Pavel Tatashin <pasha.tatashin@soleen.com>,
	Tyler Hicks <tyhicks@linux.microsoft.com>,
	linux-block <linux-block@vger.kernel.org>
Subject: Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
Date: Fri, 27 Aug 2021 20:43:02 +0200	[thread overview]
Message-ID: <20210827184302.GA29967@lst.de> (raw)
In-Reply-To: <2642808b-a7d0-28ff-f288-0f4eabc562f7@i-love.sakura.ne.jp>

On Fri, Aug 27, 2021 at 01:03:45AM +0900, Tetsuo Handa wrote:
> 
> loop_unregister_transfer() which is called from cleanup_cryptoloop()
> currently lacks serialization between kfree() from loop_remove() from
> loop_control_remove() and mutex_lock() from unregister_transfer_cb().
> We can use refcount and loop_idr_spinlock for serialization between
> these functions.


So before we start complicating things for loop_release_xfer - how
do you actually reproduce loop_unregister_transfer finding a loop
device with a transfer set?  AFAICS loop_unregister_transfer is only
called form exit_cryptoloop, which can only be called when
cryptoloop has a zero reference count.  But as long as a transfer
is registered an extra refcount is held on its owner.

> @@ -2313,20 +2320,20 @@ static int loop_add(int i)
>  		goto out;
>  	lo->lo_state = Lo_unbound;
>  
> -	err = mutex_lock_killable(&loop_ctl_mutex);
> -	if (err)
> -		goto out_free_dev;
> -
>  	/* allocate id, if @id >= 0, we're requesting that specific id */
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&loop_idr_spinlock);
>  	if (i >= 0) {
> -		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
> +		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC);
>  		if (err == -ENOSPC)
>  			err = -EEXIST;
>  	} else {
> -		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
> +		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC);
>  	}
> +	spin_unlock(&loop_idr_spinlock);
> +	idr_preload_end();

Can you explain why the mutex is switched to a spinlock?  I could not
find any caller that can't block, so there doesn't seem to be a real
need for a spinlock, while a spinlock requires extra work and GFP_ATOMIC
allocations here.  Dropping the _killable probably makes some sense,
but seems like a separate cleanup.

> +	if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) {
> +		spin_unlock(&loop_idr_spinlock);
> +		return -ENODEV;
>  	}
> +	spin_unlock(&loop_idr_spinlock);

> +	refcount_dec(&lo->idr_visible);
> +	/*
> +	 * Try to wait for concurrent callers (they should complete shortly due to
> +	 * lo->lo_state == Lo_deleting) operating on this loop device, in order to
> +	 * help that subsequent loop_add() will not to fail with -EEXIST.
> +	 * Note that this is best effort.
> +	 */
> +	for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++)
> +		schedule_timeout_killable(1);
> +	ret = 0;

This dance looks pretty strange to me.  I think just making idr_visible
an atomic_t and using atomic_cmpxchg with just 0 and 1 as valid versions
will make this much simpler, as it avoids the need to deal with a > 1
count, and it also serializes multiple removal calls.

I quickly hacked this up as a slight variant of your patch, and it's been
running the syzbot reproducer you pointed me to for quite while now:

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf4..69ced1feb18d5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2113,28 +2113,29 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
-
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
 	struct loop_func_table *xfer;
+	struct loop_device *lo;
+	int id;
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
+
+	/*
+	 * loop_unregister_transfer is only called from cryptoloop module
+	 * unload.  Given that each loop device that has a transfer enabled
+	 * hold a reference to the module implementing it we should never
+	 * get here with a transfer that is set.
+	 */
+	mutex_lock(&loop_ctl_mutex);
+	idr_for_each_entry(&loop_index_idr, lo, id)
+		WARN_ON_ONCE(lo->lo_encryption == xfer);
+	mutex_unlock(&loop_ctl_mutex);
+
 	return 0;
 }
 
@@ -2325,8 +2326,9 @@ static int loop_add(int i)
 	} else {
 		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
+	mutex_unlock(&loop_ctl_mutex);
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2392,15 +2394,17 @@ static int loop_add(int i)
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
-	mutex_unlock(&loop_ctl_mutex);
+	/* Show this loop device. */
+	atomic_set(&lo->idr_visible, 1);
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_free_dev:
 	kfree(lo);
@@ -2410,9 +2414,14 @@ static int loop_add(int i)
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	mutex_lock(&loop_ctl_mutex);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	mutex_unlock(&loop_ctl_mutex);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2440,29 +2449,40 @@ static int loop_control_remove(int idx)
 	if (ret)
 		return ret;
 
+	/*
+	 * Identify the loop device to remove. Skip the device if it is owned by
+	 * loop_remove()/loop_add() where it is not safe to access lo_mutex.
+	 * The loop device is marked invisible even if we bail out of the
+	 * removal, but the only other place checking the visibility is the
+	 * LOOP_CTL_GET_FREE ioctl, which checks the same flags as we do below,
+	 * and which is fundamentally racy anyway.
+	 */
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
-		ret = -ENODEV;
-		goto out_unlock_ctrl;
+	if (!lo || atomic_cmpxchg(&lo->idr_visible, 1, 0) == 0) {
+		mutex_unlock(&loop_ctl_mutex);
+		return -ENODEV;
 	}
+	mutex_unlock(&loop_ctl_mutex);
 
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
 		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
 
-	idr_remove(&loop_index_idr, lo->lo_number);
 	loop_remove(lo);
-out_unlock_ctrl:
-	mutex_unlock(&loop_ctl_mutex);
-	return ret;
+	return 0;
+
+mark_visible:
+	atomic_inc(&lo->idr_visible);
+	return -EBUSY;
 }
 
 static int loop_control_get_free(int idx)
@@ -2474,7 +2494,8 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		if (atomic_read(&lo->idr_visible) &&
+		    lo->lo_state == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);
@@ -2590,10 +2611,12 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_ctl_mutex here, for nobody else can
+	 * access loop_index_idr when this module is unloading.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63ac..1ec5135da54a7 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	atomic_t		idr_visible; /* a bool in reality */
 };
 
 struct loop_cmd {

  reply	other threads:[~2021-08-27 18:43 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-26 16:03 [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock Tetsuo Handa
2021-08-27 18:43 ` Christoph Hellwig [this message]
2021-08-28  1:10   ` Tetsuo Handa
2021-08-28  2:22     ` Tetsuo Handa
2021-08-28  7:18     ` Christoph Hellwig
2021-08-28 13:50       ` Tetsuo Handa
2021-08-29  1:22         ` [PATCH v2] loop: reduce the loop_ctl_mutex scope Tetsuo Handa
2021-08-29 13:47           ` [PATCH v3] " Tetsuo Handa
2021-08-30  7:13             ` Christoph Hellwig
2021-09-01  5:36               ` Christoph Hellwig
2021-09-01  5:47                 ` Tetsuo Handa
2021-09-01  6:10             ` Christoph Hellwig
2021-09-02  0:07               ` [PATCH v3 (repost)] " Tetsuo Handa
2021-09-04  4:16             ` [PATCH v3] " Jens Axboe
2021-08-31  3:19 ` [loop] 8883efd909: stress-ng.loop.ops_per_sec 148.4% improvement kernel test robot
2021-08-31  3:19   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210827184302.GA29967@lst.de \
    --to=hch@lst.de \
    --cc=axboe@kernel.dk \
    --cc=hdanton@sina.com \
    --cc=linux-block@vger.kernel.org \
    --cc=pasha.tatashin@soleen.com \
    --cc=penguin-kernel@i-love.sakura.ne.jp \
    --cc=tyhicks@linux.microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.