* [PATCH 16/29] PM: hibernate: Convert to bdev_open_by_dev()
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
@ 2023-08-11 11:04 ` Jan Kara
2023-08-11 16:57 ` Rafael J. Wysocki
2023-08-11 11:04 ` [PATCH 17/29] PM: hibernate: Drop unused snapshot_test argument Jan Kara
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Jan Kara @ 2023-08-11 11:04 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-block, Christoph Hellwig, Jan Kara, linux-pm
Convert hibernation code to use bdev_open_by_dev().
CC: linux-pm@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
kernel/power/swap.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f6ebcd00c410..b475bee282ff 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -222,7 +222,7 @@ int swsusp_swap_in_use(void)
*/
static unsigned short root_swap = 0xffff;
-static struct block_device *hib_resume_bdev;
+static struct bdev_handle *hib_resume_bdev_handle;
struct hib_bio_batch {
atomic_t count;
@@ -276,7 +276,8 @@ static int hib_submit_io(blk_opf_t opf, pgoff_t page_off, void *addr,
struct bio *bio;
int error = 0;
- bio = bio_alloc(hib_resume_bdev, 1, opf, GFP_NOIO | __GFP_HIGH);
+ bio = bio_alloc(hib_resume_bdev_handle->bdev, 1, opf,
+ GFP_NOIO | __GFP_HIGH);
bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
@@ -356,14 +357,14 @@ static int swsusp_swap_check(void)
return res;
root_swap = res;
- hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+ hib_resume_bdev_handle = bdev_open_by_dev(swsusp_resume_device,
BLK_OPEN_WRITE, NULL, NULL);
- if (IS_ERR(hib_resume_bdev))
- return PTR_ERR(hib_resume_bdev);
+ if (IS_ERR(hib_resume_bdev_handle))
+ return PTR_ERR(hib_resume_bdev_handle);
- res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
+ res = set_blocksize(hib_resume_bdev_handle->bdev, PAGE_SIZE);
if (res < 0)
- blkdev_put(hib_resume_bdev, NULL);
+ bdev_release(hib_resume_bdev_handle);
return res;
}
@@ -1521,10 +1522,10 @@ int swsusp_check(bool snapshot_test)
void *holder = snapshot_test ? &swsusp_holder : NULL;
int error;
- hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, BLK_OPEN_READ,
- holder, NULL);
- if (!IS_ERR(hib_resume_bdev)) {
- set_blocksize(hib_resume_bdev, PAGE_SIZE);
+ hib_resume_bdev_handle = bdev_open_by_dev(swsusp_resume_device,
+ BLK_OPEN_READ, holder, NULL);
+ if (!IS_ERR(hib_resume_bdev_handle)) {
+ set_blocksize(hib_resume_bdev_handle->bdev, PAGE_SIZE);
clear_page(swsusp_header);
error = hib_submit_io(REQ_OP_READ, swsusp_resume_block,
swsusp_header, NULL);
@@ -1549,11 +1550,11 @@ int swsusp_check(bool snapshot_test)
put:
if (error)
- blkdev_put(hib_resume_bdev, holder);
+ bdev_release(hib_resume_bdev_handle);
else
pr_debug("Image signature found, resuming\n");
} else {
- error = PTR_ERR(hib_resume_bdev);
+ error = PTR_ERR(hib_resume_bdev_handle);
}
if (error)
@@ -1568,12 +1569,12 @@ int swsusp_check(bool snapshot_test)
void swsusp_close(bool snapshot_test)
{
- if (IS_ERR(hib_resume_bdev)) {
+ if (IS_ERR(hib_resume_bdev_handle)) {
pr_debug("Image device not initialised\n");
return;
}
- blkdev_put(hib_resume_bdev, snapshot_test ? &swsusp_holder : NULL);
+ bdev_release(hib_resume_bdev_handle);
}
/**
--
2.35.3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 16/29] PM: hibernate: Convert to bdev_open_by_dev()
2023-08-11 11:04 ` [PATCH 16/29] PM: hibernate: Convert to bdev_open_by_dev() Jan Kara
@ 2023-08-11 16:57 ` Rafael J. Wysocki
0 siblings, 0 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2023-08-11 16:57 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel, linux-block, Christoph Hellwig, linux-pm
On Fri, Aug 11, 2023 at 1:05 PM Jan Kara <jack@suse.cz> wrote:
>
> Convert hibernation code to use bdev_open_by_dev().
>
> CC: linux-pm@vger.kernel.org
> Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
> ---
> kernel/power/swap.c | 31 ++++++++++++++++---------------
> 1 file changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/power/swap.c b/kernel/power/swap.c
> index f6ebcd00c410..b475bee282ff 100644
> --- a/kernel/power/swap.c
> +++ b/kernel/power/swap.c
> @@ -222,7 +222,7 @@ int swsusp_swap_in_use(void)
> */
>
> static unsigned short root_swap = 0xffff;
> -static struct block_device *hib_resume_bdev;
> +static struct bdev_handle *hib_resume_bdev_handle;
>
> struct hib_bio_batch {
> atomic_t count;
> @@ -276,7 +276,8 @@ static int hib_submit_io(blk_opf_t opf, pgoff_t page_off, void *addr,
> struct bio *bio;
> int error = 0;
>
> - bio = bio_alloc(hib_resume_bdev, 1, opf, GFP_NOIO | __GFP_HIGH);
> + bio = bio_alloc(hib_resume_bdev_handle->bdev, 1, opf,
> + GFP_NOIO | __GFP_HIGH);
> bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
>
> if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
> @@ -356,14 +357,14 @@ static int swsusp_swap_check(void)
> return res;
> root_swap = res;
>
> - hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
> + hib_resume_bdev_handle = bdev_open_by_dev(swsusp_resume_device,
> BLK_OPEN_WRITE, NULL, NULL);
> - if (IS_ERR(hib_resume_bdev))
> - return PTR_ERR(hib_resume_bdev);
> + if (IS_ERR(hib_resume_bdev_handle))
> + return PTR_ERR(hib_resume_bdev_handle);
>
> - res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
> + res = set_blocksize(hib_resume_bdev_handle->bdev, PAGE_SIZE);
> if (res < 0)
> - blkdev_put(hib_resume_bdev, NULL);
> + bdev_release(hib_resume_bdev_handle);
>
> return res;
> }
> @@ -1521,10 +1522,10 @@ int swsusp_check(bool snapshot_test)
> void *holder = snapshot_test ? &swsusp_holder : NULL;
> int error;
>
> - hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, BLK_OPEN_READ,
> - holder, NULL);
> - if (!IS_ERR(hib_resume_bdev)) {
> - set_blocksize(hib_resume_bdev, PAGE_SIZE);
> + hib_resume_bdev_handle = bdev_open_by_dev(swsusp_resume_device,
> + BLK_OPEN_READ, holder, NULL);
> + if (!IS_ERR(hib_resume_bdev_handle)) {
> + set_blocksize(hib_resume_bdev_handle->bdev, PAGE_SIZE);
> clear_page(swsusp_header);
> error = hib_submit_io(REQ_OP_READ, swsusp_resume_block,
> swsusp_header, NULL);
> @@ -1549,11 +1550,11 @@ int swsusp_check(bool snapshot_test)
>
> put:
> if (error)
> - blkdev_put(hib_resume_bdev, holder);
> + bdev_release(hib_resume_bdev_handle);
> else
> pr_debug("Image signature found, resuming\n");
> } else {
> - error = PTR_ERR(hib_resume_bdev);
> + error = PTR_ERR(hib_resume_bdev_handle);
> }
>
> if (error)
> @@ -1568,12 +1569,12 @@ int swsusp_check(bool snapshot_test)
>
> void swsusp_close(bool snapshot_test)
> {
> - if (IS_ERR(hib_resume_bdev)) {
> + if (IS_ERR(hib_resume_bdev_handle)) {
> pr_debug("Image device not initialised\n");
> return;
> }
>
> - blkdev_put(hib_resume_bdev, snapshot_test ? &swsusp_holder : NULL);
> + bdev_release(hib_resume_bdev_handle);
> }
>
> /**
> --
> 2.35.3
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 17/29] PM: hibernate: Drop unused snapshot_test argument
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
2023-08-11 11:04 ` [PATCH 16/29] PM: hibernate: Convert to bdev_open_by_dev() Jan Kara
@ 2023-08-11 11:04 ` Jan Kara
2023-08-11 16:58 ` Rafael J. Wysocki
2023-08-11 12:27 ` [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Christoph Hellwig
2023-08-25 1:58 ` Al Viro
3 siblings, 1 reply; 12+ messages in thread
From: Jan Kara @ 2023-08-11 11:04 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-block, Christoph Hellwig, Jan Kara, linux-pm
snapshot_test argument is now unused in swsusp_close() and
load_image_and_restore(). Drop it
CC: linux-pm@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
kernel/power/hibernate.c | 14 +++++++-------
kernel/power/power.h | 2 +-
kernel/power/swap.c | 6 +++---
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index e1b4bfa938dd..6abeec0ae084 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -684,7 +684,7 @@ static void power_down(void)
cpu_relax();
}
-static int load_image_and_restore(bool snapshot_test)
+static int load_image_and_restore(void)
{
int error;
unsigned int flags;
@@ -694,12 +694,12 @@ static int load_image_and_restore(bool snapshot_test)
lock_device_hotplug();
error = create_basic_memory_bitmaps();
if (error) {
- swsusp_close(snapshot_test);
+ swsusp_close();
goto Unlock;
}
error = swsusp_read(&flags);
- swsusp_close(snapshot_test);
+ swsusp_close();
if (!error)
error = hibernation_restore(flags & SF_PLATFORM_MODE);
@@ -788,7 +788,7 @@ int hibernate(void)
pm_pr_dbg("Checking hibernation image\n");
error = swsusp_check(snapshot_test);
if (!error)
- error = load_image_and_restore(snapshot_test);
+ error = load_image_and_restore();
}
thaw_processes();
@@ -952,7 +952,7 @@ static int software_resume(void)
/* The snapshot device should not be opened while we're running */
if (!hibernate_acquire()) {
error = -EBUSY;
- swsusp_close(false);
+ swsusp_close();
goto Unlock;
}
@@ -973,7 +973,7 @@ static int software_resume(void)
goto Close_Finish;
}
- error = load_image_and_restore(false);
+ error = load_image_and_restore();
thaw_processes();
Finish:
pm_notifier_call_chain(PM_POST_RESTORE);
@@ -987,7 +987,7 @@ static int software_resume(void)
pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
return error;
Close_Finish:
- swsusp_close(false);
+ swsusp_close();
goto Finish;
}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46eb14dc50c3..bebf049a51c1 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -172,7 +172,7 @@ int swsusp_check(bool snapshot_test);
extern void swsusp_free(void);
extern int swsusp_read(unsigned int *flags_p);
extern int swsusp_write(unsigned int flags);
-void swsusp_close(bool snapshot_test);
+void swsusp_close(void);
#ifdef CONFIG_SUSPEND
extern int swsusp_unmark(void);
#endif
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b475bee282ff..17e0dad5008e 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -444,7 +444,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
err_rel:
release_swap_writer(handle);
err_close:
- swsusp_close(false);
+ swsusp_close();
return ret;
}
@@ -509,7 +509,7 @@ static int swap_writer_finish(struct swap_map_handle *handle,
if (error)
free_all_swap_pages(root_swap);
release_swap_writer(handle);
- swsusp_close(false);
+ swsusp_close();
return error;
}
@@ -1567,7 +1567,7 @@ int swsusp_check(bool snapshot_test)
* swsusp_close - close swap device.
*/
-void swsusp_close(bool snapshot_test)
+void swsusp_close(void)
{
if (IS_ERR(hib_resume_bdev_handle)) {
pr_debug("Image device not initialised\n");
--
2.35.3
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 17/29] PM: hibernate: Drop unused snapshot_test argument
2023-08-11 11:04 ` [PATCH 17/29] PM: hibernate: Drop unused snapshot_test argument Jan Kara
@ 2023-08-11 16:58 ` Rafael J. Wysocki
0 siblings, 0 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2023-08-11 16:58 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel, linux-block, Christoph Hellwig, linux-pm
On Fri, Aug 11, 2023 at 1:05 PM Jan Kara <jack@suse.cz> wrote:
>
> snapshot_test argument is now unused in swsusp_close() and
> load_image_and_restore(). Drop it
>
> CC: linux-pm@vger.kernel.org
> Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
> ---
> kernel/power/hibernate.c | 14 +++++++-------
> kernel/power/power.h | 2 +-
> kernel/power/swap.c | 6 +++---
> 3 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
> index e1b4bfa938dd..6abeec0ae084 100644
> --- a/kernel/power/hibernate.c
> +++ b/kernel/power/hibernate.c
> @@ -684,7 +684,7 @@ static void power_down(void)
> cpu_relax();
> }
>
> -static int load_image_and_restore(bool snapshot_test)
> +static int load_image_and_restore(void)
> {
> int error;
> unsigned int flags;
> @@ -694,12 +694,12 @@ static int load_image_and_restore(bool snapshot_test)
> lock_device_hotplug();
> error = create_basic_memory_bitmaps();
> if (error) {
> - swsusp_close(snapshot_test);
> + swsusp_close();
> goto Unlock;
> }
>
> error = swsusp_read(&flags);
> - swsusp_close(snapshot_test);
> + swsusp_close();
> if (!error)
> error = hibernation_restore(flags & SF_PLATFORM_MODE);
>
> @@ -788,7 +788,7 @@ int hibernate(void)
> pm_pr_dbg("Checking hibernation image\n");
> error = swsusp_check(snapshot_test);
> if (!error)
> - error = load_image_and_restore(snapshot_test);
> + error = load_image_and_restore();
> }
> thaw_processes();
>
> @@ -952,7 +952,7 @@ static int software_resume(void)
> /* The snapshot device should not be opened while we're running */
> if (!hibernate_acquire()) {
> error = -EBUSY;
> - swsusp_close(false);
> + swsusp_close();
> goto Unlock;
> }
>
> @@ -973,7 +973,7 @@ static int software_resume(void)
> goto Close_Finish;
> }
>
> - error = load_image_and_restore(false);
> + error = load_image_and_restore();
> thaw_processes();
> Finish:
> pm_notifier_call_chain(PM_POST_RESTORE);
> @@ -987,7 +987,7 @@ static int software_resume(void)
> pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
> return error;
> Close_Finish:
> - swsusp_close(false);
> + swsusp_close();
> goto Finish;
> }
>
> diff --git a/kernel/power/power.h b/kernel/power/power.h
> index 46eb14dc50c3..bebf049a51c1 100644
> --- a/kernel/power/power.h
> +++ b/kernel/power/power.h
> @@ -172,7 +172,7 @@ int swsusp_check(bool snapshot_test);
> extern void swsusp_free(void);
> extern int swsusp_read(unsigned int *flags_p);
> extern int swsusp_write(unsigned int flags);
> -void swsusp_close(bool snapshot_test);
> +void swsusp_close(void);
> #ifdef CONFIG_SUSPEND
> extern int swsusp_unmark(void);
> #endif
> diff --git a/kernel/power/swap.c b/kernel/power/swap.c
> index b475bee282ff..17e0dad5008e 100644
> --- a/kernel/power/swap.c
> +++ b/kernel/power/swap.c
> @@ -444,7 +444,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
> err_rel:
> release_swap_writer(handle);
> err_close:
> - swsusp_close(false);
> + swsusp_close();
> return ret;
> }
>
> @@ -509,7 +509,7 @@ static int swap_writer_finish(struct swap_map_handle *handle,
> if (error)
> free_all_swap_pages(root_swap);
> release_swap_writer(handle);
> - swsusp_close(false);
> + swsusp_close();
>
> return error;
> }
> @@ -1567,7 +1567,7 @@ int swsusp_check(bool snapshot_test)
> * swsusp_close - close swap device.
> */
>
> -void swsusp_close(bool snapshot_test)
> +void swsusp_close(void)
> {
> if (IS_ERR(hib_resume_bdev_handle)) {
> pr_debug("Image device not initialised\n");
> --
> 2.35.3
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
2023-08-11 11:04 ` [PATCH 16/29] PM: hibernate: Convert to bdev_open_by_dev() Jan Kara
2023-08-11 11:04 ` [PATCH 17/29] PM: hibernate: Drop unused snapshot_test argument Jan Kara
@ 2023-08-11 12:27 ` Christoph Hellwig
2023-08-25 1:58 ` Al Viro
3 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2023-08-11 12:27 UTC (permalink / raw)
To: Jan Kara
Cc: linux-fsdevel, linux-block, Christoph Hellwig, Alasdair Kergon,
Andrew Morton, Anna Schumaker, Chao Yu, Christian Borntraeger,
Darrick J. Wong, Dave Kleikamp, David Sterba, dm-devel, drbd-dev,
Gao Xiang, Jack Wang, Jaegeuk Kim, jfs-discussion, Joern Engel,
Joseph Qi, Kent Overstreet, linux-bcache, linux-btrfs,
linux-erofs, linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd,
linux-nfs, linux-nilfs, linux-nvme, linux-pm, linux-raid,
linux-s390, linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer,
Minchan Kim, ocfs2-devel, reiserfs-devel, Sergey Senozhatsky,
Song Liu, Sven Schnelle, target-devel, Ted Tso, Trond Myklebust,
xen-devel
Except for a mostly cosmetic nitpick this looks good to me:
Acked-by: Christoph Hellwig <hch@lst.de>
That's not eactly the deep review I'd like to do, but as I'm about to
head out for vacation that's probably as good as it gets.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
` (2 preceding siblings ...)
2023-08-11 12:27 ` [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Christoph Hellwig
@ 2023-08-25 1:58 ` Al Viro
2023-08-25 13:47 ` Jan Kara
3 siblings, 1 reply; 12+ messages in thread
From: Al Viro @ 2023-08-25 1:58 UTC (permalink / raw)
To: Jan Kara
Cc: linux-fsdevel, linux-block, Christoph Hellwig, Alasdair Kergon,
Andrew Morton, Anna Schumaker, Chao Yu, Christian Borntraeger,
Darrick J. Wong, Dave Kleikamp, David Sterba, dm-devel, drbd-dev,
Gao Xiang, Jack Wang, Jaegeuk Kim, jfs-discussion, Joern Engel,
Joseph Qi, Kent Overstreet, linux-bcache, linux-btrfs,
linux-erofs, linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd,
linux-nfs, linux-nilfs, linux-nvme, linux-pm, linux-raid,
linux-s390, linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer,
Minchan Kim, ocfs2-devel, reiserfs-devel, Sergey Senozhatsky,
Song Liu, Sven Schnelle, target-devel, Ted Tso, Trond Myklebust,
xen-devel
On Fri, Aug 11, 2023 at 01:04:31PM +0200, Jan Kara wrote:
> Hello,
>
> this is a v2 of the patch series which implements the idea of blkdev_get_by_*()
> calls returning bdev_handle which is then passed to blkdev_put() [1]. This
> makes the get and put calls for bdevs more obviously matching and allows us to
> propagate context from get to put without having to modify all the users
> (again!). In particular I need to propagate used open flags to blkdev_put() to
> be able count writeable opens and add support for blocking writes to mounted
> block devices. I'll send that series separately.
>
> The series is based on Christian's vfs tree as of yesterday as there is quite
> some overlap. Patches have passed some reasonable testing - I've tested block
> changes, md, dm, bcache, xfs, btrfs, ext4, swap. This obviously doesn't cover
> everything so I'd like to ask respective maintainers to review / test their
> changes. Thanks! I've pushed out the full branch to:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git bdev_handle
>
> to ease review / testing.
Hmm... Completely Insane Idea(tm): how about turning that thing inside out and
having your bdev_open_by... return an actual opened struct file?
After all, we do that for sockets and pipes just fine and that's a whole lot
hotter area.
Suppose we leave blkdev_open()/blkdev_release() as-is. No need to mess with
what we have for normal opened files for block devices. And have block_open_by_dev()
that would find bdev, etc., same yours does and shove it into anon file.
Paired with plain fput() - no need to bother with new primitives for closing.
With a helper returning I_BDEV(bdev_file_inode(file)) to get from those to bdev.
NOTE: I'm not suggesting replacing ->s_bdev with struct file * if we do that -
we want that value cached, obviously. Just store both...
Not saying it's a good idea, but... might be interesting to look into.
Comments?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 1:58 ` Al Viro
@ 2023-08-25 13:47 ` Jan Kara
2023-08-26 2:28 ` Al Viro
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Jan Kara @ 2023-08-25 13:47 UTC (permalink / raw)
To: Al Viro
Cc: Jan Kara, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe, Christian Brauner
On Fri 25-08-23 02:58:43, Al Viro wrote:
> On Fri, Aug 11, 2023 at 01:04:31PM +0200, Jan Kara wrote:
> > Hello,
> >
> > this is a v2 of the patch series which implements the idea of blkdev_get_by_*()
> > calls returning bdev_handle which is then passed to blkdev_put() [1]. This
> > makes the get and put calls for bdevs more obviously matching and allows us to
> > propagate context from get to put without having to modify all the users
> > (again!). In particular I need to propagate used open flags to blkdev_put() to
> > be able count writeable opens and add support for blocking writes to mounted
> > block devices. I'll send that series separately.
> >
> > The series is based on Christian's vfs tree as of yesterday as there is quite
> > some overlap. Patches have passed some reasonable testing - I've tested block
> > changes, md, dm, bcache, xfs, btrfs, ext4, swap. This obviously doesn't cover
> > everything so I'd like to ask respective maintainers to review / test their
> > changes. Thanks! I've pushed out the full branch to:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git bdev_handle
> >
> > to ease review / testing.
>
> Hmm... Completely Insane Idea(tm): how about turning that thing inside out and
> having your bdev_open_by... return an actual opened struct file?
>
> After all, we do that for sockets and pipes just fine and that's a whole lot
> hotter area.
>
> Suppose we leave blkdev_open()/blkdev_release() as-is. No need to mess with
> what we have for normal opened files for block devices. And have block_open_by_dev()
> that would find bdev, etc., same yours does and shove it into anon file.
>
> Paired with plain fput() - no need to bother with new primitives for closing.
> With a helper returning I_BDEV(bdev_file_inode(file)) to get from those to bdev.
>
> NOTE: I'm not suggesting replacing ->s_bdev with struct file * if we do that -
> we want that value cached, obviously. Just store both...
>
> Not saying it's a good idea, but... might be interesting to look into.
> Comments?
I can see the appeal of not having to introduce the new bdev_handle type
and just using struct file which unifies in-kernel and userspace block
device opens. But I can see downsides too - the last fput() happening from
task work makes me a bit nervous whether it will not break something
somewhere with exclusive bdev opens. Getting from struct file to bdev is
somewhat harder but I guess a helper like F_BDEV() would solve that just
fine.
So besides my last fput() worry about I think this could work and would be
probably a bit nicer than what I have. But before going and redoing the whole
series let me gather some more feedback so that we don't go back and forth.
Christoph, Christian, Jens, any opinion?
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 13:47 ` Jan Kara
@ 2023-08-26 2:28 ` Al Viro
2023-08-28 14:27 ` Christoph Hellwig
2023-08-28 13:20 ` Christian Brauner
2023-08-28 14:22 ` Christoph Hellwig
2 siblings, 1 reply; 12+ messages in thread
From: Al Viro @ 2023-08-26 2:28 UTC (permalink / raw)
To: Jan Kara
Cc: linux-fsdevel, linux-block, Christoph Hellwig, Alasdair Kergon,
Andrew Morton, Anna Schumaker, Chao Yu, Christian Borntraeger,
Darrick J. Wong, Dave Kleikamp, David Sterba, dm-devel, drbd-dev,
Gao Xiang, Jack Wang, Jaegeuk Kim, jfs-discussion, Joern Engel,
Joseph Qi, Kent Overstreet, linux-bcache, linux-btrfs,
linux-erofs, linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd,
linux-nfs, linux-nilfs, linux-nvme, linux-pm, linux-raid,
linux-s390, linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer,
Minchan Kim, ocfs2-devel, reiserfs-devel, Sergey Senozhatsky,
Song Liu, Sven Schnelle, target-devel, Ted Tso, Trond Myklebust,
xen-devel, Jens Axboe, Christian Brauner
On Fri, Aug 25, 2023 at 03:47:56PM +0200, Jan Kara wrote:
> I can see the appeal of not having to introduce the new bdev_handle type
> and just using struct file which unifies in-kernel and userspace block
> device opens. But I can see downsides too - the last fput() happening from
> task work makes me a bit nervous whether it will not break something
> somewhere with exclusive bdev opens. Getting from struct file to bdev is
> somewhat harder but I guess a helper like F_BDEV() would solve that just
> fine.
>
> So besides my last fput() worry about I think this could work and would be
> probably a bit nicer than what I have. But before going and redoing the whole
> series let me gather some more feedback so that we don't go back and forth.
> Christoph, Christian, Jens, any opinion?
Redoing is not an issue - it can be done on top of your series just
as well. Async behaviour of fput() might be, but... need to look
through the actual users; for a lot of them it's perfectly fine.
FWIW, from a cursory look there appears to be a missing primitive: take
an opened bdev (or bdev_handle, with your variant, or opened file if we
go that way eventually) and claim it.
I mean, look at claim_swapfile() for example:
p->bdev = blkdev_get_by_dev(inode->i_rdev,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
if (IS_ERR(p->bdev)) {
error = PTR_ERR(p->bdev);
p->bdev = NULL;
return error;
}
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
return error;
we already have the file opened, and we keep it opened all the way until
the swapoff(2); here we have noticed that it's a block device and we
* open the fucker again (by device number), this time claiming
it with our swap_info_struct as holder, to be closed at swapoff(2) time
(just before we close the file)
* flip the block size to PAGE_SIZE, to be reverted at swapoff(2)
time That really looks like it ought to be
* take the opened file, see that it's a block device
* try to claim it with that holder
* on success, flip the block size
with close_filp() in the swapoff(2) (or failure exit path in swapon(2))
doing what it would've done for an O_EXCL opened block device.
The only difference from O_EXCL userland open is that here we would
end up with holder pointing not to struct file in question, but to our
swap_info_struct. It will do the right thing.
This extra open is entirely due to "well, we need to claim it and the
primitive that does that happens to be tied to opening"; feels rather
counter-intuitive.
For that matter, we could add an explicit "unclaim" primitive - might
be easier to follow. That would add another example where that could
be used - in blkdev_bszset() we have an opened block device (it's an
ioctl, after all), we want to change block size and we *really* don't
want to have that happen under a mounted filesystem. So if it's not
opened exclusive, we do a temporary exclusive open of own and act on
that instead. Might as well go for a temporary claim...
BTW, what happens if two threads call ioctl(fd, BLKBSZSET, &n)
for the same descriptor that happens to have been opened O_EXCL?
Without O_EXCL they would've been unable to claim the sucker at the same
time - the holder we are using is the address of a function argument,
i.e. something that points to kernel stack of the caller. Those would
conflict and we either get set_blocksize() calls fully serialized, or
one of the callers would eat -EBUSY. Not so in "opened with O_EXCL"
case - they can very well overlap and IIRC set_blocksize() does *not*
expect that kind of crap... It's all under CAP_SYS_ADMIN, so it's not
as if it was a meaningful security hole anyway, but it does look fishy.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-26 2:28 ` Al Viro
@ 2023-08-28 14:27 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2023-08-28 14:27 UTC (permalink / raw)
To: Al Viro
Cc: Jan Kara, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe, Christian Brauner
On Sat, Aug 26, 2023 at 03:28:52AM +0100, Al Viro wrote:
> I mean, look at claim_swapfile() for example:
> p->bdev = blkdev_get_by_dev(inode->i_rdev,
> FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
> if (IS_ERR(p->bdev)) {
> error = PTR_ERR(p->bdev);
> p->bdev = NULL;
> return error;
> }
> p->old_block_size = block_size(p->bdev);
> error = set_blocksize(p->bdev, PAGE_SIZE);
> if (error < 0)
> return error;
> we already have the file opened, and we keep it opened all the way until
> the swapoff(2); here we have noticed that it's a block device and we
> * open the fucker again (by device number), this time claiming
> it with our swap_info_struct as holder, to be closed at swapoff(2) time
> (just before we close the file)
Note that some drivers look at FMODE_EXCL/BLK_OPEN_EXCL in ->open.
These are probably bogus and maybe we want to kill them, but that will
need an audit first.
> BTW, what happens if two threads call ioctl(fd, BLKBSZSET, &n)
> for the same descriptor that happens to have been opened O_EXCL?
> Without O_EXCL they would've been unable to claim the sucker at the same
> time - the holder we are using is the address of a function argument,
> i.e. something that points to kernel stack of the caller. Those would
> conflict and we either get set_blocksize() calls fully serialized, or
> one of the callers would eat -EBUSY. Not so in "opened with O_EXCL"
> case - they can very well overlap and IIRC set_blocksize() does *not*
> expect that kind of crap... It's all under CAP_SYS_ADMIN, so it's not
> as if it was a meaningful security hole anyway, but it does look fishy.
The user get to keep the pieces.. BLKBSZSET is kinda bogus anyway
as the soft blocksize only matters for buffer_head-like I/O, and
there only for file systems. Not idea why anyone would set it manually.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 13:47 ` Jan Kara
2023-08-26 2:28 ` Al Viro
@ 2023-08-28 13:20 ` Christian Brauner
2023-08-28 14:22 ` Christoph Hellwig
2 siblings, 0 replies; 12+ messages in thread
From: Christian Brauner @ 2023-08-28 13:20 UTC (permalink / raw)
To: Jan Kara
Cc: Al Viro, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe
> So besides my last fput() worry about I think this could work and would be
> probably a bit nicer than what I have. But before going and redoing the whole
> series let me gather some more feedback so that we don't go back and forth.
> Christoph, Christian, Jens, any opinion?
I'll be a bit under water for the next few days, I expect but I'll get
back to this. I think not making you redo this whole thing from scratch
is what I'd prefer unless there's really clear advantages. But I don't
want to offer a haphazard opinion in the middle of the merge window.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 13:47 ` Jan Kara
2023-08-26 2:28 ` Al Viro
2023-08-28 13:20 ` Christian Brauner
@ 2023-08-28 14:22 ` Christoph Hellwig
2 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2023-08-28 14:22 UTC (permalink / raw)
To: Jan Kara
Cc: Al Viro, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe, Christian Brauner
On Fri, Aug 25, 2023 at 03:47:56PM +0200, Jan Kara wrote:
> I can see the appeal of not having to introduce the new bdev_handle type
> and just using struct file which unifies in-kernel and userspace block
> device opens. But I can see downsides too - the last fput() happening from
> task work makes me a bit nervous whether it will not break something
> somewhere with exclusive bdev opens. Getting from struct file to bdev is
> somewhat harder but I guess a helper like F_BDEV() would solve that just
> fine.
>
> So besides my last fput() worry about I think this could work and would be
> probably a bit nicer than what I have. But before going and redoing the whole
> series let me gather some more feedback so that we don't go back and forth.
> Christoph, Christian, Jens, any opinion?
I did think about the file a bit. The fact that we'd need something
like an anon_file for the by_dev open was always a huge turn off for
me, but maybe my concern is overblown. Having a struct file would
actually be really useful for a bunch of users.
^ permalink raw reply [flat|nested] 12+ messages in thread