* [PATCH 12/29] mtd: block2mtd: Convert to bdev_open_by_dev/path()
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
@ 2023-08-11 11:04 ` Jan Kara
2023-08-11 12:27 ` [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Christoph Hellwig
2023-08-25 1:58 ` Al Viro
2 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2023-08-11 11:04 UTC (permalink / raw)
To: linux-fsdevel
Cc: linux-block, Christoph Hellwig, Jan Kara, Joern Engel, linux-mtd
Convert block2mtd to use bdev_open_by_dev() and bdev_open_by_path() and
pass the handle around.
CC: Joern Engel <joern@lazybastard.org>
CC: linux-mtd@lists.infradead.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
drivers/mtd/devices/block2mtd.c | 51 +++++++++++++++++++--------------
1 file changed, 29 insertions(+), 22 deletions(-)
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index be106dc20ff3..aa44a23ec045 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -37,7 +37,7 @@
/* Info for the block device */
struct block2mtd_dev {
struct list_head list;
- struct block_device *blkdev;
+ struct bdev_handle *bdev_handle;
struct mtd_info mtd;
struct mutex write_mutex;
};
@@ -55,7 +55,8 @@ static struct page *page_read(struct address_space *mapping, pgoff_t index)
/* erase a specified part of the device */
static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
{
- struct address_space *mapping = dev->blkdev->bd_inode->i_mapping;
+ struct address_space *mapping =
+ dev->bdev_handle->bdev->bd_inode->i_mapping;
struct page *page;
pgoff_t index = to >> PAGE_SHIFT; // page index
int pages = len >> PAGE_SHIFT;
@@ -105,6 +106,8 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf)
{
struct block2mtd_dev *dev = mtd->priv;
+ struct address_space *mapping =
+ dev->bdev_handle->bdev->bd_inode->i_mapping;
struct page *page;
pgoff_t index = from >> PAGE_SHIFT;
int offset = from & (PAGE_SIZE-1);
@@ -117,7 +120,7 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
cpylen = len; // this page
len = len - cpylen;
- page = page_read(dev->blkdev->bd_inode->i_mapping, index);
+ page = page_read(mapping, index);
if (IS_ERR(page))
return PTR_ERR(page);
@@ -139,7 +142,8 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
loff_t to, size_t len, size_t *retlen)
{
struct page *page;
- struct address_space *mapping = dev->blkdev->bd_inode->i_mapping;
+ struct address_space *mapping =
+ dev->bdev_handle->bdev->bd_inode->i_mapping;
pgoff_t index = to >> PAGE_SHIFT; // page index
int offset = to & ~PAGE_MASK; // page offset
int cpylen;
@@ -194,7 +198,7 @@ static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
static void block2mtd_sync(struct mtd_info *mtd)
{
struct block2mtd_dev *dev = mtd->priv;
- sync_blockdev(dev->blkdev);
+ sync_blockdev(dev->bdev_handle->bdev);
return;
}
@@ -206,10 +210,10 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev->mtd.name);
- if (dev->blkdev) {
- invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping,
- 0, -1);
- blkdev_put(dev->blkdev, NULL);
+ if (dev->bdev_handle) {
+ invalidate_mapping_pages(
+ dev->bdev_handle->bdev->bd_inode->i_mapping, 0, -1);
+ bdev_release(dev->bdev_handle);
}
kfree(dev);
@@ -219,10 +223,10 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
* This function is marked __ref because it calls the __init marked
* early_lookup_bdev when called from the early boot code.
*/
-static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
+static struct bdev_handle __ref *mdtblock_early_get_bdev(const char *devname,
blk_mode_t mode, int timeout, struct block2mtd_dev *dev)
{
- struct block_device *bdev = ERR_PTR(-ENODEV);
+ struct bdev_handle *bdev_handle = ERR_PTR(-ENODEV);
#ifndef MODULE
int i;
@@ -230,7 +234,7 @@ static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
* We can't use early_lookup_bdev from a running system.
*/
if (system_state >= SYSTEM_RUNNING)
- return bdev;
+ return bdev_handle;
/*
* We might not have the root device mounted at this point.
@@ -249,19 +253,20 @@ static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
wait_for_device_probe();
if (!early_lookup_bdev(devname, &devt)) {
- bdev = blkdev_get_by_dev(devt, mode, dev, NULL);
- if (!IS_ERR(bdev))
+ bdev_handle = bdev_open_by_dev(devt, mode, dev, NULL);
+ if (!IS_ERR(bdev_handle))
break;
}
}
#endif
- return bdev;
+ return bdev_handle;
}
static struct block2mtd_dev *add_device(char *devname, int erase_size,
char *label, int timeout)
{
const blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE;
+ struct bdev_handle *bdev_handle;
struct block_device *bdev;
struct block2mtd_dev *dev;
char *name;
@@ -274,21 +279,23 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
return NULL;
/* Get a handle on the device */
- bdev = blkdev_get_by_path(devname, mode, dev, NULL);
- if (IS_ERR(bdev))
- bdev = mdtblock_early_get_bdev(devname, mode, timeout, dev);
- if (IS_ERR(bdev)) {
+ bdev_handle = bdev_open_by_path(devname, mode, dev, NULL);
+ if (IS_ERR(bdev_handle))
+ bdev_handle = mdtblock_early_get_bdev(devname, mode, timeout,
+ dev);
+ if (IS_ERR(bdev_handle)) {
pr_err("error: cannot open device %s\n", devname);
goto err_free_block2mtd;
}
- dev->blkdev = bdev;
+ dev->bdev_handle = bdev_handle;
+ bdev = bdev_handle->bdev;
if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
pr_err("attempting to use an MTD device as a block device\n");
goto err_free_block2mtd;
}
- if ((long)dev->blkdev->bd_inode->i_size % erase_size) {
+ if ((long)bdev->bd_inode->i_size % erase_size) {
pr_err("erasesize must be a divisor of device size\n");
goto err_free_block2mtd;
}
@@ -306,7 +313,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
dev->mtd.name = name;
- dev->mtd.size = dev->blkdev->bd_inode->i_size & PAGE_MASK;
+ dev->mtd.size = bdev->bd_inode->i_size & PAGE_MASK;
dev->mtd.erasesize = erase_size;
dev->mtd.writesize = 1;
dev->mtd.writebufsize = PAGE_SIZE;
--
2.35.3
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
2023-08-11 11:04 ` [PATCH 12/29] mtd: block2mtd: Convert to bdev_open_by_dev/path() Jan Kara
@ 2023-08-11 12:27 ` Christoph Hellwig
2023-08-25 1:58 ` Al Viro
2 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2023-08-11 12:27 UTC (permalink / raw)
To: Jan Kara
Cc: linux-fsdevel, linux-block, Christoph Hellwig, Alasdair Kergon,
Andrew Morton, Anna Schumaker, Chao Yu, Christian Borntraeger,
Darrick J. Wong, Dave Kleikamp, David Sterba, dm-devel, drbd-dev,
Gao Xiang, Jack Wang, Jaegeuk Kim, jfs-discussion, Joern Engel,
Joseph Qi, Kent Overstreet, linux-bcache, linux-btrfs,
linux-erofs, linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd,
linux-nfs, linux-nilfs, linux-nvme, linux-pm, linux-raid,
linux-s390, linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer,
Minchan Kim, ocfs2-devel, reiserfs-devel, Sergey Senozhatsky,
Song Liu, Sven Schnelle, target-devel, Ted Tso, Trond Myklebust,
xen-devel
Except for a mostly cosmetic nitpick this looks good to me:
Acked-by: Christoph Hellwig <hch@lst.de>
That's not eactly the deep review I'd like to do, but as I'm about to
head out for vacation that's probably as good as it gets.
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-11 11:04 [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Jan Kara
2023-08-11 11:04 ` [PATCH 12/29] mtd: block2mtd: Convert to bdev_open_by_dev/path() Jan Kara
2023-08-11 12:27 ` [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle Christoph Hellwig
@ 2023-08-25 1:58 ` Al Viro
2023-08-25 13:47 ` Jan Kara
2 siblings, 1 reply; 9+ messages in thread
From: Al Viro @ 2023-08-25 1:58 UTC (permalink / raw)
To: Jan Kara
Cc: linux-fsdevel, linux-block, Christoph Hellwig, Alasdair Kergon,
Andrew Morton, Anna Schumaker, Chao Yu, Christian Borntraeger,
Darrick J. Wong, Dave Kleikamp, David Sterba, dm-devel, drbd-dev,
Gao Xiang, Jack Wang, Jaegeuk Kim, jfs-discussion, Joern Engel,
Joseph Qi, Kent Overstreet, linux-bcache, linux-btrfs,
linux-erofs, linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd,
linux-nfs, linux-nilfs, linux-nvme, linux-pm, linux-raid,
linux-s390, linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer,
Minchan Kim, ocfs2-devel, reiserfs-devel, Sergey Senozhatsky,
Song Liu, Sven Schnelle, target-devel, Ted Tso, Trond Myklebust,
xen-devel
On Fri, Aug 11, 2023 at 01:04:31PM +0200, Jan Kara wrote:
> Hello,
>
> this is a v2 of the patch series which implements the idea of blkdev_get_by_*()
> calls returning bdev_handle which is then passed to blkdev_put() [1]. This
> makes the get and put calls for bdevs more obviously matching and allows us to
> propagate context from get to put without having to modify all the users
> (again!). In particular I need to propagate used open flags to blkdev_put() to
> be able count writeable opens and add support for blocking writes to mounted
> block devices. I'll send that series separately.
>
> The series is based on Christian's vfs tree as of yesterday as there is quite
> some overlap. Patches have passed some reasonable testing - I've tested block
> changes, md, dm, bcache, xfs, btrfs, ext4, swap. This obviously doesn't cover
> everything so I'd like to ask respective maintainers to review / test their
> changes. Thanks! I've pushed out the full branch to:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git bdev_handle
>
> to ease review / testing.
Hmm... Completely Insane Idea(tm): how about turning that thing inside out and
having your bdev_open_by... return an actual opened struct file?
After all, we do that for sockets and pipes just fine and that's a whole lot
hotter area.
Suppose we leave blkdev_open()/blkdev_release() as-is. No need to mess with
what we have for normal opened files for block devices. And have block_open_by_dev()
that would find bdev, etc., same yours does and shove it into anon file.
Paired with plain fput() - no need to bother with new primitives for closing.
With a helper returning I_BDEV(bdev_file_inode(file)) to get from those to bdev.
NOTE: I'm not suggesting replacing ->s_bdev with struct file * if we do that -
we want that value cached, obviously. Just store both...
Not saying it's a good idea, but... might be interesting to look into.
Comments?
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 1:58 ` Al Viro
@ 2023-08-25 13:47 ` Jan Kara
2023-08-26 2:28 ` Al Viro
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Jan Kara @ 2023-08-25 13:47 UTC (permalink / raw)
To: Al Viro
Cc: Jan Kara, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe, Christian Brauner
On Fri 25-08-23 02:58:43, Al Viro wrote:
> On Fri, Aug 11, 2023 at 01:04:31PM +0200, Jan Kara wrote:
> > Hello,
> >
> > this is a v2 of the patch series which implements the idea of blkdev_get_by_*()
> > calls returning bdev_handle which is then passed to blkdev_put() [1]. This
> > makes the get and put calls for bdevs more obviously matching and allows us to
> > propagate context from get to put without having to modify all the users
> > (again!). In particular I need to propagate used open flags to blkdev_put() to
> > be able count writeable opens and add support for blocking writes to mounted
> > block devices. I'll send that series separately.
> >
> > The series is based on Christian's vfs tree as of yesterday as there is quite
> > some overlap. Patches have passed some reasonable testing - I've tested block
> > changes, md, dm, bcache, xfs, btrfs, ext4, swap. This obviously doesn't cover
> > everything so I'd like to ask respective maintainers to review / test their
> > changes. Thanks! I've pushed out the full branch to:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git bdev_handle
> >
> > to ease review / testing.
>
> Hmm... Completely Insane Idea(tm): how about turning that thing inside out and
> having your bdev_open_by... return an actual opened struct file?
>
> After all, we do that for sockets and pipes just fine and that's a whole lot
> hotter area.
>
> Suppose we leave blkdev_open()/blkdev_release() as-is. No need to mess with
> what we have for normal opened files for block devices. And have block_open_by_dev()
> that would find bdev, etc., same yours does and shove it into anon file.
>
> Paired with plain fput() - no need to bother with new primitives for closing.
> With a helper returning I_BDEV(bdev_file_inode(file)) to get from those to bdev.
>
> NOTE: I'm not suggesting replacing ->s_bdev with struct file * if we do that -
> we want that value cached, obviously. Just store both...
>
> Not saying it's a good idea, but... might be interesting to look into.
> Comments?
I can see the appeal of not having to introduce the new bdev_handle type
and just using struct file which unifies in-kernel and userspace block
device opens. But I can see downsides too - the last fput() happening from
task work makes me a bit nervous whether it will not break something
somewhere with exclusive bdev opens. Getting from struct file to bdev is
somewhat harder but I guess a helper like F_BDEV() would solve that just
fine.
So besides my last fput() worry about I think this could work and would be
probably a bit nicer than what I have. But before going and redoing the whole
series let me gather some more feedback so that we don't go back and forth.
Christoph, Christian, Jens, any opinion?
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 13:47 ` Jan Kara
@ 2023-08-26 2:28 ` Al Viro
2023-08-28 14:27 ` Christoph Hellwig
2023-08-28 13:20 ` Christian Brauner
2023-08-28 14:22 ` Christoph Hellwig
2 siblings, 1 reply; 9+ messages in thread
From: Al Viro @ 2023-08-26 2:28 UTC (permalink / raw)
To: Jan Kara
Cc: linux-fsdevel, linux-block, Christoph Hellwig, Alasdair Kergon,
Andrew Morton, Anna Schumaker, Chao Yu, Christian Borntraeger,
Darrick J. Wong, Dave Kleikamp, David Sterba, dm-devel, drbd-dev,
Gao Xiang, Jack Wang, Jaegeuk Kim, jfs-discussion, Joern Engel,
Joseph Qi, Kent Overstreet, linux-bcache, linux-btrfs,
linux-erofs, linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd,
linux-nfs, linux-nilfs, linux-nvme, linux-pm, linux-raid,
linux-s390, linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer,
Minchan Kim, ocfs2-devel, reiserfs-devel, Sergey Senozhatsky,
Song Liu, Sven Schnelle, target-devel, Ted Tso, Trond Myklebust,
xen-devel, Jens Axboe, Christian Brauner
On Fri, Aug 25, 2023 at 03:47:56PM +0200, Jan Kara wrote:
> I can see the appeal of not having to introduce the new bdev_handle type
> and just using struct file which unifies in-kernel and userspace block
> device opens. But I can see downsides too - the last fput() happening from
> task work makes me a bit nervous whether it will not break something
> somewhere with exclusive bdev opens. Getting from struct file to bdev is
> somewhat harder but I guess a helper like F_BDEV() would solve that just
> fine.
>
> So besides my last fput() worry about I think this could work and would be
> probably a bit nicer than what I have. But before going and redoing the whole
> series let me gather some more feedback so that we don't go back and forth.
> Christoph, Christian, Jens, any opinion?
Redoing is not an issue - it can be done on top of your series just
as well. Async behaviour of fput() might be, but... need to look
through the actual users; for a lot of them it's perfectly fine.
FWIW, from a cursory look there appears to be a missing primitive: take
an opened bdev (or bdev_handle, with your variant, or opened file if we
go that way eventually) and claim it.
I mean, look at claim_swapfile() for example:
p->bdev = blkdev_get_by_dev(inode->i_rdev,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
if (IS_ERR(p->bdev)) {
error = PTR_ERR(p->bdev);
p->bdev = NULL;
return error;
}
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
return error;
we already have the file opened, and we keep it opened all the way until
the swapoff(2); here we have noticed that it's a block device and we
* open the fucker again (by device number), this time claiming
it with our swap_info_struct as holder, to be closed at swapoff(2) time
(just before we close the file)
* flip the block size to PAGE_SIZE, to be reverted at swapoff(2)
time That really looks like it ought to be
* take the opened file, see that it's a block device
* try to claim it with that holder
* on success, flip the block size
with close_filp() in the swapoff(2) (or failure exit path in swapon(2))
doing what it would've done for an O_EXCL opened block device.
The only difference from O_EXCL userland open is that here we would
end up with holder pointing not to struct file in question, but to our
swap_info_struct. It will do the right thing.
This extra open is entirely due to "well, we need to claim it and the
primitive that does that happens to be tied to opening"; feels rather
counter-intuitive.
For that matter, we could add an explicit "unclaim" primitive - might
be easier to follow. That would add another example where that could
be used - in blkdev_bszset() we have an opened block device (it's an
ioctl, after all), we want to change block size and we *really* don't
want to have that happen under a mounted filesystem. So if it's not
opened exclusive, we do a temporary exclusive open of own and act on
that instead. Might as well go for a temporary claim...
BTW, what happens if two threads call ioctl(fd, BLKBSZSET, &n)
for the same descriptor that happens to have been opened O_EXCL?
Without O_EXCL they would've been unable to claim the sucker at the same
time - the holder we are using is the address of a function argument,
i.e. something that points to kernel stack of the caller. Those would
conflict and we either get set_blocksize() calls fully serialized, or
one of the callers would eat -EBUSY. Not so in "opened with O_EXCL"
case - they can very well overlap and IIRC set_blocksize() does *not*
expect that kind of crap... It's all under CAP_SYS_ADMIN, so it's not
as if it was a meaningful security hole anyway, but it does look fishy.
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-26 2:28 ` Al Viro
@ 2023-08-28 14:27 ` Christoph Hellwig
0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2023-08-28 14:27 UTC (permalink / raw)
To: Al Viro
Cc: Jan Kara, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe, Christian Brauner
On Sat, Aug 26, 2023 at 03:28:52AM +0100, Al Viro wrote:
> I mean, look at claim_swapfile() for example:
> p->bdev = blkdev_get_by_dev(inode->i_rdev,
> FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
> if (IS_ERR(p->bdev)) {
> error = PTR_ERR(p->bdev);
> p->bdev = NULL;
> return error;
> }
> p->old_block_size = block_size(p->bdev);
> error = set_blocksize(p->bdev, PAGE_SIZE);
> if (error < 0)
> return error;
> we already have the file opened, and we keep it opened all the way until
> the swapoff(2); here we have noticed that it's a block device and we
> * open the fucker again (by device number), this time claiming
> it with our swap_info_struct as holder, to be closed at swapoff(2) time
> (just before we close the file)
Note that some drivers look at FMODE_EXCL/BLK_OPEN_EXCL in ->open.
These are probably bogus and maybe we want to kill them, but that will
need an audit first.
> BTW, what happens if two threads call ioctl(fd, BLKBSZSET, &n)
> for the same descriptor that happens to have been opened O_EXCL?
> Without O_EXCL they would've been unable to claim the sucker at the same
> time - the holder we are using is the address of a function argument,
> i.e. something that points to kernel stack of the caller. Those would
> conflict and we either get set_blocksize() calls fully serialized, or
> one of the callers would eat -EBUSY. Not so in "opened with O_EXCL"
> case - they can very well overlap and IIRC set_blocksize() does *not*
> expect that kind of crap... It's all under CAP_SYS_ADMIN, so it's not
> as if it was a meaningful security hole anyway, but it does look fishy.
The user get to keep the pieces.. BLKBSZSET is kinda bogus anyway
as the soft blocksize only matters for buffer_head-like I/O, and
there only for file systems. Not idea why anyone would set it manually.
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 13:47 ` Jan Kara
2023-08-26 2:28 ` Al Viro
@ 2023-08-28 13:20 ` Christian Brauner
2023-08-28 14:22 ` Christoph Hellwig
2 siblings, 0 replies; 9+ messages in thread
From: Christian Brauner @ 2023-08-28 13:20 UTC (permalink / raw)
To: Jan Kara
Cc: Al Viro, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe
> So besides my last fput() worry about I think this could work and would be
> probably a bit nicer than what I have. But before going and redoing the whole
> series let me gather some more feedback so that we don't go back and forth.
> Christoph, Christian, Jens, any opinion?
I'll be a bit under water for the next few days, I expect but I'll get
back to this. I think not making you redo this whole thing from scratch
is what I'd prefer unless there's really clear advantages. But I don't
want to offer a haphazard opinion in the middle of the merge window.
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 0/29] block: Make blkdev_get_by_*() return handle
2023-08-25 13:47 ` Jan Kara
2023-08-26 2:28 ` Al Viro
2023-08-28 13:20 ` Christian Brauner
@ 2023-08-28 14:22 ` Christoph Hellwig
2 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2023-08-28 14:22 UTC (permalink / raw)
To: Jan Kara
Cc: Al Viro, linux-fsdevel, linux-block, Christoph Hellwig,
Alasdair Kergon, Andrew Morton, Anna Schumaker, Chao Yu,
Christian Borntraeger, Darrick J. Wong, Dave Kleikamp,
David Sterba, dm-devel, drbd-dev, Gao Xiang, Jack Wang,
Jaegeuk Kim, jfs-discussion, Joern Engel, Joseph Qi,
Kent Overstreet, linux-bcache, linux-btrfs, linux-erofs,
linux-ext4, linux-f2fs-devel, linux-mm, linux-mtd, linux-nfs,
linux-nilfs, linux-nvme, linux-pm, linux-raid, linux-s390,
linux-scsi, linux-xfs, Md. Haris Iqbal, Mike Snitzer, Minchan Kim,
ocfs2-devel, reiserfs-devel, Sergey Senozhatsky, Song Liu,
Sven Schnelle, target-devel, Ted Tso, Trond Myklebust, xen-devel,
Jens Axboe, Christian Brauner
On Fri, Aug 25, 2023 at 03:47:56PM +0200, Jan Kara wrote:
> I can see the appeal of not having to introduce the new bdev_handle type
> and just using struct file which unifies in-kernel and userspace block
> device opens. But I can see downsides too - the last fput() happening from
> task work makes me a bit nervous whether it will not break something
> somewhere with exclusive bdev opens. Getting from struct file to bdev is
> somewhat harder but I guess a helper like F_BDEV() would solve that just
> fine.
>
> So besides my last fput() worry about I think this could work and would be
> probably a bit nicer than what I have. But before going and redoing the whole
> series let me gather some more feedback so that we don't go back and forth.
> Christoph, Christian, Jens, any opinion?
I did think about the file a bit. The fact that we'd need something
like an anon_file for the by_dev open was always a huge turn off for
me, but maybe my concern is overblown. Having a struct file would
actually be really useful for a bunch of users.
______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/
^ permalink raw reply [flat|nested] 9+ messages in thread