linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@lst.de>,
	 Jens Axboe <axboe@kernel.dk>
Cc: "Darrick J. Wong" <djwong@kernel.org>,
	linux-fsdevel@vger.kernel.org,  linux-block@vger.kernel.org,
	Christian Brauner <brauner@kernel.org>
Subject: [PATCH RFC 30/34] bdev: rework bdev_open_by_dev()
Date: Wed, 03 Jan 2024 13:55:28 +0100	[thread overview]
Message-ID: <20240103-vfs-bdev-file-v1-30-6c8ee55fb6ef@kernel.org> (raw)
In-Reply-To: <20240103-vfs-bdev-file-v1-0-6c8ee55fb6ef@kernel.org>

Now that we always use files when opening block devices rework
bdev_open_by_dev() to work well with both bdev_file_open_by_*() and
blkdev_open().

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/bdev.c | 139 +++++++++++++++++++++++++++++++++--------------------------
 block/blk.h  |   6 +--
 block/fops.c |  34 ++++++---------
 3 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index b276ef994858..2867edba0169 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -704,6 +704,24 @@ static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
 	return ret;
 }
 
+int bdev_permission(dev_t dev, blk_mode_t mode, void *holder)
+{
+	int ret;
+
+	ret = devcgroup_check_permission(
+		DEVCG_DEV_BLOCK, MAJOR(dev), MINOR(dev),
+		((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
+			((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
+	if (ret)
+		return ret;
+
+	/* Blocking writes requires exclusive opener */
+	if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
+		return -EINVAL;
+
+	return 0;
+}
+
 static void blkdev_put_part(struct block_device *part)
 {
 	struct block_device *whole = bdev_whole(part);
@@ -796,15 +814,15 @@ static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode)
 }
 
 /**
- * bdev_open_by_dev - open a block device by device number
- * @dev: device number of block device to open
+ * bdev_open - open a block device
+ * @bdev: block device to open
  * @mode: open mode (BLK_OPEN_*)
  * @holder: exclusive holder identifier
  * @hops: holder operations
+ * @f_bdev: file for the block device
  *
- * Open the block device described by device number @dev. If @holder is not
- * %NULL, the block device is opened with exclusive access.  Exclusive opens may
- * nest for the same @holder.
+ * Open the block device. If @holder is not %NULL, the block device is opened
+ * with exclusive access.  Exclusive opens may nest for the same @holder.
  *
  * Use this interface ONLY if you really do not have anything better - i.e. when
  * you are behind a truly sucky interface and all you are given is a device
@@ -814,52 +832,29 @@ static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode)
  * Might sleep.
  *
  * RETURNS:
- * Handle with a reference to the block_device on success, ERR_PTR(-errno) on
- * failure.
+ * zero on success, -errno on failure.
  */
-struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
-				     const struct blk_holder_ops *hops)
+int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
+	      const struct blk_holder_ops *hops, struct file *f_bdev)
 {
 	struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle),
 					     GFP_KERNEL);
-	struct block_device *bdev;
 	bool unblock_events = true;
-	struct gendisk *disk;
+	struct gendisk *disk = bdev->bd_disk;
 	int ret;
 
+	handle = kmalloc(sizeof(struct bdev_handle), GFP_KERNEL);
 	if (!handle)
-		return ERR_PTR(-ENOMEM);
-
-	ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
-			MAJOR(dev), MINOR(dev),
-			((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
-			((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
-	if (ret)
-		goto free_handle;
-
-	/* Blocking writes requires exclusive opener */
-	if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) {
-		ret = -EINVAL;
-		goto free_handle;
-	}
-
-	bdev = blkdev_get_no_open(dev);
-	if (!bdev) {
-		ret = -ENXIO;
-		goto free_handle;
-	}
-	disk = bdev->bd_disk;
+		return -ENOMEM;
 
 	if (holder) {
 		mode |= BLK_OPEN_EXCL;
 		ret = bd_prepare_to_claim(bdev, holder, hops);
 		if (ret)
-			goto put_blkdev;
+			return ret;
 	} else {
-		if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) {
-			ret = -EIO;
-			goto put_blkdev;
-		}
+		if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL))
+			return -EIO;
 	}
 
 	disk_block_events(disk);
@@ -903,7 +898,22 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 	handle->bdev = bdev;
 	handle->holder = holder;
 	handle->mode = mode;
-	return handle;
+
+	/*
+	 * Preserve backwards compatibility and allow large file access
+	 * even if userspace doesn't ask for it explicitly. Some mkfs
+	 * binary needs it. We might want to drop this workaround
+	 * during an unstable branch.
+	 */
+	f_bdev->f_flags |= O_LARGEFILE;
+	f_bdev->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+	if (bdev_nowait(bdev))
+		f_bdev->f_mode |= FMODE_NOWAIT;
+	f_bdev->f_mapping = handle->bdev->bd_inode->i_mapping;
+	f_bdev->f_wb_err = filemap_sample_wb_err(f_bdev->f_mapping);
+	f_bdev->private_data = handle;
+
+	return 0;
 put_module:
 	module_put(disk->fops->owner);
 abort_claiming:
@@ -911,11 +921,8 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 		bd_abort_claiming(bdev, holder);
 	mutex_unlock(&disk->open_mutex);
 	disk_unblock_events(disk);
-put_blkdev:
-	blkdev_put_no_open(bdev);
-free_handle:
 	kfree(handle);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 static unsigned blk_to_file_flags(blk_mode_t mode)
@@ -927,8 +934,10 @@ static unsigned blk_to_file_flags(blk_mode_t mode)
 		flags |= O_RDWR;
 	else if (mode & BLK_OPEN_WRITE)
 		flags |= O_WRONLY;
-	else
+	else if (mode & BLK_OPEN_READ)
 		flags |= O_RDONLY;
+	else /* Neither read nor write for a block device requested? */
+		WARN_ON_ONCE(true);
 
 	/*
 	 * O_EXCL is one of those flags that the VFS clears once it's done with
@@ -952,31 +961,37 @@ static unsigned blk_to_file_flags(blk_mode_t mode)
 struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 				   const struct blk_holder_ops *hops)
 {
-	struct file *file;
-	struct bdev_handle *handle;
+	struct file *f_bdev;
+	struct block_device *bdev;
 	unsigned int flags;
+	int ret;
 
-	handle = bdev_open_by_dev(dev, mode, holder, hops);
-	if (IS_ERR(handle))
-		return ERR_CAST(handle);
+	ret = bdev_permission(dev, 0, holder);
+	if (ret)
+		return ERR_PTR(ret);
+
+	bdev = blkdev_get_no_open(dev);
+	if (!bdev)
+		return ERR_PTR(-ENXIO);
 
 	flags = blk_to_file_flags(mode);
-	file = alloc_file_pseudo(handle->bdev->bd_inode, blockdev_mnt, "",
-				 flags | O_LARGEFILE, &def_blk_fops);
-	if (IS_ERR(file)) {
-		bdev_release(handle);
-		return file;
+	f_bdev = alloc_file_pseudo(bdev->bd_inode, blockdev_mnt, "",
+				   flags | O_LARGEFILE, &def_blk_fops);
+	if (IS_ERR(f_bdev)) {
+		blkdev_put_no_open(bdev);
+		return f_bdev;
 	}
-	ihold(handle->bdev->bd_inode);
-
-	file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT | FMODE_NOACCOUNT;
-	if (bdev_nowait(handle->bdev))
-		file->f_mode |= FMODE_NOWAIT;
+	f_bdev->f_mode &= ~FMODE_OPENED;
 
-	file->f_mapping = handle->bdev->bd_inode->i_mapping;
-	file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
-	file->private_data = handle;
-	return file;
+	ihold(bdev->bd_inode);
+	ret = bdev_open(bdev, mode, holder, hops, f_bdev);
+	if (ret) {
+		fput(f_bdev);
+		return ERR_PTR(ret);
+	}
+	/* Now that thing is opened. */
+	f_bdev->f_mode |= FMODE_OPENED;
+	return f_bdev;
 }
 EXPORT_SYMBOL(bdev_file_open_by_dev);
 
diff --git a/block/blk.h b/block/blk.h
index d1a2030fa5c3..ab1a5ab8cd2e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -525,7 +525,7 @@ static inline int req_ref_read(struct request *req)
 }
 
 void bdev_release(struct bdev_handle *handle);
-struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
-		const struct blk_holder_ops *hops);
-
+int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
+	      const struct blk_holder_ops *hops, struct file *f_bdev);
+int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
 #endif /* BLK_INTERNAL_H */
diff --git a/block/fops.c b/block/fops.c
index 0abaac705daf..ed7be8b5810e 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -584,31 +584,25 @@ blk_mode_t file_to_blk_mode(struct file *file)
 
 static int blkdev_open(struct inode *inode, struct file *filp)
 {
-	struct bdev_handle *handle;
+	struct block_device *bdev;
 	blk_mode_t mode;
-
-	/*
-	 * Preserve backwards compatibility and allow large file access
-	 * even if userspace doesn't ask for it explicitly. Some mkfs
-	 * binary needs it. We might want to drop this workaround
-	 * during an unstable branch.
-	 */
-	filp->f_flags |= O_LARGEFILE;
-	filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+	void *holder;
+	int ret;
 
 	mode = file_to_blk_mode(filp);
-	handle = bdev_open_by_dev(inode->i_rdev, mode,
-			mode & BLK_OPEN_EXCL ? filp : NULL, NULL);
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	holder = mode & BLK_OPEN_EXCL ? filp : NULL;
+	ret = bdev_permission(inode->i_rdev, mode, holder);
+	if (ret)
+		return ret;
 
-	if (bdev_nowait(handle->bdev))
-		filp->f_mode |= FMODE_NOWAIT;
+	bdev = blkdev_get_no_open(inode->i_rdev);
+	if (!bdev)
+		return -ENXIO;
 
-	filp->f_mapping = handle->bdev->bd_inode->i_mapping;
-	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-	filp->private_data = handle;
-	return 0;
+	ret = bdev_open(bdev, mode, holder, NULL, filp);
+	if (ret)
+		blkdev_put_no_open(bdev);
+	return ret;
 }
 
 static int blkdev_release(struct inode *inode, struct file *filp)

-- 
2.42.0


  parent reply	other threads:[~2024-01-03 12:56 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-03 12:54 [PATCH RFC 00/34] Open block devices as files & a bd_inode proposal Christian Brauner
2024-01-03 12:54 ` [PATCH RFC 01/34] bdev: open block device as files Christian Brauner
2024-01-08  5:37   ` Dave Chinner
2024-01-08 11:34     ` Christian Brauner
2024-01-17 15:31   ` Jan Kara
2024-01-18 17:22     ` Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 02/34] block/ioctl: port blkdev_bszset() to file Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 03/34] block/genhd: port disk_scan_partitions() " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 04/34] md: port block device access " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 05/34] swap: port block device usage " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 06/34] power: port block device access " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 07/34] xfs: port block device access to files Christian Brauner
2024-01-08  5:34   ` Dave Chinner
2024-01-08 11:34     ` Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 08/34] drbd: port block device access to file Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 09/34] pktcdvd: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 10/34] rnbd: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 11/34] xen: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 12/34] zram: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 13/34] bcache: port block device access to files Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 14/34] block2mtd: port " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 15/34] nvme: port block device access to file Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 16/34] s390: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 17/34] target: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 18/34] bcachefs: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 19/34] btrfs: port " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 20/34] erofs: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 21/34] ext4: port block " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 22/34] f2fs: port block device access to files Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 23/34] jfs: port block device access to file Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 24/34] nfs: port block device access to files Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 25/34] ocfs2: port block device access to file Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 26/34] reiserfs: " Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 27/34] bdev: remove bdev_open_by_path() Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 28/34] bdev: make bdev_release() private to block layer Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 29/34] bdev: make struct bdev_handle private to the " Christian Brauner
2024-01-03 12:55 ` Christian Brauner [this message]
2024-01-03 12:55 ` [PATCH RFC 31/34] ext4: rely on sb->f_bdev only Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 32/34] block: expose bdev_file_inode() Christian Brauner
2024-01-03 12:55 ` [PATCH RFC 33/34] ext4: use bdev_file_inode() Christian Brauner
2024-01-03 12:55 ` [PATCH DRAFT RFC 34/34] buffer: port block device access to files and get rid of bd_inode access Christian Brauner
2024-01-08  5:52   ` Dave Chinner
2024-01-17 16:15     ` Jan Kara
2024-01-17 16:24       ` Christoph Hellwig
2024-01-17 16:33         ` Jan Kara
2024-01-18 17:39           ` Christian Brauner
2024-01-17 16:32   ` Jan Kara
2024-01-18 17:41     ` Christian Brauner
2024-01-08 16:26 ` [PATCH RFC 00/34] Open block devices as files & a bd_inode proposal Christoph Hellwig
2024-01-09  8:46   ` Jan Kara
2024-01-15 14:24     ` Christian Brauner
2024-01-17 16:46 ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240103-vfs-bdev-file-v1-30-6c8ee55fb6ef@kernel.org \
    --to=brauner@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).