Linux filesystem development
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Jeremy Bingham <jbingham@gmail.com>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	brauner@kernel.org, jkoolstra@xs4all.nl, jack@suse.cz,
	hch@infradead.org, viro@zeniv.linux.org.uk,
	syzkaller@googlegroups.com
Subject: Re: [PATCH v2 3/4] minix: convert file operations to iomap and add
Date: Wed, 1 Jul 2026 11:21:46 -0700	[thread overview]
Message-ID: <20260701182146.GD6507@frogsfrogsfrogs> (raw)
In-Reply-To: <44e53f18f7bc7d190c2676e66a0b77a40a62d448.1782619718.git.jbingham@gmail.com>

On Sat, Jun 27, 2026 at 10:15:55PM -0700, Jeremy Bingham wrote:
> Subject: [PATCH v2 3/4] minix: convert file operations to iomap and add

...add what?

> Replace generic_file_read_iter and generic_file_write_iter with custom
> minix_file_read_iter and minix_file_write_iter that dispatch to iomap
> for both buffered and direct I/O.
> 
> Buffered writes now go through iomap_file_buffered_write instead of
> the aops write_begin/write_end path (which no longer exists for
> regular files). Buffered reads still use generic_file_read_iter
> for the non-DIO case.
> 
> Direct I/O is implemented via iomap_dio_rw for both reads and writes.
> minix_dio_read_iter takes a shared inode lock; minix_dio_write_iter
> takes an exclusive lock, does generic_write_checks, and falls back
> to buffered writes via iomap_file_buffered_write for the tail of a
> DIO write that is not block-aligned. The minix_dio_write_end_io
> callback updates i_size and marks the inode dirty.
> 
> minix_file_open sets FMODE_CAN_ODIRECT so the VFS allows O_DIRECT
> opens, and splice_write is added to the file operations.

Ignore my question about FMODE_CAN_ODIRECT in the previous patch, then.

> minix_setattr is exported (made non-static) so it can be shared by
> the symlink inode operations in a subsequent patch.
> 
> Signed-off-by: Jeremy Bingham <jbingham@gmail.com>
> ---
>  fs/minix/file.c  | 157 +++++++++++++++++++++++++++++++++++++++++++++--
>  fs/minix/minix.h |   2 +
>  2 files changed, 153 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/minix/file.c b/fs/minix/file.c
> index 86e5943cd2ff..b07c853fa43a 100644
> --- a/fs/minix/file.c
> +++ b/fs/minix/file.c
> @@ -17,21 +17,166 @@ int minix_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>  			start, end, datasync);
>  }
>  
> +static ssize_t minix_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	ssize_t ret;
> +
> +	inode_lock_shared(inode);
> +
> +	const struct iomap_ops *ops = minix_iomap_ops_ver(inode);
> +
> +	ret = iomap_dio_rw(iocb, to, ops, NULL, 0, NULL, 0);
> +	inode_unlock_shared(inode);
> +	return ret;
> +}
> +
> +static int minix_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
> +		unsigned int flags)
> +{
> +	struct inode *inode = file_inode(iocb->ki_filp);
> +	loff_t pos = iocb->ki_pos;
> +
> +	if (error)
> +		return error;
> +
> +	pos += size;
> +	if (size && pos > i_size_read(inode)) {
> +		i_size_write(inode, pos);
> +		mark_inode_dirty(inode);
> +	}
> +	return 0;
> +}
> +
> +static const struct iomap_dio_ops minix_dio_write_ops = {
> +	.end_io = minix_dio_write_end_io,
> +};
> +
> +static ssize_t minix_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	ssize_t ret;
> +	unsigned int flags = 0;
> +	unsigned long blocksize = inode->i_sb->s_blocksize;
> +
> +	inode_lock(inode);
> +	ret = generic_write_checks(iocb, from);
> +	if (ret <= 0)
> +		goto out_unlock;
> +
> +	ret = kiocb_modified(iocb);
> +	if (ret)
> +		goto out_unlock;
> +
> +	if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
> +		!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))
> +		flags |= IOMAP_DIO_FORCE_WAIT;
> +
> +	const struct iomap_ops *ops = minix_iomap_ops_ver(inode);
> +
> +	ret = iomap_dio_rw(iocb, from, ops,
> +		&minix_dio_write_ops, flags, NULL, 0);
> +	if (ret == -ENOTBLK)
> +		ret = 0; /* fallback to buffered */
> +
> +	if (ret >= 0 && iov_iter_count(from)) {
> +		loff_t pos;
> +		loff_t endbyte;
> +		ssize_t status;
> +
> +		iocb->ki_flags &= ~IOCB_DIRECT;

Why not set IOC_DSYNC here and let generic_write_sync do all the
flushing work for you?  There's no requirement to dump the pagecache
after a downgraded direct write, but if you want that, use
IOCB_DONTCACHE.

--D

> +		pos = iocb->ki_pos;
> +		status = iomap_file_buffered_write(iocb, from, ops,
> +			NULL, NULL);
> +		if (unlikely(status < 0)) {
> +			ret = status;
> +			goto out_unlock;
> +		}
> +
> +		ret += status;
> +		endbyte = pos + status - 1;
> +		status = filemap_write_and_wait_range(inode->i_mapping, pos, endbyte);
> +		if (!status) {
> +			invalidate_mapping_pages(inode->i_mapping,
> +				pos >> PAGE_SHIFT,
> +				endbyte >> PAGE_SHIFT);
> +			if (ret > 0)
> +				ret = generic_write_sync(iocb, ret);
> +		} else {
> +			ret = status;
> +		}
> +	}
> +
> +out_unlock:
> +	inode_unlock(inode);
> +	return ret;
> +}
> +
> +static ssize_t minix_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> +{
> +	if (iocb->ki_flags & IOCB_DIRECT)
> +		return minix_dio_read_iter(iocb, to);
> +
> +	return generic_file_read_iter(iocb, to);
> +}
> +
> +static ssize_t minix_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> +{
> +	struct inode *inode = iocb->ki_filp->f_mapping->host;
> +	ssize_t ret;
> +
> +	/* minix_dio_write_iter also locks the inode and appears to do the same
> +	 * general sorts of checks as this, so just return directly from there.
> +	 */
> +	if (iocb->ki_flags & IOCB_DIRECT)
> +		return minix_dio_write_iter(iocb, from);
> +
> +	inode_lock(inode);
> +	ret = generic_write_checks(iocb, from);
> +	if (ret <= 0)
> +		goto unlock;
> +
> +	ret = file_modified(iocb->ki_filp);
> +	if (ret)
> +		goto unlock;
> +
> +	const struct iomap_ops *ops = minix_iomap_ops_ver(inode);
> +
> +	ret = iomap_file_buffered_write(iocb, from, ops,
> +			NULL, NULL);
> +
> +	if (ret > 0)
> +		ret = generic_write_sync(iocb, ret);
> +
> +unlock:
> +	inode_unlock(inode);
> +	return ret;
> +}
> +
> +static int minix_file_open(struct inode *inode, struct file *filp)
> +{
> +	filp->f_mode |= FMODE_CAN_ODIRECT;
> +	return generic_file_open(inode, filp);
> +}
> +
>  /*
> - * We have mostly NULLs here: the current defaults are OK for
> - * the minix filesystem.
> + * We still have some NULLs here, but not as many of the current defaults are
> + * still OK for the minix filesystem.
>   */
> +
>  const struct file_operations minix_file_operations = {
>  	.llseek		= generic_file_llseek,
> -	.read_iter	= generic_file_read_iter,
> -	.write_iter	= generic_file_write_iter,
> +	.read_iter	= minix_file_read_iter,
> +	.write_iter	= minix_file_write_iter,
>  	.mmap_prepare	= generic_file_mmap_prepare,
> +	.open		= minix_file_open,
>  	.fsync		= minix_fsync,
>  	.splice_read	= filemap_splice_read,
> +	.splice_write	= iter_file_splice_write,
>  };
>  
> -static int minix_setattr(struct mnt_idmap *idmap,
> -			 struct dentry *dentry, struct iattr *attr)
> +int minix_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> +	struct iattr *attr)
>  {
>  	struct inode *inode = d_inode(dentry);
>  	int error;
> diff --git a/fs/minix/minix.h b/fs/minix/minix.h
> index 77e503cca97f..76718f789369 100644
> --- a/fs/minix/minix.h
> +++ b/fs/minix/minix.h
> @@ -58,6 +58,8 @@ void minix_free_block(struct inode *inode, unsigned long block);
>  unsigned long minix_count_free_blocks(struct super_block *sb);
>  int minix_getattr(struct mnt_idmap *, const struct path *,
>  		struct kstat *, u32, unsigned int);
> +int minix_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> +	struct iattr *attr);
>  int minix_prepare_chunk(struct folio *folio, loff_t pos, unsigned len);
>  struct mapping_metadata_bhs *minix_get_metadata_bhs(struct inode *inode);
>  int minix_fsync(struct file *file, loff_t start, loff_t end, int datasync);
> -- 
> 2.47.3
> 
> 

  reply	other threads:[~2026-07-01 18:21 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-28  5:15 [PATCH v2 0/4] minix: convert to iomap and add direct I/O Jeremy Bingham
2026-06-28  5:15 ` [PATCH v2 1/4] minix: add iomap infrastructure Jeremy Bingham
2026-07-01 18:06   ` Darrick J. Wong
2026-07-01 18:32     ` Jeremy Bingham
2026-06-28  5:15 ` [PATCH v2 2/4] minix: convert address space operations to iomap Jeremy Bingham
2026-07-01 18:14   ` Darrick J. Wong
2026-07-01 18:37     ` Jeremy Bingham
2026-07-01 18:47       ` Darrick J. Wong
2026-06-28  5:15 ` [PATCH v2 3/4] minix: convert file operations to iomap and add Jeremy Bingham
2026-07-01 18:21   ` Darrick J. Wong [this message]
2026-07-01 18:42     ` Jeremy Bingham
2026-06-28  5:15 ` [PATCH v2 4/4] minix: fix symlink and truncate for iomap Jeremy Bingham
2026-06-29  5:27 ` [syzbot ci] Re: minix: convert to iomap and add direct I/O syzbot ci
2026-06-30  3:05   ` Jeremy Bingham
2026-06-30  4:05     ` syzbot ci
2026-07-01 18:00 ` [PATCH v2 0/4] " Darrick J. Wong
2026-07-02 18:42   ` Jeremy Bingham

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260701182146.GD6507@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=jbingham@gmail.com \
    --cc=jkoolstra@xs4all.nl \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=syzkaller@googlegroups.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox