* [PATCH 01/15] ovl: add permission hooks outside of do_splice_direct()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 02/15] splice: remove permission hook from do_splice_direct() Amir Goldstein
` (13 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
The main callers of do_splice_direct() also call rw_verify_area() for
the entire range that is being copied, e.g. by vfs_copy_file_range()
or do_sendfile() before calling do_splice_direct().
The only caller that does not have those checks for entire range is
ovl_copy_up_file(). In preparation for removing the checks inside
do_splice_direct(), add rw_verify_area() call in ovl_copy_up_file().
For extra safety, perform minimal sanity checks from rw_verify_area()
for non negative offsets also in the copy up do_splice_direct() loop
without calling the file permission hooks.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/overlayfs/copy_up.c | 26 +++++++++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 4382881b0709..106f8643af3b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -230,6 +230,19 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
return ovl_real_fileattr_set(new, &newfa);
}
+static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
+{
+ loff_t tmp;
+
+ if (WARN_ON_ONCE(pos != pos2))
+ return -EIO;
+ if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0))
+ return -EIO;
+ if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp)))
+ return -EIO;
+ return 0;
+}
+
static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
struct file *new_file, loff_t len)
{
@@ -244,13 +257,20 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
int error = 0;
ovl_path_lowerdata(dentry, &datapath);
- if (WARN_ON(datapath.dentry == NULL))
+ if (WARN_ON_ONCE(datapath.dentry == NULL) ||
+ WARN_ON_ONCE(len < 0))
return -EIO;
old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
+ error = rw_verify_area(READ, old_file, &old_pos, len);
+ if (!error)
+ error = rw_verify_area(WRITE, new_file, &new_pos, len);
+ if (error)
+ goto out_fput;
+
/* Try to use clone_file_range to clone up within the same fs */
ovl_start_write(dentry);
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
@@ -309,6 +329,10 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
}
}
+ error = ovl_verify_area(old_pos, new_pos, this_len, len);
+ if (error)
+ break;
+
ovl_start_write(dentry);
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 02/15] splice: remove permission hook from do_splice_direct()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
2023-11-14 15:33 ` [PATCH 01/15] ovl: add permission hooks outside of do_splice_direct() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 03/15] splice: move permission hook out of splice_direct_to_actor() Amir Goldstein
` (12 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
All callers of do_splice_direct() have a call to rw_verify_area() for
the entire range that is being copied, e.g. by vfs_copy_file_range() or
do_sendfile() before calling do_splice_direct().
The rw_verify_area() check inside do_splice_direct() is redundant and
is called after sb_start_write(), so it is not "start-write-safe".
Remove this redundant check.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/splice.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index d983d375ff11..6e917db6f49a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1166,6 +1166,7 @@ static void direct_file_splice_eof(struct splice_desc *sd)
* (splice in + splice out, as compared to just sendfile()). So this helper
* can splice directly through a process-private pipe.
*
+ * Callers already called rw_verify_area() on the entire range.
*/
long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len, unsigned int flags)
@@ -1187,10 +1188,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
if (unlikely(out->f_flags & O_APPEND))
return -EINVAL;
- ret = rw_verify_area(WRITE, out, opos, len);
- if (unlikely(ret < 0))
- return ret;
-
ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
if (ret > 0)
*ppos = sd.pos;
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 03/15] splice: move permission hook out of splice_direct_to_actor()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
2023-11-14 15:33 ` [PATCH 01/15] ovl: add permission hooks outside of do_splice_direct() Amir Goldstein
2023-11-14 15:33 ` [PATCH 02/15] splice: remove permission hook from do_splice_direct() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 04/15] splice: move permission hook out of splice_file_to_pipe() Amir Goldstein
` (11 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel, Chuck Lever, Jeff Layton
vfs_splice_read() has a permission hook inside rw_verify_area() and
it is called from do_splice_direct() -> splice_direct_to_actor().
The callers of do_splice_direct() (e.g. vfs_copy_file_range()) already
call rw_verify_area() for the entire range, but the other caller of
splice_direct_to_actor() (nfsd) does not.
Add the rw_verify_area() checks in nfsd_splice_read() and use a
variant of vfs_splice_read() without rw_verify_area() check in
splice_direct_to_actor() to avoid the redundant rw_verify_area() checks.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/nfsd/vfs.c | 5 ++++-
fs/splice.c | 58 +++++++++++++++++++++++++++++++--------------------
2 files changed, 39 insertions(+), 24 deletions(-)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fbbea7498f02..5d704461e3b4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1046,7 +1046,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
- host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
+ host_err = rw_verify_area(READ, file, &offset, *count);
+ if (!host_err)
+ host_err = splice_direct_to_actor(file, &sd,
+ nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
diff --git a/fs/splice.c b/fs/splice.c
index 6e917db6f49a..6fc2c27e9520 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -944,27 +944,15 @@ static void do_splice_eof(struct splice_desc *sd)
sd->splice_eof(sd);
}
-/**
- * vfs_splice_read - Read data from a file and splice it into a pipe
- * @in: File to splice from
- * @ppos: Input file offset
- * @pipe: Pipe to splice to
- * @len: Number of bytes to splice
- * @flags: Splice modifier flags (SPLICE_F_*)
- *
- * Splice the requested amount of data from the input file to the pipe. This
- * is synchronous as the caller must hold the pipe lock across the entire
- * operation.
- *
- * If successful, it returns the amount of data spliced, 0 if it hit the EOF or
- * a hole and a negative error code otherwise.
+/*
+ * Callers already called rw_verify_area() on the entire range.
+ * No need to call it for sub ranges.
*/
-long vfs_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
+static long do_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
{
unsigned int p_space;
- int ret;
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;
@@ -975,10 +963,6 @@ long vfs_splice_read(struct file *in, loff_t *ppos,
p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
len = min_t(size_t, len, p_space << PAGE_SHIFT);
- ret = rw_verify_area(READ, in, ppos, len);
- if (unlikely(ret < 0))
- return ret;
-
if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT;
@@ -992,6 +976,34 @@ long vfs_splice_read(struct file *in, loff_t *ppos,
return copy_splice_read(in, ppos, pipe, len, flags);
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
+
+/**
+ * vfs_splice_read - Read data from a file and splice it into a pipe
+ * @in: File to splice from
+ * @ppos: Input file offset
+ * @pipe: Pipe to splice to
+ * @len: Number of bytes to splice
+ * @flags: Splice modifier flags (SPLICE_F_*)
+ *
+ * Splice the requested amount of data from the input file to the pipe. This
+ * is synchronous as the caller must hold the pipe lock across the entire
+ * operation.
+ *
+ * If successful, it returns the amount of data spliced, 0 if it hit the EOF or
+ * a hole and a negative error code otherwise.
+ */
+long vfs_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ int ret;
+
+ ret = rw_verify_area(READ, in, ppos, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return do_splice_read(in, ppos, pipe, len, flags);
+}
EXPORT_SYMBOL_GPL(vfs_splice_read);
/**
@@ -1066,7 +1078,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
size_t read_len;
loff_t pos = sd->pos, prev_pos = pos;
- ret = vfs_splice_read(in, &pos, pipe, len, flags);
+ ret = do_splice_read(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
goto read_failure;
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 04/15] splice: move permission hook out of splice_file_to_pipe()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (2 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 03/15] splice: move permission hook out of splice_direct_to_actor() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 05/15] splice: remove permission hook from iter_file_splice_write() Amir Goldstein
` (10 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
vfs_splice_read() has a permission hook inside rw_verify_area() and
it is called from splice_file_to_pipe(), which is called from
do_splice() and do_sendfile().
do_sendfile() already has a rw_verify_area() check for the entire range.
do_splice() has a rw_verify_check() for the splice to file case, not for
the splice from file case.
Add the rw_verify_area() check for splice from file case in do_splice()
and use a variant of vfs_splice_read() without rw_verify_area() check
in splice_file_to_pipe() to avoid the redundant rw_verify_area() checks.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/splice.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/splice.c b/fs/splice.c
index 6fc2c27e9520..d4fdd44c0b32 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1239,7 +1239,7 @@ long splice_file_to_pipe(struct file *in,
pipe_lock(opipe);
ret = wait_for_space(opipe, flags);
if (!ret)
- ret = vfs_splice_read(in, offset, opipe, len, flags);
+ ret = do_splice_read(in, offset, opipe, len, flags);
pipe_unlock(opipe);
if (ret > 0)
wakeup_pipe_readers(opipe);
@@ -1316,6 +1316,10 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
offset = in->f_pos;
}
+ ret = rw_verify_area(READ, in, &offset, len);
+ if (unlikely(ret < 0))
+ return ret;
+
if (out->f_flags & O_NONBLOCK)
flags |= SPLICE_F_NONBLOCK;
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 05/15] splice: remove permission hook from iter_file_splice_write()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (3 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 04/15] splice: move permission hook out of splice_file_to_pipe() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-21 14:56 ` Christian Brauner
2023-11-14 15:33 ` [PATCH 06/15] remap_range: move permission hooks out of do_clone_file_range() Amir Goldstein
` (9 subsequent siblings)
14 siblings, 1 reply; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
All the callers of ->splice_write(), (e.g. do_splice_direct() and
do_splice()) already check rw_verify_area() for the entire range
and perform all the other checks that are in vfs_write_iter().
Create a helper do_iter_writev(), that performs the write without the
checks and use it in iter_file_splice_write() to avoid the redundant
rw_verify_area() checks.
This is needed for fanotify "pre content" events.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/internal.h | 8 +++++++-
fs/read_write.c | 7 +++++++
fs/splice.c | 9 ++++++---
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index 58e43341aebf..c114b85e27a7 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -298,7 +298,13 @@ static inline ssize_t do_get_acl(struct mnt_idmap *idmap,
}
#endif
-ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
+/*
+ * fs/read_write.c
+ */
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from,
+ loff_t *pos);
+ssize_t do_iter_writev(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ rwf_t flags);
/*
* fs/attr.c
diff --git a/fs/read_write.c b/fs/read_write.c
index 4771701c896b..590ab228fa98 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -739,6 +739,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
return ret;
}
+ssize_t do_iter_writev(struct file *filp, struct iov_iter *iter, loff_t *ppos,
+ rwf_t flags)
+{
+ return do_iter_readv_writev(filp, iter, ppos, WRITE, flags);
+}
+
+
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, int type, rwf_t flags)
diff --git a/fs/splice.c b/fs/splice.c
index d4fdd44c0b32..decbace5d812 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -673,10 +673,13 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
.u.file = out,
};
int nbufs = pipe->max_usage;
- struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
- GFP_KERNEL);
+ struct bio_vec *array;
ssize_t ret;
+ if (!out->f_op->write_iter)
+ return -EINVAL;
+
+ array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL);
if (unlikely(!array))
return -ENOMEM;
@@ -733,7 +736,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
}
iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
- ret = vfs_iter_write(out, &from, &sd.pos, 0);
+ ret = do_iter_writev(out, &from, &sd.pos, 0);
if (ret <= 0)
break;
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 05/15] splice: remove permission hook from iter_file_splice_write()
2023-11-14 15:33 ` [PATCH 05/15] splice: remove permission hook from iter_file_splice_write() Amir Goldstein
@ 2023-11-21 14:56 ` Christian Brauner
2023-11-21 15:18 ` Amir Goldstein
0 siblings, 1 reply; 25+ messages in thread
From: Christian Brauner @ 2023-11-21 14:56 UTC (permalink / raw)
To: Amir Goldstein
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
On Tue, Nov 14, 2023 at 05:33:11PM +0200, Amir Goldstein wrote:
> All the callers of ->splice_write(), (e.g. do_splice_direct() and
> do_splice()) already check rw_verify_area() for the entire range
> and perform all the other checks that are in vfs_write_iter().
>
> Create a helper do_iter_writev(), that performs the write without the
> checks and use it in iter_file_splice_write() to avoid the redundant
> rw_verify_area() checks.
>
> This is needed for fanotify "pre content" events.
>
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
If you resend anyway, for the low-level splice helpers specifically it
would be nice to add a comment that mentions that the caller is expected
to perform basic rw checks.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 05/15] splice: remove permission hook from iter_file_splice_write()
2023-11-21 14:56 ` Christian Brauner
@ 2023-11-21 15:18 ` Amir Goldstein
0 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-21 15:18 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
On Tue, Nov 21, 2023 at 4:56 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Tue, Nov 14, 2023 at 05:33:11PM +0200, Amir Goldstein wrote:
> > All the callers of ->splice_write(), (e.g. do_splice_direct() and
> > do_splice()) already check rw_verify_area() for the entire range
> > and perform all the other checks that are in vfs_write_iter().
> >
> > Create a helper do_iter_writev(), that performs the write without the
> > checks and use it in iter_file_splice_write() to avoid the redundant
> > rw_verify_area() checks.
> >
> > This is needed for fanotify "pre content" events.
> >
> > Suggested-by: Jan Kara <jack@suse.cz>
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
>
> If you resend anyway, for the low-level splice helpers specifically it
> would be nice to add a comment that mentions that the caller is expected
> to perform basic rw checks.
Will do.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 06/15] remap_range: move permission hooks out of do_clone_file_range()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (4 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 05/15] splice: remove permission hook from iter_file_splice_write() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 07/15] remap_range: move file_start_write() to after permission hook Amir Goldstein
` (8 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
In many of the vfs helpers, file permission hook is called before
taking sb_start_write(), making them "start-write-safe".
do_clone_file_range() is an exception to this rule.
do_clone_file_range() has two callers - vfs_clone_file_range() and
overlayfs. Move remap_verify_area() checks from do_clone_file_range()
out to vfs_clone_file_range() to make them "start-write-safe".
Overlayfs already has calls to rw_verify_area() with the same security
permission hooks as remap_verify_area() has.
The rest of the checks in remap_verify_area() are irrelevant for
overlayfs that calls do_clone_file_range() offset 0 and positive length.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/remap_range.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 87ae4f0dc3aa..42f79cb2b1b1 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -385,14 +385,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
- ret = remap_verify_area(file_in, pos_in, len, false);
- if (ret)
- return ret;
-
- ret = remap_verify_area(file_out, pos_out, len, true);
- if (ret)
- return ret;
-
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out, len, remap_flags);
if (ret < 0)
@@ -410,6 +402,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
{
loff_t ret;
+ ret = remap_verify_area(file_in, pos_in, len, false);
+ if (ret)
+ return ret;
+
+ ret = remap_verify_area(file_out, pos_out, len, true);
+ if (ret)
+ return ret;
+
file_start_write(file_out);
ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
remap_flags);
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 07/15] remap_range: move file_start_write() to after permission hook
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (5 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 06/15] remap_range: move permission hooks out of do_clone_file_range() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-21 15:10 ` Christian Brauner
2023-11-14 15:33 ` [PATCH 08/15] btrfs: " Amir Goldstein
` (7 subsequent siblings)
14 siblings, 1 reply; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
In vfs code, file_start_write() is usually called after the permission
hook in rw_verify_area(). vfs_dedupe_file_range_one() is an exception
to this rule.
In vfs_dedupe_file_range_one(), move file_start_write() to after the
the rw_verify_area() checks to make them "start-write-safe".
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/remap_range.c | 32 +++++++++++++-------------------
1 file changed, 13 insertions(+), 19 deletions(-)
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 42f79cb2b1b1..de4b09d0ba1d 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -445,46 +445,40 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
REMAP_FILE_CAN_SHORTEN));
- ret = mnt_want_write_file(dst_file);
- if (ret)
- return ret;
-
/*
* This is redundant if called from vfs_dedupe_file_range(), but other
* callers need it and it's not performance sesitive...
*/
ret = remap_verify_area(src_file, src_pos, len, false);
if (ret)
- goto out_drop_write;
+ return ret;
ret = remap_verify_area(dst_file, dst_pos, len, true);
if (ret)
- goto out_drop_write;
+ return ret;
- ret = -EPERM;
if (!allow_file_dedupe(dst_file))
- goto out_drop_write;
+ return -EPERM;
- ret = -EXDEV;
if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
- goto out_drop_write;
+ return -EXDEV;
- ret = -EISDIR;
if (S_ISDIR(file_inode(dst_file)->i_mode))
- goto out_drop_write;
+ return -EISDIR;
- ret = -EINVAL;
if (!dst_file->f_op->remap_file_range)
- goto out_drop_write;
+ return -EINVAL;
- if (len == 0) {
- ret = 0;
- goto out_drop_write;
- }
+ if (len == 0)
+ return 0;
+
+ ret = mnt_want_write_file(dst_file);
+ if (ret)
+ return ret;
ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
-out_drop_write:
+
mnt_drop_write_file(dst_file);
return ret;
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 07/15] remap_range: move file_start_write() to after permission hook
2023-11-14 15:33 ` [PATCH 07/15] remap_range: move file_start_write() to after permission hook Amir Goldstein
@ 2023-11-21 15:10 ` Christian Brauner
2023-11-21 15:47 ` Christian Brauner
2023-11-21 18:39 ` Amir Goldstein
0 siblings, 2 replies; 25+ messages in thread
From: Christian Brauner @ 2023-11-21 15:10 UTC (permalink / raw)
To: Amir Goldstein
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
On Tue, Nov 14, 2023 at 05:33:13PM +0200, Amir Goldstein wrote:
> In vfs code, file_start_write() is usually called after the permission
> hook in rw_verify_area(). vfs_dedupe_file_range_one() is an exception
> to this rule.
>
> In vfs_dedupe_file_range_one(), move file_start_write() to after the
> the rw_verify_area() checks to make them "start-write-safe".
>
> This is needed for fanotify "pre content" events.
>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
> fs/remap_range.c | 32 +++++++++++++-------------------
> 1 file changed, 13 insertions(+), 19 deletions(-)
>
> diff --git a/fs/remap_range.c b/fs/remap_range.c
> index 42f79cb2b1b1..de4b09d0ba1d 100644
> --- a/fs/remap_range.c
> +++ b/fs/remap_range.c
> @@ -445,46 +445,40 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
> WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
> REMAP_FILE_CAN_SHORTEN));
>
> - ret = mnt_want_write_file(dst_file);
> - if (ret)
> - return ret;
> -
> /*
> * This is redundant if called from vfs_dedupe_file_range(), but other
> * callers need it and it's not performance sesitive...
> */
> ret = remap_verify_area(src_file, src_pos, len, false);
> if (ret)
> - goto out_drop_write;
> + return ret;
>
> ret = remap_verify_area(dst_file, dst_pos, len, true);
> if (ret)
> - goto out_drop_write;
> + return ret;
>
> - ret = -EPERM;
> if (!allow_file_dedupe(dst_file))
> - goto out_drop_write;
> + return -EPERM;
So that check specifically should come after mnt_want_write_file()
because it calls inode_permission() which takes the mount's idmapping
into account. And before you hold mnt_want_write_file() the idmapping of
the mount can still change. Once you've gotten write access though we
tell the anyone trying to change the mount's write-relevant properties
to go away.
With your changes that check might succeed now but fail later. So please
move that check below mnt_want_write_file(). That shouldn't be a
problem.
Fwiw, for security_file_permission() it doesn't matter because the LSMs
don't care about DAC permission - at least not the ones that currently
implement the hook. I verified that years ago and just rechecked. If
they start caring - which I sincerely hope they don't - then we have to
do a bunch of rework anyway to make that work reliably. But I doubt
that'll happen or we'll let that happen.
While at it, please rename allow_file_dedupe() to may_dedupe_file() so
it mirrors our helpers in fs/namei.c.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 07/15] remap_range: move file_start_write() to after permission hook
2023-11-21 15:10 ` Christian Brauner
@ 2023-11-21 15:47 ` Christian Brauner
2023-11-21 18:39 ` Amir Goldstein
1 sibling, 0 replies; 25+ messages in thread
From: Christian Brauner @ 2023-11-21 15:47 UTC (permalink / raw)
To: Amir Goldstein
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
> the mount can still change. Once you've gotten write access though we
> tell the anyone trying to change the mount's write-relevant properties
> to go away.
I should also clarify that this is unlikely to matter in practice. It's
more about correctness. You have to be in a very specific scenario for
that to even be a relevant concern.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 07/15] remap_range: move file_start_write() to after permission hook
2023-11-21 15:10 ` Christian Brauner
2023-11-21 15:47 ` Christian Brauner
@ 2023-11-21 18:39 ` Amir Goldstein
1 sibling, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-21 18:39 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
On Tue, Nov 21, 2023 at 5:10 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Tue, Nov 14, 2023 at 05:33:13PM +0200, Amir Goldstein wrote:
> > In vfs code, file_start_write() is usually called after the permission
> > hook in rw_verify_area(). vfs_dedupe_file_range_one() is an exception
> > to this rule.
> >
> > In vfs_dedupe_file_range_one(), move file_start_write() to after the
> > the rw_verify_area() checks to make them "start-write-safe".
> >
> > This is needed for fanotify "pre content" events.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> > fs/remap_range.c | 32 +++++++++++++-------------------
> > 1 file changed, 13 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/remap_range.c b/fs/remap_range.c
> > index 42f79cb2b1b1..de4b09d0ba1d 100644
> > --- a/fs/remap_range.c
> > +++ b/fs/remap_range.c
> > @@ -445,46 +445,40 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
> > WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
> > REMAP_FILE_CAN_SHORTEN));
> >
> > - ret = mnt_want_write_file(dst_file);
> > - if (ret)
> > - return ret;
> > -
> > /*
> > * This is redundant if called from vfs_dedupe_file_range(), but other
> > * callers need it and it's not performance sesitive...
> > */
> > ret = remap_verify_area(src_file, src_pos, len, false);
> > if (ret)
> > - goto out_drop_write;
> > + return ret;
> >
> > ret = remap_verify_area(dst_file, dst_pos, len, true);
> > if (ret)
> > - goto out_drop_write;
> > + return ret;
> >
> > - ret = -EPERM;
> > if (!allow_file_dedupe(dst_file))
> > - goto out_drop_write;
> > + return -EPERM;
>
> So that check specifically should come after mnt_want_write_file()
> because it calls inode_permission() which takes the mount's idmapping
> into account. And before you hold mnt_want_write_file() the idmapping of
> the mount can still change. Once you've gotten write access though we
> tell the anyone trying to change the mount's write-relevant properties
> to go away.
>
> With your changes that check might succeed now but fail later. So please
> move that check below mnt_want_write_file(). That shouldn't be a
> problem.
>
Right. Good catch!
Thanks,
Amir.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 08/15] btrfs: move file_start_write() to after permission hook
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (6 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 07/15] remap_range: move file_start_write() to after permission hook Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 09/15] fs: move file_start_write() into vfs_iter_write() Amir Goldstein
` (6 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel, Chris Mason, Josef Bacik, David Sterba
In vfs code, file_start_write() is usually called after the permission
hook in rw_verify_area(). btrfs_ioctl_encoded_write() in an exception
to this rule.
Move file_start_write() to after the rw_verify_area() check in encoded
write to make the permission hook "start-write-safe".
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/btrfs/ioctl.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 752acff2c734..e691770c25aa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4523,29 +4523,29 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
if (ret < 0)
goto out_acct;
- file_start_write(file);
-
if (iov_iter_count(&iter) == 0) {
ret = 0;
- goto out_end_write;
+ goto out_iov;
}
pos = args.offset;
ret = rw_verify_area(WRITE, file, &pos, args.len);
if (ret < 0)
- goto out_end_write;
+ goto out_iov;
init_sync_kiocb(&kiocb, file);
ret = kiocb_set_rw_flags(&kiocb, 0);
if (ret)
- goto out_end_write;
+ goto out_iov;
kiocb.ki_pos = pos;
+ file_start_write(file);
+
ret = btrfs_do_write_iter(&kiocb, &iter, &args);
if (ret > 0)
fsnotify_modify(file);
-out_end_write:
file_end_write(file);
+out_iov:
kfree(iov);
out_acct:
if (ret > 0)
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 09/15] fs: move file_start_write() into vfs_iter_write()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (7 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 08/15] btrfs: " Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 10/15] fs: move permission hook out of do_iter_write() Amir Goldstein
` (5 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel, Chuck Lever, Jeff Layton, Jan Harkes
All the callers of vfs_iter_write() call file_start_write() just before
calling vfs_iter_write() except for target_core_file's fd_do_rw().
Move file_start_write() from the callers into vfs_iter_write().
fd_do_rw() calls vfs_iter_write() with a non-regular file, so
file_start_write() is a no-op.
This is needed for fanotify "pre content" events.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
drivers/block/loop.c | 2 --
fs/coda/file.c | 4 +---
fs/nfsd/vfs.c | 2 --
fs/overlayfs/file.c | 2 --
fs/read_write.c | 13 ++++++++++---
5 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9f2d412fc560..8a8cd4fc9238 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -245,9 +245,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
- file_start_write(file);
bw = vfs_iter_write(file, &i, ppos, 0);
- file_end_write(file);
if (likely(bw == bvec->bv_len))
return 0;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 16acc58311ea..7c84555c8923 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -79,14 +79,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
if (ret)
goto finish_write;
- file_start_write(host_file);
inode_lock(coda_inode);
- ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
+ ret = vfs_iter_write(host_file, to, &iocb->ki_pos, 0);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
inode_set_mtime_to_ts(coda_inode, inode_set_ctime_current(coda_inode));
inode_unlock(coda_inode);
- file_end_write(host_file);
finish_write:
venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5d704461e3b4..35c9546b3396 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1186,9 +1186,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
- file_start_write(file);
host_err = vfs_iter_write(file, &iter, &pos, flags);
- file_end_write(file);
if (host_err < 0) {
commit_reset_write_verifier(nn, rqstp, host_err);
goto out_nfserr;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 131621daeb13..690b173f34fc 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -436,9 +436,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(ifl);
- file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
- file_end_write(real.file);
/* Update size */
ovl_file_modified(file);
} else {
diff --git a/fs/read_write.c b/fs/read_write.c
index 590ab228fa98..8cdc6e6a9639 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -846,7 +846,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
EXPORT_SYMBOL(vfs_iter_read);
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
- loff_t *pos, rwf_t flags)
+ loff_t *pos, rwf_t flags)
{
size_t tot_len;
ssize_t ret = 0;
@@ -901,11 +901,18 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
EXPORT_SYMBOL(vfs_iocb_iter_write);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
- rwf_t flags)
+ rwf_t flags)
{
+ int ret;
+
if (!file->f_op->write_iter)
return -EINVAL;
- return do_iter_write(file, iter, ppos, flags);
+
+ file_start_write(file);
+ ret = do_iter_write(file, iter, ppos, flags);
+ file_end_write(file);
+
+ return ret;
}
EXPORT_SYMBOL(vfs_iter_write);
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 10/15] fs: move permission hook out of do_iter_write()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (8 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 09/15] fs: move file_start_write() into vfs_iter_write() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-21 15:34 ` Christian Brauner
2023-11-14 15:33 ` [PATCH 11/15] fs: move permission hook out of do_iter_read() Amir Goldstein
` (4 subsequent siblings)
14 siblings, 1 reply; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
In many of the vfs helpers, the rw_verity_area() checks are called before
taking sb_start_write(), making them "start-write-safe".
do_iter_write() is an exception to this rule.
do_iter_write() has two callers - vfs_iter_write() and vfs_writev().
Move rw_verify_area() and other checks from do_iter_write() out to
its callers to make them "start-write-safe".
Move also the fsnotify_modify() hook to align with similar pattern
used in vfs_write() and other vfs helpers.
This is needed for fanotify "pre content" events.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/read_write.c | 76 ++++++++++++++++++++++++++++++-------------------
1 file changed, 46 insertions(+), 30 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 8cdc6e6a9639..d4891346d42e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -848,28 +848,10 @@ EXPORT_SYMBOL(vfs_iter_read);
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
loff_t *pos, rwf_t flags)
{
- size_t tot_len;
- ssize_t ret = 0;
-
- if (!(file->f_mode & FMODE_WRITE))
- return -EBADF;
- if (!(file->f_mode & FMODE_CAN_WRITE))
- return -EINVAL;
-
- tot_len = iov_iter_count(iter);
- if (!tot_len)
- return 0;
- ret = rw_verify_area(WRITE, file, pos, tot_len);
- if (ret < 0)
- return ret;
-
if (file->f_op->write_iter)
- ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
+ return do_iter_readv_writev(file, iter, pos, WRITE, flags);
else
- ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
- if (ret > 0)
- fsnotify_modify(file);
- return ret;
+ return do_loop_readv_writev(file, iter, pos, WRITE, flags);
}
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
@@ -903,13 +885,28 @@ EXPORT_SYMBOL(vfs_iocb_iter_write);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags)
{
- int ret;
+ size_t tot_len;
+ ssize_t ret;
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return -EINVAL;
if (!file->f_op->write_iter)
return -EINVAL;
+ tot_len = iov_iter_count(iter);
+ if (!tot_len)
+ return 0;
+
+ ret = rw_verify_area(WRITE, file, ppos, tot_len);
+ if (ret < 0)
+ return ret;
+
file_start_write(file);
ret = do_iter_write(file, iter, ppos, flags);
+ if (ret > 0)
+ fsnotify_modify(file);
file_end_write(file);
return ret;
@@ -934,20 +931,39 @@ static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
}
static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos, rwf_t flags)
+ unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- ssize_t ret;
+ size_t tot_len;
+ ssize_t ret = 0;
- ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
- if (ret >= 0) {
- file_start_write(file);
- ret = do_iter_write(file, &iter, pos, flags);
- file_end_write(file);
- kfree(iov);
- }
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return -EINVAL;
+
+ ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov,
+ &iter);
+ if (ret < 0)
+ return ret;
+
+ tot_len = iov_iter_count(&iter);
+ if (!tot_len)
+ goto out;
+
+ ret = rw_verify_area(WRITE, file, pos, tot_len);
+ if (ret < 0)
+ goto out;
+
+ file_start_write(file);
+ ret = do_iter_write(file, &iter, pos, flags);
+ if (ret > 0)
+ fsnotify_modify(file);
+ file_end_write(file);
+out:
+ kfree(iov);
return ret;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 10/15] fs: move permission hook out of do_iter_write()
2023-11-14 15:33 ` [PATCH 10/15] fs: move permission hook out of do_iter_write() Amir Goldstein
@ 2023-11-21 15:34 ` Christian Brauner
0 siblings, 0 replies; 25+ messages in thread
From: Christian Brauner @ 2023-11-21 15:34 UTC (permalink / raw)
To: Amir Goldstein
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
On Tue, Nov 14, 2023 at 05:33:16PM +0200, Amir Goldstein wrote:
> In many of the vfs helpers, the rw_verity_area() checks are called before
> taking sb_start_write(), making them "start-write-safe".
> do_iter_write() is an exception to this rule.
>
> do_iter_write() has two callers - vfs_iter_write() and vfs_writev().
> Move rw_verify_area() and other checks from do_iter_write() out to
> its callers to make them "start-write-safe".
>
> Move also the fsnotify_modify() hook to align with similar pattern
> used in vfs_write() and other vfs helpers.
>
> This is needed for fanotify "pre content" events.
>
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
> fs/read_write.c | 76 ++++++++++++++++++++++++++++++-------------------
> 1 file changed, 46 insertions(+), 30 deletions(-)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 8cdc6e6a9639..d4891346d42e 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -848,28 +848,10 @@ EXPORT_SYMBOL(vfs_iter_read);
> static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
> loff_t *pos, rwf_t flags)
> {
> - size_t tot_len;
> - ssize_t ret = 0;
> -
> - if (!(file->f_mode & FMODE_WRITE))
> - return -EBADF;
> - if (!(file->f_mode & FMODE_CAN_WRITE))
> - return -EINVAL;
> -
> - tot_len = iov_iter_count(iter);
> - if (!tot_len)
> - return 0;
> - ret = rw_verify_area(WRITE, file, pos, tot_len);
> - if (ret < 0)
> - return ret;
> -
> if (file->f_op->write_iter)
> - ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
> + return do_iter_readv_writev(file, iter, pos, WRITE, flags);
> else
> - ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
> - if (ret > 0)
> - fsnotify_modify(file);
> - return ret;
> + return do_loop_readv_writev(file, iter, pos, WRITE, flags);
Nit, this will end up being:
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
loff_t *pos, rwf_t flags)
{
if (file->f_op->write_iter)
return do_iter_readv_writev(file, iter, pos, WRITE, flags);
else
return do_loop_readv_writev(file, iter, pos, WRITE, flags);
}
which is probably best written as:
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
loff_t *pos, rwf_t flags)
{
if (file->f_op->write_iter)
return do_iter_readv_writev(file, iter, pos, WRITE, flags);
return do_loop_readv_writev(file, iter, pos, WRITE, flags);
}
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 11/15] fs: move permission hook out of do_iter_read()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (9 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 10/15] fs: move permission hook out of do_iter_write() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-21 15:28 ` Christian Brauner
2023-11-21 15:35 ` Christian Brauner
2023-11-14 15:33 ` [PATCH 12/15] fs: move kiocb_start_write() into vfs_iocb_iter_write() Amir Goldstein
` (3 subsequent siblings)
14 siblings, 2 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
We recently moved fsnotify hook, rw_verify_area() and other checks from
do_iter_write() out to its two callers.
for consistency, do the same thing for do_iter_read() - move the
rw_verify_area() checks and fsnotify hook to the callers vfs_iter_read()
and vfs_readv().
This aligns those vfs helpers with the pattern used in vfs_read() and
vfs_iocb_iter_read() and the vfs write helpers, where all the checks are
in the vfs helpers and the do_* or call_* helpers do the work.
This is needed for fanotify "pre content" events.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/read_write.c | 70 +++++++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 26 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index d4891346d42e..5b18e13c2620 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -781,11 +781,22 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
}
static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
- loff_t *pos, rwf_t flags)
+ loff_t *pos, rwf_t flags)
+{
+ if (file->f_op->read_iter)
+ return do_iter_readv_writev(file, iter, pos, READ, flags);
+ else
+ return do_loop_readv_writev(file, iter, pos, READ, flags);
+}
+
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+ struct iov_iter *iter)
{
size_t tot_len;
ssize_t ret = 0;
+ if (!file->f_op->read_iter)
+ return -EINVAL;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
@@ -794,22 +805,20 @@ static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
- ret = rw_verify_area(READ, file, pos, tot_len);
+ ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
if (ret < 0)
return ret;
- if (file->f_op->read_iter)
- ret = do_iter_readv_writev(file, iter, pos, READ, flags);
- else
- ret = do_loop_readv_writev(file, iter, pos, READ, flags);
+ ret = call_read_iter(file, iocb, iter);
out:
if (ret >= 0)
fsnotify_access(file);
return ret;
}
+EXPORT_SYMBOL(vfs_iocb_iter_read);
-ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
- struct iov_iter *iter)
+ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ rwf_t flags)
{
size_t tot_len;
ssize_t ret = 0;
@@ -824,25 +833,16 @@ ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
- ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
+ ret = rw_verify_area(READ, file, ppos, tot_len);
if (ret < 0)
return ret;
- ret = call_read_iter(file, iocb, iter);
+ ret = do_iter_read(file, iter, ppos, flags);
out:
if (ret >= 0)
fsnotify_access(file);
return ret;
}
-EXPORT_SYMBOL(vfs_iocb_iter_read);
-
-ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
- rwf_t flags)
-{
- if (!file->f_op->read_iter)
- return -EINVAL;
- return do_iter_read(file, iter, ppos, flags);
-}
EXPORT_SYMBOL(vfs_iter_read);
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
@@ -914,19 +914,37 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
EXPORT_SYMBOL(vfs_iter_write);
static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos, rwf_t flags)
+ unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- ssize_t ret;
+ size_t tot_len;
+ ssize_t ret = 0;
- ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
- if (ret >= 0) {
- ret = do_iter_read(file, &iter, pos, flags);
- kfree(iov);
- }
+ if (!(file->f_mode & FMODE_READ))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_READ))
+ return -EINVAL;
+
+ ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov,
+ &iter);
+ if (ret < 0)
+ return ret;
+ tot_len = iov_iter_count(&iter);
+ if (!tot_len)
+ goto out;
+
+ ret = rw_verify_area(READ, file, pos, tot_len);
+ if (ret < 0)
+ goto out;
+
+ ret = do_iter_read(file, &iter, pos, flags);
+out:
+ if (ret >= 0)
+ fsnotify_access(file);
+ kfree(iov);
return ret;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 11/15] fs: move permission hook out of do_iter_read()
2023-11-14 15:33 ` [PATCH 11/15] fs: move permission hook out of do_iter_read() Amir Goldstein
@ 2023-11-21 15:28 ` Christian Brauner
2023-11-21 17:46 ` Amir Goldstein
2023-11-21 15:35 ` Christian Brauner
1 sibling, 1 reply; 25+ messages in thread
From: Christian Brauner @ 2023-11-21 15:28 UTC (permalink / raw)
To: Amir Goldstein
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
> +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
> + struct iov_iter *iter)
Fyi, vfs_iocb_iter_read() and vfs_iter_read() end up with the same checks:
if (!file->f_op->read_iter)
return -EINVAL;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
if (ret < 0)
return ret;
So if you resend you might want to static inline this. But idk, might
not matter too much.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 11/15] fs: move permission hook out of do_iter_read()
2023-11-21 15:28 ` Christian Brauner
@ 2023-11-21 17:46 ` Amir Goldstein
0 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-21 17:46 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
On Tue, Nov 21, 2023 at 5:28 PM Christian Brauner <brauner@kernel.org> wrote:
>
> > +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
> > + struct iov_iter *iter)
>
> Fyi, vfs_iocb_iter_read() and vfs_iter_read() end up with the same checks:
>
> if (!file->f_op->read_iter)
> return -EINVAL;
> if (!(file->f_mode & FMODE_READ))
> return -EBADF;
> if (!(file->f_mode & FMODE_CAN_READ))
> return -EINVAL;
>
> tot_len = iov_iter_count(iter);
> if (!tot_len)
> goto out;
> ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
> if (ret < 0)
> return ret;
>
> So if you resend you might want to static inline this. But idk, might
> not matter too much.
There are more commonalities with other helpers,
but I don't want to "over clean", so I'd rather leave it like that.
I will remove the else in do_iter_read().
Thanks,
Amir.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 11/15] fs: move permission hook out of do_iter_read()
2023-11-14 15:33 ` [PATCH 11/15] fs: move permission hook out of do_iter_read() Amir Goldstein
2023-11-21 15:28 ` Christian Brauner
@ 2023-11-21 15:35 ` Christian Brauner
1 sibling, 0 replies; 25+ messages in thread
From: Christian Brauner @ 2023-11-21 15:35 UTC (permalink / raw)
To: Amir Goldstein
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
> static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
> - loff_t *pos, rwf_t flags)
> + loff_t *pos, rwf_t flags)
> +{
> + if (file->f_op->read_iter)
> + return do_iter_readv_writev(file, iter, pos, READ, flags);
> + else
> + return do_loop_readv_writev(file, iter, pos, READ, flags);
> +}
That else doesn't serve a purpose here. I would just remove it. Easier
on the eye too.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 12/15] fs: move kiocb_start_write() into vfs_iocb_iter_write()
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (10 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 11/15] fs: move permission hook out of do_iter_read() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 13/15] fs: create __sb_write_started() helper Amir Goldstein
` (2 subsequent siblings)
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
In vfs code, sb_start_write() is usually called after the permission hook
in rw_verify_area(). vfs_iocb_iter_write() is an exception to this rule,
where kiocb_start_write() is called by its callers.
Move kiocb_start_write() from the callers into vfs_iocb_iter_write()
after the rw_verify_area() checks, to make them "start-write-safe".
This is needed for fanotify "pre content" events.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/cachefiles/io.c | 2 --
fs/overlayfs/file.c | 1 -
fs/read_write.c | 2 ++
3 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 009d23cd435b..3d3667807636 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -319,8 +319,6 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing);
- kiocb_start_write(&ki->iocb);
-
get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 690b173f34fc..2adf3a5641cd 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -456,7 +456,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_queue_completion;
refcount_set(&aio_req->ref, 2);
- kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5b18e13c2620..8d381929701c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -854,6 +854,7 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
return do_loop_readv_writev(file, iter, pos, WRITE, flags);
}
+/* Caller is responsible for calling kiocb_end_write() on completion */
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter)
{
@@ -874,6 +875,7 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
if (ret < 0)
return ret;
+ kiocb_start_write(iocb);
ret = call_write_iter(file, iocb, iter);
if (ret > 0)
fsnotify_modify(file);
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 13/15] fs: create __sb_write_started() helper
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (11 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 12/15] fs: move kiocb_start_write() into vfs_iocb_iter_write() Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 14/15] fs: create file_write_started() helper Amir Goldstein
2023-11-14 15:33 ` [PATCH 15/15] fs: create {sb,file}_write_not_started() helpers Amir Goldstein
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
Similar to sb_write_started() for use by other sb freeze levels.
Unlike the boolean sb_write_started(), this helper returns a tristate
to distiguish the cases of lockdep disabled or unknown lock state.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
include/linux/fs.h | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98b7a7a8c42e..e8aa48797bf4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1645,9 +1645,22 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
#define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+/**
+ * __sb_write_started - check if sb freeze level is held
+ * @sb: the super we write to
+ *
+ * > 0 sb freeze level is held
+ * 0 sb freeze level is not held
+ * < 0 !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
+ */
+static inline int __sb_write_started(const struct super_block *sb, int level)
+{
+ return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
+}
+
static inline bool sb_write_started(const struct super_block *sb)
{
- return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1);
+ return __sb_write_started(sb, SB_FREEZE_WRITE);
}
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 14/15] fs: create file_write_started() helper
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (12 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 13/15] fs: create __sb_write_started() helper Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
2023-11-14 15:33 ` [PATCH 15/15] fs: create {sb,file}_write_not_started() helpers Amir Goldstein
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
Convenience wrapper for sb_write_started(file_inode(inode)->i_sb)), which
has a single occurrence in the code right now.
Document the false negatives of those helpers, which makes them unusable
to assert that sb_start_write() is not held.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
fs/read_write.c | 2 +-
include/linux/fs.h | 21 +++++++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 8d381929701c..87e1256d0a67 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1437,7 +1437,7 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
- lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
+ lockdep_assert(file_write_started(file_out));
return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e8aa48797bf4..05780d993c7d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1658,11 +1658,32 @@ static inline int __sb_write_started(const struct super_block *sb, int level)
return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
}
+/**
+ * sb_write_started - check if SB_FREEZE_WRITE is held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
static inline bool sb_write_started(const struct super_block *sb)
{
return __sb_write_started(sb, SB_FREEZE_WRITE);
}
+/**
+ * file_write_started - check if SB_FREEZE_WRITE is held
+ * @file: the file we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ * May be false positive with !S_ISREG, because file_start_write() has
+ * no effect on !S_ISREG.
+ */
+static inline bool file_write_started(const struct file *file)
+{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return true;
+ return sb_write_started(file_inode(file)->i_sb);
+}
+
/**
* sb_end_write - drop write access to a superblock
* @sb: the super we wrote to
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 15/15] fs: create {sb,file}_write_not_started() helpers
2023-11-14 15:33 [PATCH 00/15] Tidy up file permission hooks Amir Goldstein
` (13 preceding siblings ...)
2023-11-14 15:33 ` [PATCH 14/15] fs: create file_write_started() helper Amir Goldstein
@ 2023-11-14 15:33 ` Amir Goldstein
14 siblings, 0 replies; 25+ messages in thread
From: Amir Goldstein @ 2023-11-14 15:33 UTC (permalink / raw)
To: Christian Brauner
Cc: Al Viro, Jan Kara, Jens Axboe, Miklos Szeredi, David Howells,
linux-fsdevel
Create new helpers {sb,file}_write_not_started() that can be used
to assert that sb_start_write() is not held.
This is needed for fanotify "pre content" events.
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
include/linux/fs.h | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05780d993c7d..c085172f832f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1669,6 +1669,17 @@ static inline bool sb_write_started(const struct super_block *sb)
return __sb_write_started(sb, SB_FREEZE_WRITE);
}
+/**
+ * sb_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
+static inline bool sb_write_not_started(const struct super_block *sb)
+{
+ return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
+}
+
/**
* file_write_started - check if SB_FREEZE_WRITE is held
* @file: the file we write to
@@ -1684,6 +1695,21 @@ static inline bool file_write_started(const struct file *file)
return sb_write_started(file_inode(file)->i_sb);
}
+/**
+ * file_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @file: the file we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ * May be false positive with !S_ISREG, because file_start_write() has
+ * no effect on !S_ISREG.
+ */
+static inline bool file_write_not_started(const struct file *file)
+{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return true;
+ return sb_write_not_started(file_inode(file)->i_sb);
+}
+
/**
* sb_end_write - drop write access to a superblock
* @sb: the super we wrote to
--
2.34.1
^ permalink raw reply related [flat|nested] 25+ messages in thread