From: Amir Goldstein <amir73il@gmail.com>
To: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>, Jens Axboe <axboe@kernel.dk>,
Miklos Szeredi <miklos@szeredi.hu>,
David Howells <dhowells@redhat.com>,
Al Viro <viro@zeniv.linux.org.uk>,
linux-fsdevel@vger.kernel.org
Subject: [PATCH] fs: create kiocb_{start,end}_write() helpers
Date: Tue, 15 Aug 2023 19:57:21 +0300 [thread overview]
Message-ID: <20230815165721.821906-1-amir73il@gmail.com> (raw)
aio, io_uring, cachefiles and overlayfs, all open code an ugly variant
of file_{start,end}_write() to silence lockdep warnings.
Create helpers for this lockdep dance and use the helpers in all the
callers.
Use a new iocb flag IOCB_WRITE_STARTED to indicate if sb_start_write()
was called.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
Christian,
This is an attempt to consolidate the open coded lockdep fooling in
all those async io submitters into a single helper.
The idea to do that consolidation was suggested by Jan.
The (questionable) idea to use an IOCB_ flag was mine.
This re-factoring is part of a larger vfs cleanup I am doing for
fanotify permission events. The complete series is not ready for
prime time yet, but this one patch is independent and I would love
to get it reviewed/merged a head of the rest.
Thanks,
Amir.
fs/aio.c | 26 ++----------------
fs/cachefiles/io.c | 16 ++---------
fs/overlayfs/file.c | 15 ++++------
include/linux/fs.h | 67 +++++++++++++++++++++++++++++++++++++++++++--
io_uring/rw.c | 36 ++++--------------------
5 files changed, 82 insertions(+), 78 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 77e33619de40..410598cb9d21 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1444,17 +1444,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
if (!list_empty_careful(&iocb->ki_list))
aio_remove_iocb(iocb);
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct inode *inode = file_inode(kiocb->ki_filp);
-
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(inode->i_mode))
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- file_end_write(kiocb->ki_filp);
- }
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(kiocb);
iocb->ki_res.res = res;
iocb->ki_res.res2 = 0;
@@ -1581,18 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
- /*
- * Open-code file_start_write here to grab freeze protection,
- * which will be released by another thread in
- * aio_complete_rw(). Fool lockdep by telling it the lock got
- * released so that it doesn't complain about the held lock when
- * we return to userspace.
- */
- if (S_ISREG(file_inode(file)->i_mode)) {
- sb_start_write(file_inode(file)->i_sb);
- __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
- }
- req->ki_flags |= IOCB_WRITE;
+ kiocb_start_write(req);
aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 175a25fcade8..6e16f7c116da 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
_enter("%ld", ret);
- /* Tell lockdep we inherited freeze protection from submission thread */
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+ kiocb_end_write(iocb);
if (ret < 0)
trace_cachefiles_io_error(object, inode, ret,
@@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object,
{
struct cachefiles_cache *cache;
struct cachefiles_kiocb *ki;
- struct inode *inode;
unsigned int old_nofs;
ssize_t ret;
size_t len = iov_iter_count(iter);
@@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing);
- /* Open-code file_start_write here to grab freeze protection, which
- * will be released by another thread in aio_complete_rw(). Fool
- * lockdep by telling it the lock got released so that it doesn't
- * complain about the held lock when we return to userspace.
- */
- inode = file_inode(file);
- __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
- __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+ kiocb_start_write(ki);
get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
- trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
+ trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
old_nofs = memalloc_nofs_save();
ret = cachefiles_inject_write_error();
if (ret == 0)
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 7e7876aae01c..d1dc8779d95d 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -293,10 +293,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
- /* Actually acquired in ovl_write_iter() */
- __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
- SB_FREEZE_WRITE);
- file_end_write(iocb->ki_filp);
+ kiocb_end_write(iocb);
ovl_copyattr(inode);
}
@@ -365,6 +362,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
+/* IOCB flags that may be propagated to real file io */
+#define OVL_IOCB_MASK ~(IOCB_WRITE_STARTED)
+
static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -372,7 +372,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct fd real;
const struct cred *old_cred;
ssize_t ret;
- int ifl = iocb->ki_flags;
+ int ifl = iocb->ki_flags & OVL_IOCB_MASK;
if (!iov_iter_count(iter))
return 0;
@@ -412,10 +412,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!aio_req)
goto out;
- file_start_write(real.file);
- /* Pacify lockdep, same trick as done in aio_write() */
- __sb_writers_release(file_inode(real.file)->i_sb,
- SB_FREEZE_WRITE);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
@@ -423,6 +419,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
+ kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1fb1f050f560..6305bc710d30 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -338,6 +338,8 @@ enum rw_hint {
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
+/* file_start_write() was called */
+#define IOCB_WRITE_STARTED (1 << 22)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -351,7 +353,8 @@ enum rw_hint {
{ IOCB_WRITE, "WRITE" }, \
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
- { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }
+ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
+ { IOCB_WRITE_STARTED, "WRITE_STARTED" }
struct kiocb {
struct file *ki_filp;
@@ -2632,6 +2635,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
return (inode->i_mode ^ mode) & S_IFMT;
}
+/**
+ * file_start_write - get write access to a superblock for regular file io
+ * @file: the file we want to write to
+ *
+ * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * Should be matched with a call to file_end_write().
+ */
static inline void file_start_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
@@ -2646,11 +2656,64 @@ static inline bool file_start_write_trylock(struct file *file)
return sb_start_write_trylock(file_inode(file)->i_sb);
}
+/**
+ * file_end_write - drop write access to a superblock of a regular file
+ * @file: the file we wrote to
+ *
+ * Should be matched with a call to file_start_write().
+ */
static inline void file_end_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
- __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(file_inode(file)->i_sb);
+}
+
+/**
+ * kiocb_start_write - get write access to a superblock for async file io
+ * @iocb: the io context we want to submit the write with
+ *
+ * This is a variant of file_start_write() for async io submission.
+ * Should be matched with a call to kiocb_end_write().
+ */
+static inline void kiocb_start_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ iocb->ki_flags |= IOCB_WRITE;
+ if (WARN_ON_ONCE(iocb->ki_flags & IOCB_WRITE_STARTED))
+ return;
+ if (!S_ISREG(inode->i_mode))
+ return;
+ sb_start_write(inode->i_sb);
+ /*
+ * Fool lockdep by telling it the lock got released so that it
+ * doesn't complain about the held lock when we return to userspace.
+ */
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+ iocb->ki_flags |= IOCB_WRITE_STARTED;
+}
+
+/**
+ * kiocb_end_write - drop write access to a superblock after async file io
+ * @iocb: the io context we sumbitted the write with
+ *
+ * Should be matched with a call to kiocb_start_write().
+ */
+static inline void kiocb_end_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (!(iocb->ki_flags & IOCB_WRITE_STARTED))
+ return;
+ if (!S_ISREG(inode->i_mode))
+ return;
+ /*
+ * Tell lockdep we inherited freeze protection from submission thread.
+ */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(inode->i_sb);
+ iocb->ki_flags &= ~IOCB_WRITE_STARTED;
}
/*
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..09493ae49b85 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -220,20 +220,6 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
}
#endif
-static void kiocb_end_write(struct io_kiocb *req)
-{
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (req->flags & REQ_F_ISREG) {
- struct super_block *sb = file_inode(req->file)->i_sb;
-
- __sb_writers_acquired(sb, SB_FREEZE_WRITE);
- sb_end_write(sb);
- }
-}
-
/*
* Trigger the notifications after having done some IO, and finish the write
* accounting, if any.
@@ -243,7 +229,7 @@ static void io_req_io_end(struct io_kiocb *req)
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
if (rw->kiocb.ki_flags & IOCB_WRITE) {
- kiocb_end_write(req);
+ kiocb_end_write(&rw->kiocb);
fsnotify_modify(req->file);
} else {
fsnotify_access(req->file);
@@ -313,7 +299,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
struct io_kiocb *req = cmd_to_io_kiocb(rw);
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ kiocb_end_write(kiocb);
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
@@ -902,19 +888,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
return ret;
}
- /*
- * Open-code file_start_write here to grab freeze protection,
- * which will be released by another thread in
- * io_complete_rw(). Fool lockdep by telling it the lock got
- * released so that it doesn't complain about the held lock when
- * we return to userspace.
- */
- if (req->flags & REQ_F_ISREG) {
- sb_start_write(file_inode(req->file)->i_sb);
- __sb_writers_release(file_inode(req->file)->i_sb,
- SB_FREEZE_WRITE);
- }
- kiocb->ki_flags |= IOCB_WRITE;
+ kiocb_start_write(kiocb);
if (likely(req->file->f_op->write_iter))
ret2 = call_write_iter(req->file, kiocb, &s->iter);
@@ -961,7 +935,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
io->bytes_done += ret2;
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ kiocb_end_write(kiocb);
return ret ? ret : -EAGAIN;
}
done:
@@ -972,7 +946,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
ret = io_setup_async_rw(req, iovec, s, false);
if (!ret) {
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ kiocb_end_write(kiocb);
return -EAGAIN;
}
return ret;
--
2.34.1
next reply other threads:[~2023-08-15 16:58 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-15 16:57 Amir Goldstein [this message]
2023-08-15 17:02 ` [PATCH] fs: create kiocb_{start,end}_write() helpers Jens Axboe
2023-08-15 17:06 ` Jens Axboe
2023-08-15 18:48 ` Amir Goldstein
2023-08-15 21:16 ` Jens Axboe
2023-08-16 8:51 ` Christian Brauner
2023-08-16 14:04 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230815165721.821906-1-amir73il@gmail.com \
--to=amir73il@gmail.com \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=dhowells@redhat.com \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).