[PATCH v3 3/4] ovl: do not open/llseek lower file with upper sb_writers held

linux-unionfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Amir Goldstein <amir73il@gmail.com>
To: Miklos Szeredi <miklos@szeredi.hu>
Cc: linux-unionfs@vger.kernel.org
Subject: [PATCH v3 3/4] ovl: do not open/llseek lower file with upper sb_writers held
Date: Wed, 16 Aug 2023 18:23:33 +0300	[thread overview]
Message-ID: <20230816152334.924960-4-amir73il@gmail.com> (raw)
In-Reply-To: <20230816152334.924960-1-amir73il@gmail.com>

overlayfs file open (ovl_maybe_lookup_lowerdata) and overlay file llseek
take the ovl_inode_lock, without holding upper sb_writers.

In case of nested lower overlay that uses same upper fs as this overlay,
lockdep will warn about (possibly false positive) circular lock
dependency when doing open/llseek of lower ovl file during copy up with
our upper sb_writers held, because the locking ordering seems reverse to
the locking order in ovl_copy_up_start():

- lower ovl_inode_lock
- upper sb_writers

Let the copy up "transaction" keeps an elevated mnt write count on upper
mnt, but leaves taking upper sb_writers to lower level helpers only when
they actually need it.  This allows to avoid holding upper sb_writers
during lower file open/llseek and prevents the lockdep warning.

Minimizing the scope of upper sb_writers during copy up is also needed
for fixing another possible deadlocks by a following patch.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/overlayfs/copy_up.c | 76 ++++++++++++++++++++++++++++++------------
 fs/overlayfs/util.c    |  8 +++--
 2 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index e9adff187284..add5861cf06b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -252,7 +252,9 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 		return PTR_ERR(old_file);
 
 	/* Try to use clone_file_range to clone up within the same fs */
+	ovl_start_write(dentry);
 	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+	ovl_end_write(dentry);
 	if (cloned == len)
 		goto out_fput;
 	/* Couldn't clone, so now we try to copy the data */
@@ -287,8 +289,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 		 * it may not recognize all kind of holes and sometimes
 		 * only skips partial of hole area. However, it will be
 		 * enough for most of the use cases.
+		 *
+		 * We do not hold upper sb_writers throughout the loop to avert
+		 * lockdep warning with llseek of lower file in nested overlay:
+		 * - upper sb_writers
+		 * -- lower ovl_inode_lock (ovl_llseek)
 		 */
-
 		if (skip_hole && data_pos < old_pos) {
 			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
 			if (data_pos > old_pos) {
@@ -303,9 +309,11 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 			}
 		}
 
+		ovl_start_write(dentry);
 		bytes = do_splice_direct(old_file, &old_pos,
 					 new_file, &new_pos,
 					 this_len, SPLICE_F_MOVE);
+		ovl_end_write(dentry);
 		if (bytes <= 0) {
 			error = bytes;
 			break;
@@ -555,14 +563,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 	struct inode *udir = d_inode(upperdir);
 
+	ovl_start_write(c->dentry);
+
 	/* Mark parent "impure" because it may now contain non-pure upper */
 	err = ovl_set_impure(c->parent, upperdir);
 	if (err)
-		return err;
+		goto out;
 
 	err = ovl_set_nlink_lower(c->dentry);
 	if (err)
-		return err;
+		goto out;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 	upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
@@ -581,10 +591,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	}
 	inode_unlock(udir);
 	if (err)
-		return err;
+		goto out;
 
 	err = ovl_set_nlink_upper(c->dentry);
 
+out:
+	ovl_end_write(c->dentry);
 	return err;
 }
 
@@ -718,21 +730,19 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		.link = c->link
 	};
 
-	/* workdir and destdir could be the same when copying up to indexdir */
-	err = -EIO;
-	if (lock_rename(c->workdir, c->destdir) != NULL)
-		goto unlock;
-
 	err = ovl_prep_cu_creds(c->dentry, &cc);
 	if (err)
-		goto unlock;
+		return err;
 
+	ovl_start_write(c->dentry);
+	inode_lock(wdir);
 	temp = ovl_create_temp(ofs, c->workdir, &cattr);
+	inode_unlock(wdir);
+	ovl_end_write(c->dentry);
 	ovl_revert_cu_creds(&cc);
 
-	err = PTR_ERR(temp);
 	if (IS_ERR(temp))
-		goto unlock;
+		return PTR_ERR(temp);
 
 	/*
 	 * Copy up data first and then xattrs. Writing data after
@@ -740,8 +750,21 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 	 */
 	path.dentry = temp;
 	err = ovl_copy_up_data(c, &path);
-	if (err)
+	/*
+	 * We cannot hold lock_rename() throughout this helper, because or
+	 * lock ordering with sb_writers, which shouldn't be held when calling
+	 * ovl_copy_up_data(), so lock workdir and destdir and make sure that
+	 * temp wasn't moved before copy up completion or cleanup.
+	 * If temp was moved, abort without the cleanup.
+	 */
+	ovl_start_write(c->dentry);
+	if (lock_rename(c->workdir, c->destdir) != NULL ||
+	    temp->d_parent != c->workdir) {
+		err = -EIO;
+		goto unlock;
+	} else if (err) {
 		goto cleanup;
+	}
 
 	err = ovl_copy_up_metadata(c, temp);
 	if (err)
@@ -778,6 +801,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		ovl_set_flag(OVL_WHITEOUTS, inode);
 unlock:
 	unlock_rename(c->workdir, c->destdir);
+	ovl_end_write(c->dentry);
 
 	return err;
 
@@ -801,9 +825,10 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 	if (err)
 		return err;
 
+	ovl_start_write(c->dentry);
 	tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+	ovl_end_write(c->dentry);
 	ovl_revert_cu_creds(&cc);
-
 	if (IS_ERR(tmpfile))
 		return PTR_ERR(tmpfile);
 
@@ -814,9 +839,11 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 			goto out_fput;
 	}
 
+	ovl_start_write(c->dentry);
+
 	err = ovl_copy_up_metadata(c, temp);
 	if (err)
-		goto out_fput;
+		goto out;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 
@@ -830,7 +857,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 	inode_unlock(udir);
 
 	if (err)
-		goto out_fput;
+		goto out;
 
 	if (c->metacopy_digest)
 		ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
@@ -842,6 +869,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 		ovl_set_upperdata(d_inode(c->dentry));
 	ovl_inode_update(d_inode(c->dentry), dget(temp));
 
+out:
+	ovl_end_write(c->dentry);
 out_fput:
 	fput(tmpfile);
 	return err;
@@ -892,7 +921,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		 * Mark parent "impure" because it may now contain non-pure
 		 * upper
 		 */
+		ovl_start_write(c->dentry);
 		err = ovl_set_impure(c->parent, c->destdir);
+		ovl_end_write(c->dentry);
 		if (err)
 			return err;
 	}
@@ -908,6 +939,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 	if (c->indexed)
 		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
 
+	ovl_start_write(c->dentry);
 	if (to_index) {
 		/* Initialize nlink for copy up of disconnected dentry */
 		err = ovl_set_nlink_upper(c->dentry);
@@ -922,6 +954,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		ovl_dentry_set_upper_alias(c->dentry);
 		ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
 	}
+	ovl_end_write(c->dentry);
 
 out:
 	if (to_index)
@@ -1010,15 +1043,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 	 * Writing to upper file will clear security.capability xattr. We
 	 * don't want that to happen for normal copy-up operation.
 	 */
+	ovl_start_write(c->dentry);
 	if (capability) {
 		err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
 				      capability, cap_size, 0);
-		if (err)
-			goto out_free;
 	}
-
-
-	err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
+	if (!err) {
+		err = ovl_removexattr(ofs, upperpath.dentry,
+				      OVL_XATTR_METACOPY);
+	}
+	ovl_end_write(c->dentry);
 	if (err)
 		goto out_free;
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 980e128ba0a4..4e67ef0cc8da 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -670,6 +670,10 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
 	return false;
 }
 
+/*
+ * The copy up "transaction" keeps an elevated mnt write count on upper mnt,
+ * but leaves taking freeze protection on upper sb to lower level helpers.
+ */
 int ovl_copy_up_start(struct dentry *dentry, int flags)
 {
 	struct inode *inode = d_inode(dentry);
@@ -682,7 +686,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
 	if (ovl_already_copied_up_locked(dentry, flags))
 		err = 1; /* Already copied up */
 	else
-		err = ovl_want_write(dentry);
+		err = ovl_get_mnt_write(dentry);
 	if (err)
 		goto out_unlock;
 
@@ -695,7 +699,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
 
 void ovl_copy_up_end(struct dentry *dentry)
 {
-	ovl_drop_write(dentry);
+	ovl_put_mnt_write(dentry);
 	ovl_inode_unlock(d_inode(dentry));
 }
 
-- 
2.34.1

next prev parent reply	other threads:[~2023-08-16 15:24 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-16 15:23 [PATCH v3 0/4] overlayfs lock ordering changes Amir Goldstein
2023-08-16 15:23 ` [PATCH v3 1/4] ovl: reorder ovl_want_write() after ovl_inode_lock() Amir Goldstein
2023-08-16 15:23 ` [PATCH v3 2/4] ovl: split ovl_want_write() into two helpers Amir Goldstein
2023-08-16 15:23 ` Amir Goldstein [this message]
2023-08-16 15:23 ` [PATCH v3 4/4] ovl: do not encode lower fh with upper sb_writers held Amir Goldstein
2023-08-17  5:44 ` [PATCH v3 0/4] fs: export __mnt_{want,drop}_write to modules Amir Goldstein
2023-08-17 13:38   ` Christian Brauner
2023-08-17 13:55     ` Amir Goldstein
2023-08-17 14:32       ` Christian Brauner
2023-08-17 14:41         ` Amir Goldstein

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e9adff18728 dfblob:add5861cf06 dfblob:980e128ba0a
dfblob:4e67ef0cc8d )
 OR (
bs:"[PATCH v3 3/4] ovl: do not open/llseek lower file with upper sb_writers held" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230816152334.924960-4-amir73il@gmail.com \
    --to=amir73il@gmail.com \
    --cc=linux-unionfs@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).