[RFC PATCH 2/2] btrfs: Lockdep annotations for the extent bits wait event

Linux Btrfs filesystem development
 help / color / mirror / Atom feed

From: Ioannis Angelakopoulos <iangelak@fb.com>
To: <linux-btrfs@vger.kernel.org>, <kernel-team@fb.com>
Subject: [RFC PATCH 2/2] btrfs: Lockdep annotations for the extent bits wait event
Date: Thu, 11 Aug 2022 17:42:44 -0700	[thread overview]
Message-ID: <20220812004241.1722846-3-iangelak@fb.com> (raw)
In-Reply-To: <20220812004241.1722846-1-iangelak@fb.com>

Add lockdep annotations in five places that lock extent bits:
  1) find_lock_delalloc_range() in fs/btrfs/extent_io.c and its callers
  2) btrfs_lock_and_flush_ordered_range() in fs/btrfs/ordered-data.c and
  its callers
  3) extent_fiemap() in fs/btrfs/extent_io.c
  4) btrfs_fallocate() in fs/btrfs/file.c
  5) btrfs_finish_ordered_io() in fs/btrfs/inode.c

To annotate the extent bits wait event we make use of a two level lockdep
map (making use of the multilevel wait event lockdep annotation macros).
The first level is used for high level functions such as btrfs_fallocate()
and the second level is used for low level functions, such as
find_lock_delalloc_range().

If we used only one level then lockdep would complain because there are
execution contexts where the extent bits annotation is acquired before the
delalloc_mutex (i.e., in btrfs_fallocate()) and later delalloc_mutex is
acquired before the extent bits annotation (i.e.,
find_lock_delalloc_range()). Normally, if the range of bits locked were the
same for both btrfs_fallocate() and find_lock_delalloc_range() we would
have a deadlock(). However, a call to btrfs_wait_ordered_range() from
btrfs_fallocate() guarantees that the range of extent bits is unlocked
before locking, thus the deadlock is averted.

By using a two level lockdep map we "break" the dependency between the high
and low levels, thus lockdep does not complain.

Signed-off-by: Ioannis Angelakopoulos <iangelak@fb.com>
---
 fs/btrfs/ctree.h        |   1 +
 fs/btrfs/disk-io.c      |   1 +
 fs/btrfs/extent_io.c    | 114 +++++++++++++++++++++++++++++++++++++---
 fs/btrfs/file.c         |  10 ++--
 fs/btrfs/inode.c        |  22 ++++++--
 fs/btrfs/ordered-data.c |   4 +-
 6 files changed, 133 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 44837545eef8..85f947ce6e6b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1104,6 +1104,7 @@ struct btrfs_fs_info {
 	struct lockdep_map btrfs_state_change_map[4];
 	struct lockdep_map btrfs_trans_pending_ordered_map;
 	struct lockdep_map btrfs_ordered_extent_map;
+	struct lockdep_map btrfs_inode_extent_lock_map;
 
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 	spinlock_t ref_verify_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6268dafeeb2d..afd971c31168 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2996,6 +2996,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
 	btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
 	btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
+	btrfs_lockdep_init_map(fs_info, btrfs_inode_extent_lock);
 	btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
 				     BTRFS_LOCKDEP_TRANS_COMMIT_START);
 	btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6e8e936a8a1e..3d2ef3d78e7f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -891,6 +891,24 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 
 }
 
+int __clear_extent_bit_lockdep(struct extent_io_tree *tree, u64 start, u64 end,
+			       u32 bits, int wake, int delete,
+			       struct extent_state **cached_state,
+			       gfp_t mask, struct extent_changeset *changeset)
+{
+	int ret;
+
+	ret = __clear_extent_bit(tree, start, end, bits, wake, delete, cached_state,
+				 mask, changeset);
+
+	if ((tree->owner != IO_TREE_FREE_SPACE_INODE_IO) &&
+	    (tree->owner == IO_TREE_INODE_IO))
+		btrfs_lockdep_release(tree->fs_info, btrfs_inode_extent_lock);
+
+	return ret;
+}
+
+
 static void wait_on_state(struct extent_io_tree *tree,
 			  struct extent_state *state)
 		__releases(tree->lock)
@@ -1470,6 +1488,14 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 				  cached, GFP_NOFS, NULL);
 }
 
+int clear_extent_bit_lockdep(struct extent_io_tree *tree, u64 start, u64 end,
+			     u32 bits, int wake, int delete,
+			     struct extent_state **cached)
+{
+	return __clear_extent_bit_lockdep(tree, start, end, bits, wake, delete,
+					  cached, GFP_NOFS, NULL);
+}
+
 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
 		u32 bits, struct extent_changeset *changeset)
 {
@@ -1504,6 +1530,43 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
 			break;
 		WARN_ON(start > end);
 	}
+
+	return err;
+}
+
+int lock_extent_bits_lockdep(struct extent_io_tree *tree, u64 start, u64 end,
+			     struct extent_state **cached_state, bool nested)
+{
+	int err;
+#ifdef CONFIG_LOCKDEP
+	int subclass = 1;
+#endif
+
+	/* The wait event occurs within lock_extent_bits() */
+	if ((tree->owner != IO_TREE_FREE_SPACE_INODE_IO) &&
+	    (tree->owner == IO_TREE_INODE_IO)) {
+		if (nested)
+			btrfs_might_wait_for_event_nested(tree->fs_info,
+							  btrfs_inode_extent_lock,
+							  subclass);
+		else
+			btrfs_might_wait_for_event(tree->fs_info,
+						   btrfs_inode_extent_lock);
+	}
+
+	err = lock_extent_bits(tree, start, end, cached_state);
+
+	if ((tree->owner != IO_TREE_FREE_SPACE_INODE_IO) &&
+	    (tree->owner == IO_TREE_INODE_IO)) {
+		if (nested)
+			btrfs_lockdep_acquire_nested(tree->fs_info,
+						     btrfs_inode_extent_lock,
+						     subclass);
+		else
+			btrfs_lockdep_acquire(tree->fs_info,
+					      btrfs_inode_extent_lock);
+	}
+
 	return err;
 }
 
@@ -2094,14 +2157,15 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
 	}
 
 	/* step three, lock the state bits for the whole range */
-	lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
+	lock_extent_bits_lockdep(tree, delalloc_start, delalloc_end, &cached_state,
+				 true);
 
 	/* then test to make sure it is all still delalloc */
 	ret = test_range_bit(tree, delalloc_start, delalloc_end,
 			     EXTENT_DELALLOC, 1, cached_state);
 	if (!ret) {
-		unlock_extent_cached(tree, delalloc_start, delalloc_end,
-				     &cached_state);
+		unlock_extent_cached_lockdep(tree, delalloc_start, delalloc_end,
+					     &cached_state);
 		__unlock_for_delalloc(inode, locked_page,
 			      delalloc_start, delalloc_end);
 		cond_resched();
@@ -3774,6 +3838,20 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
 	 * bio to do the cleanup.
 	 */
 	submit_one_bio(&bio_ctrl);
+
+	/*
+	 * Release the lockdep map here because
+	 * btrfs_lock_and_flush_ordered_range() will exit with the lockdep map
+	 * acquired as a reader. Actually bio worker would unlock the extent bits
+	 * and thus the lockdep map when it ran asynchronously, if extent bits were
+	 * annotated in a generic way. However, lockdep expects that the
+	 * acquisition and release of the lockdep map occur in the same context,
+	 * thus we have to unlock the lockdep map here.
+	 */
+	if ((inode->io_tree.owner != IO_TREE_FREE_SPACE_INODE_IO) &&
+	    (inode->io_tree.owner == IO_TREE_INODE_IO))
+		btrfs_lockdep_release(inode->io_tree.fs_info,
+				      btrfs_inode_extent_lock);
 	return ret;
 }
 
@@ -3793,6 +3871,18 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
 				  REQ_RAHEAD, prev_em_start);
 		put_page(pages[index]);
 	}
+
+	/*
+	 * Release the lockdep map here because
+	 * btrfs_lock_and_flush_ordered_range() will exit with the lockdep map
+	 * acquired as a reader. We have to unlock here because
+	 * contiguous_readpages() will be called in while loop by
+	 * extent_readahead(), thus lockdep would complain about double locking.
+	 */
+	if ((inode->io_tree.owner != IO_TREE_FREE_SPACE_INODE_IO) &&
+	    (inode->io_tree.owner == IO_TREE_INODE_IO))
+		btrfs_lockdep_release(inode->io_tree.fs_info,
+				      btrfs_inode_extent_lock);
 }
 
 /*
@@ -3829,6 +3919,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 		}
 		ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
 				delalloc_end, &page_started, &nr_written, wbc);
+
+		/*
+		 * Release the lockdep map here, because btrfs_run_delalloc_range()
+		 * will exit with the lockdep map acquired.
+		 */
+		if ((inode->io_tree.owner != IO_TREE_FREE_SPACE_INODE_IO) &&
+		    (inode->io_tree.owner == IO_TREE_INODE_IO))
+			btrfs_lockdep_release(inode->io_tree.fs_info,
+					      btrfs_inode_extent_lock);
+
 		if (ret) {
 			btrfs_page_set_error(inode->root->fs_info, page,
 					     page_offset(page), PAGE_SIZE);
@@ -5584,8 +5684,8 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		last_for_get_extent = isize;
 	}
 
-	lock_extent_bits(&inode->io_tree, start, start + len - 1,
-			 &cached_state);
+	lock_extent_bits_lockdep(&inode->io_tree, start, start + len - 1,
+				 &cached_state, false);
 
 	em = get_extent_skip_holes(inode, start, last_for_get_extent);
 	if (!em)
@@ -5697,8 +5797,8 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		ret = emit_last_fiemap_cache(fieinfo, &cache);
 	free_extent_map(em);
 out:
-	unlock_extent_cached(&inode->io_tree, start, start + len - 1,
-			     &cached_state);
+	unlock_extent_cached_lockdep(&inode->io_tree, start, start + len - 1,
+				     &cached_state);
 
 out_free_ulist:
 	btrfs_free_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c9649b7b2f18..df51a0cc3d98 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1510,7 +1510,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
 		*write_bytes = min_t(size_t, *write_bytes ,
 				     num_bytes - pos + lockstart);
 	}
-	unlock_extent(&inode->io_tree, lockstart, lockend);
+	unlock_extent_lockdep(&inode->io_tree, lockstart, lockend);
 
 	return ret;
 }
@@ -3517,8 +3517,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	}
 
 	locked_end = alloc_end - 1;
-	lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
-			 &cached_state);
+	lock_extent_bits_lockdep(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+				 &cached_state, false);
 
 	btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
 
@@ -3607,8 +3607,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	 */
 	ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
 out_unlock:
-	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
-			     &cached_state);
+	unlock_extent_cached_lockdep(&BTRFS_I(inode)->io_tree, alloc_start,
+				     locked_end, &cached_state);
 out:
 	btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
 	extent_changeset_free(data_reserved);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8cdb173331c7..0974df5bcdd5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3214,6 +3214,7 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	bool freespace_inode;
 	bool truncated = false;
 	bool clear_reserved_extent = true;
+	bool have_locked = false;
 	unsigned int clear_bits = EXTENT_DEFRAG;
 
 	start = ordered_extent->file_offset;
@@ -3272,7 +3273,12 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	}
 
 	clear_bits |= EXTENT_LOCKED;
-	lock_extent_bits(io_tree, start, end, &cached_state);
+	lock_extent_bits_lockdep(io_tree, start, end, &cached_state, true);
+	/*
+	 * We need this helper boolean because we can jump to out tag without
+	 * having acquired the lockdep map first and unlock it
+	 */
+	have_locked = true;
 
 	if (freespace_inode)
 		trans = btrfs_join_transaction_spacecache(root);
@@ -3338,9 +3344,14 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	}
 	ret = 0;
 out:
-	clear_extent_bit(&inode->io_tree, start, end, clear_bits,
-			 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
-			 &cached_state);
+	if (have_locked)
+		clear_extent_bit_lockdep(&inode->io_tree, start, end, clear_bits,
+					 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
+					 &cached_state);
+	else
+		clear_extent_bit(&inode->io_tree, start, end, clear_bits,
+				(clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
+				&cached_state);
 
 	if (trans)
 		btrfs_end_transaction(trans);
@@ -5139,7 +5150,8 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
 			break;
 	}
 	free_extent_map(em);
-	unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
+	unlock_extent_cached_lockdep(io_tree, hole_start, block_end - 1,
+				     &cached_state);
 	return err;
 }
 
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index eb24a6d20ff8..c6d377aa4abb 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1043,7 +1043,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
 		cachedp = cached_state;
 
 	while (1) {
-		lock_extent_bits(&inode->io_tree, start, end, cachedp);
+		lock_extent_bits_lockdep(&inode->io_tree, start, end, cachedp, true);
 		ordered = btrfs_lookup_ordered_range(inode, start,
 						     end - start + 1);
 		if (!ordered) {
@@ -1056,7 +1056,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
 				refcount_dec(&cache->refs);
 			break;
 		}
-		unlock_extent_cached(&inode->io_tree, start, end, cachedp);
+		unlock_extent_cached_lockdep(&inode->io_tree, start, end, cachedp);
 		btrfs_start_ordered_extent(ordered, 1);
 		btrfs_put_ordered_extent(ordered);
 	}
-- 
2.30.2

next prev parent reply	other threads:[~2022-08-12  0:48 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-12  0:42 [RFC PATCH 0/2] btrfs: Add a lockdep annotation for the extent bits wait event Ioannis Angelakopoulos
2022-08-12  0:42 ` [RFC PATCH 1/2] btrfs: Add lockdep wrappers around the extent bits locking and unlocking functions Ioannis Angelakopoulos
2022-08-12  0:42 ` Ioannis Angelakopoulos [this message]
2022-08-23 21:02   ` [RFC PATCH 2/2] btrfs: Lockdep annotations for the extent bits wait event David Sterba

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:44837545eef dfblob:85f947ce6e6 dfblob:6268dafeeb2
dfblob:afd971c3116 dfblob:6e8e936a8a1 dfblob:3d2ef3d78e7
dfblob:c9649b7b2f1 dfblob:df51a0cc3d9 dfblob:8cdb173331c
dfblob:0974df5bcdd dfblob:eb24a6d20ff dfblob:c6d377aa4ab )
 OR (
bs:"[RFC PATCH 2/2] btrfs: Lockdep annotations for the extent bits wait event" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220812004241.1722846-3-iangelak@fb.com \
    --to=iangelak@fb.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox