* [PATCH 01/12] btrfs: rename btrfs_submit_bio() to btrfs_submit_bbio()
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 02/12] btrfs: rename __btrfs_submit_bio() and drop double underscores David Sterba
` (11 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The function name is a bit misleading as it submits the btrfs_bio
(bbio), rename it so we can use btrfs_submit_bio() when an actual bio is
submitted.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/bio.c | 10 +++++-----
fs/btrfs/bio.h | 6 +++---
fs/btrfs/compression.c | 4 ++--
fs/btrfs/direct-io.c | 2 +-
fs/btrfs/extent_io.c | 6 +++---
fs/btrfs/inode.c | 4 ++--
fs/btrfs/scrub.c | 10 +++++-----
7 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index f6cb58d7f16a..4f3e265880bf 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -53,7 +53,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
/*
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
- * btrfs, and is used for all I/O submitted through btrfs_submit_bio.
+ * btrfs, and is used for all I/O submitted through btrfs_submit_bbio().
*
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
@@ -211,7 +211,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
goto done;
}
- btrfs_submit_bio(repair_bbio, mirror);
+ btrfs_submit_bbio(repair_bbio, mirror);
return;
}
@@ -280,7 +280,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
- btrfs_submit_bio(repair_bbio, mirror);
+ btrfs_submit_bbio(repair_bbio, mirror);
return fbio;
}
@@ -777,7 +777,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
return true;
}
-void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
+void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
{
/* If bbio->inode is not populated, its file_offset must be 0. */
ASSERT(bbio->inode || bbio->file_offset == 0);
@@ -789,7 +789,7 @@ void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
/*
* Submit a repair write.
*
- * This bypasses btrfs_submit_bio deliberately, as that writes all copies in a
+ * This bypasses btrfs_submit_bbio() deliberately, as that writes all copies in a
* RAID setup. Here we only want to write the one bad copy, so we do the
* mapping ourselves and submit the bio directly.
*
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index d9dd5276093d..e48612340745 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -29,7 +29,7 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
* Highlevel btrfs I/O structure. It is allocated by btrfs_bio_alloc and
- * passed to btrfs_submit_bio for mapping to the physical devices.
+ * passed to btrfs_submit_bbio() for mapping to the physical devices.
*/
struct btrfs_bio {
/*
@@ -42,7 +42,7 @@ struct btrfs_bio {
union {
/*
* For data reads: checksumming and original I/O information.
- * (for internal use in the btrfs_submit_bio machinery only)
+ * (for internal use in the btrfs_submit_bbio() machinery only)
*/
struct {
u8 *csum;
@@ -104,7 +104,7 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
/* Submit using blkcg_punt_bio_submit. */
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
-void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
+void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct folio *folio,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 832ab8984c41..39cd2ed1974b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -395,7 +395,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
cb->bbio.ordered = ordered;
btrfs_add_compressed_bio_folios(cb);
- btrfs_submit_bio(&cb->bbio, 0);
+ btrfs_submit_bbio(&cb->bbio, 0);
}
/*
@@ -630,7 +630,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
if (memstall)
psi_memstall_leave(&pflags);
- btrfs_submit_bio(&cb->bbio, 0);
+ btrfs_submit_bbio(&cb->bbio, 0);
return;
out_free_compressed_pages:
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index 67adbe9d294a..855255b4481d 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -726,7 +726,7 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
}
}
- btrfs_submit_bio(bbio, 0);
+ btrfs_submit_bbio(bbio, 0);
}
static const struct iomap_ops btrfs_dio_iomap_ops = {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 643dd948054f..8de6d226475d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -117,7 +117,7 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
btrfs_submit_compressed_read(bbio);
else
- btrfs_submit_bio(bbio, 0);
+ btrfs_submit_bbio(bbio, 0);
/* The bbio is owned by the end_io handler now */
bio_ctrl->bbio = NULL;
@@ -1800,7 +1800,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
folio_unlock(folio);
}
}
- btrfs_submit_bio(bbio, 0);
+ btrfs_submit_bbio(bbio, 0);
}
/*
@@ -3572,7 +3572,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
ASSERT(ret);
}
}
- btrfs_submit_bio(bbio, mirror_num);
+ btrfs_submit_bbio(bbio, mirror_num);
done:
if (wait == WAIT_COMPLETE) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a8ad540d6de2..7dffe241dd15 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9152,7 +9152,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
atomic_inc(&priv.pending);
- btrfs_submit_bio(bbio, 0);
+ btrfs_submit_bbio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
btrfs_encoded_read_endio, &priv);
@@ -9167,7 +9167,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
} while (disk_io_size);
atomic_inc(&priv.pending);
- btrfs_submit_bio(bbio, 0);
+ btrfs_submit_bbio(bbio, 0);
if (atomic_dec_return(&priv.pending))
io_wait_event(priv.wait, !atomic_read(&priv.pending));
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b3afa6365823..3a3427428074 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -838,7 +838,7 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
bbio->bio.bi_iter.bi_size >= blocksize)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
- btrfs_submit_bio(bbio, mirror);
+ btrfs_submit_bbio(bbio, mirror);
if (wait)
wait_scrub_stripe_io(stripe);
bbio = NULL;
@@ -857,7 +857,7 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
if (bbio) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
- btrfs_submit_bio(bbio, mirror);
+ btrfs_submit_bbio(bbio, mirror);
if (wait)
wait_scrub_stripe_io(stripe);
}
@@ -1683,7 +1683,7 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
bbio->bio.bi_iter.bi_size >= stripe_len)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
- btrfs_submit_bio(bbio, mirror);
+ btrfs_submit_bbio(bbio, mirror);
bbio = NULL;
}
@@ -1720,7 +1720,7 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
if (bbio) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
- btrfs_submit_bio(bbio, mirror);
+ btrfs_submit_bbio(bbio, mirror);
}
if (atomic_dec_and_test(&stripe->pending_io)) {
@@ -1776,7 +1776,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
mirror = calc_next_mirror(mirror, num_copies);
}
- btrfs_submit_bio(bbio, mirror);
+ btrfs_submit_bbio(bbio, mirror);
}
static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 02/12] btrfs: rename __btrfs_submit_bio() and drop double underscores
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
2024-08-27 21:55 ` [PATCH 01/12] btrfs: rename btrfs_submit_bio() to btrfs_submit_bbio() David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 03/12] btrfs: rename __extent_writepage() " David Sterba
` (10 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
Previous patch freed the function name btrfs_submit_bio() so we can use
it for a helper that submits struct bio.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/bio.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 4f3e265880bf..d5dcc356df33 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -502,8 +502,8 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
-static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
- struct btrfs_io_stripe *smap, int mirror_num)
+static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
+ struct btrfs_io_stripe *smap, int mirror_num)
{
if (!bioc) {
/* Single mirror read/write fast path. */
@@ -603,7 +603,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
* context. This changes nothing when cgroups aren't in use.
*/
bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;
- __btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
+ btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
}
static bool should_async_write(struct btrfs_bio *bbio)
@@ -752,7 +752,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
}
}
- __btrfs_submit_bio(bio, bioc, &smap, mirror_num);
+ btrfs_submit_bio(bio, bioc, &smap, mirror_num);
done:
return map_length == length;
@@ -878,7 +878,7 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
smap.dev = fs_info->dev_replace.tgtdev;
}
- __btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
+ btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
return;
fail:
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 03/12] btrfs: rename __extent_writepage() and drop double underscores
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
2024-08-27 21:55 ` [PATCH 01/12] btrfs: rename btrfs_submit_bio() to btrfs_submit_bbio() David Sterba
2024-08-27 21:55 ` [PATCH 02/12] btrfs: rename __btrfs_submit_bio() and drop double underscores David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 04/12] btrfs: rename __compare_inode_defrag() " David Sterba
` (9 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The function does not follow the pattern where the underscores would be
justified, so rename it.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/extent_io.c | 28 ++++++++++++++--------------
fs/btrfs/inode.c | 2 +-
fs/btrfs/subpage.c | 4 ++--
include/trace/events/btrfs.h | 2 +-
4 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8de6d226475d..f7a388529c17 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1177,7 +1177,7 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
}
/*
- * helper for __extent_writepage, doing all of the delayed allocation setup.
+ * helper for extent_writepage(), doing all of the delayed allocation setup.
*
* This returns 1 if btrfs_run_delalloc_range function did all the work required
* to write the page (copy into inline extent). In this case the IO has
@@ -1398,18 +1398,18 @@ static int submit_one_sector(struct btrfs_inode *inode,
}
/*
- * helper for __extent_writepage. This calls the writepage start hooks,
+ * Helper for extent_writepage(). This calls the writepage start hooks,
* and does the loop to map the page into extents and bios.
*
* We return 1 if the IO is started and the page is unlocked,
* 0 if all went well (page still locked)
* < 0 if there were errors (page still locked)
*/
-static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
- struct folio *folio,
- u64 start, u32 len,
- struct btrfs_bio_ctrl *bio_ctrl,
- loff_t i_size)
+static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
+ struct folio *folio,
+ u64 start, u32 len,
+ struct btrfs_bio_ctrl *bio_ctrl,
+ loff_t i_size)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
@@ -1500,7 +1500,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
* Return 0 if everything goes well.
* Return <0 for error.
*/
-static int __extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
+static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
{
struct inode *inode = folio->mapping->host;
const u64 page_start = folio_pos(folio);
@@ -1509,7 +1509,7 @@ static int __extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ct
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_SHIFT;
- trace___extent_writepage(folio, inode, bio_ctrl->wbc);
+ trace_extent_writepage(folio, inode, bio_ctrl->wbc);
WARN_ON(!folio_test_locked(folio));
@@ -1534,8 +1534,8 @@ static int __extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ct
if (ret)
goto done;
- ret = __extent_writepage_io(BTRFS_I(inode), folio, folio_pos(folio),
- PAGE_SIZE, bio_ctrl, i_size);
+ ret = extent_writepage_io(BTRFS_I(inode), folio, folio_pos(folio),
+ PAGE_SIZE, bio_ctrl, i_size);
if (ret == 1)
return 0;
@@ -2202,7 +2202,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
continue;
}
- ret = __extent_writepage(folio, bio_ctrl);
+ ret = extent_writepage(folio, bio_ctrl);
if (ret < 0) {
done = 1;
break;
@@ -2293,8 +2293,8 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
if (pages_dirty && folio != locked_folio)
ASSERT(folio_test_dirty(folio));
- ret = __extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
- &bio_ctrl, i_size);
+ ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
+ &bio_ctrl, i_size);
if (ret == 1)
goto next_page;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7dffe241dd15..9a81516e074a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -747,7 +747,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode,
/*
* In the successful case (ret == 0 here), cow_file_range will return 1.
*
- * Quite a bit further up the callstack in __extent_writepage, ret == 1
+ * Quite a bit further up the callstack in extent_writepage(), ret == 1
* is treated as a short circuited success and does not unlock the folio,
* so we must do it here.
*
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 272cece50dd0..663f2f953a65 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -705,7 +705,7 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
* - Page locked by plain lock_page()
* It should not have any subpage::writers count.
* Can be unlocked by unlock_page().
- * This is the most common locked page for __extent_writepage() called
+ * This is the most common locked page for extent_writepage() called
* inside extent_write_cache_pages().
* Rarer cases include the @locked_page from extent_write_locked_range().
*
@@ -829,7 +829,7 @@ bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
* Unlike btrfs_folio_end_writer_lock() which unlocks a specified subpage range,
* this ends all writer locked ranges of a page.
*
- * This is for the locked page of __extent_writepage(), as the locked page
+ * This is for the locked page of extent_writepage(), as the locked page
* can contain several locked subpage ranges.
*/
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio)
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0eddbb8b6728..e4add61e00f1 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -721,7 +721,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
__entry->writeback_index)
);
-DEFINE_EVENT(btrfs__writepage, __extent_writepage,
+DEFINE_EVENT(btrfs__writepage, extent_writepage,
TP_PROTO(const struct folio *folio, const struct inode *inode,
const struct writeback_control *wbc),
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 04/12] btrfs: rename __compare_inode_defrag() and drop double underscores
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (2 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 03/12] btrfs: rename __extent_writepage() " David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 05/12] btrfs: constify arguments of compare_inode_defrag() David Sterba
` (8 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The function does not follow the pattern where the underscores would be
justified, so rename it.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index f6dbda37a361..e2949f630584 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -45,7 +45,7 @@ struct inode_defrag {
u32 extent_thresh;
};
-static int __compare_inode_defrag(struct inode_defrag *defrag1,
+static int compare_inode_defrag(struct inode_defrag *defrag1,
struct inode_defrag *defrag2)
{
if (defrag1->root > defrag2->root)
@@ -83,7 +83,7 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
parent = *p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
- ret = __compare_inode_defrag(defrag, entry);
+ ret = compare_inode_defrag(defrag, entry);
if (ret < 0)
p = &parent->rb_left;
else if (ret > 0)
@@ -189,7 +189,7 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
parent = p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
- ret = __compare_inode_defrag(&tmp, entry);
+ ret = compare_inode_defrag(&tmp, entry);
if (ret < 0)
p = parent->rb_left;
else if (ret > 0)
@@ -198,7 +198,7 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
goto out;
}
- if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
+ if (parent && compare_inode_defrag(&tmp, entry) > 0) {
parent = rb_next(parent);
if (parent)
entry = rb_entry(parent, struct inode_defrag, rb_node);
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 05/12] btrfs: constify arguments of compare_inode_defrag()
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (3 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 04/12] btrfs: rename __compare_inode_defrag() " David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 06/12] btrfs: rename __need_auto_defrag() and drop double underscores David Sterba
` (7 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
A comparator function does not change its parameters, make them const.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index e2949f630584..e4bb5a8651f3 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -45,8 +45,8 @@ struct inode_defrag {
u32 extent_thresh;
};
-static int compare_inode_defrag(struct inode_defrag *defrag1,
- struct inode_defrag *defrag2)
+static int compare_inode_defrag(const struct inode_defrag *defrag1,
+ const struct inode_defrag *defrag2)
{
if (defrag1->root > defrag2->root)
return 1;
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 06/12] btrfs: rename __need_auto_defrag() and drop double underscores
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (4 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 05/12] btrfs: constify arguments of compare_inode_defrag() David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 07/12] btrfs: rename __btrfs_add_inode_defrag() " David Sterba
` (6 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The function does not follow the pattern where the underscores would be
justified, so rename it.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index e4bb5a8651f3..b735fce21f07 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -107,7 +107,7 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
return 0;
}
-static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
+static inline int need_auto_defrag(struct btrfs_fs_info *fs_info)
{
if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
return 0;
@@ -130,7 +130,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
u64 transid;
int ret;
- if (!__need_auto_defrag(fs_info))
+ if (!need_auto_defrag(fs_info))
return 0;
if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
@@ -245,7 +245,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
again:
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
goto cleanup;
- if (!__need_auto_defrag(fs_info))
+ if (!need_auto_defrag(fs_info))
goto cleanup;
/* Get the inode */
@@ -306,7 +306,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
break;
- if (!__need_auto_defrag(fs_info))
+ if (!need_auto_defrag(fs_info))
break;
/* find an inode to defrag */
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 07/12] btrfs: rename __btrfs_add_inode_defrag() and drop double underscores
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (5 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 06/12] btrfs: rename __need_auto_defrag() and drop double underscores David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 08/12] btrfs: rename __btrfs_run_defrag_inode() " David Sterba
` (5 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The function does not follow the pattern where the underscores would be
justified, so rename it.
Also update the misleading comment, the passed item is not freed, that's
what the caller does.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index b735fce21f07..5258dd86dbd8 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -61,16 +61,14 @@ static int compare_inode_defrag(const struct inode_defrag *defrag1,
}
/*
- * Pop a record for an inode into the defrag tree. The lock must be held
+ * Insert a record for an inode into the defrag tree. The lock must be held
* already.
*
* If you're inserting a record for an older transid than an existing record,
* the transid already in the tree is lowered.
- *
- * If an existing record is found the defrag item you pass in is freed.
*/
-static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
- struct inode_defrag *defrag)
+static int btrfs_insert_inode_defrag(struct btrfs_inode *inode,
+ struct inode_defrag *defrag)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct inode_defrag *entry;
@@ -157,7 +155,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
* and then re-read this inode, this new inode doesn't have
* IN_DEFRAG flag. At the case, we may find the existed defrag.
*/
- ret = __btrfs_add_inode_defrag(inode, defrag);
+ ret = btrfs_insert_inode_defrag(inode, defrag);
if (ret)
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
} else {
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 08/12] btrfs: rename __btrfs_run_defrag_inode() and drop double underscores
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (6 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 07/12] btrfs: rename __btrfs_add_inode_defrag() " David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes() David Sterba
` (4 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The function does not follow the pattern where the underscores would be
justified, so rename it.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 5258dd86dbd8..41d67065d02b 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -231,8 +231,8 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
#define BTRFS_DEFRAG_BATCH 1024
-static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
- struct inode_defrag *defrag)
+static int btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
+ struct inode_defrag *defrag)
{
struct btrfs_root *inode_root;
struct inode *inode;
@@ -322,7 +322,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
first_ino = defrag->ino + 1;
root_objectid = defrag->root;
- __btrfs_run_defrag_inode(fs_info, defrag);
+ btrfs_run_defrag_inode(fs_info, defrag);
}
atomic_dec(&fs_info->defrag_running);
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (7 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 08/12] btrfs: rename __btrfs_run_defrag_inode() " David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 22:59 ` Qu Wenruo
2024-08-27 21:55 ` [PATCH 10/12] btrfs: return void from btrfs_add_inode_defrag() David Sterba
` (3 subsequent siblings)
12 siblings, 1 reply; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
or unmount, but the way it frees the inodes in fs_info->defrag_inodes
is inefficient. Each time it needs to locate first node, remove it,
potentially rebalance tree until it's done. This allows to do a
conditional reschedule.
For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
convenient but if the reschedule happens and unlocks fs_info->defrag_inodes_lock
we can't be sure that the tree is in the same state. If that happens,
restart the iteration from the beginning.
The cleanup operation is kmem_cache_free() which will likely take the
fast path for most objects.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 41d67065d02b..bd1769dd134c 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -212,19 +212,18 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
{
- struct inode_defrag *defrag;
- struct rb_node *node;
+ struct inode_defrag *defrag, *next;
spin_lock(&fs_info->defrag_inodes_lock);
- node = rb_first(&fs_info->defrag_inodes);
- while (node) {
- rb_erase(node, &fs_info->defrag_inodes);
- defrag = rb_entry(node, struct inode_defrag, rb_node);
+again:
+ rbtree_postorder_for_each_entry_safe(defrag, next, &fs_info->defrag_inodes,
+ rb_node) {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
- cond_resched_lock(&fs_info->defrag_inodes_lock);
-
- node = rb_first(&fs_info->defrag_inodes);
+ if (cond_resched_lock(&fs_info->defrag_inodes_lock)) {
+ /* Rescheduled and unlocked. */
+ goto again;
+ }
}
spin_unlock(&fs_info->defrag_inodes_lock);
}
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
2024-08-27 21:55 ` [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes() David Sterba
@ 2024-08-27 22:59 ` Qu Wenruo
2024-08-27 23:18 ` David Sterba
0 siblings, 1 reply; 19+ messages in thread
From: Qu Wenruo @ 2024-08-27 22:59 UTC (permalink / raw)
To: David Sterba, linux-btrfs
在 2024/8/28 07:25, David Sterba 写道:
> btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
> or unmount, but the way it frees the inodes in fs_info->defrag_inodes
> is inefficient. Each time it needs to locate first node, remove it,
> potentially rebalance tree until it's done. This allows to do a
> conditional reschedule.
>
> For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
> convenient but if the reschedule happens and unlocks fs_info->defrag_inodes_lock
> we can't be sure that the tree is in the same state. If that happens,
> restart the iteration from the beginning.
In that case, isn't the rbtree itself in an inconsistent state, and
restarting will only cause invalid memory access?
So in this particular case, since we can be interrupted, the full tree
balance looks like the only safe way we can go?
Thanks,
Qu
>
> The cleanup operation is kmem_cache_free() which will likely take the
> fast path for most objects.
>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
> fs/btrfs/defrag.c | 17 ++++++++---------
> 1 file changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
> index 41d67065d02b..bd1769dd134c 100644
> --- a/fs/btrfs/defrag.c
> +++ b/fs/btrfs/defrag.c
> @@ -212,19 +212,18 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
>
> void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
> {
> - struct inode_defrag *defrag;
> - struct rb_node *node;
> + struct inode_defrag *defrag, *next;
>
> spin_lock(&fs_info->defrag_inodes_lock);
> - node = rb_first(&fs_info->defrag_inodes);
> - while (node) {
> - rb_erase(node, &fs_info->defrag_inodes);
> - defrag = rb_entry(node, struct inode_defrag, rb_node);
> +again:
> + rbtree_postorder_for_each_entry_safe(defrag, next, &fs_info->defrag_inodes,
> + rb_node) {
> kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
>
> - cond_resched_lock(&fs_info->defrag_inodes_lock);
> -
> - node = rb_first(&fs_info->defrag_inodes);
> + if (cond_resched_lock(&fs_info->defrag_inodes_lock)) {
> + /* Rescheduled and unlocked. */
> + goto again;
> + }
> }
> spin_unlock(&fs_info->defrag_inodes_lock);
> }
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
2024-08-27 22:59 ` Qu Wenruo
@ 2024-08-27 23:18 ` David Sterba
2024-08-27 23:48 ` Qu Wenruo
0 siblings, 1 reply; 19+ messages in thread
From: David Sterba @ 2024-08-27 23:18 UTC (permalink / raw)
To: Qu Wenruo; +Cc: David Sterba, linux-btrfs
On Wed, Aug 28, 2024 at 08:29:23AM +0930, Qu Wenruo wrote:
>
>
> 在 2024/8/28 07:25, David Sterba 写道:
> > btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
> > or unmount, but the way it frees the inodes in fs_info->defrag_inodes
> > is inefficient. Each time it needs to locate first node, remove it,
> > potentially rebalance tree until it's done. This allows to do a
> > conditional reschedule.
> >
> > For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
> > convenient but if the reschedule happens and unlocks fs_info->defrag_inodes_lock
> > we can't be sure that the tree is in the same state. If that happens,
> > restart the iteration from the beginning.
>
> In that case, isn't the rbtree itself in an inconsistent state, and
> restarting will only cause invalid memory access?
>
> So in this particular case, since we can be interrupted, the full tree
> balance looks like the only safe way we can go?
You're right, the nodes get freed so even if the iteration is restarted
it would touch freed memory, IOW rbtree_postorder_for_each_entry_safe()
can't be interrupted. I can drop the reschedule, with the same argument
that it should be relatively fast even for thousands of entries, this
should not hurt for remouunt/umount context.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
2024-08-27 23:18 ` David Sterba
@ 2024-08-27 23:48 ` Qu Wenruo
2024-08-28 0:11 ` David Sterba
0 siblings, 1 reply; 19+ messages in thread
From: Qu Wenruo @ 2024-08-27 23:48 UTC (permalink / raw)
To: dsterba, Qu Wenruo; +Cc: David Sterba, linux-btrfs
在 2024/8/28 08:48, David Sterba 写道:
> On Wed, Aug 28, 2024 at 08:29:23AM +0930, Qu Wenruo wrote:
>>
>>
>> 在 2024/8/28 07:25, David Sterba 写道:
>>> btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
>>> or unmount, but the way it frees the inodes in fs_info->defrag_inodes
>>> is inefficient. Each time it needs to locate first node, remove it,
>>> potentially rebalance tree until it's done. This allows to do a
>>> conditional reschedule.
>>>
>>> For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
>>> convenient but if the reschedule happens and unlocks fs_info->defrag_inodes_lock
>>> we can't be sure that the tree is in the same state. If that happens,
>>> restart the iteration from the beginning.
>>
>> In that case, isn't the rbtree itself in an inconsistent state, and
>> restarting will only cause invalid memory access?
>>
>> So in this particular case, since we can be interrupted, the full tree
>> balance looks like the only safe way we can go?
>
> You're right, the nodes get freed so even if the iteration is restarted
> it would touch freed memory, IOW rbtree_postorder_for_each_entry_safe()
> can't be interrupted. I can drop the reschedule, with the same argument
> that it should be relatively fast even for thousands of entries, this
> should not hurt for remouunt/umount context.
>
Considering the autodefrag is only triggered for certain writes, and at
remount (to RO) or unmount time, there should be no more writes, the
solution looks fine.
Thanks,
Qu
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
2024-08-27 23:48 ` Qu Wenruo
@ 2024-08-28 0:11 ` David Sterba
2024-08-28 0:13 ` Qu Wenruo
0 siblings, 1 reply; 19+ messages in thread
From: David Sterba @ 2024-08-28 0:11 UTC (permalink / raw)
To: Qu Wenruo; +Cc: dsterba, Qu Wenruo, David Sterba, linux-btrfs
On Wed, Aug 28, 2024 at 09:18:11AM +0930, Qu Wenruo wrote:
>
>
> 在 2024/8/28 08:48, David Sterba 写道:
> > On Wed, Aug 28, 2024 at 08:29:23AM +0930, Qu Wenruo wrote:
> >>
> >>
> >> 在 2024/8/28 07:25, David Sterba 写道:
> >>> btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
> >>> or unmount, but the way it frees the inodes in fs_info->defrag_inodes
> >>> is inefficient. Each time it needs to locate first node, remove it,
> >>> potentially rebalance tree until it's done. This allows to do a
> >>> conditional reschedule.
> >>>
> >>> For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
> >>> convenient but if the reschedule happens and unlocks fs_info->defrag_inodes_lock
> >>> we can't be sure that the tree is in the same state. If that happens,
> >>> restart the iteration from the beginning.
> >>
> >> In that case, isn't the rbtree itself in an inconsistent state, and
> >> restarting will only cause invalid memory access?
> >>
> >> So in this particular case, since we can be interrupted, the full tree
> >> balance looks like the only safe way we can go?
> >
> > You're right, the nodes get freed so even if the iteration is restarted
> > it would touch freed memory, IOW rbtree_postorder_for_each_entry_safe()
> > can't be interrupted. I can drop the reschedule, with the same argument
> > that it should be relatively fast even for thousands of entries, this
> > should not hurt for remouunt/umount context.
> >
>
> Considering the autodefrag is only triggered for certain writes, and at
> remount (to RO) or unmount time, there should be no more writes, the
> solution looks fine.
Ok, thanks. I'll commit the following updated version:
btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
or unmount, but the way it frees the inodes in fs_info->defrag_inodes
is inefficient. Each time it needs to locate first node, remove it,
potentially rebalance tree until it's done. This allows to do a
conditional reschedule.
For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
convenient but we can't reschedule and restart iteration because some of
the tree nodes would be already freed.
The cleanup operation is kmem_cache_free() which will likely take the
fast path for most objects so rescheduling should not be necessary.
Signed-off-by: David Sterba <dsterba@suse.com>
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -212,19 +212,12 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
{
- struct inode_defrag *defrag;
- struct rb_node *node;
+ struct inode_defrag *defrag, *next;
spin_lock(&fs_info->defrag_inodes_lock);
- node = rb_first(&fs_info->defrag_inodes);
- while (node) {
- rb_erase(node, &fs_info->defrag_inodes);
- defrag = rb_entry(node, struct inode_defrag, rb_node);
+ rbtree_postorder_for_each_entry_safe(defrag, next, &fs_info->defrag_inodes,
+ rb_node) {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
-
- cond_resched_lock(&fs_info->defrag_inodes_lock);
-
- node = rb_first(&fs_info->defrag_inodes);
}
spin_unlock(&fs_info->defrag_inodes_lock);
}
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
2024-08-28 0:11 ` David Sterba
@ 2024-08-28 0:13 ` Qu Wenruo
0 siblings, 0 replies; 19+ messages in thread
From: Qu Wenruo @ 2024-08-28 0:13 UTC (permalink / raw)
To: dsterba; +Cc: Qu Wenruo, David Sterba, linux-btrfs
在 2024/8/28 09:41, David Sterba 写道:
> On Wed, Aug 28, 2024 at 09:18:11AM +0930, Qu Wenruo wrote:
>>
>>
>> 在 2024/8/28 08:48, David Sterba 写道:
>>> On Wed, Aug 28, 2024 at 08:29:23AM +0930, Qu Wenruo wrote:
>>>>
>>>>
>>>> 在 2024/8/28 07:25, David Sterba 写道:
>>>>> btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
>>>>> or unmount, but the way it frees the inodes in fs_info->defrag_inodes
>>>>> is inefficient. Each time it needs to locate first node, remove it,
>>>>> potentially rebalance tree until it's done. This allows to do a
>>>>> conditional reschedule.
>>>>>
>>>>> For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
>>>>> convenient but if the reschedule happens and unlocks fs_info->defrag_inodes_lock
>>>>> we can't be sure that the tree is in the same state. If that happens,
>>>>> restart the iteration from the beginning.
>>>>
>>>> In that case, isn't the rbtree itself in an inconsistent state, and
>>>> restarting will only cause invalid memory access?
>>>>
>>>> So in this particular case, since we can be interrupted, the full tree
>>>> balance looks like the only safe way we can go?
>>>
>>> You're right, the nodes get freed so even if the iteration is restarted
>>> it would touch freed memory, IOW rbtree_postorder_for_each_entry_safe()
>>> can't be interrupted. I can drop the reschedule, with the same argument
>>> that it should be relatively fast even for thousands of entries, this
>>> should not hurt for remouunt/umount context.
>>>
>>
>> Considering the autodefrag is only triggered for certain writes, and at
>> remount (to RO) or unmount time, there should be no more writes, the
>> solution looks fine.
>
> Ok, thanks. I'll commit the following updated version:
>
> btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes()
>
> btrfs_cleanup_defrag_inodes() is not called frequently, only in remount
> or unmount, but the way it frees the inodes in fs_info->defrag_inodes
> is inefficient. Each time it needs to locate first node, remove it,
> potentially rebalance tree until it's done. This allows to do a
> conditional reschedule.
>
> For cleanups the rbtree_postorder_for_each_entry_safe() iterator is
> convenient but we can't reschedule and restart iteration because some of
> the tree nodes would be already freed.
>
> The cleanup operation is kmem_cache_free() which will likely take the
> fast path for most objects so rescheduling should not be necessary.
>
> Signed-off-by: David Sterba <dsterba@suse.com>
>
> --- a/fs/btrfs/defrag.c
> +++ b/fs/btrfs/defrag.c
> @@ -212,19 +212,12 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
>
> void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
> {
> - struct inode_defrag *defrag;
> - struct rb_node *node;
> + struct inode_defrag *defrag, *next;
>
> spin_lock(&fs_info->defrag_inodes_lock);
> - node = rb_first(&fs_info->defrag_inodes);
> - while (node) {
> - rb_erase(node, &fs_info->defrag_inodes);
> - defrag = rb_entry(node, struct inode_defrag, rb_node);
> + rbtree_postorder_for_each_entry_safe(defrag, next, &fs_info->defrag_inodes,
> + rb_node) {
> kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
> -
> - cond_resched_lock(&fs_info->defrag_inodes_lock);
> -
> - node = rb_first(&fs_info->defrag_inodes);
> }
Since it's just a simple kmem_cache_free() line, there is no need for
the brackets.
Otherwise the whole series looks good to me.
Reviewed-by: Qu Wenruo <wqu@suse.com>
Thanks,
Qu
> spin_unlock(&fs_info->defrag_inodes_lock);
> }
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 10/12] btrfs: return void from btrfs_add_inode_defrag()
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (8 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 09/12] btrfs: clear defragmented inodes using postorder in btrfs_cleanup_defrag_inodes() David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 11/12] btrfs: drop transaction parameter " David Sterba
` (2 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
The potential memory allocation failure is not a fatal error, skipping
autodefrag is fine and the caller inode_should_defrag() does not care
about the errors. Further writes can attempt to add the inode back to
the defragmentation list again.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 14 +++++++-------
fs/btrfs/defrag.h | 4 ++--
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index bd1769dd134c..f4c62b6faf3e 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -117,10 +117,11 @@ static inline int need_auto_defrag(struct btrfs_fs_info *fs_info)
}
/*
- * Insert a defrag record for this inode if auto defrag is enabled.
+ * Insert a defrag record for this inode if auto defrag is enabled. No errors
+ * returned as they're not considered fatal.
*/
-int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, u32 extent_thresh)
+void btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode, u32 extent_thresh)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -129,10 +130,10 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
int ret;
if (!need_auto_defrag(fs_info))
- return 0;
+ return;
if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
- return 0;
+ return;
if (trans)
transid = trans->transid;
@@ -141,7 +142,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
if (!defrag)
- return -ENOMEM;
+ return;
defrag->ino = btrfs_ino(inode);
defrag->transid = transid;
@@ -162,7 +163,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
}
spin_unlock(&fs_info->defrag_inodes_lock);
- return 0;
}
/*
diff --git a/fs/btrfs/defrag.h b/fs/btrfs/defrag.h
index 878528e086fb..97f36ab3f24d 100644
--- a/fs/btrfs/defrag.h
+++ b/fs/btrfs/defrag.h
@@ -18,8 +18,8 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 newer_than, unsigned long max_to_defrag);
int __init btrfs_auto_defrag_init(void);
void __cold btrfs_auto_defrag_exit(void);
-int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, u32 extent_thresh);
+void btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode, u32 extent_thresh);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_defrag_root(struct btrfs_root *root);
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 11/12] btrfs: drop transaction parameter from btrfs_add_inode_defrag()
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (9 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 10/12] btrfs: return void from btrfs_add_inode_defrag() David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 21:55 ` [PATCH 12/12] btrfs: always pass readahead state to defrag David Sterba
2024-08-27 23:02 ` [PATCH 00/12] Renames and defrag cleanups Qu Wenruo
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
There's only one caller inode_should_defrag() that passes NULL to
btrfs_add_inode_defrag() so we can drop it an simplify the code.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 11 ++---------
fs/btrfs/defrag.h | 3 +--
fs/btrfs/inode.c | 2 +-
3 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index f4c62b6faf3e..7333512cc9dd 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -120,13 +120,11 @@ static inline int need_auto_defrag(struct btrfs_fs_info *fs_info)
* Insert a defrag record for this inode if auto defrag is enabled. No errors
* returned as they're not considered fatal.
*/
-void btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, u32 extent_thresh)
+void btrfs_add_inode_defrag(struct btrfs_inode *inode, u32 extent_thresh)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct inode_defrag *defrag;
- u64 transid;
int ret;
if (!need_auto_defrag(fs_info))
@@ -135,17 +133,12 @@ void btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
return;
- if (trans)
- transid = trans->transid;
- else
- transid = btrfs_get_root_last_trans(root);
-
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
if (!defrag)
return;
defrag->ino = btrfs_ino(inode);
- defrag->transid = transid;
+ defrag->transid = btrfs_get_root_last_trans(root);
defrag->root = btrfs_root_id(root);
defrag->extent_thresh = extent_thresh;
diff --git a/fs/btrfs/defrag.h b/fs/btrfs/defrag.h
index 97f36ab3f24d..6b7596c4f0dc 100644
--- a/fs/btrfs/defrag.h
+++ b/fs/btrfs/defrag.h
@@ -18,8 +18,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 newer_than, unsigned long max_to_defrag);
int __init btrfs_auto_defrag_init(void);
void __cold btrfs_auto_defrag_exit(void);
-void btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, u32 extent_thresh);
+void btrfs_add_inode_defrag(struct btrfs_inode *inode, u32 extent_thresh);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_defrag_root(struct btrfs_root *root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9a81516e074a..62e1e407626e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -885,7 +885,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
/* If this is a small write inside eof, kick off a defrag */
if (num_bytes < small_write &&
(start > 0 || end + 1 < inode->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode, small_write);
+ btrfs_add_inode_defrag(inode, small_write);
}
static int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 12/12] btrfs: always pass readahead state to defrag
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (10 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 11/12] btrfs: drop transaction parameter " David Sterba
@ 2024-08-27 21:55 ` David Sterba
2024-08-27 23:02 ` [PATCH 00/12] Renames and defrag cleanups Qu Wenruo
12 siblings, 0 replies; 19+ messages in thread
From: David Sterba @ 2024-08-27 21:55 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
Defrag ioctl passes readahead from the file, but autodefrag does not
have a file so the readahead state is allocated when needed.
The autodefrag loop in cleaner thread iterates over inodes so we can
simply provide an on-stack readahead state and will not need to allocate
it in btrfs_defrag_file(). The size is 32 bytes which is acceptable.
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/defrag.c | 32 +++++++++++---------------------
1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 7333512cc9dd..c92b4367995d 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -224,7 +224,8 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
#define BTRFS_DEFRAG_BATCH 1024
static int btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
- struct inode_defrag *defrag)
+ struct inode_defrag *defrag,
+ struct file_ra_state *ra)
{
struct btrfs_root *inode_root;
struct inode *inode;
@@ -263,9 +264,10 @@ static int btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
range.len = (u64)-1;
range.start = cur;
range.extent_thresh = defrag->extent_thresh;
+ file_ra_state_init(ra, inode->i_mapping);
sb_start_write(fs_info->sb);
- ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+ ret = btrfs_defrag_file(inode, ra, &range, defrag->transid,
BTRFS_DEFRAG_BATCH);
sb_end_write(fs_info->sb);
iput(inode);
@@ -292,6 +294,8 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
atomic_inc(&fs_info->defrag_running);
while (1) {
+ struct file_ra_state ra = { 0 };
+
/* Pause the auto defragger. */
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
break;
@@ -314,7 +318,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
first_ino = defrag->ino + 1;
root_objectid = defrag->root;
- btrfs_run_defrag_inode(fs_info, defrag);
+ btrfs_run_defrag_inode(fs_info, defrag, &ra);
}
atomic_dec(&fs_info->defrag_running);
@@ -1307,8 +1311,7 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
if (entry->start + range_len <= *last_scanned_ret)
continue;
- if (ra)
- page_cache_sync_readahead(inode->vfs_inode.i_mapping,
+ page_cache_sync_readahead(inode->vfs_inode.i_mapping,
ra, NULL, entry->start >> PAGE_SHIFT,
((entry->start + range_len - 1) >> PAGE_SHIFT) -
(entry->start >> PAGE_SHIFT) + 1);
@@ -1340,7 +1343,7 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
* Entry point to file defragmentation.
*
* @inode: inode to be defragged
- * @ra: readahead state (can be NUL)
+ * @ra: readahead state
* @range: defrag options including range and flags
* @newer_than: minimum transid to defrag
* @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
@@ -1362,12 +1365,13 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 cur;
u64 last_byte;
bool do_compress = (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS);
- bool ra_allocated = false;
int compress_type = BTRFS_COMPRESS_ZLIB;
int ret = 0;
u32 extent_thresh = range->extent_thresh;
pgoff_t start_index;
+ ASSERT(ra);
+
if (isize == 0)
return 0;
@@ -1396,18 +1400,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
cur = round_down(range->start, fs_info->sectorsize);
last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
- /*
- * If we were not given a ra, allocate a readahead context. As
- * readahead is just an optimization, defrag will work without it so
- * we don't error out.
- */
- if (!ra) {
- ra_allocated = true;
- ra = kzalloc(sizeof(*ra), GFP_KERNEL);
- if (ra)
- file_ra_state_init(ra, inode->i_mapping);
- }
-
/*
* Make writeback start from the beginning of the range, so that the
* defrag range can be written sequentially.
@@ -1462,8 +1454,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
cond_resched();
}
- if (ra_allocated)
- kfree(ra);
/*
* Update range.start for autodefrag, this will indicate where to start
* in next run.
--
2.45.0
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 00/12] Renames and defrag cleanups
2024-08-27 21:55 [PATCH 00/12] Renames and defrag cleanups David Sterba
` (11 preceding siblings ...)
2024-08-27 21:55 ` [PATCH 12/12] btrfs: always pass readahead state to defrag David Sterba
@ 2024-08-27 23:02 ` Qu Wenruo
12 siblings, 0 replies; 19+ messages in thread
From: Qu Wenruo @ 2024-08-27 23:02 UTC (permalink / raw)
To: David Sterba, linux-btrfs
在 2024/8/28 07:25, David Sterba 写道:
> A few simple cleanups, dropping underscores and improving some code in
> defrag.
>
> David Sterba (12):
> btrfs: rename btrfs_submit_bio() to btrfs_submit_bbio()
> btrfs: rename __btrfs_submit_bio() and drop double underscores
> btrfs: rename __extent_writepage() and drop double underscores
> btrfs: rename __compare_inode_defrag() and drop double underscores
> btrfs: constify arguments of compare_inode_defrag()
> btrfs: rename __need_auto_defrag() and drop double underscores
> btrfs: rename __btrfs_add_inode_defrag() and drop double underscores
> btrfs: rename __btrfs_run_defrag_inode() and drop double underscores
> btrfs: clear defragmented inodes using postorder in
> btrfs_cleanup_defrag_inodes()
This one doesn't look safe to me as the rbtree_postorder_for_each_safe()
doesn't look like interruption safe.
The remaining ones looks good to me.
Reviewed-by: Qu Wenruo <wqu@suse.com>
Thanks,
Qu
> btrfs: return void from btrfs_add_inode_defrag()
> btrfs: drop transaction parameter from btrfs_add_inode_defrag()
> btrfs: always pass readahead state to defrag
>
> fs/btrfs/bio.c | 20 +++----
> fs/btrfs/bio.h | 6 +--
> fs/btrfs/compression.c | 4 +-
> fs/btrfs/defrag.c | 100 ++++++++++++++---------------------
> fs/btrfs/defrag.h | 3 +-
> fs/btrfs/direct-io.c | 2 +-
> fs/btrfs/extent_io.c | 34 ++++++------
> fs/btrfs/inode.c | 8 +--
> fs/btrfs/scrub.c | 10 ++--
> fs/btrfs/subpage.c | 4 +-
> include/trace/events/btrfs.h | 2 +-
> 11 files changed, 86 insertions(+), 107 deletions(-)
>
^ permalink raw reply [flat|nested] 19+ messages in thread