* [PATCH 1/7] btrfs: move kthread_associate_blkcg out of btrfs_submit_compressed_write
[not found] ` <20230327004954.728797-1-hch-jcswGhMUV9g@public.gmane.org>
@ 2023-03-27 0:49 ` Christoph Hellwig
2023-03-27 0:49 ` [PATCH 2/7] btrfs: don't free the async_extent in submit_uncompressed_range Christoph Hellwig
` (2 subsequent siblings)
3 siblings, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2023-03-27 0:49 UTC (permalink / raw)
To: Josef Bacik, Chris Mason, David Sterba
Cc: Tejun Heo, Jens Axboe, cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-block-u79uwXL29TY76Z2rM5mHXA,
linux-btrfs-u79uwXL29TY76Z2rM5mHXA
btrfs_submit_compressed_write should not have to care if it is called
from a helper thread or not. Move the kthread_associate_blkcg handling
into submit_one_async_extent, as that is the one caller that needs it.
Also move the assignment of REQ_CGROUP_PUNT into cow_file_range_async,
as that is the routine that sets up the helper thread offload.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/btrfs/compression.c | 8 --------
fs/btrfs/compression.h | 1 -
fs/btrfs/inode.c | 12 ++++++++----
3 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 44c4276741ceda..d532a8c8c9d8c6 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -286,7 +286,6 @@ void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
- struct cgroup_subsys_state *blkcg_css,
bool writeback)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -295,10 +294,6 @@ void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
- if (blkcg_css) {
- kthread_associate_blkcg(blkcg_css);
- write_flags |= REQ_CGROUP_PUNT;
- }
write_flags |= REQ_BTRFS_ONE_ORDERED;
cb = alloc_compressed_bio(inode, start, REQ_OP_WRITE | write_flags,
@@ -314,9 +309,6 @@ void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
btrfs_add_compressed_bio_pages(cb);
btrfs_submit_bio(&cb->bbio, 0);
-
- if (blkcg_css)
- kthread_associate_blkcg(NULL);
}
/*
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 5d5146e72a860b..19ab2abeddc088 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -92,7 +92,6 @@ void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
- struct cgroup_subsys_state *blkcg_css,
bool writeback);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 865d56ff2ce150..698915c032bddc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1053,14 +1053,18 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_START_WRITEBACK);
+
+ if (async_chunk->blkcg_css)
+ kthread_associate_blkcg(async_chunk->blkcg_css);
btrfs_submit_compressed_write(inode, start, /* file_offset */
async_extent->ram_size, /* num_bytes */
ins.objectid, /* disk_bytenr */
ins.offset, /* compressed_len */
async_extent->pages, /* compressed_pages */
async_extent->nr_pages,
- async_chunk->write_flags,
- async_chunk->blkcg_css, true);
+ async_chunk->write_flags, true);
+ if (async_chunk->blkcg_css)
+ kthread_associate_blkcg(NULL);
*alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
return ret;
@@ -1612,6 +1616,7 @@ static int cow_file_range_async(struct btrfs_inode *inode,
if (blkcg_css != blkcg_root_css) {
css_get(blkcg_css);
async_chunk[i].blkcg_css = blkcg_css;
+ async_chunk[i].write_flags |= REQ_CGROUP_PUNT;
} else {
async_chunk[i].blkcg_css = NULL;
}
@@ -10374,8 +10379,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
btrfs_delalloc_release_extents(inode, num_bytes);
btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
- ins.offset, pages, nr_pages, 0, NULL,
- false);
+ ins.offset, pages, nr_pages, 0, false);
ret = orig_count;
goto out;
--
2.39.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 2/7] btrfs: don't free the async_extent in submit_uncompressed_range
[not found] ` <20230327004954.728797-1-hch-jcswGhMUV9g@public.gmane.org>
2023-03-27 0:49 ` [PATCH 1/7] btrfs: move kthread_associate_blkcg out of btrfs_submit_compressed_write Christoph Hellwig
@ 2023-03-27 0:49 ` Christoph Hellwig
2023-03-27 0:49 ` [PATCH 5/7] btrfs, block: move REQ_CGROUP_PUNT to btrfs Christoph Hellwig
2023-03-27 0:49 ` [PATCH 7/7] block: make blkcg_punt_bio_submit optional Christoph Hellwig
3 siblings, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2023-03-27 0:49 UTC (permalink / raw)
To: Josef Bacik, Chris Mason, David Sterba
Cc: Tejun Heo, Jens Axboe, cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-block-u79uwXL29TY76Z2rM5mHXA,
linux-btrfs-u79uwXL29TY76Z2rM5mHXA
Let submit_one_async_extent, which is the only caller of
submit_uncompressed_range handle freeing of the async_extent in one
central place.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/btrfs/inode.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 698915c032bddc..7a1bfce9532fe9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -944,10 +944,9 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
ret = cow_file_range(inode, locked_page, start, end, &page_started,
&nr_written, 0, NULL);
/* Inline extent inserted, page gets unlocked and everything is done */
- if (page_started) {
- ret = 0;
- goto out;
- }
+ if (page_started)
+ return 0;
+
if (ret < 0) {
btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
if (locked_page) {
@@ -961,14 +960,11 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
end_extent_writepage(locked_page, ret, page_start, page_end);
unlock_page(locked_page);
}
- goto out;
+ return ret;
}
- ret = extent_write_locked_range(&inode->vfs_inode, start, end);
/* All pages will be unlocked, including @locked_page */
-out:
- kfree(async_extent);
- return ret;
+ return extent_write_locked_range(&inode->vfs_inode, start, end);
}
static int submit_one_async_extent(struct btrfs_inode *inode,
@@ -1000,8 +996,10 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
lock_extent(io_tree, start, end, NULL);
/* We have fall back to uncompressed write */
- if (!async_extent->pages)
- return submit_uncompressed_range(inode, async_extent, locked_page);
+ if (!async_extent->pages) {
+ ret = submit_uncompressed_range(inode, async_extent, locked_page);
+ goto done;
+ }
ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
@@ -1066,6 +1064,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
if (async_chunk->blkcg_css)
kthread_associate_blkcg(NULL);
*alloc_hint = ins.objectid + ins.offset;
+done:
kfree(async_extent);
return ret;
@@ -1080,8 +1079,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK | PAGE_SET_ERROR);
free_async_extent_pages(async_extent);
- kfree(async_extent);
- return ret;
+ goto done;
}
/*
--
2.39.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 5/7] btrfs, block: move REQ_CGROUP_PUNT to btrfs
[not found] ` <20230327004954.728797-1-hch-jcswGhMUV9g@public.gmane.org>
2023-03-27 0:49 ` [PATCH 1/7] btrfs: move kthread_associate_blkcg out of btrfs_submit_compressed_write Christoph Hellwig
2023-03-27 0:49 ` [PATCH 2/7] btrfs: don't free the async_extent in submit_uncompressed_range Christoph Hellwig
@ 2023-03-27 0:49 ` Christoph Hellwig
2023-03-28 1:15 ` Jens Axboe
2023-03-27 0:49 ` [PATCH 7/7] block: make blkcg_punt_bio_submit optional Christoph Hellwig
3 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2023-03-27 0:49 UTC (permalink / raw)
To: Josef Bacik, Chris Mason, David Sterba
Cc: Tejun Heo, Jens Axboe, cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-block-u79uwXL29TY76Z2rM5mHXA,
linux-btrfs-u79uwXL29TY76Z2rM5mHXA
REQ_CGROUP_PUNT is a bit annoying as it is hard to follow and adds
a branch to the bio submission hot path. To fix this, export
blkcg_punt_bio_submit and let btrfs call it directly. Add a new
REQ_FS_PRIVATE flag for btrfs to indicate to it's own low-level
bio submission code that a punt to the cgroup submission helper
is required.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
block/blk-cgroup.c | 31 +++++++++++++++++--------------
block/blk-cgroup.h | 12 ------------
block/blk-core.c | 3 ---
fs/btrfs/bio.c | 12 ++++++++----
fs/btrfs/bio.h | 3 +++
fs/btrfs/extent_io.c | 2 +-
fs/btrfs/inode.c | 2 +-
include/linux/bio.h | 5 +++++
include/linux/blk_types.h | 18 +++++-------------
9 files changed, 40 insertions(+), 48 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bd50b55bdb6135..9f5f3263c1781e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1688,24 +1688,27 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
-bool __blkcg_punt_bio_submit(struct bio *bio)
+/*
+ * When a shared kthread issues a bio for a cgroup, doing so synchronously can
+ * lead to priority inversions as the kthread can be trapped waiting for that
+ * cgroup. Use this helper instead of submit_bio to punt the actual issuing to
+ * a dedicated per-blkcg work item to avoid such priority inversions.
+ */
+void blkcg_punt_bio_submit(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;
- /* consume the flag first */
- bio->bi_opf &= ~REQ_CGROUP_PUNT;
-
- /* never bounce for the root cgroup */
- if (!blkg->parent)
- return false;
-
- spin_lock_bh(&blkg->async_bio_lock);
- bio_list_add(&blkg->async_bios, bio);
- spin_unlock_bh(&blkg->async_bio_lock);
-
- queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
- return true;
+ if (blkg->parent) {
+ spin_lock_bh(&blkg->async_bio_lock);
+ bio_list_add(&blkg->async_bios, bio);
+ spin_unlock_bh(&blkg->async_bio_lock);
+ queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
+ } else {
+ /* never bounce for the root cgroup */
+ submit_bio(bio);
+ }
}
+EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
/*
* Scale the accumulated delay based on how long it has been since we updated
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9c5078755e5e19..64758ab9f1f134 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -375,16 +375,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q)))
-bool __blkcg_punt_bio_submit(struct bio *bio);
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio)
-{
- if (bio->bi_opf & REQ_CGROUP_PUNT)
- return __blkcg_punt_bio_submit(bio);
- else
- return false;
-}
-
static inline void blkcg_bio_issue_init(struct bio *bio)
{
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
@@ -506,8 +496,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
diff --git a/block/blk-core.c b/block/blk-core.c
index 42926e6cb83c8e..478978dcb2bd50 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -830,9 +830,6 @@ EXPORT_SYMBOL(submit_bio_noacct);
*/
void submit_bio(struct bio *bio)
{
- if (blkcg_punt_bio_submit(bio))
- return;
-
if (bio_op(bio) == REQ_OP_READ) {
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, bio_sectors(bio));
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index cf09c6271edbee..60cf186085bf0a 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -421,7 +421,11 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
dev->devid, bio->bi_iter.bi_size);
btrfsic_check_bio(bio);
- submit_bio(bio);
+
+ if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
+ blkcg_punt_bio_submit(bio);
+ else
+ submit_bio(bio);
}
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
@@ -537,10 +541,10 @@ static void run_one_async_done(struct btrfs_work *work)
/*
* All of the bios that pass through here are from async helpers.
- * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
- * This changes nothing when cgroups aren't in use.
+ * Use REQ_BTRFS_CGROUP_PUNT to issue them from the owning cgroup's
+ * context. This changes nothing when cgroups aren't in use.
*/
- bio->bi_opf |= REQ_CGROUP_PUNT;
+ bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;
__btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
}
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index dbf125f6fa336d..8edf3c35eead90 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -88,6 +88,9 @@ static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
/* Bio only refers to one ordered extent. */
#define REQ_BTRFS_ONE_ORDERED REQ_DRV
+/* Submit using blkcg_punt_bio_submit. */
+#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
+
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f5702b1e2b86b0..f40e4a002f789a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2538,7 +2538,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
struct btrfs_bio_ctrl bio_ctrl = {
.wbc = &wbc_writepages,
/* We're called from an async helper function */
- .opf = REQ_OP_WRITE | REQ_CGROUP_PUNT |
+ .opf = REQ_OP_WRITE | REQ_BTRFS_CGROUP_PUNT |
wbc_to_write_flags(&wbc_writepages),
.extent_locked = 1,
};
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 713b47792b9b7e..1a34374c8e327e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1615,7 +1615,7 @@ static int cow_file_range_async(struct btrfs_inode *inode,
if (blkcg_css != blkcg_root_css) {
css_get(blkcg_css);
async_chunk[i].blkcg_css = blkcg_css;
- async_chunk[i].write_flags |= REQ_CGROUP_PUNT;
+ async_chunk[i].write_flags |= REQ_BTRFS_CGROUP_PUNT;
} else {
async_chunk[i].blkcg_css = NULL;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d766be7152e151..b3e7529ff55eb3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -500,6 +500,7 @@ void bio_associate_blkg(struct bio *bio);
void bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css);
void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void blkcg_punt_bio_submit(struct bio *bio);
#else /* CONFIG_BLK_CGROUP */
static inline void bio_associate_blkg(struct bio *bio) { }
static inline void bio_associate_blkg_from_css(struct bio *bio,
@@ -507,6 +508,10 @@ static inline void bio_associate_blkg_from_css(struct bio *bio,
{ }
static inline void bio_clone_blkg_association(struct bio *dst,
struct bio *src) { }
+static inline void blkcg_punt_bio_submit(struct bio *bio)
+{
+ submit_bio(bio);
+}
#endif /* CONFIG_BLK_CGROUP */
static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 99be590f952f6e..fb8843990d288e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -404,18 +404,11 @@ enum req_flag_bits {
__REQ_RAHEAD, /* read ahead, can fail anytime */
__REQ_BACKGROUND, /* background IO */
__REQ_NOWAIT, /* Don't wait if request will block */
- /*
- * When a shared kthread needs to issue a bio for a cgroup, doing
- * so synchronously can lead to priority inversions as the kthread
- * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes
- * submit_bio() punt the actual issuing to a dedicated per-blkcg
- * work item to avoid such priority inversions.
- */
- __REQ_CGROUP_PUNT,
__REQ_POLLED, /* caller polls for completion using bio_poll */
__REQ_ALLOC_CACHE, /* allocate IO from cache if available */
__REQ_SWAP, /* swap I/O */
__REQ_DRV, /* for driver use */
+ __REQ_FS_PRIVATE, /* for file system (submitter) use */
/*
* Command specific flags, keep last:
@@ -443,14 +436,13 @@ enum req_flag_bits {
#define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT)
-#define REQ_CGROUP_PUNT (__force blk_opf_t)(1ULL << __REQ_CGROUP_PUNT)
-
-#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
#define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED)
#define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE)
-
-#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP)
+#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
+#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
+
+#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
--
2.39.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 5/7] btrfs, block: move REQ_CGROUP_PUNT to btrfs
2023-03-27 0:49 ` [PATCH 5/7] btrfs, block: move REQ_CGROUP_PUNT to btrfs Christoph Hellwig
@ 2023-03-28 1:15 ` Jens Axboe
0 siblings, 0 replies; 17+ messages in thread
From: Jens Axboe @ 2023-03-28 1:15 UTC (permalink / raw)
To: Christoph Hellwig, Josef Bacik, Chris Mason, David Sterba
Cc: Tejun Heo, cgroups, linux-block, linux-btrfs
On 3/26/23 6:49?PM, Christoph Hellwig wrote:
> REQ_CGROUP_PUNT is a bit annoying as it is hard to follow and adds
> a branch to the bio submission hot path. To fix this, export
> blkcg_punt_bio_submit and let btrfs call it directly. Add a new
> REQ_FS_PRIVATE flag for btrfs to indicate to it's own low-level
> bio submission code that a punt to the cgroup submission helper
> is required.
Looks good, and nice to remove more cruft from the generic
submission path:
Reviewed-by: Jens Axboe <axboe@kernel.dk>
--
Jens Axboe
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 7/7] block: make blkcg_punt_bio_submit optional
[not found] ` <20230327004954.728797-1-hch-jcswGhMUV9g@public.gmane.org>
` (2 preceding siblings ...)
2023-03-27 0:49 ` [PATCH 5/7] btrfs, block: move REQ_CGROUP_PUNT to btrfs Christoph Hellwig
@ 2023-03-27 0:49 ` Christoph Hellwig
2023-03-27 23:32 ` Jens Axboe
3 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2023-03-27 0:49 UTC (permalink / raw)
To: Josef Bacik, Chris Mason, David Sterba
Cc: Tejun Heo, Jens Axboe, cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-block-u79uwXL29TY76Z2rM5mHXA,
linux-btrfs-u79uwXL29TY76Z2rM5mHXA
Guard all the code to punt bios to a per-cgroup submission helper by a
new CONFIG_BLK_CGROUP_PUNT_BIO symbol that is selected by btrfs.
This way non-btrfs kernel builds don't need to have this code.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
block/Kconfig | 3 ++
block/blk-cgroup.c | 77 +++++++++++++++++++++++++---------------------
block/blk-cgroup.h | 3 +-
fs/btrfs/Kconfig | 1 +
4 files changed, 48 insertions(+), 36 deletions(-)
diff --git a/block/Kconfig b/block/Kconfig
index 941b2dca70db73..69ccf7457ae110 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME
config BLK_CGROUP_RWSTAT
bool
+config BLK_CGROUP_PUNT_BIO
+ bool
+
config BLK_DEV_BSG_COMMON
tristate
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c524ecab440b8f..18c922579719b5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -56,7 +56,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
-static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64
@@ -166,7 +165,9 @@ static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
+#endif
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -188,6 +189,9 @@ static void blkg_release(struct percpu_ref *ref)
call_rcu(&blkg->rcu_head, __blkg_release);
}
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
+static struct workqueue_struct *blkcg_punt_bio_wq;
+
static void blkg_async_bio_workfn(struct work_struct *work)
{
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
@@ -214,6 +218,40 @@ static void blkg_async_bio_workfn(struct work_struct *work)
blk_finish_plug(&plug);
}
+/*
+ * When a shared kthread issues a bio for a cgroup, doing so synchronously can
+ * lead to priority inversions as the kthread can be trapped waiting for that
+ * cgroup. Use this helper instead of submit_bio to punt the actual issuing to
+ * a dedicated per-blkcg work item to avoid such priority inversions.
+ */
+void blkcg_punt_bio_submit(struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+
+ if (blkg->parent) {
+ spin_lock(&blkg->async_bio_lock);
+ bio_list_add(&blkg->async_bios, bio);
+ spin_unlock(&blkg->async_bio_lock);
+ queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
+ } else {
+ /* never bounce for the root cgroup */
+ submit_bio(bio);
+ }
+}
+EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
+
+static int __init blkcg_punt_bio_init(void)
+{
+ blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE |
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!blkcg_punt_bio_wq)
+ return -ENOMEM;
+ return 0;
+}
+subsys_initcall(blkcg_punt_bio_init);
+#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */
+
/**
* bio_blkcg_css - return the blkcg CSS associated with a bio
* @bio: target bio
@@ -269,10 +307,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
blkg->q = disk->queue;
INIT_LIST_HEAD(&blkg->q_node);
+ blkg->blkcg = blkcg;
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spin_lock_init(&blkg->async_bio_lock);
bio_list_init(&blkg->async_bios);
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
- blkg->blkcg = blkcg;
+#endif
u64_stats_init(&blkg->iostat.sync);
for_each_possible_cpu(cpu) {
@@ -1688,28 +1728,6 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
-/*
- * When a shared kthread issues a bio for a cgroup, doing so synchronously can
- * lead to priority inversions as the kthread can be trapped waiting for that
- * cgroup. Use this helper instead of submit_bio to punt the actual issuing to
- * a dedicated per-blkcg work item to avoid such priority inversions.
- */
-void blkcg_punt_bio_submit(struct bio *bio)
-{
- struct blkcg_gq *blkg = bio->bi_blkg;
-
- if (blkg->parent) {
- spin_lock(&blkg->async_bio_lock);
- bio_list_add(&blkg->async_bios, bio);
- spin_unlock(&blkg->async_bio_lock);
- queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
- } else {
- /* never bounce for the root cgroup */
- submit_bio(bio);
- }
-}
-EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
-
/*
* Scale the accumulated delay based on how long it has been since we updated
* the delay. We only call this when we are adding delay, in case it's been a
@@ -2088,16 +2106,5 @@ bool blk_cgroup_congested(void)
return ret;
}
-static int __init blkcg_init(void)
-{
- blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
- WQ_MEM_RECLAIM | WQ_FREEZABLE |
- WQ_UNBOUND | WQ_SYSFS, 0);
- if (!blkcg_punt_bio_wq)
- return -ENOMEM;
- return 0;
-}
-subsys_initcall(blkcg_init);
-
module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 64758ab9f1f134..e98d2c1be35415 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -72,9 +72,10 @@ struct blkcg_gq {
struct blkg_iostat_set iostat;
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
-
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spinlock_t async_bio_lock;
struct bio_list async_bios;
+#endif
union {
struct work_struct async_bio_work;
struct work_struct free_work;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 37b6bab90c835e..66fa9ab2c04629 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -2,6 +2,7 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
+ select BLK_CGROUP_PUNT_BIO
select CRYPTO
select CRYPTO_CRC32C
select LIBCRC32C
--
2.39.2
^ permalink raw reply related [flat|nested] 17+ messages in thread