Linux block layer

Linux block layer
 help / color / mirror / Atom feed

* [PATCH 4/9] block: move the FAIL_MAKE_REQUEST symbol from lib/ to block/
From: Christoph Hellwig @ 2026-06-02  5:45 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jonathan Corbet, linux-block, linux-doc, bpf, linux-kselftest
In-Reply-To: <20260602054615.3788425-1-hch@lst.de>

Keep the Kconfig symbol together with the code that it guards.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/Kconfig     | 6 ++++++
 lib/Kconfig.debug | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index 15027963472d..6c942391f65e 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -209,6 +209,12 @@ config BLK_INLINE_ENCRYPTION_FALLBACK
 	  by falling back to the kernel crypto API when inline
 	  encryption hardware is not present.
 
+config FAIL_MAKE_REQUEST
+	bool "Fault-injection capability for disk IO"
+	depends on FAULT_INJECTION
+	help
+	  Provide fault-injection capability for disk IO.
+
 source "block/partitions/Kconfig"
 
 config BLK_PM
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8ff5adcfe1e0..fb085963ec5e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2116,12 +2116,6 @@ config FAULT_INJECTION_USERCOPY
 	  Provides fault-injection capability to inject failures
 	  in usercopy functions (copy_from_user(), get_user(), ...).
 
-config FAIL_MAKE_REQUEST
-	bool "Fault-injection capability for disk IO"
-	depends on FAULT_INJECTION && BLOCK
-	help
-	  Provide fault-injection capability for disk IO.
-
 config FAIL_IO_TIMEOUT
 	bool "Fault-injection capability for faking disk interrupts"
 	depends on FAULT_INJECTION && BLOCK
-- 
2.53.0


^ permalink raw reply related

* [PATCH 3/9] block: refactor should_fail_bio and should_fail_request
From: Christoph Hellwig @ 2026-06-02  5:45 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jonathan Corbet, linux-block, linux-doc, bpf, linux-kselftest
In-Reply-To: <20260602054615.3788425-1-hch@lst.de>

Move the bdev flag checks into a helper and the blk-mq clone
insers so that we can do only a single actual error injection for
I/O to partitions instead of doing it twice.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c          | 28 ++++++++++++++--------------
 block/blk-mq.c            |  3 ++-
 block/blk.h               |  5 ++---
 include/linux/blk_types.h |  2 --
 4 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f35e0d3fb127..644888b66f33 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -502,10 +502,9 @@ static int __init setup_fail_make_request(char *str)
 }
 __setup("fail_make_request=", setup_fail_make_request);
 
-bool should_fail_request(struct block_device *part, unsigned int bytes)
+bool should_fail_request(unsigned int bytes)
 {
-	return bdev_test_flag(part, BD_MAKE_IT_FAIL) &&
-	       should_fail(&fail_make_request, bytes);
+	return should_fail(&fail_make_request, bytes);
 }
 
 static int __init fail_make_request_debugfs(void)
@@ -539,11 +538,13 @@ static inline void bio_check_ro(struct bio *bio)
 	}
 }
 
-static int should_fail_bio(struct bio *bio)
+static inline bool may_fail_bio(struct bio *bio)
 {
-	if (should_fail_request(bdev_whole(bio->bi_bdev), bio->bi_iter.bi_size))
-		return -EIO;
-	return 0;
+	if (!IS_ENABLED(CONFIG_FAIL_MAKE_REQUEST))
+		return false;
+	return bdev_test_flag(bio->bi_bdev, BD_MAKE_IT_FAIL) ||
+		(bio_flagged(bio, BIO_REMAPPED) &&
+		 bdev_test_flag(bdev_whole(bio->bi_bdev), BD_MAKE_IT_FAIL));
 }
 
 /*
@@ -577,8 +578,6 @@ static int blk_partition_remap(struct bio *bio)
 {
 	struct block_device *p = bio->bi_bdev;
 
-	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
-		return -EIO;
 	if (bio_sectors(bio)) {
 		bio->bi_iter.bi_sector += p->bd_start_sect;
 		trace_block_bio_remap(bio, p->bd_dev,
@@ -723,10 +722,13 @@ static void __submit_bio_noacct_mq(struct bio *bio)
 
 void submit_bio_noacct_nocheck(struct bio *bio, bool split)
 {
-	if (should_fail_bio(bio)) {
-		bio_io_error(bio);
-		return;
+	if (unlikely(may_fail_bio(bio))) {
+		if (should_fail_request(bio->bi_iter.bi_size)) {
+			bio_io_error(bio);
+			return;
+		}
 	}
+
 	blk_cgroup_bio_start(bio);
 
 	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
@@ -799,8 +801,6 @@ void submit_bio_noacct(struct bio *bio)
 			goto not_supported;
 	}
 
-	if (should_fail_bio(bio))
-		goto end_io;
 	bio_check_ro(bio);
 	if (!bio_flagged(bio, BIO_REMAPPED)) {
 		if (unlikely(bio_check_eod(bio)))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 629e16003eb7..bf66645622df 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3275,7 +3275,8 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
 		return BLK_STS_IOERR;
 	}
 
-	if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
+	if (q->disk && bdev_test_flag(q->disk->part0, BD_MAKE_IT_FAIL) &&
+	    should_fail_request(blk_rq_bytes(rq)))
 		return BLK_STS_IOERR;
 
 	ret = blk_crypto_rq_get_keyslot(rq);
diff --git a/block/blk.h b/block/blk.h
index 889a39589356..250a6eee700a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -647,10 +647,9 @@ int disk_register_independent_access_ranges(struct gendisk *disk);
 void disk_unregister_independent_access_ranges(struct gendisk *disk);
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
-bool should_fail_request(struct block_device *part, unsigned int bytes);
+bool should_fail_request(unsigned int bytes);
 #else /* CONFIG_FAIL_MAKE_REQUEST */
-static inline bool should_fail_request(struct block_device *part,
-					unsigned int bytes)
+static inline bool should_fail_request(unsigned int bytes)
 {
 	return false;
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8808ee76e73c..4a3cfa857637 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -51,9 +51,7 @@ struct block_device {
 #define BD_WRITE_HOLDER		(1u<<9)
 #define BD_HAS_SUBMIT_BIO	(1u<<10)
 #define BD_RO_WARNED		(1u<<11)
-#ifdef CONFIG_FAIL_MAKE_REQUEST
 #define BD_MAKE_IT_FAIL		(1u<<12)
-#endif
 	dev_t			bd_dev;
 	struct address_space	*bd_mapping;	/* page cache */
 
-- 
2.53.0


^ permalink raw reply related

* [PATCH 2/9] block: consolidate the calls to should_fail_bio
From: Christoph Hellwig @ 2026-06-02  5:45 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jonathan Corbet, linux-block, linux-doc, bpf, linux-kselftest
In-Reply-To: <20260602054615.3788425-1-hch@lst.de>

Delay the point of error injection a bit so that we have a single site
in blk-core.c after more of the submission side checks and blkcg
throttling.  This allows to make should_fail_bio static in blk-core.c.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c  | 6 +++++-
 block/blk-merge.c | 5 +----
 block/blk.h       | 1 -
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3a23af3e26a9..f35e0d3fb127 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -539,7 +539,7 @@ static inline void bio_check_ro(struct bio *bio)
 	}
 }
 
-int should_fail_bio(struct bio *bio)
+static int should_fail_bio(struct bio *bio)
 {
 	if (should_fail_request(bdev_whole(bio->bi_bdev), bio->bi_iter.bi_size))
 		return -EIO;
@@ -723,6 +723,10 @@ static void __submit_bio_noacct_mq(struct bio *bio)
 
 void submit_bio_noacct_nocheck(struct bio *bio, bool split)
 {
+	if (should_fail_bio(bio)) {
+		bio_io_error(bio);
+		return;
+	}
 	blk_cgroup_bio_start(bio);
 
 	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7cc82a7a6f4e..b44f8ae849b8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -130,11 +130,8 @@ struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
 	trace_block_split(split, bio->bi_iter.bi_sector);
 	WARN_ON_ONCE(bio_zone_write_plugging(bio));
 
-	if (should_fail_bio(bio))
-		bio_io_error(bio);
-	else if (!blk_throtl_bio(bio))
+	if (!blk_throtl_bio(bio))
 		submit_bio_noacct_nocheck(bio, true);
-
 	return split;
 }
 EXPORT_SYMBOL_GPL(bio_submit_split_bioset);
diff --git a/block/blk.h b/block/blk.h
index bf1a80493ff1..889a39589356 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -646,7 +646,6 @@ extern const struct address_space_operations def_blk_aops;
 int disk_register_independent_access_ranges(struct gendisk *disk);
 void disk_unregister_independent_access_ranges(struct gendisk *disk);
 
-int should_fail_bio(struct bio *bio);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 bool should_fail_request(struct block_device *part, unsigned int bytes);
 #else /* CONFIG_FAIL_MAKE_REQUEST */
-- 
2.53.0


^ permalink raw reply related

* [PATCH 1/9] block: remove ALLOW_ERROR_INJECTION for should_fail_bio
From: Christoph Hellwig @ 2026-06-02  5:45 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jonathan Corbet, linux-block, linux-doc, bpf, linux-kselftest
In-Reply-To: <20260602054615.3788425-1-hch@lst.de>

Allow error injection for should_fail_bio is a bit misguided.  It allows
inserting an errno, which is then ignored, but it forced and out of line
call for something that should not exist when error injection is disabled.

Remove the error injection flag in preparation for adding better block
layer error injection, and switch the bpf test to use a MM error
injection site instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c                                           | 1 -
 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index b0f0a304ea0b..3a23af3e26a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -545,7 +545,6 @@ int should_fail_bio(struct bio *bio)
 		return -EIO;
 	return 0;
 }
-ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
 
 /*
  * Check whether this bio extends beyond the end of the device or partition.
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 2e0ddef77ba5..6c8b161cdd7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -588,12 +588,13 @@ static void test_attach_override(void)
 		goto cleanup;
 	}
 
-	/* The should_fail_bio function is on error injection list,
+	/* The __filemap_add_folio function is on error injection list,
 	 * attach should succeed.
 	 */
 	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
-						     "should_fail_bio", NULL);
-	if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio"))
+						     "__filemap_add_folio,",
+						     NULL);
+	if (!ASSERT_OK_PTR(link, "override_attached___filemap_add_folio,"))
 		goto cleanup;
 
 	bpf_link__destroy(link);
-- 
2.53.0


^ permalink raw reply related

* configurable block error injection
From: Christoph Hellwig @ 2026-06-02  5:45 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jonathan Corbet, linux-block, linux-doc, bpf, linux-kselftest

Hi all,

this series adds a new configurable block error injection facility.
We already have a few to inject block errors, but unfortunately most
of them are either not very useful or hard to use, or both:

 - The fail_make_request failure injection point can't distinguish
   different commands, different ranges in the file and can only injection
   plain I/O errors.
 - the should_fail_bio 'dynamic' failure injection has all the same issues
   as fail_make_request
 - dm-error can only fail all command in the table using BLK_STS_IOERR
   and requires setting up a new block device
 - dm-flakey and dm-dust allow all kinds of configurability, but still
   don't have good error selection, no good support for non-read/write
   commands and are limited to the dm table alignment requirements,
   which for zoned devices enforces setting them up for an entire zone.
   They also once again require setting up a stacked block device,
   which is really annoying in harnesses like xfstests

This series adds a new debugfs-based block layer error injection
that allows to configure what operations and ranges the injection
applied to, and what status to return.  It also allows to configure a
failure ratio similar to the xfs errortag injection.

As part of this the should_fail_bio is removed, as the should_fail_bio
function really gets in the way in it's current form, and the injection
of an errno which always gets turned into BLK_STS_IOERR doesn't make
much sense.  This required adjusting the bpf test suite as it attached
to this function as it assumed it to be present.

Diffstat:
 Documentation/block/error-injection.rst                    |   59 ++
 Documentation/block/index.rst                              |    1 
 block/Kconfig                                              |    6 
 block/Makefile                                             |    1 
 block/blk-core.c                                           |  128 ++---
 block/blk-merge.c                                          |    5 
 block/blk-mq.c                                             |    3 
 block/blk-sysfs.c                                          |    4 
 block/blk.h                                                |   24 
 block/error-injection.c                                    |  329 +++++++++++++
 block/genhd.c                                              |    4 
 include/linux/blk_types.h                                  |    2 
 include/linux/blkdev.h                                     |    5 
 lib/Kconfig.debug                                          |    6 
 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c |    7 
 15 files changed, 507 insertions(+), 77 deletions(-)

^ permalink raw reply

* Re: [PATCH v7 00/43] btrfs: add fscrypt support
From: Eric Biggers @ 2026-06-02  4:19 UTC (permalink / raw)
  To: David Sterba
  Cc: Daniel Vacek, David Sterba, linux-block, linux-fscrypt,
	linux-btrfs, linux-kernel, Chris Mason, Josef Bacik,
	Theodore Y. Ts'o, Jaegeuk Kim, Jens Axboe
In-Reply-To: <20260602022553.GA2295@sol>

On Mon, Jun 01, 2026 at 07:25:53PM -0700, Eric Biggers wrote:
> On Mon, Jun 01, 2026 at 08:57:30PM +0200, David Sterba wrote:
> > The testing is not straightforward as it needs 3 projects to
> > synchronize, kernel, fstests and btrfs-progs. Testing may need to use
> > custom git branches for all of them. For btrfs-progs the changes will ge
> > it in soon. For fstests it can be a chicken-egg problem, as they don't
> > accept tests for unmerged code. We've been using our fstests [1] with
> > additional fixups (not upstreamable, related to CI workarounds). Though
> > I'm not sure if Daniel has updated the branch with his most recent
> > version.
> > 
> > [1] https://github.com/btrfs/fstests branch staging
> 
> Where are the btrfs-progs changes, then?  I'd like to try this out, but
> there's no way to do it without the btrfs-progs changes.

Please make sure to test with debugging options enabled as well.  Trying
it with KASAN enabled, right away there's an out-of-bounds write in
base64_encode(), because btrfs_real_readdir() has an incorrect buffer
size calculation:

    nokey_len = DIV_ROUND_UP(name_len * 4, 3);

The other filesystems do it right by using fscrypt_fname_alloc_buffer().

Of course it turns out this is in the Sashiko comments as well, quoted
below:

    Is it possible for this check to allow a heap buffer overflow in the
    filldir buffer?

    The length check estimates the fscrypt nokey name size using
    DIV_ROUND_UP(name_len * 4, 3). However, fscrypt_fname_disk_to_usr()
    prepends an 8-byte dirhash before base64 encoding. Furthermore, if
    the name length exceeds 149 bytes, it hashes the name and encodes
    exactly 189 bytes, yielding a 252-byte output.

    If the remaining buffer space is large enough to pass this nokey_len
    check but smaller than the actual bytes produced by
    fscrypt_fname_disk_to_usr(), it looks like
    fscrypt_fname_disk_to_usr() could write past the end of the
    PAGE_SIZE allocation.

I think I've done enough with this series for now.  I'll take a look
again once it's in a better shape.

- Eric

^ permalink raw reply

* Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
From: Yu Kuai @ 2026-06-02  4:00 UTC (permalink / raw)
  To: Bart Van Assche, tj, josef; +Cc: axboe, linux-block, linux-kernel, yukuai
In-Reply-To: <8709b8e7-8328-47e8-950f-e5726bd70dbc@gmail.com>

Hi,

在 2026/6/2 5:50, Bart Van Assche 写道:
> On 5/31/26 11:13 PM, Yu Kuai wrote:
>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct 
>> seq_file *sf,
>>       if (!dname)
>>           return 0;
>>   -    spin_lock(&ioc->lock);
>> +    spin_lock_irq(&ioc->lock);
>>       seq_printf(sf, "%s ctrl=%s model=linear "
>>              "rbps=%llu rseqiops=%llu rrandiops=%llu "
>>              "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
>>              dname, ioc->user_cost_model ? "user" : "auto",
>>              u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
>>              u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], 
>> u[I_LCOEF_WRANDIOPS]);
>> -    spin_unlock(&ioc->lock);
>> +    spin_unlock_irq(&ioc->lock);
>>       return 0;
>>   }
>
> This change is wrong. ioc_cost_model_prfill() only has one caller,
> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function
> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of
> the above function enables interrupts while q->queue_lock is held. If an
> interrupt happens on the same CPU core before q->queue_lock is unlocked,
> and that interrupt tries to lock q->queue_lock, a deadlock will occur.

Yes, this is correct. Sorry I just notice this is a sysfs api like other
places and didn't notice the problem. So ioc_weight_write() is the only
place that need the fix.

Jens, please let me not if you want me to send a v2 or a fix.

>
> Bart.

-- 
Thansk,
Kuai

^ permalink raw reply

* Re: [GIT PULL] md-7.2-20260531
From: Coly Li @ 2026-06-02  3:40 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Yu Kuai, linux-raid, linux-block, linux-bcache
In-Reply-To: <753dfdb4-7d68-4183-af3f-d51e92d4dd31@kernel.dk>

Hi Jens,

> 2026年6月2日 02:55，Jens Axboe <axboe@kernel.dk> 写道：
> 
> On 5/31/26 5:26 AM, Yu Kuai wrote:
>> Hi Jens,
>> 
>> Please consider pulling the following changes into your for-7.2/block
>> branch.
>> 
>> This pull request contains:
>> 
>> Bug Fixes:
>> - Only requeue dm-raid bios when dm is suspending. (Benjamin Marzinski)
>> - Reset raid10 read_slot when reusing r10bio for discard. (Chen Cheng)
>> - Fix raid1/raid10 deadlock in read error recovery path. (Abd-Alrhman Masalkhi)
>> - Fix raid1/raid10 error-path detection with md_cloned_bio(). (Abd-Alrhman Masalkhi)
>> - Fix raid1/raid10 bio accounting for split md cloned bios. (Abd-Alrhman Masalkhi)
>> - Fix raid1 nr_pending leak in REQ_ATOMIC bad-block path. (Abd-Alrhman Masalkhi)
>> 
>> Improvements:
>> - Skip redundant raid_disks updates when the value is unchanged. (Abd-Alrhman Masalkhi)
>> 
>> Cleanups:
>> - Update MAINTAINERS email addresses. (Yu Kuai, Li Nan)
>> - Clean up raid1 read error handling. (Christoph Hellwig)
>> - Move the exceed_read_errors condition out of fix_read_error(). (Christoph Hellwig)
>> - Use str_plural() in raid0 dump_zones(). (Thorsten Blum)
> 
> Pulled, thanks.
> 
> Side note - emails from you with the new email address all end up in
> spam. I just noticed some of them this morning. But I think you want to
> ensure that the DKIM etc part of your email is all kosher, gmail flags
> all of it.

Could you please check my patch to change my email address go fygo.io in your spam emails?
I guess similar situation may also happen on my patch sent from fygo.io.
And yes, we are looking into this DKIM issue now.

Thanks.

Coly Li

^ permalink raw reply

* Re: [PATCH v7 12/43] btrfs: add new FEATURE_INCOMPAT_ENCRYPT flag
From: Eric Biggers @ 2026-06-02  3:27 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel, Omar Sandoval, Sweet Tea Dorminy
In-Reply-To: <20260513085340.3673127-13-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:46AM +0200, Daniel Vacek wrote:
> From: Omar Sandoval <osandov@osandov.com>
> 
> As encrypted files will be incompatible with older filesystem versions,
> new filesystems should be created with an incompat flag for fscrypt,
> which will gate access to the encryption ioctls.
> 
> Signed-off-by: Omar Sandoval <osandov@osandov.com>
> Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Daniel Vacek <neelx@suse.com>
> ---
> 
> v5: https://lore.kernel.org/linux-btrfs/ccbea52046c1dadbbef926bfc878cc23af952729.1706116485.git.josef@toxicpanda.com/
>  * No changes since.
> ---
>  fs/btrfs/fs.h              | 3 ++-
>  fs/btrfs/super.c           | 5 +++++
>  fs/btrfs/sysfs.c           | 6 ++++++
>  include/uapi/linux/btrfs.h | 1 +
>  4 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index a4758d94b32e..dbdb73722c14 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -322,7 +322,8 @@ enum {
>  	(BTRFS_FEATURE_INCOMPAT_SUPP_STABLE |	\
>  	 BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \
>  	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 | \
> -	 BTRFS_FEATURE_INCOMPAT_REMAP_TREE)
> +	 BTRFS_FEATURE_INCOMPAT_REMAP_TREE |	\
> +	 BTRFS_FEATURE_INCOMPAT_ENCRYPT)
>  
>  #else
>  
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index efaa0788c1fc..84df97363611 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -2563,6 +2563,11 @@ static int __init btrfs_print_mod_info(void)
>  			", fsverity=yes"
>  #else
>  			", fsverity=no"
> +#endif
> +#ifdef CONFIG_FS_ENCRYPTION
> +			", fscrypt=yes"
> +#else
> +			", fscrypt=no"
>  #endif
>  			;
>  
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 0d14570c8bc2..3fe57843f902 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -305,6 +305,9 @@ BTRFS_FEAT_ATTR_INCOMPAT(remap_tree, REMAP_TREE);
>  #ifdef CONFIG_FS_VERITY
>  BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
>  #endif
> +#ifdef CONFIG_FS_ENCRYPTION
> +BTRFS_FEAT_ATTR_INCOMPAT(encryption, ENCRYPT);
> +#endif /* CONFIG_FS_ENCRYPTION */
>  
>  /*
>   * Features which depend on feature bits and may differ between each fs.
> @@ -338,6 +341,9 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
>  #ifdef CONFIG_FS_VERITY
>  	BTRFS_FEAT_ATTR_PTR(verity),
>  #endif
> +#ifdef CONFIG_FS_ENCRYPTION
> +	BTRFS_FEAT_ATTR_PTR(encryption),
> +#endif /* CONFIG_FS_ENCRYPTION */
>  	NULL
>  };
>  
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index 9165154a274d..2f6a46e5f4ce 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -335,6 +335,7 @@ struct btrfs_ioctl_fs_info_args {
>  #define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
>  #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2	(1ULL << 13)
>  #define BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE	(1ULL << 14)
> +#define BTRFS_FEATURE_INCOMPAT_ENCRYPT		(1ULL << 15)
>  #define BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA	(1ULL << 16)
>  #define BTRFS_FEATURE_INCOMPAT_REMAP_TREE	(1ULL << 17)

There seems to be an inconsistency where btrfs's fscrypt support depends
on CONFIG_BTRFS_EXPERIMENTAL, but the support is advertised in sysfs
regardless.

- Eric

^ permalink raw reply

* Re: [PATCH v7 11/43] btrfs: add inode encryption contexts
From: Eric Biggers @ 2026-06-02  3:25 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel, Omar Sandoval, Sweet Tea Dorminy
In-Reply-To: <20260513085340.3673127-12-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:45AM +0200, Daniel Vacek wrote:
> From: Omar Sandoval <osandov@osandov.com>
> 
> fscrypt stores a context item with encrypted inodes that contains the
> related encryption information.  fscrypt provides an arbitrary blob for
> the filesystem to store, and it does not clearly fit into an existing
> structure, so this goes in a new item type.
> 
> Signed-off-by: Omar Sandoval <osandov@osandov.com>
> Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Daniel Vacek <neelx@suse.com>
> ---
> 
> v7 changes:
>  * Fix a path leak as found by Chri's AI review.
> v6 changes:
>  * Shorten the inode context key macro name to BTRFS_FSCRYPT_INODE_CTX_KEY.
> v5: https://lore.kernel.org/linux-btrfs/5a88efb484b0874a7430b83bc6e5f6b9aa5858d5.1706116485.git.josef@toxicpanda.com/
> ---
>  fs/btrfs/fscrypt.c              | 116 ++++++++++++++++++++++++++++++++
>  fs/btrfs/fscrypt.h              |   2 +
>  fs/btrfs/inode.c                |  19 ++++++
>  fs/btrfs/ioctl.c                |   8 ++-
>  include/uapi/linux/btrfs_tree.h |  10 +++
>  5 files changed, 153 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/fscrypt.c b/fs/btrfs/fscrypt.c
> index 6cfba7d94e72..c503f817cbe7 100644
> --- a/fs/btrfs/fscrypt.c
> +++ b/fs/btrfs/fscrypt.c
> @@ -1,10 +1,126 @@
>  // SPDX-License-Identifier: GPL-2.0
>  
> +#include <linux/iversion.h>
>  #include "ctree.h"
> +#include "accessors.h"
>  #include "btrfs_inode.h"
> +#include "disk-io.h"
> +#include "fs.h"
>  #include "fscrypt.h"
> +#include "ioctl.h"
> +#include "messages.h"
> +#include "transaction.h"
> +#include "xattr.h"
> +
> +static int btrfs_fscrypt_get_context(struct inode *inode, void *ctx, size_t len)
> +{
> +	struct btrfs_key key = {
> +		.objectid = btrfs_ino(BTRFS_I(inode)),
> +		.type = BTRFS_FSCRYPT_INODE_CTX_KEY,
> +		.offset = 0,
> +	};
> +	struct btrfs_path *path;
> +	struct extent_buffer *leaf;
> +	unsigned long ptr;
> +	int ret;
> +
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
> +	if (ret) {
> +		len = -ENOENT;
> +		goto out;
> +	}
> +
> +	leaf = path->nodes[0];
> +	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
> +	/* fscrypt provides max context length, but it could be less */
> +	len = min_t(size_t, len, btrfs_item_size(leaf, path->slots[0]));
> +	read_extent_buffer(leaf, ctx, ptr, len);
> +
> +out:
> +	btrfs_free_path(path);
> +	return len;
> +}

This doesn't conform to the calling convention for
fscrypt_operations::get_context, specifically in the cases where
-ENODATA and -ERANGE are expected.

        /*
         * Get the fscrypt context of the given inode.
         *
         * @inode: the inode whose context to get
         * @ctx: the buffer into which to get the context
         * @len: length of the @ctx buffer in bytes
         *
         * Return: On success, returns the length of the context in bytes; this
         *         may be less than @len.  On failure, returns -ENODATA if the
         *         inode doesn't have a context, -ERANGE if the context is
         *         longer than @len, or another -errno code.
         */
        int (*get_context)(struct inode *inode, void *ctx, size_t len);

It also seems to be assuming that any error from btrfs_search_slot()
means "not found", which isn't correct.

The size_t variable called 'len' is also being used to store negative
errno values, which is weird.

> +static int btrfs_fscrypt_set_context(struct inode *inode, const void *ctx,
> +				     size_t len, void *fs_data)
> +{
> +	struct btrfs_trans_handle *trans = fs_data;
> +	struct btrfs_key key = {
> +		.objectid = btrfs_ino(BTRFS_I(inode)),
> +		.type = BTRFS_FSCRYPT_INODE_CTX_KEY,
> +		.offset = 0,
> +	};
> +	struct btrfs_path *path = NULL;
> +	struct extent_buffer *leaf;
> +	unsigned long ptr;
> +	int ret;
> +
> +	if (!trans)
> +		trans = btrfs_start_transaction(BTRFS_I(inode)->root, 2);
> +	if (IS_ERR(trans))
> +		return PTR_ERR(trans);
> +
> +	path = btrfs_alloc_path();
> +	if (!path) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
> +
> +	ret = btrfs_search_slot(trans, BTRFS_I(inode)->root, &key, path, 0, 1);
> +	if (ret < 0)
> +		goto out_err;
> +
> +	if (ret > 0) {
> +		btrfs_release_path(path);
> +		ret = btrfs_insert_empty_item(trans, BTRFS_I(inode)->root, path, &key, len);
> +		if (ret)
> +			goto out_err;
> +	}
> +
> +	leaf = path->nodes[0];
> +	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
> +
> +	len = min_t(size_t, len, btrfs_item_size(leaf, path->slots[0]));
> +	write_extent_buffer(leaf, ctx, ptr, len);
> +	btrfs_mark_buffer_dirty(trans, leaf);
> +	btrfs_release_path(path);
> +
> +	if (fs_data)
> +		goto out_err;
> +
> +	BTRFS_I(inode)->flags |= BTRFS_INODE_ENCRYPT;
> +	btrfs_sync_inode_flags_to_i_flags(BTRFS_I(inode));
> +	inode_inc_iversion(inode);
> +	inode_set_ctime_current(inode);
> +	ret = btrfs_update_inode(trans, BTRFS_I(inode));
> +	if (ret)
> +		goto out_abort;
> +	btrfs_free_path(path);
> +	btrfs_end_transaction(trans);
> +	return 0;
> +out_abort:
> +	btrfs_abort_transaction(trans, ret);
> +out_err:
> +	if (!fs_data)
> +		btrfs_end_transaction(trans);
> +	btrfs_free_path(path);
> +	return ret;
> +}

The 'len = min_t(size_t, len, btrfs_item_size(leaf, path->slots[0]));'
line seems scary, since it just truncates the data given.

> @@ -199,7 +203,7 @@ static int check_fsflags(unsigned int old_flags, unsigned int flags)
>  		      FS_NOATIME_FL | FS_NODUMP_FL | \
>  		      FS_SYNC_FL | FS_DIRSYNC_FL | \
>  		      FS_NOCOMP_FL | FS_COMPR_FL |
> -		      FS_NOCOW_FL))
> +		      FS_NOCOW_FL | FS_ENCRYPT_FL))
>  		return -EOPNOTSUPP;

Do you know why FS_VERITY_FL isn't in this mask?

- Eric

^ permalink raw reply

* Re: [PATCH v7 10/43] btrfs: start using fscrypt hooks
From: Eric Biggers @ 2026-06-02  3:12 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel, Omar Sandoval, Sweet Tea Dorminy
In-Reply-To: <20260513085340.3673127-11-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:44AM +0200, Daniel Vacek wrote:
> @@ -9041,20 +9063,28 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
>  	};
>  	unsigned int trans_num_items;
>  	int ret;
> -	int name_len;
>  	int datasize;
>  	unsigned long ptr;
>  	struct btrfs_file_extent_item *ei;
>  	struct extent_buffer *leaf;
> +	struct fscrypt_str disk_link;
> +	size_t max_len;
> +	u32 name_len = strlen(symname);
> +
> +	/*
> +	 * BTRFS_MAX_INLINE_DATA_SIZE() isn't actually telling the truth, we actually
> +	 * limit inline data extents to min(BTRFS_MAX_INLINE_DATA_SIZE(), sectorsize),
> +	 * so adjust max_len given this wonderful bit of inconsistency.
> +	 */
> +	max_len = min_t(size_t, BTRFS_MAX_INLINE_DATA_SIZE(fs_info), fs_info->sectorsize);
>  
> -	name_len = strlen(symname);
>  	/*
> -	 * Symlinks utilize uncompressed inline extent data, which should not
> -	 * reach block size.
> +	 * fscrypt sets disk_link.len to be len + 1, including a NUL terminator,
> +	 * but we don't store that '\0' character.
>  	 */
> -	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
> -	    name_len >= fs_info->sectorsize)
> -		return -ENAMETOOLONG;
> +	ret = fscrypt_prepare_symlink(dir, symname, name_len, max_len + 1, &disk_link);
> +	if (ret)
> +		return ret;

This is off by one from the other filesystems.  Yes, the way the other
filesystems do encrypted symlinks is weird, but this still doesn't fix
it, since the unnecessary 'struct fscrypt_symlink_data' is still stored.
If it's not being fixed completely, it should just be done the same way.

Did you do it this way because you're trying to squeeze out an extra
byte, to allow 4094-byte symlink targets instead of 4093 as the other
filesystems do?  Or did you do it this way because btrfs doesn't count a
nul terminator when checking unencrypted symlinks against
BTRFS_MAX_INLINE_DATA_SIZE(fs_info), and you needed to preserve that
behavior?  But at the same time, btrfs *does* count the nul terminator
when validating against 'fs_info->sectorsize', and this changes that
behavior.  So it's not clear what was intended here.

> +	if (IS_ENCRYPTED(inode)) {
> +		ret = fscrypt_encrypt_symlink(inode, symname, name_len, &disk_link);
> +		if (ret) {
> +			btrfs_abort_transaction(trans, ret);
> +			btrfs_free_path(path);
> +			discard_new_inode(inode);
> +			inode = NULL;
> +			goto out;
> +		}
> +	}

fscrypt_encrypt_symlink() already has an IS_ENCRYPTED(inode) check
built-in.

> +static const char *btrfs_get_link(struct dentry *dentry, struct inode *inode,
> +				  struct delayed_call *done)
> +{
> +	struct page *cpage;
> +	const char *paddr;
> +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +
> +	if (!IS_ENCRYPTED(inode))
> +		return page_get_link(dentry, inode, done);
> +
> +	if (!dentry)
> +		return ERR_PTR(-ECHILD);
> +
> +	cpage = read_mapping_page(inode->i_mapping, 0, NULL);
> +	if (IS_ERR(cpage))
> +		return ERR_CAST(cpage);
> +
> +	paddr = fscrypt_get_symlink(inode, page_address(cpage),
> +				    BTRFS_MAX_INLINE_DATA_SIZE(fs_info), done);
> +	put_page(cpage);

This uses a different max_len from btrfs_symlink().

Speaking of symlinks, btrfs is also missing a hookup to
fscrypt_symlink_getattr().

- Eric

^ permalink raw reply

* Re: [GIT PULL] md-7.2-20260531
From: Yu Kuai @ 2026-06-02  3:07 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-raid, linux-block, Abd-Alrhman Masalkhi, Benjamin Marzinski,
	Chen Cheng, Christoph Hellwig, Li Nan, Thorsten Blum, yukuai
In-Reply-To: <753dfdb4-7d68-4183-af3f-d51e92d4dd31@kernel.dk>

Hi,

在 2026/6/2 2:55, Jens Axboe 写道:
> On 5/31/26 5:26 AM, Yu Kuai wrote:
>> Hi Jens,
>>
>> Please consider pulling the following changes into your for-7.2/block
>> branch.
>>
>> This pull request contains:
>>
>> Bug Fixes:
>> - Only requeue dm-raid bios when dm is suspending. (Benjamin Marzinski)
>> - Reset raid10 read_slot when reusing r10bio for discard. (Chen Cheng)
>> - Fix raid1/raid10 deadlock in read error recovery path. (Abd-Alrhman Masalkhi)
>> - Fix raid1/raid10 error-path detection with md_cloned_bio(). (Abd-Alrhman Masalkhi)
>> - Fix raid1/raid10 bio accounting for split md cloned bios. (Abd-Alrhman Masalkhi)
>> - Fix raid1 nr_pending leak in REQ_ATOMIC bad-block path. (Abd-Alrhman Masalkhi)
>>
>> Improvements:
>> - Skip redundant raid_disks updates when the value is unchanged. (Abd-Alrhman Masalkhi)
>>
>> Cleanups:
>> - Update MAINTAINERS email addresses. (Yu Kuai, Li Nan)
>> - Clean up raid1 read error handling. (Christoph Hellwig)
>> - Move the exceed_read_errors condition out of fix_read_error(). (Christoph Hellwig)
>> - Use str_plural() in raid0 dump_zones(). (Thorsten Blum)
> Pulled, thanks.
>
> Side note - emails from you with the new email address all end up in
> spam. I just noticed some of them this morning. But I think you want to
> ensure that the DKIM etc part of your email is all kosher, gmail flags
> all of it.

Thanks a lot for the notice, we'll look into this.

>
-- 
Thansk,
Kuai

^ permalink raw reply

* Re: [PATCH v7 09/43] btrfs: add infrastructure for safe em freeing
From: Eric Biggers @ 2026-06-02  2:54 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel, Boris Burkov
In-Reply-To: <20260513085340.3673127-10-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:43AM +0200, Daniel Vacek wrote:
> +/*
> + * Drop a ref for the extent map in the given tree.
> + *
> + * @tree:	tree that the em is a part of.
> + * @em:		the em to drop the reference to.
> + *
> + * Drop the reference count on @em by one, if the reference count hits 0 and
> + * there is an object on the em that can't be safely freed in the current
> + * context (if we are holding the extent_map_tree->lock for example), then add
> + * it to the freed_extents list on the extent_map_tree for later processing.
> + *
> + * This must be followed by a btrfs_free_pending_extent_maps() to clear
> + * the pending frees.
> + */
> +void btrfs_free_extent_map_safe(struct extent_map_tree *tree,
> +				struct extent_map *em)

The "unsafe" btrfs_free_extent_map() still exists.  Assuming both
variants will continue to be needed, shouldn't the comment above the
"unsafe" variant be updated to document the preconditions for using it?

> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
> index 6f685f3c9327..a962012be1c3 100644
> --- a/fs/btrfs/extent_map.h
> +++ b/fs/btrfs/extent_map.h
> @@ -97,11 +97,18 @@ struct extent_map {
>  	u32 flags;
>  	refcount_t refs;
>  	struct list_head list;
> +	struct list_head free_list;
> +};

The comment above this struct says "Keep this structure as compact as
possible, as we can have really large amounts of allocated extent maps
at any time."  Is there any way to avoid adding a whole list_head?

- Eric

^ permalink raw reply

* Re: [PATCH v7 05/43] blk-crypto: add a process bio callback
From: Eric Biggers @ 2026-06-02  2:48 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel
In-Reply-To: <20260513085340.3673127-6-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:39AM +0200, Daniel Vacek wrote:
>  /*
>   * Supported types of keys.  Must be bitflags due to their use in
>   * blk_crypto_profile::key_types_supported.
> @@ -77,12 +85,14 @@ enum blk_crypto_key_type {
>   *	filesystem block size or the disk sector size.
>   * @dun_bytes: the maximum number of bytes of DUN used when using this key
>   * @key_type: the type of this key -- either raw or hardware-wrapped
> + * @proces_bio: optional callback to process encrypted bios.
>   */
>  struct blk_crypto_config {
>  	enum blk_crypto_mode_num crypto_mode;
>  	unsigned int data_unit_size;
>  	unsigned int dun_bytes;
>  	enum blk_crypto_key_type key_type;
> +	blk_crypto_process_bio_t process_bio;
>  };

This new field needs to be initialized in
fscrypt_select_encryption_impl().  Otherwise it breaks native inline
encryption for ext4 and f2fs.

- Eric

^ permalink raw reply

* Re: [PATCH v7 36/43] btrfs: deal with encrypted symlinks in send
From: Eric Biggers @ 2026-06-02  2:42 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel
In-Reply-To: <20260513085340.3673127-37-neelx@suse.com>

On Wed, May 13, 2026 at 10:53:10AM +0200, Daniel Vacek wrote:
> From: Josef Bacik <josef@toxicpanda.com>
> 
> Send needs to send the decrypted value of the symlinks, handle the case
> where the inode is encrypted and decrypt the symlink name into a buffer
> and copy this buffer into our fs_path struct.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Daniel Vacek <neelx@suse.com>
> ---
> 
> No changes in v7.
> v6 changes:
>  * read_symlink_encrypted() reworked from using pages to using folios.
> v5: https://lore.kernel.org/linux-btrfs/4d97f35d6f85ff041b09bed33b63446a92b7a20c.1706116485.git.josef@toxicpanda.com/
> ---
>  fs/btrfs/send.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index 89d72d8cb85f..d5256c22fe7a 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -1701,9 +1701,7 @@ static int find_extent_clone(struct send_ctx *sctx,
>  	return ret;
>  }
>  
> -static int read_symlink(struct btrfs_root *root,
> -			u64 ino,
> -			struct fs_path *dest)
> +static int read_symlink_unencrypted(struct btrfs_root *root, u64 ino, struct fs_path *dest)
>  {
>  	int ret;
>  	BTRFS_PATH_AUTO_FREE(path);
> @@ -1764,6 +1762,47 @@ static int read_symlink(struct btrfs_root *root,
>  	return fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
>  }
>  
> +static int read_symlink_encrypted(struct btrfs_root *root, u64 ino, struct fs_path *dest)
> +{
> +	DEFINE_DELAYED_CALL(done);
> +	const char *buf;
> +	struct folio *folio;
> +	struct btrfs_inode *inode;
> +	int ret = 0;
> +
> +	inode = btrfs_iget(ino, root);
> +	if (IS_ERR(inode))
> +		return PTR_ERR(inode);
> +
> +	folio = read_mapping_folio(inode->vfs_inode.i_mapping, 0, NULL);
> +	if (IS_ERR(folio)) {
> +		iput(&inode->vfs_inode);
> +		return PTR_ERR(folio);
> +	}
> +
> +	buf = fscrypt_get_symlink(&inode->vfs_inode, folio_address(folio),
> +				  BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info),
> +				  &done);
> +	folio_put(folio);
> +	iput(&inode->vfs_inode);
> +
> +	if (IS_ERR(buf))
> +		return PTR_ERR(buf);
> +
> +	ret = fs_path_add(dest, buf, strlen(buf));
> +	do_delayed_call(&done);
> +	return ret;
> +}
> +
> +
> +static int read_symlink(struct btrfs_root *root, u64 ino,
> +			struct fs_path *dest)
> +{
> +	if (btrfs_fs_incompat(root->fs_info, ENCRYPT))
> +		return read_symlink_encrypted(root, ino, dest);
> +	return read_symlink_unencrypted(root, ino, dest);
> +}

This just assumes that all the symlinks on the filesystem are encrypted,
without checking the actual encrypt flag in the inode.

Of course, looking at the Sashiko review for this patch, it already
found this, as well as a use-after-free.

I don't know why I'm even reviewing this.

- Eric

^ permalink raw reply

* Re: [PATCH v7 15/43] btrfs: implement fscrypt ioctls
From: Eric Biggers @ 2026-06-02  2:35 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel, Omar Sandoval, Sweet Tea Dorminy
In-Reply-To: <20260513085340.3673127-16-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:49AM +0200, Daniel Vacek wrote:
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 6a37dd3cc5ee..2e0b79f41197 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -5159,6 +5159,35 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_get_fslabel(fs_info, argp);
>  	case FS_IOC_SETFSLABEL:
>  		return btrfs_ioctl_set_fslabel(file, argp);
> +#ifdef CONFIG_BTRFS_EXPERIMENTAL
> +	case FS_IOC_SET_ENCRYPTION_POLICY: {
> +		if (!IS_ENABLED(CONFIG_FS_ENCRYPTION))
> +			return -EOPNOTSUPP;
> +		if (sb_rdonly(fs_info->sb))
> +			return -EROFS;
> +		/*
> +		 *  If we crash before we commit, nothing encrypted could have
> +		 * been written so it doesn't matter whether the encrypted
> +		 * state persists.
> +		 */
> +		btrfs_set_fs_incompat(fs_info, ENCRYPT);
> +		return fscrypt_ioctl_set_policy(file, (const void __user *)arg);
> +	}
> +	case FS_IOC_GET_ENCRYPTION_POLICY:
> +		return fscrypt_ioctl_get_policy(file, (void __user *)arg);
> +	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
> +		return fscrypt_ioctl_get_policy_ex(file, (void __user *)arg);
> +	case FS_IOC_ADD_ENCRYPTION_KEY:
> +		return fscrypt_ioctl_add_key(file, (void __user *)arg);
> +	case FS_IOC_REMOVE_ENCRYPTION_KEY:
> +		return fscrypt_ioctl_remove_key(file, (void __user *)arg);
> +	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
> +		return fscrypt_ioctl_remove_key_all_users(file, (void __user *)arg);
> +	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
> +		return fscrypt_ioctl_get_key_status(file, (void __user *)arg);
> +	case FS_IOC_GET_ENCRYPTION_NONCE:
> +		return fscrypt_ioctl_get_nonce(file, (void __user *)arg);
> +#endif /* CONFIG_BTRFS_EXPERIMENTAL */

Are you sure you want to auto-enable the "encrypt" feature flag like
this?  It doesn't even require a privilege.

It's also only in FS_IOC_SET_ENCRYPTION_POLICY, so this doesn't work in
cases where users add a key first.

- Eric

^ permalink raw reply

* Re: [PATCH v7 03/43] fscrypt: add a __fscrypt_file_open helper
From: Eric Biggers @ 2026-06-02  2:33 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel
In-Reply-To: <20260513085340.3673127-4-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:37AM +0200, Daniel Vacek wrote:
> From: Josef Bacik <josef@toxicpanda.com>
> 
> We have fscrypt_file_open() which is meant to be called on files being
> opened so that their key is loaded when we start reading data from them.
> 
> However for btrfs send we are opening the inode directly without a filp,
> so we need a different helper to make sure we can load the fscrypt
> context for the inode before reading its contents.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Daniel Vacek <neelx@suse.com>
> ---
> 
> No changes in v7.
> v6 changes:
>  * Adapted to fscrypt changes since the last two years.
> v5: https://lore.kernel.org/linux-btrfs/4a372419c3fe6ad425e1b124c342a054e9d6db23.1706116485.git.josef@toxicpanda.com/
> ---
>  fs/crypto/hooks.c       | 38 ++++++++++++++++++++++++++++++++------
>  include/linux/fscrypt.h |  8 ++++++++
>  2 files changed, 40 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
> index a7a8a3f581a0..3142cf106bde 100644
> --- a/fs/crypto/hooks.c
> +++ b/fs/crypto/hooks.c
> @@ -9,6 +9,37 @@
>  
>  #include "fscrypt_private.h"
>  
> +/**
> + * __fscrypt_file_open() - prepare for filesystem-internal access to a
> + *			   possibly-encrypted regular file
> + * @dir: the inode for the directory via which the file is being accessed
> + * @inode: the inode being "opened"
> + *
> + * This is like fscrypt_file_open(), but instead of taking the 'struct file'
> + * being opened it takes the parent directory explicitly.  This is intended for
> + * use cases such as "send/receive" which involve the filesystem accessing file
> + * contents without setting up a 'struct file'.
> + *
> + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
> + */
> +int __fscrypt_file_open(struct inode *dir, struct inode *inode)
> +{
> +	int err;
> +
> +	err = fscrypt_require_key(inode);
> +	if (err)
> +		return err;
> +
> +	if (!fscrypt_has_permitted_context(dir, inode)) {
> +		fscrypt_warn(inode,
> +			     "Inconsistent encryption context (parent directory: %llu)",
> +			     dir->i_ino);
> +		return -EPERM;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(__fscrypt_file_open);
> +
>  /**
>   * fscrypt_file_open() - prepare to open a possibly-encrypted regular file
>   * @inode: the inode being opened
> @@ -60,12 +91,7 @@ int fscrypt_file_open(struct inode *inode, struct file *filp)
>  	rcu_read_unlock();
>  
>  	dentry_parent = dget_parent(dentry);
> -	if (!fscrypt_has_permitted_context(d_inode(dentry_parent), inode)) {
> -		fscrypt_warn(inode,
> -			     "Inconsistent encryption context (parent directory: %llu)",
> -			     d_inode(dentry_parent)->i_ino);
> -		err = -EPERM;
> -	}
> +	err = __fscrypt_file_open(d_inode(dentry_parent), inode);
>  	dput(dentry_parent);
>  	return err;
>  }

This change makes fscrypt_file_open() execute an unnecessary extra
fscrypt_require_key().  Could we just leave fscrypt_file_open() as-is?

- Eric

^ permalink raw reply

* Re: [PATCH v7 00/43] btrfs: add fscrypt support
From: Eric Biggers @ 2026-06-02  2:25 UTC (permalink / raw)
  To: David Sterba
  Cc: Daniel Vacek, David Sterba, linux-block, linux-fscrypt,
	linux-btrfs, linux-kernel, Chris Mason, Josef Bacik,
	Theodore Y. Ts'o, Jaegeuk Kim, Jens Axboe
In-Reply-To: <20260601185730.GE880787@twin.jikos.cz>

On Mon, Jun 01, 2026 at 08:57:30PM +0200, David Sterba wrote:
> The testing is not straightforward as it needs 3 projects to
> synchronize, kernel, fstests and btrfs-progs. Testing may need to use
> custom git branches for all of them. For btrfs-progs the changes will ge
> it in soon. For fstests it can be a chicken-egg problem, as they don't
> accept tests for unmerged code. We've been using our fstests [1] with
> additional fixups (not upstreamable, related to CI workarounds). Though
> I'm not sure if Daniel has updated the branch with his most recent
> version.
> 
> [1] https://github.com/btrfs/fstests branch staging

Where are the btrfs-progs changes, then?  I'd like to try this out, but
there's no way to do it without the btrfs-progs changes.

- Eric

^ permalink raw reply

* [PATCH] make new mount API honour SB_NOUSER (was Re: [PATCH] block: Avoid mounting the bdev pseudo-filesystem in userspace)
From: Al Viro @ 2026-06-02  2:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christian Brauner, Jan Kara, linux-fsdevel, Jens Axboe,
	linux-block, linux-kernel, lvc-project, stable, Denis Arefev
In-Reply-To: <20260602013526.GO2636677@ZenIV>

one should *not* be allowed to mount one of those, new API or not.

Reported-by: Denis Arefev <arefev@swemel.ru>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
[[ I still want to see the rest of the reproducer - report smells like a missing
d_can_lookup() somewhere, on top of fsmount(2) bug]]
diff --git a/fs/namespace.c b/fs/namespace.c
index fe919abd2f01..17777c837683 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4499,6 +4499,10 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
 	new_mnt = vfs_create_mount(fc);
 	if (IS_ERR(new_mnt))
 		return PTR_ERR(new_mnt);
+	if (new_mnt->mnt_sb->s_flags & SB_NOUSER) {
+		mntput(new_mnt);
+		return -EINVAL;
+	}
 	new_mnt->mnt_flags = mnt_flags;
 
 	new_path.dentry = dget(fc->root);

^ permalink raw reply related

* Re: [PATCH v3] loop: Fix NULL pointer dereference in lo_rw_aio()
From: Hillf Danton @ 2026-06-02  2:02 UTC (permalink / raw)
  To: Ming Lei
  Cc: Qu Wenruo, Christoph Hellwig, Damien Le Moal, Tetsuo Handa,
	Jens Axboe, Bart Van Assche, linux-block, LKML, Andrew Morton,
	Linus Torvalds, linux-btrfs, David Sterba, linux-fsdevel,
	Christian Brauner
In-Reply-To: <ah4Xc7OmGle_C5M3@fedora>

on Mon, 1 Jun 2026 18:36:19 -0500 Ming Lei wrote:
> On Tue, Jun 02, 2026 at 07:17:30AM +0800, Hillf Danton wrote:
> > On Mon, 1 Jun 2026 17:14:59 -0500 Ming Lei wrote:
> > > On Tue, Jun 02, 2026 at 05:51:26AM +0800, Hillf Danton wrote:
> > > > On OnMon, 1 Jun 2026 10:29:25 -0500 Ming Lei wrote:
> > > > > syzbot log shows the null-ptr-deref  is on WRITE, instead of DISCARD.
> > > > >
> > > > > https://syzkaller.appspot.com/bug?extid=cd8a9a308e879a4e2c28
> > > > >
> > > > > Adding WARN_ON(!lo->lo_backing_file) in loop_queue_rq() might capture
> > > > > this bio submission context if this req isn't issued via wq.
> > > > >
> > > > I suspect this makes $.02 sense given the check of Lo_bound upon queuing rq.
> > > 
> > > Can't lo->lo_state be updated after the check? It is totally lockless...
> > >
> > Sounds good hm... do you mean it is UNWISE to not flush the loop workqueue
> > when closing disk?
> 
> Quite the opposite, it is wise to not flush wq in __loop_clr_fd(), please
> see my previous comment.
>
When queuing rq, if lo_state is updated after checking Lo_bond, I see nothing
that prevents syzbot from reporting null-ptr-deref exists. Can you tippoint
why flush is NOT needed if you are right?

^ permalink raw reply

* Re: [PATCH] block: Avoid mounting the bdev pseudo-filesystem in userspace
From: Al Viro @ 2026-06-02  1:35 UTC (permalink / raw)
  To: Denis Arefev
  Cc: linux-fsdevel, Jens Axboe, linux-block, linux-kernel, lvc-project,
	stable
In-Reply-To: <20260602011907.GM2636677@ZenIV>

On Tue, Jun 02, 2026 at 02:19:07AM +0100, Al Viro wrote:
> On Thu, May 21, 2026 at 10:28:56AM +0300, Denis Arefev wrote:
> > The bdev pseudo-filesystem is an internal kernel filesystem with which
> > userspace should not interfere. Unregister it so that userspace cannot
> > even attempt to mount it.
> > 
> > This fixes a bug [1] that occurs when attempting to access files,
> > because the system call move_mount() uses pointers declared in the
> > inode_operations structure, which for the bdev pseudo-filesystem
> > are always equal to 0. `inode->i_op = &empty_iops;`
> 
> What?  init_pseudo() sets SB_NOUSER; what are you talking about?

... which doesn't suffice, apparently, since now bdev has become
mountable, along with the rest of pseudo-fs.  *THAT* is a bug.

> And assuming you've somehow managed to mount the sucker, which
> ->i_op method had been accessed?

->lookup(), apparently.  Which means that 'directory' should've been
rejected by d_can_lookup(), no matter which filesystem it's been
from.  Which might or might not be a bug in its own right.

In any case, NAK on that patch - it's papering over the real bug that
has nothing to do with block layer.

mount -t bdev none /mnt

must fail, same as for pipefs, sockfs, etc.  It doesn't.

fsdevel Cc'd, as it should've been from the very beginning.

^ permalink raw reply

* Re: [PATCH] block: Avoid mounting the bdev pseudo-filesystem in userspace
From: Al Viro @ 2026-06-02  1:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Denis Arefev, Jens Axboe, linux-block, linux-kernel, lvc-project,
	stable
In-Reply-To: <ahPnFlvUqq0JC2vy@infradead.org>

On Sun, May 24, 2026 at 11:07:18PM -0700, Christoph Hellwig wrote:
> On Thu, May 21, 2026 at 10:28:56AM +0300, Denis Arefev wrote:
> > The bdev pseudo-filesystem is an internal kernel filesystem with which
> > userspace should not interfere. Unregister it so that userspace cannot
> > even attempt to mount it.
> > 
> > This fixes a bug [1] that occurs when attempting to access files,
> > because the system call move_mount() uses pointers declared in the
> > inode_operations structure, which for the bdev pseudo-filesystem
> > are always equal to 0. `inode->i_op = &empty_iops;`
> 
> Looks good:

It really, really does not.  I would like to see the reproducer - analysis
looks like random noise out of LLM.

I've no real problem with removing that register_filesystem(), but if
it *does* fix some reproducer, I really want to see details.

^ permalink raw reply

* Re: [PATCH] block: Avoid mounting the bdev pseudo-filesystem in userspace
From: Al Viro @ 2026-06-02  1:19 UTC (permalink / raw)
  To: Denis Arefev; +Cc: Jens Axboe, linux-block, linux-kernel, lvc-project, stable
In-Reply-To: <20260521072857.5078-1-arefev@swemel.ru>

On Thu, May 21, 2026 at 10:28:56AM +0300, Denis Arefev wrote:
> The bdev pseudo-filesystem is an internal kernel filesystem with which
> userspace should not interfere. Unregister it so that userspace cannot
> even attempt to mount it.
> 
> This fixes a bug [1] that occurs when attempting to access files,
> because the system call move_mount() uses pointers declared in the
> inode_operations structure, which for the bdev pseudo-filesystem
> are always equal to 0. `inode->i_op = &empty_iops;`

What?  init_pseudo() sets SB_NOUSER; what are you talking about?
And assuming you've somehow managed to mount the sucker, which
->i_op method had been accessed?

^ permalink raw reply

* [PATCH] blk-mq: clarify blk_mq_quiesce_queue_nowait() comment
From: kensanya @ 2026-06-02  1:12 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, linux-kernel, TanZheng

From: TanZheng <tanzheng@kylinos.cn>

The FIXME added in 2017 suggested removing this function once mpt3sas
stopped using scsi_internal_device_*block_nowait().  That is no longer
accurate: blk_mq_quiesce_queue_nowait() is now a fundamental building
block used by blk_mq_quiesce_queue(), blk_mq_quiesce_tagset(),
scsi_host_block() and others for batched quiesce without sleeping per
queue.Replace the FIXME with a comment.

Signed-off-by: TanZheng <tanzheng@kylinos.cn>
---
 block/blk-mq.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a24175441380..b9a96d922baf 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -255,8 +255,14 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q)
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_non_owner);
 
 /*
- * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
- * mpt3sas driver such that this function can be removed.
+ * Mark a request queue as quiesced without waiting for in-flight dispatches
+ * to finish.  Callers that need to ensure dispatch has drained must follow
+ * this with blk_mq_wait_quiesce_done() on the queue's tag_set, or use
+ * blk_mq_quiesce_queue() which combines both steps.
+ *
+ * This split exists so multiple queues (e.g. all LUNs on a SCSI host, or all
+ * queues in a tag_set) can be quiesced without sleeping per queue, then
+ * waited on once at tag_set scope.
  */
 void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 {
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v7 13/43] btrfs: adapt readdir for encrypted and nokey names
From: Eric Biggers @ 2026-06-01 23:44 UTC (permalink / raw)
  To: Daniel Vacek
  Cc: Chris Mason, Josef Bacik, Theodore Y. Ts'o, Jaegeuk Kim,
	Jens Axboe, David Sterba, linux-block, linux-fscrypt, linux-btrfs,
	linux-kernel, Omar Sandoval, Sweet Tea Dorminy
In-Reply-To: <20260513085340.3673127-14-neelx@suse.com>

On Wed, May 13, 2026 at 10:52:47AM +0200, Daniel Vacek wrote:
> +	/*
> +	 * TODO: This should maybe be using the crypto API, not the fallback,
> +	 * but fscrypt uses the fallback and this is only used in emulation of
> +	 * fscrypt's buffer sha256 method.
> +	 */

You can delete this TODO.  The SHA-256 library functions just do the
right thing now.

- Eric

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox