public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: Boris Burkov <boris@bur.io>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper
Date: Wed, 4 Mar 2026 14:48:18 +1030	[thread overview]
Message-ID: <983e8887-9176-4cfe-97f1-8f2106420659@suse.com> (raw)
In-Reply-To: <20260304040133.GA899178@zen.localdomain>



在 2026/3/4 14:32, Boris Burkov 写道:
> On Thu, Feb 26, 2026 at 09:23:18AM +1030, Qu Wenruo wrote:
[...]
>> -	btrfs_lock_extent(&inode->io_tree, offset, end, &cached);
>> -	ret = __cow_file_range_inline(inode, size, compressed_size,
>> -				      compress_type, compressed_folio,
>> -				      update_i_size);
>> -	if (ret > 0) {
> 
> Possible bug described below, but note the old code did not call
> extent_clear_unlock_delalloc when __cow_file_range_inline() returned > 0

You're right, and __cow_file_range_inline() has a somewhat weird 
returning value.

It turns 0 if we have inserted the inline extent, <0 for critical error 
which also aborts the transaction (except -ENOSPC).
It only return >0 if we failed with ENOSPC.

But to be honest, the ENOSPC can happen from two locations (insert and 
update inodes), and if the insert succeeded but inode update failed, I'm 
not sure if we should still continue without reverting the inserted extent.

Thus the ret > 0 branch in the new code should be falling back to 
regular ones.

> 
>> -		btrfs_unlock_extent(&inode->io_tree, offset, end, &cached);
>> -		return ret;
>> -	}
>> -
>> -	/*
>> -	 * In the successful case (ret == 0 here), cow_file_range will return 1.
>> -	 *
>> -	 * Quite a bit further up the callstack in extent_writepage(), ret == 1
>> -	 * is treated as a short circuited success and does not unlock the folio,
>> -	 * so we must do it here.
>> -	 *
>> -	 * In the failure case, the locked_folio does get unlocked by
>> -	 * btrfs_folio_end_all_writers, which asserts that it is still locked
>> -	 * at that point, so we must *not* unlock it here.
>> -	 *
>> -	 * The other two callsites in compress_file_range do not have a
>> -	 * locked_folio, so they are not relevant to this logic.
>> -	 */
>> -	if (ret == 0)
>> -		locked_folio = NULL;
>> -
>> -	extent_clear_unlock_delalloc(inode, offset, end, locked_folio, &cached,
>> -				     clear_flags, PAGE_UNLOCK |
>> -				     PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
>> -	return ret;
>> -}
>> -
>>   struct async_extent {
>>   	u64 start;
>>   	u64 ram_size;
>> @@ -797,7 +747,7 @@ static int add_async_extent(struct async_chunk *cow, u64 start, u64 ram_size,
>>    * options, defragmentation, properties or heuristics.
>>    */
>>   static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
>> -				      u64 end)
>> +				      u64 end, bool check_inline)
>>   {
>>   	struct btrfs_fs_info *fs_info = inode->root->fs_info;
>>   
>> @@ -812,8 +762,9 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
>>   	 * and will always fallback to regular write later.
>>   	 */
>>   	if (end + 1 - start <= fs_info->sectorsize &&
>> -	    (start > 0 || end + 1 < inode->disk_i_size))
>> +	    (!check_inline || (start > 0 || end + 1 < inode->disk_i_size)))
>>   		return 0;
>> +
>>   	/* Defrag ioctl takes precedence over mount options and properties. */
>>   	if (inode->defrag_compress == BTRFS_DEFRAG_DONT_COMPRESS)
>>   		return 0;
>> @@ -928,7 +879,6 @@ static void compress_file_range(struct btrfs_work *work)
>>   		container_of(work, struct async_chunk, work);
>>   	struct btrfs_inode *inode = async_chunk->inode;
>>   	struct btrfs_fs_info *fs_info = inode->root->fs_info;
>> -	struct address_space *mapping = inode->vfs_inode.i_mapping;
>>   	struct compressed_bio *cb = NULL;
>>   	u64 blocksize = fs_info->sectorsize;
>>   	u64 start = async_chunk->start;
>> @@ -1000,7 +950,7 @@ static void compress_file_range(struct btrfs_work *work)
>>   	 * been flagged as NOCOMPRESS.  This flag can change at any time if we
>>   	 * discover bad compression ratios.
>>   	 */
>> -	if (!inode_need_compress(inode, start, end))
>> +	if (!inode_need_compress(inode, start, end, false))
>>   		goto cleanup_and_bail_uncompressed;
>>   
>>   	if (0 < inode->defrag_compress && inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
>> @@ -1021,35 +971,6 @@ static void compress_file_range(struct btrfs_work *work)
>>   	total_compressed = cb->bbio.bio.bi_iter.bi_size;
>>   	total_in = cur_len;
>>   
>> -	/*
>> -	 * Try to create an inline extent.
>> -	 *
>> -	 * If we didn't compress the entire range, try to create an uncompressed
>> -	 * inline extent, else a compressed one.
>> -	 *
>> -	 * Check cow_file_range() for why we don't even try to create inline
>> -	 * extent for the subpage case.
>> -	 */
>> -	if (total_in < actual_end)
>> -		ret = cow_file_range_inline(inode, NULL, start, end, 0,
>> -					    BTRFS_COMPRESS_NONE, NULL, false);
>> -	else
>> -		ret = cow_file_range_inline(inode, NULL, start, end, total_compressed,
>> -					    compress_type,
>> -					    bio_first_folio_all(&cb->bbio.bio), false);
>> -	if (ret <= 0) {
>> -		cleanup_compressed_bio(cb);
>> -		if (ret < 0)
>> -			mapping_set_error(mapping, -EIO);
>> -		return;
>> -	}
>> -	/*
>> -	 * If a single block at file offset 0 cannot be inlined, fall back to
>> -	 * regular writes without marking the file incompressible.
>> -	 */
>> -	if (start == 0 && end <= blocksize)
>> -		goto cleanup_and_bail_uncompressed;
>> -
>>   	/*
>>   	 * We aren't doing an inline extent. Round the compressed size up to a
>>   	 * block size boundary so the allocator does sane things.
>> @@ -1427,11 +1348,6 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
>>    *
>>    * When this function fails, it unlocks all folios except @locked_folio.
>>    *
>> - * When this function successfully creates an inline extent, it returns 1 and
>> - * unlocks all folios including locked_folio and starts I/O on them.
>> - * (In reality inline extents are limited to a single block, so locked_folio is
>> - * the only folio handled anyway).
>> - *
>>    * When this function succeed and creates a normal extent, the folio locking
>>    * status depends on the passed in flags:
>>    *
>> @@ -1475,25 +1391,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>>   	ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
>>   
>>   	inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
>> -
>> -	if (!(flags & COW_FILE_RANGE_NO_INLINE)) {
>> -		/* lets try to make an inline extent */
>> -		ret = cow_file_range_inline(inode, locked_folio, start, end, 0,
>> -					    BTRFS_COMPRESS_NONE, NULL, false);
>> -		if (ret <= 0) {
>> -			/*
>> -			 * We succeeded, return 1 so the caller knows we're done
>> -			 * with this page and already handled the IO.
>> -			 *
>> -			 * If there was an error then cow_file_range_inline() has
>> -			 * already done the cleanup.
>> -			 */
>> -			if (ret == 0)
>> -				ret = 1;
>> -			goto done;
>> -		}
>> -	}
>> -
>>   	alloc_hint = btrfs_get_extent_allocation_hint(inode, start, num_bytes);
>>   
>>   	/*
>> @@ -1571,7 +1468,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>>   	}
>>   	extent_clear_unlock_delalloc(inode, orig_start, end, locked_folio, &cached,
>>   				     EXTENT_LOCKED | EXTENT_DELALLOC, page_ops);
>> -done:
>>   	if (done_offset)
>>   		*done_offset = end;
>>   	return ret;
>> @@ -1874,7 +1770,7 @@ static int fallback_to_cow(struct btrfs_inode *inode,
>>   	 * a locked folio, which can race with writeback.
>>   	 */
>>   	ret = cow_file_range(inode, locked_folio, start, end, NULL,
>> -			     COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED);
>> +			     COW_FILE_RANGE_KEEP_LOCKED);
>>   	ASSERT(ret != 1);
>>   	return ret;
>>   }
>> @@ -2425,6 +2321,80 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
>>   	return false;
>>   }
>>   
>> +/*
>> + * Return 0 if an inlined extent is created successfully.
>> + * Return <0 if critical error happened.
>> + * Return >0 if an inline extent can not be created.
>> + */
>> +static int run_delalloc_inline(struct btrfs_inode *inode, struct folio *locked_folio)
>> +{
>> +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
>> +	struct compressed_bio *cb = NULL;
>> +	struct extent_state *cached = NULL;
>> +	const u64 i_size = i_size_read(&inode->vfs_inode);
>> +	const u32 blocksize = fs_info->sectorsize;
>> +	int compress_type = fs_info->compress_type;
>> +	int compress_level = fs_info->compress_level;
>> +	u32 compressed_size = 0;
>> +	int ret;
>> +
>> +	ASSERT(folio_pos(locked_folio) == 0);
>> +
>> +	if (btrfs_inode_can_compress(inode) &&
>> +	    inode_need_compress(inode, 0, blocksize, true)) {
>> +		if (inode->defrag_compress > 0 &&
>> +		    inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
>> +			compress_type = inode->defrag_compress;
>> +			compress_level = inode->defrag_compress_level;
>> +		} else if (inode->prop_compress) {
>> +			compress_type = inode->prop_compress;
>> +		}
>> +		cb = btrfs_compress_bio(inode, 0, blocksize, compress_type, compress_level, 0);
>> +		if (IS_ERR(cb)) {
>> +			cb = NULL;
>> +			/* Just fall back to non-compressed case. */
>> +		} else {
>> +			compressed_size = cb->bbio.bio.bi_iter.bi_size;
>> +		}
>> +	}
>> +	if (!can_cow_file_range_inline(inode, 0, i_size, compressed_size)) {
>> +		if (cb)
>> +			cleanup_compressed_bio(cb);
>> +		return 1;
>> +	}
>> +
>> +	btrfs_lock_extent(&inode->io_tree, 0, blocksize - 1, &cached);
>> +	if (cb)
>> +		ret = __cow_file_range_inline(inode, i_size, compressed_size, compress_type,
>> +					      bio_first_folio_all(&cb->bbio.bio), false);
>> +	else
>> +		ret = __cow_file_range_inline(inode, i_size, 0, BTRFS_COMPRESS_NONE,
>> +					      NULL, false);
>> +	/*
>> +	 * In the successful case (ret == 0 here), run_delalloc_inline() will return 1.
> 
> As far as I can tell, this is inside run_delalloc_inline() and we do not
> further manipulate ret to become 1, so this comment seems to have an
> error.
> 
> Was it meant to say that the caller, btrfs_run_delalloc_range(), will
> return 1?

Right, the comment is not properly updated for the new situation.

> 
>> +	 *
>> +	 * Quite a bit further up the callstack in extent_writepage(), ret == 1
>> +	 * is treated as a short circuited success and does not unlock the folio,
>> +	 * so we must do it here.
>> +	 *
>> +	 * For failure case, the @locked_folio does get unlocked by
>> +	 * btrfs_folio_end_lock_bitmap(), so we must *not* unlock it here.
>> +	 *
>> +	 * So if ret == 0, we let extent_clear_unlock_delalloc() to unlock the
>> +	 * folio by passing NULL as @locked_folio.
>> +	 * Otherwise pass @locked_folio as usual.
>> +	 */
>> +	if (ret == 0)
>> +		locked_folio = NULL;
> 
> I think this might be a bug.
> 
> when ret == 1, we do fallback but have cleared delalloc, so the caller
> will call cow_file_range without the delalloc bits set, and double call
> extent_clear_unlock_delalloc()

Correct, I'll update the comment of that __cow_file_range_inline() and 
for ret > 0 case to directly exit without extent_clear_unlock_delalloc().

Thanks a lot for reviewing and catching this critical bug,
Qu

> 
>> +	extent_clear_unlock_delalloc(inode, 0, blocksize - 1, locked_folio, &cached,
>> +				     EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
>> +				     EXTENT_DO_ACCOUNTING | EXTENT_LOCKED,
>> +				     PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
>> +	if (cb)
>> +		cleanup_compressed_bio(cb);
>> +	return ret;
>> +}
>> +
>>   /*
>>    * Function to process delayed allocation (create CoW) for ranges which are
>>    * being touched for the first time.
>> @@ -2441,11 +2411,26 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
>>   	ASSERT(!(end <= folio_pos(locked_folio) ||
>>   		 start >= folio_next_pos(locked_folio)));
>>   
>> +	if (start == 0 && end + 1 <= inode->root->fs_info->sectorsize &&
>> +	    end + 1 >= inode->disk_i_size) {
>> +		int ret;
>> +
>> +		ret = run_delalloc_inline(inode, locked_folio);
>> +		if (ret < 0)
>> +			return ret;
>> +		if (ret == 0)
>> +			return 1;
>> +		/*
>> +		 * Continue regular handling if we can not create an
>> +		 * inlined extent.
>> +		 */
>> +	}
>> +
>>   	if (should_nocow(inode, start, end))
>>   		return run_delalloc_nocow(inode, locked_folio, start, end);
>>   
>>   	if (btrfs_inode_can_compress(inode) &&
>> -	    inode_need_compress(inode, start, end) &&
>> +	    inode_need_compress(inode, start, end, false) &&
>>   	    run_delalloc_compressed(inode, locked_folio, start, end, wbc))
>>   		return 1;
>>   
>> -- 
>> 2.53.0
>>


      reply	other threads:[~2026-03-04  4:18 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-25 22:53 [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper Qu Wenruo
2026-03-04  4:02 ` Boris Burkov
2026-03-04  4:18   ` Qu Wenruo [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=983e8887-9176-4cfe-97f1-8f2106420659@suse.com \
    --to=wqu@suse.com \
    --cc=boris@bur.io \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox