All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mingming Cao <cmm@us.ibm.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: tytso@mit.edu, sandeen@redhat.com, linux-ext4@vger.kernel.org
Subject: Re: [PATCH -V3 02/11] ext4: Make sure all the block allocation paths reserve blocks
Date: Thu, 28 Aug 2008 13:41:33 -0700	[thread overview]
Message-ID: <1219956093.6384.1.camel@mingming-laptop> (raw)
In-Reply-To: <1219850916-8986-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com>


在 2008-08-27三的 20:58 +0530,Aneesh Kumar K.V写道:
> With delayed allocation we need to make sure block are reserved
> before we attempt to allocate them. Otherwise we get block
> allocation failure (ENOSPC) during writepages which cannot
> be handled. This would mean silent data loss (We do a printk
> stating data will be lost). This patch update the DIO
> and fallocate code path to do block reservation before block
> allocation. This is needed to make sure parallel DIO and
> fallocate request doesn't take block out of delayed reserve
> space.
> 
> When free blocks count go below a threshold we switch to
> a slow patch which looks at other CPU's accumulated percpu
> counter values.
> 

Added this patch to patch queue. 

> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  fs/ext4/balloc.c  |   58 ++++++++++++++++++++++++++++++++++++++--------------
>  fs/ext4/ext4.h    |   13 +++++++++++
>  fs/ext4/inode.c   |    5 +---
>  fs/ext4/mballoc.c |   23 +++++++++++---------
>  4 files changed, 69 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index cfed283..dc10bfd 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -1602,6 +1602,32 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
>  	return ret;
>  }
> 
> +int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
> +						ext4_fsblk_t nblocks)
> +{
> +	s64 free_blocks;
> +	ext4_fsblk_t root_blocks = 0;
> +	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
> +
> +	free_blocks = percpu_counter_read(fbc);
> +
> +	if (!capable(CAP_SYS_RESOURCE) &&
> +		sbi->s_resuid != current->fsuid &&
> +		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
> +		root_blocks = ext4_r_blocks_count(sbi->s_es);
> +
> +	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
> +		free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
> +
> +	if (free_blocks < (root_blocks + nblocks))
> +		/* we don't have free space */
> +		return -ENOSPC;
> +
> +	/* reduce fs free blocks counter */
> +	percpu_counter_sub(fbc, nblocks);
> +	return 0;
> +}
> +

Any reason that we don't do percpu_counter_sum_and_sub() together? I
thought this fixed the parallel reservation race before.

>  /**
>   * ext4_has_free_blocks()
>   * @sbi:	in-core super block structure.
> @@ -1623,18 +1649,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
>  		sbi->s_resuid != current->fsuid &&
>  		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
>  		root_blocks = ext4_r_blocks_count(sbi->s_es);
> -#ifdef CONFIG_SMP
> -	if (free_blocks - root_blocks < FBC_BATCH)
> -		free_blocks =
> -			percpu_counter_sum(&sbi->s_freeblocks_counter);
> -#endif
> +
> +	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
> +		free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
> +
>  	if (free_blocks <= root_blocks)
>  		/* we don't have free space */
>  		return 0;
>  	if (free_blocks - root_blocks < nblocks)
>  		return free_blocks - root_blocks;
>  	return nblocks;
> - }
> +}
> 
> 
>  /**
> @@ -1713,14 +1738,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
>  		/*
>  		 * With delalloc we already reserved the blocks
>  		 */
> -		*count = ext4_has_free_blocks(sbi, *count);
> -	}
> -	if (*count == 0) {
> -		*errp = -ENOSPC;
> -		return 0;	/*return with ENOSPC error */
> +		if (ext4_claim_free_blocks(sbi, *count)) {
> +			*errp = -ENOSPC;
> +			return 0;	/*return with ENOSPC error */
> +		}
>  	}
> -	num = *count;
> -
>  	/*
>  	 * Check quota for allocation of this block.
>  	 */
> @@ -1915,9 +1937,13 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
>  	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
>  	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
>  	spin_unlock(sb_bgl_lock(sbi, group_no));
> -	if (!EXT4_I(inode)->i_delalloc_reserved_flag)
> -		percpu_counter_sub(&sbi->s_freeblocks_counter, num);
> -
> +	if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
> +		/*
> +		 * we allocated less blocks than we
> +		 * claimed. Add the difference back.
> +		 */
> +		percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
> +	}
>  	if (sbi->s_log_groups_per_flex) {
>  		ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
>  		spin_lock(sb_bgl_lock(sbi, flex_group));
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7f11b25..71a4fde 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1047,6 +1047,8 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
>  					unsigned long *count, int *errp);
>  extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
>  			ext4_fsblk_t goal, unsigned long *count, int *errp);
> +extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
> +						ext4_fsblk_t nblocks);
>  extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
>  						ext4_fsblk_t nblocks);
>  extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
> @@ -1295,6 +1297,17 @@ do {								\
>  		__ext4_std_error((sb), __func__, (errno));	\
>  } while (0)
> 
> +#ifdef CONFIG_SMP
> +/* Each CPU can accumulate FBC_BATCH blocks in their local
> + * counters. So we need to make sure we have free blocks more
> + * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
> + */
> +#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
> +#else
> +#define EXT4_FREEBLOCKS_WATERMARK 0
> +#endif
> +
> +
>  /*
>   * Inodes and files operations
>   */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 1c289c1..d965a05 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1537,13 +1537,10 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
>  	md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
>  	total = md_needed + nrblocks;
> 
> -	if (ext4_has_free_blocks(sbi, total) < total) {
> +	if (ext4_claim_free_blocks(sbi, total)) {
>  		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>  		return -ENOSPC;
>  	}
> -	/* reduce fs free blocks counter */
> -	percpu_counter_sub(&sbi->s_freeblocks_counter, total);
> -
>  	EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
>  	EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
> 
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 82dd0e4..4404b46 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2977,9 +2977,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
>  	 * at write_begin() time for delayed allocation
>  	 * do not double accounting
>  	 */
> -	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
> -		percpu_counter_sub(&sbi->s_freeblocks_counter,
> -					ac->ac_b_ex.fe_len);
> +	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
> +			ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
> +		/*
> +		 * we allocated less blocks than we calimed
> +		 * Add the difference back
> +		 */
> +		percpu_counter_add(&sbi->s_freeblocks_counter,
> +				ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
> +	}
> 
>  	if (sbi->s_log_groups_per_flex) {
>  		ext4_group_t flex_group = ext4_flex_group(sbi,
> @@ -4391,14 +4397,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>  		/*
>  		 * With delalloc we already reserved the blocks
>  		 */
> -		ar->len = ext4_has_free_blocks(sbi, ar->len);
> -	}
> -
> -	if (ar->len == 0) {
> -		*errp = -ENOSPC;
> -		return 0;
> +		if (ext4_claim_free_blocks(sbi, ar->len)) {
> +			*errp = -ENOSPC;
> +			return 0;
> +		}
>  	}
> -
>  	while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
>  		ar->flags |= EXT4_MB_HINT_NOPREALLOC;
>  		ar->len--;

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2008-08-28 20:41 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-27 15:28 [PATCH -V3 01/11] percpu_counters: make fbc->count read atomic on 32 bit architecture Aneesh Kumar K.V
2008-08-27 15:28 ` [PATCH -V3 02/11] ext4: Make sure all the block allocation paths reserve blocks Aneesh Kumar K.V
2008-08-27 15:28   ` [PATCH -V3 03/11] ext4: Retry block reservation Aneesh Kumar K.V
2008-08-27 15:28     ` [PATCH -V3 04/11] ext4: Add percpu dirty block accounting Aneesh Kumar K.V
2008-08-27 15:28       ` [PATCH -V3 05/11] ext4: Switch to non delalloc mode when we are low on free blocks count Aneesh Kumar K.V
2008-08-27 15:28         ` [PATCH -V3 06/11] ext4: Update meta-data reservation with delalloc Aneesh Kumar K.V
2008-08-27 15:28           ` [PATCH -V3 07/11] ext4: request for blocks with ar.excepted_group = -1 Aneesh Kumar K.V
2008-08-27 15:28             ` [PATCH -V3 08/11] ext4: Signed arithematic fix Aneesh Kumar K.V
2008-08-27 15:28               ` [PATCH -V3 09/11] ext4: Fix ext4 nomballoc allocator for ENOSPC Aneesh Kumar K.V
2008-08-27 15:28                 ` [PATCH -V3 10/11] ext4: Add inode to journal handle after block allocation for ordered mode Aneesh Kumar K.V
2008-08-27 15:28                   ` [PATCH -V3 11/11] ext4: Retry block allocation if we have free blocks left Aneesh Kumar K.V
2008-08-28 21:57                 ` [PATCH -V3 09/11] ext4: Fix ext4 nomballoc allocator for ENOSPC Mingming Cao
2008-08-29  3:44                   ` Aneesh Kumar K.V
2008-08-29  4:14                     ` Aneesh Kumar K.V
2008-08-29  5:02                       ` Mingming Cao
2008-08-29  5:06                     ` Mingming Cao
2008-08-29  8:25                       ` Aneesh Kumar K.V
2008-08-28 21:04               ` [PATCH -V3 08/11] ext4: Signed arithematic fix Mingming Cao
2008-08-28 21:03             ` [PATCH -V3 07/11] ext4: request for blocks with ar.excepted_group = -1 Mingming Cao
2008-08-28 21:03           ` [PATCH -V3 06/11] ext4: Update meta-data reservation with delalloc Mingming Cao
2008-08-28 20:57         ` [PATCH -V3 05/11] ext4: Switch to non delalloc mode when we are low on free blocks count Mingming Cao
2008-08-28 20:56       ` [PATCH -V3 04/11] ext4: Add percpu dirty block accounting Mingming Cao
2008-10-09 20:44       ` Eric Sandeen
2008-10-10  4:52         ` Aneesh Kumar K.V
2008-10-10  4:58           ` Eric Sandeen
2008-10-11 21:10         ` Andreas Dilger
2008-08-28 20:42     ` [PATCH -V3 03/11] ext4: Retry block reservation Mingming Cao
2008-08-28 20:41   ` Mingming Cao [this message]
2008-08-27 19:05 ` [PATCH -V3 01/11] percpu_counters: make fbc->count read atomic on 32 bit architecture Andrew Morton
2008-08-27 21:01   ` Peter Zijlstra
2008-08-27 21:22     ` Andrew Morton
2008-08-28  3:52       ` Aneesh Kumar K.V
2008-08-28  4:09         ` Andrew Morton
2008-08-28 22:59           ` Mingming Cao
2008-08-28 22:59             ` Mingming Cao
2008-08-28  7:57       ` Peter Zijlstra
2008-08-28  3:48   ` Aneesh Kumar K.V
2008-08-28  4:06     ` Andrew Morton
2008-08-28 14:19       ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1219956093.6384.1.camel@mingming-laptop \
    --to=cmm@us.ibm.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=sandeen@redhat.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.