All of lore.kernel.org
 help / color / mirror / Atom feed
From: Theodore Ts'o <tytso@mit.edu>
To: linux-ext4@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
	Mingming Cao <cmm@us.ibm.com>, "Theodore Ts'o" <tytso@mit.edu>
Subject: [PATCH 08/42] ext4: Add percpu dirty block accounting.
Date: Thu,  9 Oct 2008 00:05:26 -0400	[thread overview]
Message-ID: <1223525160-9887-9-git-send-email-tytso@mit.edu> (raw)
In-Reply-To: <1223525160-9887-8-git-send-email-tytso@mit.edu>

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

This patch adds dirty block accounting using percpu_counters.  Delayed
allocation block reservation is now done by updating dirty block
counter.  In the later patch we switch to non delalloc mode if the
filesystem free blocks is greater than 150% of total filesystem dirty
blocks

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao<cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/balloc.c  |   59 +++++++++++++++++++++++++++++++++-------------------
 fs/ext4/ext4_sb.h |    1 +
 fs/ext4/inode.c   |   22 +++++++++---------
 fs/ext4/mballoc.c |   17 ++------------
 fs/ext4/super.c   |    8 ++++++-
 5 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 5790988..87b198c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1605,26 +1605,38 @@ out:
 int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
 						ext4_fsblk_t nblocks)
 {
-	s64 free_blocks;
+	s64 free_blocks, dirty_blocks;
 	ext4_fsblk_t root_blocks = 0;
 	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+	struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
 
-	free_blocks = percpu_counter_read(fbc);
+	free_blocks  = percpu_counter_read_positive(fbc);
+	dirty_blocks = percpu_counter_read_positive(dbc);
 
 	if (!capable(CAP_SYS_RESOURCE) &&
 		sbi->s_resuid != current->fsuid &&
 		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
 		root_blocks = ext4_r_blocks_count(sbi->s_es);
 
-	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
-		free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
-
-	if (free_blocks < (root_blocks + nblocks))
+	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+						EXT4_FREEBLOCKS_WATERMARK) {
+		free_blocks  = percpu_counter_sum(fbc);
+		dirty_blocks = percpu_counter_sum(dbc);
+		if (dirty_blocks < 0) {
+			printk(KERN_CRIT "Dirty block accounting "
+					"went wrong %lld\n",
+					dirty_blocks);
+		}
+	}
+	/* Check whether we have space after
+	 * accounting for current dirty blocks
+	 */
+	if (free_blocks < ((s64)(root_blocks + nblocks) + dirty_blocks))
 		/* we don't have free space */
 		return -ENOSPC;
 
-	/* reduce fs free blocks counter */
-	percpu_counter_sub(fbc, nblocks);
+	/* Add the blocks to nblocks */
+	percpu_counter_add(dbc, nblocks);
 	return 0;
 }
 
@@ -1640,23 +1652,28 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
 ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
 						ext4_fsblk_t nblocks)
 {
-	ext4_fsblk_t free_blocks;
+	ext4_fsblk_t free_blocks, dirty_blocks;
 	ext4_fsblk_t root_blocks = 0;
+	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+	struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
 
-	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	free_blocks  = percpu_counter_read_positive(fbc);
+	dirty_blocks = percpu_counter_read_positive(dbc);
 
 	if (!capable(CAP_SYS_RESOURCE) &&
 		sbi->s_resuid != current->fsuid &&
 		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
 		root_blocks = ext4_r_blocks_count(sbi->s_es);
 
-	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
-		free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
-
-	if (free_blocks <= root_blocks)
+	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+						EXT4_FREEBLOCKS_WATERMARK) {
+		free_blocks  = percpu_counter_sum_positive(fbc);
+		dirty_blocks = percpu_counter_sum_positive(dbc);
+	}
+	if (free_blocks <= (root_blocks + dirty_blocks))
 		/* we don't have free space */
 		return 0;
-	if (free_blocks - root_blocks < nblocks)
+	if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
 		return free_blocks - root_blocks;
 	return nblocks;
 }
@@ -1943,13 +1960,11 @@ allocated:
 	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
-	if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
-		/*
-		 * we allocated less blocks than we
-		 * claimed. Add the difference back.
-		 */
-		percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
-	}
+	percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+	/*
+	 * Now reduce the dirty block count also. Should not go negative
+	 */
+	percpu_counter_sub(&sbi->s_dirtyblocks_counter, num);
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
 		spin_lock(sb_bgl_lock(sbi, flex_group));
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 69810a2..a5577e0 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -59,6 +59,7 @@ struct ext4_sb_info {
 	struct percpu_counter s_freeblocks_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
+	struct percpu_counter s_dirtyblocks_counter;
 	struct blockgroup_lock s_blockgroup_lock;
 
 	/* root of the per fs reservation window tree */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index eb9d449..7875a2d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1030,19 +1030,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
 	mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
 
-	/* Account for allocated meta_blocks */
-	mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
-	/* update fs free blocks counter for truncate case */
-	percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+	if (mdb_free) {
+		/* Account for allocated meta_blocks */
+		mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+		/* update fs dirty blocks counter */
+		percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
+		EXT4_I(inode)->i_allocated_meta_blocks = 0;
+		EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+	}
 
 	/* update per-inode reservations */
 	BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
 	EXT4_I(inode)->i_reserved_data_blocks -= used;
 
-	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
-	EXT4_I(inode)->i_reserved_meta_blocks = mdb;
-	EXT4_I(inode)->i_allocated_meta_blocks = 0;
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
 
@@ -1588,8 +1589,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
 	release = to_free + mdb_free;
 
-	/* update fs free blocks counter for truncate case */
-	percpu_counter_add(&sbi->s_freeblocks_counter, release);
+	/* update fs dirty blocks counter for truncate case */
+	percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
 
 	/* update per-inode reservations */
 	BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
@@ -2471,7 +2472,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
-
 retry:
 	/*
 	 * With delayed allocation, we don't log the i_disksize update
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e4f30de..c7b0dea 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2969,22 +2969,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-
+	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
 	/*
-	 * free blocks account has already be reduced/reserved
-	 * at write_begin() time for delayed allocation
-	 * do not double accounting
+	 * Now reduce the dirty block count also. Should not go negative
 	 */
-	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
-			ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
-		/*
-		 * we allocated less blocks than we calimed
-		 * Add the difference back
-		 */
-		percpu_counter_add(&sbi->s_freeblocks_counter,
-				ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
-	}
-
+	percpu_counter_sub(&sbi->s_dirtyblocks_counter, ac->ac_b_ex.fe_len);
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi,
 							  ac->ac_b_ex.fe_group);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de6ca0..efa40d9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -520,6 +520,7 @@ static void ext4_put_super(struct super_block *sb)
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
+	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 	brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -2259,6 +2260,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		err = percpu_counter_init(&sbi->s_dirs_counter,
 				ext4_count_dirs(sb));
 	}
+	if (!err) {
+		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+	}
 	if (err) {
 		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
 		goto failed_mount3;
@@ -2491,6 +2495,7 @@ failed_mount3:
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
+	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
@@ -3169,7 +3174,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
-	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
 	ext4_free_blocks_count_set(es, buf->f_bfree);
 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
 	if (buf->f_bfree < ext4_r_blocks_count(es))
-- 
1.5.6.1.205.ge2c7.dirty


  reply	other threads:[~2008-10-09  4:06 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-09  4:05 [PATCH 0/42] Ext4 patches queued up for the 2.6.28 merge window Theodore Ts'o
2008-10-09  4:05 ` [PATCH 01/42] percpu counter: clean up percpu_counter_sum_and_set() Theodore Ts'o
2008-10-09  4:05   ` [PATCH 02/42] ext4: Add printk priority levels to clean up checkpatch warnings Theodore Ts'o
2008-10-09  4:05     ` [PATCH 03/42] ext4: Fix long long " Theodore Ts'o
2008-10-09  4:05       ` [PATCH 04/42] ext4: Fix whitespace checkpatch warnings/errors Theodore Ts'o
2008-10-09  4:05         ` [PATCH 05/42] ext4: invalidate pages if delalloc block allocation fails Theodore Ts'o
2008-10-09  4:05           ` [PATCH 06/42] ext4: Make sure all the block allocation paths reserve blocks Theodore Ts'o
2008-10-09  4:05             ` Theodore Ts'o
2008-10-09  4:05             ` [PATCH 07/42] ext4: Retry block reservation Theodore Ts'o
2008-10-09  4:05               ` Theodore Ts'o [this message]
2008-10-09  4:05                 ` [PATCH 09/42] ext4: Switch to non delalloc mode when we are low on free blocks count Theodore Ts'o
2008-10-09  4:05                   ` [PATCH 10/42] ext4: Signed arithmetic fix Theodore Ts'o
2008-10-09  4:05                     ` [PATCH 11/42] ext4: Fix ext4 nomballoc allocator for ENOSPC Theodore Ts'o
2008-10-09  4:05                       ` [PATCH 12/42] ext4: Don't add the inode to journal handle until after the block is allocated Theodore Ts'o
2008-10-09  4:05                         ` [PATCH 13/42] ext4: Retry block allocation if we have free blocks left Theodore Ts'o
2008-10-09  4:05                           ` Theodore Ts'o
2008-10-09  4:05                           ` [PATCH 14/42] ext4: truncate block allocated on a failed ext4_write_begin Theodore Ts'o
2008-10-09  4:05                             ` [PATCH 15/42] ext4: Properly update i_disksize Theodore Ts'o
2008-10-09  4:05                               ` [PATCH 16/42] ext4: Avoid printk floods in the face of directory corruption Theodore Ts'o
2008-10-09  4:05                                 ` [PATCH 17/42] Update flex_bg free blocks and free inodes counters when resizing Theodore Ts'o
2008-10-09  4:05                                   ` [PATCH 18/42] ext4: fix #11321: create /proc/ext4/*/stats more carefully Theodore Ts'o
2008-10-09  4:05                                     ` [PATCH 19/42] jbd2: clean up how the journal device name is printed Theodore Ts'o
2008-10-09  4:05                                       ` [PATCH 20/42] ext4: add missing unlock in ext4_check_descriptors() on error path Theodore Ts'o
2008-10-09  4:05                                         ` [PATCH 21/42] ext4: elevate write count for migrate ioctl Theodore Ts'o
2008-10-09  4:05                                           ` [PATCH 22/42] ext4: hook the ext3 migration interface to the EXT4_IOC_SETFLAGS ioctl Theodore Ts'o
2008-10-09  4:05                                             ` [PATCH 23/42] ext4: Renumber EXT4_IOC_MIGRATE Theodore Ts'o
2008-10-09  4:05                                               ` [PATCH 24/42] ext4: use percpu data structures for lg_prealloc_list Theodore Ts'o
2008-10-09  4:05                                                 ` Theodore Ts'o
2008-10-09  4:05                                                 ` [PATCH 25/42] ext4/jbd2: Avoid WARN() messages when failing to write to the superblock Theodore Ts'o
2008-10-09  4:05                                                   ` [PATCH 26/42] ext4: Don't use 'struct dentry' for internal lookups Theodore Ts'o
2008-10-09  4:05                                                     ` [PATCH 27/42] ext4: move /proc setup and teardown out of mballoc.c Theodore Ts'o
2008-10-09  4:05                                                       ` [PATCH 28/42] ext4: Combine proc file handling into a single set of functions Theodore Ts'o
2008-10-09  4:05                                                         ` [PATCH 29/42] ext4: Use readahead when reading an inode from the inode table Theodore Ts'o
2008-10-09  4:05                                                           ` [PATCH 30/42] ext4: Remove old legacy block allocator Theodore Ts'o
2008-10-09  4:05                                                             ` Theodore Ts'o
2008-10-09  4:05                                                             ` [PATCH 31/42] ext4: fix initialization of UNINIT bitmap blocks Theodore Ts'o
2008-10-09  4:05                                                               ` [PATCH 32/42] jbd2: abort instead of waiting for nonexistent transaction Theodore Ts'o
2008-10-09  4:05                                                                 ` [PATCH 33/42] ext4: Add debugging markers that can be used by systemtap Theodore Ts'o
2008-10-09  4:05                                                                   ` [PATCH 34/42] jbd2: Fix buffer head leak when writing the commit block Theodore Ts'o
2008-10-09  4:05                                                                     ` [PATCH 35/42] ext4: fix xattr deadlock Theodore Ts'o
     [not found]                                                                       ` <1223525160-9887-36-git-send-email-tytso-3s7WtUTddSA@public.gmane.org>
2008-10-09  4:05                                                                         ` [PATCH 36/42] vfs: vfs-level fiemap interface Theodore Ts'o
2008-10-09  4:05                                                                           ` Theodore Ts'o
2008-10-09  4:05                                                                           ` [PATCH 37/42] ocfs2: fiemap support Theodore Ts'o
2008-10-09  4:05                                                                             ` [Ocfs2-devel] " Theodore Ts'o
2008-10-09  4:05                                                                             ` [PATCH 38/42] generic block based fiemap implementation Theodore Ts'o
2008-10-09  4:05                                                                               ` [PATCH 39/42] Hook ext4 to the vfs fiemap interface Theodore Ts'o
2008-10-09  4:05                                                                                 ` [PATCH 40/42] Update ext4 MAINTAINERS file Theodore Ts'o
2008-10-09  4:05                                                                                   ` [PATCH 41/42] ext4: Avoid double dirtying of super block in ext4_put_super() Theodore Ts'o
2008-10-09  4:06                                                                                     ` [PATCH 42/42] ext4: Rename ext4dev to ext4 Theodore Ts'o
2008-10-11 22:04                                                                                       ` Jeremy Fitzhardinge
2008-10-11 22:04                                                                                         ` Jeremy Fitzhardinge
2008-10-11 22:09                                                                                         ` Eric Sandeen
2008-10-11 22:09                                                                                           ` Eric Sandeen
2008-10-11 22:54                                                                                           ` Jeremy Fitzhardinge
2008-10-11 22:54                                                                                             ` Jeremy Fitzhardinge
2008-10-11 22:58                                                                                           ` Theodore Tso
2008-10-11 23:08                                                                                             ` Grant Coady
2008-10-12  1:06                                                                                             ` Eric Sandeen
2008-10-09  8:18                                                           ` [PATCH 29/42] ext4: Use readahead when reading an inode from the inode table Aneesh Kumar K.V
2008-10-09  8:52   ` [PATCH 01/42] percpu counter: clean up percpu_counter_sum_and_set() Peter Zijlstra
2008-10-09 16:52     ` Theodore Tso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1223525160-9887-9-git-send-email-tytso@mit.edu \
    --to=tytso@mit.edu \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=cmm@us.ibm.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.