All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: cmm@us.ibm.com, tytso@mit.edu, sandeen@redhat.com
Cc: linux-ext4@vger.kernel.org,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [RFC PATCH -v2] ext4: Switch to non delalloc mode when we are low on free blocks count.
Date: Mon, 25 Aug 2008 16:50:32 +0530	[thread overview]
Message-ID: <1219663233-21849-5-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1219663233-21849-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

delayed allocation allocate blocks during writepages. That also
means we cannot handle block allocation failures. Switch to
non - delalloc when we are running low on free blocks.
Delayed allocation need to do aggressive meta-data block reservation
considering that the requested blocks can all be discontiguous.
Switching to non-delalloc avoids that. Also we can satisfy
partial write in non-delalloc mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/inode.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3f3ecc0..d923a14 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2482,6 +2482,29 @@ static int ext4_da_writepages(struct address_space *mapping,
 	return ret;
 }
 
+#define FALL_BACK_TO_NONDELALLOC 1
+static int ext4_nonda_switch(struct super_block *sb)
+{
+	s64 free_blocks, dirty_blocks;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	/*
+	 * switch to non delalloc mode if we are running low
+	 * on free block. The free block accounting via percpu
+	 * counters can get slightly wrong with FBC_BATCH getting
+	 * accumulated on each CPU without updating global counters
+	 * Delalloc need an accurate free block accounting. So switch
+	 * to non delalloc when we are near to error range.
+	 */
+	free_blocks  = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+	if ( 2 * free_blocks < 3 * dirty_blocks) {
+		/* free block count is less that 150% of dirty blocks */
+		return 1;
+	}
+	return 0;
+}
+
 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
@@ -2496,6 +2519,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
+
+	if (ext4_nonda_switch(inode->i_sb)) {
+		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
+		return ext4_write_begin(file, mapping, pos,
+					len, flags, pagep, fsdata);
+	}
+	*fsdata = (void *)0;
 retry:
 	/*
 	 * With delayed allocation, we don't log the i_disksize update
@@ -2564,6 +2594,19 @@ static int ext4_da_write_end(struct file *file,
 	handle_t *handle = ext4_journal_current_handle();
 	loff_t new_i_size;
 	unsigned long start, end;
+	int write_mode = (int)fsdata;
+
+	if (write_mode == FALL_BACK_TO_NONDELALLOC) {
+		if (ext4_should_order_data(inode)) {
+			return ext4_ordered_write_end(file, mapping, pos,
+					len, copied, page, fsdata);
+		} else if (ext4_should_writeback_data(inode)) {
+			return ext4_writeback_write_end(file, mapping, pos,
+					len, copied, page, fsdata);
+		} else {
+			BUG();
+		}
+	}
 
 	start = pos & (PAGE_CACHE_SIZE - 1);
 	end = start + copied -1;
@@ -4901,6 +4944,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	loff_t size;
 	unsigned long len;
 	int ret = -EINVAL;
+	void *fsdata;
 	struct file *file = vma->vm_file;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
@@ -4939,11 +4983,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	 * on the same page though
 	 */
 	ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
-			len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+			len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
 	if (ret < 0)
 		goto out_unlock;
 	ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
-			len, len, page, NULL);
+			len, len, page, fsdata);
 	if (ret < 0)
 		goto out_unlock;
 	ret = 0;
-- 
1.6.0.1.90.g27a6e


  reply	other threads:[~2008-08-25 11:20 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-25 11:20 [RFC PATCH -v2] percpu_counters: make fbc->count read atomic on 32 bit architecture Aneesh Kumar K.V
2008-08-25 11:20 ` [RFC PATCH -v2] ext4: Make sure all the block allocation paths reserve blocks Aneesh Kumar K.V
2008-08-25 11:20   ` [RFC PATCH -v2] ext4: Retry block reservation Aneesh Kumar K.V
2008-08-25 11:20     ` [RFC PATCH -v2] ext4: Add percpu dirty block accounting Aneesh Kumar K.V
2008-08-25 11:20       ` Aneesh Kumar K.V [this message]
2008-08-25 11:20         ` [RFC PATCH -v2] ext4: request for blocks with ar.excepted_group = -1 Aneesh Kumar K.V
2008-08-27  8:30           ` Akira Fujita
2008-08-25 21:31         ` [RFC PATCH -v2] ext4: Switch to non delalloc mode when we are low on free blocks count Mingming Cao
2008-08-25 21:26       ` [RFC PATCH -v2] ext4: Add percpu dirty block accounting Mingming Cao
2008-08-25 21:06     ` [RFC PATCH -v2] ext4: Retry block reservation Mingming Cao
2008-08-25 21:00   ` [RFC PATCH -v2] ext4: Make sure all the block allocation paths reserve blocks Mingming Cao
2008-08-25 11:27 ` [RFC PATCH -v2] percpu_counters: make fbc->count read atomic on 32 bit architecture Peter Zijlstra
2008-08-25 14:05   ` Aneesh Kumar K.V
2008-08-25 14:21     ` Peter Zijlstra
2008-08-25 23:18     ` Andreas Dilger
2008-08-27  0:26 ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1219663233-21849-5-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=cmm@us.ibm.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=sandeen@redhat.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.