linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ying Han <yinghan@google.com>
To: linux-mm@kvack.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@lst.de>, Nick Piggin <npiggin@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Paul Turner <pjt@google.com>
Subject: [PATCH] Fix ext2 and ext4 buffer-head accounting.
Date: Wed, 27 Oct 2010 10:16:37 -0700	[thread overview]
Message-ID: <1288199797-22541-1-git-send-email-yinghan@google.com> (raw)

Pages pinned to block group_descriptors in the super_block are non-reclaimable.
Those pages are showed up as file-backed in meminfo which confuse user program
issuing too many drop_caches/ttfp when this memory will never be freed.

The change has us not account for the file system descriptors by taking the pages
off LRU and decrementing the NR_FILE_PAGES counter. The pages are putting back when
the filesystem is being unmounted.

Signed-off-by: Ying Han <yinghan@google.com>
Signed-off-by: Paul Turner <pjt@google.com>
---
 fs/buffer.c                 |   44 +++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/super.c             |   15 +++++++++++++-
 fs/ext4/super.c             |   12 ++++++++++-
 include/linux/buffer_head.h |    5 ++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 3e7dca2..677d5f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -22,6 +22,8 @@
 #include <linux/syscalls.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/capability.h>
@@ -3314,6 +3316,48 @@ int bh_submit_read(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(bh_submit_read);
 
+void bh_disable_accounting(struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+	struct zone *zone = page_zone(page);
+	unsigned long flags;
+
+	if (buffer_unaccounted(bh))
+		return;
+
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	/* If someone else is holding it off-LRU we can't safely do anything */
+	if (PageLRU(page)) {
+		BUG_ON(buffer_unaccounted(bh));
+		ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		__dec_zone_state(zone, NR_FILE_PAGES);
+		set_buffer_unaccounted(bh);
+	}
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+EXPORT_SYMBOL(bh_disable_accounting);
+
+void bh_enable_accounting(struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+	struct zone *zone = page_zone(page);
+	unsigned long flags;
+
+	if (!buffer_unaccounted(bh))
+		return;
+
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	if (buffer_unaccounted(bh)) {
+		SetPageLRU(page);
+		add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
+		__inc_zone_state(zone, NR_FILE_PAGES);
+		clear_buffer_unaccounted(bh);
+	}
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+EXPORT_SYMBOL(bh_enable_accounting);
+
 void __init buffer_init(void)
 {
 	int nrpages;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1ec6026..a4d21ce 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -29,6 +29,7 @@
 #include <linux/vfs.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/swap.h>
 #include <linux/log2.h>
 #include <linux/quotaops.h>
 #include <asm/uaccess.h>
@@ -135,13 +136,16 @@ static void ext2_put_super (struct super_block * sb)
 	}
 	db_count = sbi->s_gdb_count;
 	for (i = 0; i < db_count; i++)
-		if (sbi->s_group_desc[i])
+		if (sbi->s_group_desc[i]) {
+			bh_enable_accounting(sbi->s_group_desc[i]);
 			brelse (sbi->s_group_desc[i]);
+		}
 	kfree(sbi->s_group_desc);
 	kfree(sbi->s_debts);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
+	bh_enable_accounting(sbi->s_sbh);
 	brelse (sbi->s_sbh);
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
@@ -1080,9 +1084,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
 		ext2_msg(sb, KERN_WARNING,
 			"warning: mounting ext3 filesystem as ext2");
+
 	if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY))
 		sb->s_flags |= MS_RDONLY;
 	ext2_write_super(sb);
+
+	/* disable accounting of pinned file pages */
+	lru_add_drain_all();
+	db_count = sbi->s_gdb_count;
+	for (i = 0; i < db_count; i++)
+		bh_disable_accounting(sbi->s_group_desc[i]);
+	bh_disable_accounting(sbi->s_sbh);
+
 	return 0;
 
 cantfind_ext2:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2614774..5203476 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -32,6 +32,7 @@
 #include <linux/vfs.h>
 #include <linux/random.h>
 #include <linux/mount.h>
+#include <linux/swap.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
@@ -734,8 +735,10 @@ static void ext4_put_super(struct super_block *sb)
 	}
 	kobject_del(&sbi->s_kobj);
 
-	for (i = 0; i < sbi->s_gdb_count; i++)
+	for (i = 0; i < sbi->s_gdb_count; i++) {
+		bh_enable_accounting(sbi->s_group_desc[i]);
 		brelse(sbi->s_group_desc[i]);
+	}
 	kfree(sbi->s_group_desc);
 	if (is_vmalloc_addr(sbi->s_flex_groups))
 		vfree(sbi->s_flex_groups);
@@ -745,6 +748,7 @@ static void ext4_put_super(struct super_block *sb)
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+	bh_enable_accounting(sbi->s_sbh);
 	brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -3129,6 +3133,12 @@ no_journal:
 		goto failed_mount4;
 	}
 
+	/* disable accounting of pinned file pages */
+	lru_add_drain_all();
+	for (i = 0; i < db_count; i++)
+		bh_disable_accounting(sbi->s_group_desc[i]);
+	bh_disable_accounting(sbi->s_sbh);
+
 	sbi->s_kobj.kset = ext4_kset;
 	init_completion(&sbi->s_kobj_unregister);
 	err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index ec94c12..7d48499 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -34,6 +34,7 @@ enum bh_state_bits {
 	BH_Write_EIO,	/* I/O error on write */
 	BH_Eopnotsupp,	/* operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
+	BH_Unaccounted, /* Backing page has been removed from accounting */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
@@ -126,6 +127,7 @@ BUFFER_FNS(Boundary, boundary)
 BUFFER_FNS(Write_EIO, write_io_error)
 BUFFER_FNS(Eopnotsupp, eopnotsupp)
 BUFFER_FNS(Unwritten, unwritten)
+BUFFER_FNS(Unaccounted, unaccounted)
 
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
 #define touch_buffer(bh)	mark_page_accessed(bh->b_page)
@@ -234,6 +236,9 @@ int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
 int nobh_writepage(struct page *page, get_block_t *get_block,
                         struct writeback_control *wbc);
 
+void bh_disable_accounting(struct buffer_head *bh);
+void bh_enable_accounting(struct buffer_head *bh);
+
 void buffer_init(void);
 
 /*
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

             reply	other threads:[~2010-10-27 17:16 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-27 17:16 Ying Han [this message]
2010-10-29 23:19 ` [PATCH] Fix ext2 and ext4 buffer-head accounting Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1288199797-22541-1-git-send-email-yinghan@google.com \
    --to=yinghan@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=hch@lst.de \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    --cc=pjt@google.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).