From: Munehiro Ikeda <m-ikeda@ds.jp.nec.com>
To: linux-kernel@vger.kernel.org, jens.axboe@oracle.com,
Vivek Goyal <vgoyal@redhat.com>
Cc: Munehiro Ikeda <m-ikeda@ds.jp.nec.com>,
Ryo Tsuruta <ryov@valinux.co.jp>,
taka@valinux.co.jp, kamezawa.hiroyu@jp.fujitsu.com,
Andrea Righi <righi.andrea@gmail.com>,
Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
akpm@linux-foundation.org, balbir@linux.vnet.ibm.com
Subject: [RFC][PATCH 04/11] blkiocg async: block_commit_write not to record process info
Date: Thu, 08 Jul 2010 23:16:53 -0400 [thread overview]
Message-ID: <4C3694A5.6080103@ds.jp.nec.com> (raw)
In-Reply-To: <4C369009.80503@ds.jp.nec.com>
When a mmap(2)'d page is written back, which means the page doesn't
have buffer_head, ext4 prepares buffer_heads and calls
block_commit_write() from ext4_writepage().
This results to call mark_buffer_dirty() and the page's dirty flag
is set. In this case, current process marking page dirty is (almost)
flush kernel thread, so the original info of a process which dirtied
this page is lost.
To prevent this issue, this patch introduces
block_commit_write_noiotrack() which is same as block_commit_write()
but runs through a code path not to record current process info.
The portion calling block_commit_write() in ext4 will be modified
in the following patch.
Signed-off-by: Munehiro "Muuhh" Ikeda <m-ikeda@ds.jp.nec.com>
---
fs/buffer.c | 70 ++++++++++++++++++++++++++++++++-----------
include/linux/buffer_head.h | 2 +
2 files changed, 54 insertions(+), 18 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index c418fdf..61ebf94 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -660,15 +660,17 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
*
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
+ * If track is true, dirtying process info is recorded for iotrack.
*/
static void __set_page_dirty(struct page *page,
- struct address_space *mapping, int warn)
+ struct address_space *mapping, int warn, int track)
{
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
- blk_iotrack_reset_owner_pagedirty(page, current->mm);
+ if (track)
+ blk_iotrack_reset_owner_pagedirty(page, current->mm);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
@@ -723,7 +725,7 @@ int __set_page_dirty_buffers(struct page *page)
spin_unlock(&mapping->private_lock);
if (newly_dirty)
- __set_page_dirty(page, mapping, 1);
+ __set_page_dirty(page, mapping, 1, 1);
return newly_dirty;
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -1137,18 +1139,11 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
*/
/**
- * mark_buffer_dirty - mark a buffer_head as needing writeout
+ * __mark_buffer_dirty - helper function for mark_buffer_dirty*
* @bh: the buffer_head to mark dirty
- *
- * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
- * backing page dirty, then tag the page as dirty in its address_space's radix
- * tree and then attach the address_space's inode to its superblock's dirty
- * inode list.
- *
- * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and the global inode_lock.
+ * @track: if true, dirtying process info will be recorded for iotrack
*/
-void mark_buffer_dirty(struct buffer_head *bh)
+static void __mark_buffer_dirty(struct buffer_head *bh, int track)
{
WARN_ON_ONCE(!buffer_uptodate(bh));
@@ -1169,12 +1164,40 @@ void mark_buffer_dirty(struct buffer_head *bh)
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
if (mapping)
- __set_page_dirty(page, mapping, 0);
+ __set_page_dirty(page, mapping, 0, track);
}
}
}
+
+/**
+ * mark_buffer_dirty - mark a buffer_head as needing writeout
+ * @bh: the buffer_head to mark dirty
+ *
+ * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
+ * backing page dirty, then tag the page as dirty in its address_space's radix
+ * tree and then attach the address_space's inode to its superblock's dirty
+ * inode list.
+ *
+ * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
+ * mapping->tree_lock and the global inode_lock.
+ */
+void mark_buffer_dirty(struct buffer_head *bh)
+{
+ __mark_buffer_dirty(bh, 1);
+}
EXPORT_SYMBOL(mark_buffer_dirty);
+/**
+ * mark_buffer_dirty_noiotrack
+ * - same as mark_buffer_dirty but doesn't record dirtying process info
+ * @bh: the buffer_head to mark dirty
+ */
+void mark_buffer_dirty_noiotrack(struct buffer_head *bh)
+{
+ __mark_buffer_dirty(bh, 0);
+}
+EXPORT_SYMBOL(mark_buffer_dirty_noiotrack);
+
/*
* Decrement a buffer_head's reference count. If all buffers against a page
* have zero reference count, are clean and unlocked, and if the page is clean
@@ -1916,7 +1939,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
}
static int __block_commit_write(struct inode *inode, struct page *page,
- unsigned from, unsigned to)
+ unsigned from, unsigned to, int track)
{
unsigned block_start, block_end;
int partial = 0;
@@ -1934,7 +1957,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
partial = 1;
} else {
set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
+ if (track)
+ mark_buffer_dirty(bh);
+ else
+ mark_buffer_dirty_noiotrack(bh);
}
clear_buffer_new(bh);
}
@@ -2067,7 +2093,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
flush_dcache_page(page);
/* This could be a short (even 0-length) commit */
- __block_commit_write(inode, page, start, start+copied);
+ __block_commit_write(inode, page, start, start+copied, 1);
return copied;
}
@@ -2414,11 +2440,19 @@ EXPORT_SYMBOL(block_prepare_write);
int block_commit_write(struct page *page, unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
- __block_commit_write(inode,page,from,to);
+ __block_commit_write(inode, page, from, to, 1);
return 0;
}
EXPORT_SYMBOL(block_commit_write);
+int block_commit_write_noiotrack(struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ __block_commit_write(inode, page, from, to, 0);
+ return 0;
+}
+EXPORT_SYMBOL(block_commit_write_noiotrack);
+
/*
* block_page_mkwrite() is not allowed to change the file size as it gets
* called from a page fault handler when a page is first dirtied. Hence we must
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 1b9ba19..9d7e0b0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -145,6 +145,7 @@ BUFFER_FNS(Unwritten, unwritten)
*/
void mark_buffer_dirty(struct buffer_head *bh);
+void mark_buffer_dirty_noiotrack(struct buffer_head *bh);
void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset);
@@ -225,6 +226,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
get_block_t *, loff_t *);
int generic_cont_expand_simple(struct inode *inode, loff_t size);
int block_commit_write(struct page *page, unsigned from, unsigned to);
+int block_commit_write_noiotrack(struct page *page, unsigned from, unsigned to);
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block);
void block_sync_page(struct page *);
--
1.6.2.5
next prev parent reply other threads:[~2010-07-09 3:19 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-09 2:57 [RFC][PATCH 00/11] blkiocg async support Munehiro Ikeda
2010-07-09 3:14 ` [RFC][PATCH 01/11] blkiocg async: Make page_cgroup independent from memory controller Munehiro Ikeda
2010-07-26 6:49 ` Balbir Singh
2010-07-09 3:15 ` [RFC][PATCH 02/11] blkiocg async: The main part of iotrack Munehiro Ikeda
2010-07-09 7:35 ` KAMEZAWA Hiroyuki
2010-07-09 23:06 ` Munehiro Ikeda
2010-07-12 0:11 ` KAMEZAWA Hiroyuki
2010-07-14 14:46 ` Munehiro IKEDA
2010-07-09 7:38 ` KAMEZAWA Hiroyuki
2010-07-09 23:09 ` Munehiro Ikeda
2010-07-10 10:06 ` Andrea Righi
2010-07-09 3:16 ` [RFC][PATCH 03/11] blkiocg async: Hooks for iotrack Munehiro Ikeda
2010-07-09 9:24 ` Andrea Righi
2010-07-09 23:43 ` Munehiro Ikeda
2010-07-09 3:16 ` Munehiro Ikeda [this message]
2010-07-09 3:17 ` [RFC][PATCH 05/11] blkiocg async: __set_page_dirty_nobuffer not to record process info Munehiro Ikeda
2010-07-09 3:17 ` [RFC][PATCH 06/11] blkiocg async: ext4_writepage not to overwrite iotrack info Munehiro Ikeda
2010-07-09 3:18 ` [RFC][PATCH 07/11] blkiocg async: Pass bio to elevator_ops functions Munehiro Ikeda
2010-07-09 3:19 ` [RFC][PATCH 08/11] blkiocg async: Function to search blkcg from css ID Munehiro Ikeda
2010-07-09 3:20 ` [RFC][PATCH 09/11] blkiocg async: Functions to get cfqg from bio Munehiro Ikeda
2010-07-09 3:22 ` [RFC][PATCH 10/11] blkiocg async: Async queue per cfq_group Munehiro Ikeda
2010-08-13 1:24 ` Nauman Rafique
2010-08-13 21:00 ` Munehiro Ikeda
2010-08-13 23:01 ` Nauman Rafique
2010-08-14 0:49 ` Munehiro Ikeda
2010-07-09 3:23 ` [RFC][PATCH 11/11] blkiocg async: Workload timeslice adjustment for async queues Munehiro Ikeda
2010-07-09 10:04 ` [RFC][PATCH 00/11] blkiocg async support Andrea Righi
2010-07-09 13:45 ` Vivek Goyal
2010-07-10 0:17 ` Munehiro Ikeda
2010-07-10 0:55 ` Nauman Rafique
2010-07-10 13:24 ` Vivek Goyal
2010-07-12 0:20 ` KAMEZAWA Hiroyuki
2010-07-12 13:18 ` Vivek Goyal
2010-07-13 4:36 ` KAMEZAWA Hiroyuki
2010-07-14 14:29 ` Vivek Goyal
2010-07-15 0:00 ` KAMEZAWA Hiroyuki
2010-07-16 13:43 ` Vivek Goyal
2010-07-16 14:15 ` Daniel P. Berrange
2010-07-16 14:35 ` Vivek Goyal
2010-07-16 14:53 ` Daniel P. Berrange
2010-07-16 15:12 ` Vivek Goyal
2010-07-27 10:40 ` Daniel P. Berrange
2010-07-27 14:03 ` Vivek Goyal
2010-07-22 19:28 ` Greg Thelen
2010-07-22 23:59 ` KAMEZAWA Hiroyuki
2010-07-26 6:41 ` Balbir Singh
2010-07-27 6:40 ` Greg Thelen
2010-07-27 6:39 ` KAMEZAWA Hiroyuki
2010-08-02 20:58 ` Vivek Goyal
2010-08-03 14:31 ` Munehiro Ikeda
2010-08-03 19:24 ` Nauman Rafique
2010-08-04 14:32 ` Munehiro Ikeda
2010-08-03 20:15 ` Vivek Goyal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C3694A5.6080103@ds.jp.nec.com \
--to=m-ikeda@ds.jp.nec.com \
--cc=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=guijianfeng@cn.fujitsu.com \
--cc=jens.axboe@oracle.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=righi.andrea@gmail.com \
--cc=ryov@valinux.co.jp \
--cc=taka@valinux.co.jp \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox