diff for duplicates of <20071128192957.511EAB8310@localhost> diff --git a/a/1.txt b/N1/1.txt index 8a9c073..8b13789 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,825 +1 @@ ->From mrubin@matchstick.corp.google.com Wed Nov 28 11:10:06 2007 -Message-Id: <20071128190121.716364000@matchstick.corp.google.com> -Date: Wed, 28 Nov 2007 11:01:21 -0800 -From: mrubin@google.com -To: mrubin@google.com -Subject: [patch 1/1] Writeback fix for concurrent large and small file writes. -From: Michael Rubin <mrubin@google.com> - -Fixing a bug where writing to large files while concurrently writing to -smaller ones creates a situation where writeback cannot keep up with the -traffic and memory baloons until the we hit the threshold watermark. This -can result in surprising latency spikes when syncing. This latency -can take minutes on large memory systems. Upon request I can provide -a test to reproduce this situation. The flush tree fixes this issue and -fixes several other minor issues with fairness also. - -1) Adding a data structure to guarantee fairness when writing inodes -to disk. The flush_tree is based on an rbtree. The only difference is -how duplicate keys are chained off the same rb_node. - -2) Added a FS flag to mark file systems that are not disk backed so we -don't have to flush them. Not sure I marked all of them. But just marking -these improves writeback performance. - -3) Added an inode flag to allow inodes to be marked so that they are -never written back to disk. See get_pipe_inode. - -Under autotest this patch has passed: fsx, bonnie, and iozone. I am -currently writing more writeback focused tests (which so far have been -passed) to add into autotest. - -Signed-off-by: Michael Rubin <mrubin@google.com> ---- - -Index: 2624rc3/fs/block_dev.c -=================================================================== ---- 2624rc3.orig/fs/block_dev.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/block_dev.c 2007-11-27 10:51:26.000000000 -0800 -@@ -518,6 +518,7 @@ static struct file_system_type bd_type = - .name = "bdev", - .get_sb = bd_get_sb, - .kill_sb = kill_anon_super, -+ .fs_flags = FS_ANONYMOUS, - }; - - static struct vfsmount *bd_mnt __read_mostly; -Index: 2624rc3/fs/fs-writeback.c -=================================================================== ---- 2624rc3.orig/fs/fs-writeback.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/fs-writeback.c 2007-11-27 17:40:19.000000000 -0800 -@@ -23,8 +23,174 @@ - #include <linux/blkdev.h> - #include <linux/backing-dev.h> - #include <linux/buffer_head.h> -+#include <linux/rbtree.h> - #include "internal.h" - -+#define rb_to_inode(node) rb_entry((node), struct inode, i_flush_node) -+ -+/* -+ * When inodes are parked for writeback they are parked in the -+ * flush_tree. The flush tree is a data structure based on an rb tree. -+ * -+ * Duplicate keys are handled by making a list in the tree for each key -+ * value. The order of how we choose the next inode to flush is decided -+ * by two fields. First the earliest dirtied_when value. If there are -+ * duplicate dirtied_when values then the earliest i_flushed_when value -+ * determines who gets flushed next. -+ * -+ * The flush tree organizes the dirtied_when keys with the rb_tree. Any -+ * inodes with a duplicate dirtied_when value are link listed together. This -+ * link list is sorted by the inode's i_flushed_when. When both the -+ * dirited_when and the i_flushed_when are indentical the order in the -+ * linked list determines the order we flush the inodes. -+ */ -+ -+/* -+ * Find a rb_node matching the key in the flush tree. There are no duplicate -+ * rb_nodes in the tree. Instead they are chained off the first node. -+ */ -+static struct inode *flush_tree_search(struct super_block *sb, -+ unsigned long ts) -+{ -+ struct rb_node *n = sb->s_flush_root.rb_node; -+ assert_spin_locked(&inode_lock); -+ while (n) { -+ struct inode *inode = rb_to_inode(n); -+ if (time_before(ts, inode->dirtied_when)) { -+ n = n->rb_left; -+ } else if (time_after(ts, inode->dirtied_when)) { -+ n = n->rb_right; -+ } else { -+ return inode; -+ } -+ } -+ return NULL; -+} -+ -+/* -+ * Inserting an inode into the flush tree. The tree is keyed by the -+ * dirtied_when member. -+ * -+ * If there is a duplicate key in the tree already the new inode is put -+ * on the tail of a list of the rb_node. -+ * All inserted inodes must have one of the I_DIRTY flags set. -+ */ -+static void flush_tree_insert(struct super_block *sb, struct inode *inode) -+{ -+ struct rb_node **new = &(sb->s_flush_root.rb_node); -+ struct rb_node *parent = NULL; -+ -+ assert_spin_locked(&inode_lock); -+ BUG_ON((inode->i_state & I_DIRTY) == 0); -+ BUG_ON(inode->i_state & (I_FREEING|I_CLEAR)); -+ BUG_ON(RB_LINKED_NODE(&inode->i_flush_node)); -+ -+ sb->s_flush_count++; -+ -+ list_del_init(&inode->i_list); -+ while (*new) { -+ struct inode *this = rb_to_inode(*new); -+ parent = *new; -+ if (time_before(inode->dirtied_when, this->dirtied_when)) -+ new = &((*new)->rb_left); -+ else if (time_after(inode->dirtied_when, -+ this->dirtied_when)) { -+ new = &((*new)->rb_right); -+ } else { -+ list_add_tail(&inode->i_list, &this->i_list); -+ return; -+ } -+ } -+ -+ /* Add in the new node and rebalance the tree */ -+ rb_link_node(&inode->i_flush_node, parent, new); -+ rb_insert_color(&inode->i_flush_node, &sb->s_flush_root); -+} -+ -+ -+/* -+ * Here we return the inode that has the smallest key in the flush tree -+ * that is greater than the parameter "prev_time". -+ */ -+static struct inode *flush_tree_min_greater(struct super_block *sb, -+ unsigned long prev_time) -+{ -+ struct rb_node *node = sb->s_flush_root.rb_node; -+ struct inode *bsf = NULL; -+ /* best so far */ -+ assert_spin_locked(&inode_lock); -+ while (node) { -+ struct inode *data = rb_to_inode(node); -+ /* Just trying to get lucky */ -+ if ((prev_time + 1) == data->dirtied_when) -+ return data; -+ -+ /* If this value is greater than our prev_time and is -+ less than the best so far, this is our new best so far.*/ -+ if ((data->dirtied_when > prev_time) && -+ (bsf ? bsf->dirtied_when > data->dirtied_when : 1)) -+ bsf = data; -+ -+ /* Search all the way down to the bottom of the tree */ -+ if (time_before(prev_time, data->dirtied_when)) -+ node = node->rb_left; -+ else if (time_after_eq(prev_time, data->dirtied_when)) -+ node = node->rb_right; -+ } -+ return bsf; -+} -+ -+/* -+ * Here is where we interate to find the next inode to process. The -+ * strategy is to first look for any other inodes with the same dirtied_when -+ * value. If we have already processed that node then we need to find -+ * the next highest dirtied_when value in the tree. -+ */ -+static struct inode *flush_tree_next(struct super_block *sb, -+ unsigned long start_time, -+ unsigned long prev_time) -+{ -+ struct inode *inode = flush_tree_search(sb, prev_time); -+ assert_spin_locked(&inode_lock); -+ /* We have a duplicate timed inode as the last processed */ -+ if (inode && (time_before(inode->i_flushed_when, start_time))) -+ return inode; -+ -+ /* Now we have to find the oldest one next */ -+ return flush_tree_min_greater(sb, prev_time); -+} -+ -+/* Removing a node from the flushtree. */ -+void flush_tree_remove(struct super_block *sb, struct inode *inode) -+{ -+ struct rb_node *rb_node = &inode->i_flush_node; -+ struct rb_root *rb_root = &sb->s_flush_root; -+ -+ assert_spin_locked(&inode_lock); -+ BUG_ON((inode->i_state & I_DIRTY) == 0); -+ -+ sb->s_flush_count--; -+ -+ /* There is no chain on this inode. Just remove it from the tree */ -+ if (list_empty(&inode->i_list)) { -+ BUG_ON(!RB_LINKED_NODE(rb_node)); -+ rb_erase(rb_node, rb_root); -+ memset(rb_node, 0, sizeof(*rb_node)); -+ return; -+ } -+ -+ /* This node is on a chain AND is in the rb_tree */ -+ if (RB_LINKED_NODE(rb_node)) { -+ struct inode *new = list_entry(inode->i_list.next, -+ struct inode, i_list); -+ rb_replace_node(rb_node, &new->i_flush_node, rb_root); -+ memset(rb_node, 0, sizeof(*rb_node)); -+ } -+ /* Take it off the list */ -+ list_del_init(&inode->i_list); -+} -+ -+ - /** - * __mark_inode_dirty - internal function - * @inode: inode to mark -@@ -32,7 +198,7 @@ - * Mark an inode as dirty. Callers should use mark_inode_dirty or - * mark_inode_dirty_sync. - * -- * Put the inode on the super block's dirty list. -+ * Put the inode in the super block's flush_tree. - * - * CAREFUL! We mark it dirty unconditionally, but move it onto the - * dirty list only if it is hashed or if it refers to a blockdev. -@@ -75,6 +241,13 @@ void __mark_inode_dirty(struct inode *in - if ((inode->i_state & flags) == flags) - return; - -+ /* anonynous file systems do not write data back */ -+ if (inode->i_sb->s_type->fs_flags & FS_ANONYMOUS) -+ return; -+ -+ if (inode->i_state & I_DIRTY_NEVER) -+ return; -+ - if (unlikely(block_dump)) { - struct dentry *dentry = NULL; - const char *name = "?"; -@@ -97,14 +270,7 @@ void __mark_inode_dirty(struct inode *in - if ((inode->i_state & flags) != flags) { - const int was_dirty = inode->i_state & I_DIRTY; - -- inode->i_state |= flags; -- -- /* -- * If the inode is being synced, just update its dirty state. -- * The unlocker will place the inode on the appropriate -- * superblock list, based upon its state. -- */ -- if (inode->i_state & I_SYNC) -+ if (inode->i_state & (I_FREEING|I_CLEAR)) - goto out; - - /* -@@ -115,7 +281,15 @@ void __mark_inode_dirty(struct inode *in - if (hlist_unhashed(&inode->i_hash)) - goto out; - } -- if (inode->i_state & (I_FREEING|I_CLEAR)) -+ -+ inode->i_state |= flags; -+ -+ /* -+ * If the inode is being synced, just update its dirty state. -+ * The unlocker will place the inode on the appropriate -+ * superblock list, based upon its state. -+ */ -+ if (inode->i_state & I_SYNC) - goto out; - - /* -@@ -124,7 +298,8 @@ void __mark_inode_dirty(struct inode *in - */ - if (!was_dirty) { - inode->dirtied_when = jiffies; -- list_move(&inode->i_list, &sb->s_dirty); -+ inode->i_flushed_when = jiffies; -+ flush_tree_insert(sb, inode); - } - } - out: -@@ -140,38 +315,6 @@ static int write_inode(struct inode *ino - return 0; - } - --/* -- * Redirty an inode: set its when-it-was dirtied timestamp and move it to the -- * furthest end of its superblock's dirty-inode list. -- * -- * Before stamping the inode's ->dirtied_when, we check to see whether it is -- * already the most-recently-dirtied inode on the s_dirty list. If that is -- * the case then the inode must have been redirtied while it was being written -- * out and we don't reset its dirtied_when. -- */ --static void redirty_tail(struct inode *inode) --{ -- struct super_block *sb = inode->i_sb; -- -- if (!list_empty(&sb->s_dirty)) { -- struct inode *tail_inode; -- -- tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); -- if (!time_after_eq(inode->dirtied_when, -- tail_inode->dirtied_when)) -- inode->dirtied_when = jiffies; -- } -- list_move(&inode->i_list, &sb->s_dirty); --} -- --/* -- * requeue inode for re-scanning after sb->s_io list is exhausted. -- */ --static void requeue_io(struct inode *inode) --{ -- list_move(&inode->i_list, &inode->i_sb->s_more_io); --} -- - static void inode_sync_complete(struct inode *inode) - { - /* -@@ -181,38 +324,9 @@ static void inode_sync_complete(struct i - wake_up_bit(&inode->i_state, __I_SYNC); - } - --/* -- * Move expired dirty inodes from @delaying_queue to @dispatch_queue. -- */ --static void move_expired_inodes(struct list_head *delaying_queue, -- struct list_head *dispatch_queue, -- unsigned long *older_than_this) --{ -- while (!list_empty(delaying_queue)) { -- struct inode *inode = list_entry(delaying_queue->prev, -- struct inode, i_list); -- if (older_than_this && -- time_after(inode->dirtied_when, *older_than_this)) -- break; -- list_move(&inode->i_list, dispatch_queue); -- } --} -- --/* -- * Queue all expired dirty inodes for io, eldest first. -- */ --static void queue_io(struct super_block *sb, -- unsigned long *older_than_this) --{ -- list_splice_init(&sb->s_more_io, sb->s_io.prev); -- move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); --} -- - int sb_has_dirty_inodes(struct super_block *sb) - { -- return !list_empty(&sb->s_dirty) || -- !list_empty(&sb->s_io) || -- !list_empty(&sb->s_more_io); -+ return !RB_EMPTY_ROOT(&sb->s_flush_root); - } - EXPORT_SYMBOL(sb_has_dirty_inodes); - -@@ -237,6 +351,7 @@ __sync_single_inode(struct inode *inode, - BUG_ON(inode->i_state & I_SYNC); - - /* Set I_SYNC, reset I_DIRTY */ -+ flush_tree_remove(inode->i_sb, inode); - dirty = inode->i_state & I_DIRTY; - inode->i_state |= I_SYNC; - inode->i_state &= ~I_DIRTY; -@@ -279,12 +394,15 @@ __sync_single_inode(struct inode *inode, - */ - if (wbc->for_kupdate) { - /* -- * For the kupdate function we move the inode -- * to s_more_io so it will get more writeout as -- * soon as the queue becomes uncongested. -+ * For the kupdate function we leave -+ * dirtied_when field untouched and return -+ * it to the flush_tree. The next iteration -+ * of kupdate will flush more pages when -+ * the queue is no longer congested. - */ - inode->i_state |= I_DIRTY_PAGES; -- requeue_io(inode); -+ wbc->more_io = 1; -+ flush_tree_insert(inode->i_sb, inode); - } else { - /* - * Otherwise fully redirty the inode so that -@@ -294,14 +412,15 @@ __sync_single_inode(struct inode *inode, - * all the other files. - */ - inode->i_state |= I_DIRTY_PAGES; -- redirty_tail(inode); -+ inode->dirtied_when = jiffies; -+ flush_tree_insert(inode->i_sb, inode); - } - } else if (inode->i_state & I_DIRTY) { - /* - * Someone redirtied the inode while were writing back - * the pages. - */ -- redirty_tail(inode); -+ flush_tree_insert(inode->i_sb, inode); - } else if (atomic_read(&inode->i_count)) { - /* - * The inode is clean, inuse -@@ -333,23 +452,22 @@ __writeback_single_inode(struct inode *i - else - WARN_ON(inode->i_state & I_WILL_FREE); - -+ BUG_ON((inode->i_state & I_DIRTY) == 0); -+ -+ /* -+ * If the inode is locked and we are not going to wait for it -+ * to be unlocked then we can just exit the routine. Since the -+ * inode is marked I_DIRTY it will be inserted into the flush -+ * tree by sync_single_inode when the I_SYNC is released. -+ */ - if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { -- struct address_space *mapping = inode->i_mapping; - int ret; -- -- /* -- * We're skipping this inode because it's locked, and we're not -- * doing writeback-for-data-integrity. Move it to s_more_io so -- * that writeback can proceed with the other inodes on s_io. -- * We'll have another go at writing back this inode when we -- * completed a full scan of s_io. -- */ -- requeue_io(inode); -- -+ struct address_space *mapping = inode->i_mapping; - /* - * Even if we don't actually write the inode itself here, - * we can at least start some of the data writeout.. - */ -+ wbc->more_io = 1; - spin_unlock(&inode_lock); - ret = do_writepages(mapping, wbc); - spin_lock(&inode_lock); -@@ -383,8 +501,8 @@ __writeback_single_inode(struct inode *i - * If we're a pdlfush thread, then implement pdflush collision avoidance - * against the entire list. - * -- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so -- * that it can be located for waiting on in __writeback_single_inode(). -+ * WB_SYNC_HOLD is a hack for sys_sync(): so that it can be located for -+ * waiting on in __writeback_single_inode(). - * - * Called under inode_lock. - * -@@ -398,28 +516,29 @@ __writeback_single_inode(struct inode *i - * a queue with that address_space. (Easy: have a global "dirty superblocks" - * list). - * -- * The inodes to be written are parked on sb->s_io. They are moved back onto -- * sb->s_dirty as they are selected for writing. This way, none can be missed -- * on the writer throttling path, and we get decent balancing between many -- * throttled threads: we don't want them all piling up on inode_sync_wait. -+ * The inodes to be written are inserted into the flush_tree. - */ - static void - sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) - { - const unsigned long start = jiffies; /* livelock avoidance */ -+ struct inode *inode = NULL; -+ unsigned long prev_time = 0; - -- if (!wbc->for_kupdate || list_empty(&sb->s_io)) -- queue_io(sb, wbc->older_than_this); -+ if (sb->s_type->fs_flags & FS_ANONYMOUS) -+ return; - -- while (!list_empty(&sb->s_io)) { -- struct inode *inode = list_entry(sb->s_io.prev, -- struct inode, i_list); -+ mutex_lock(&sb->s_flush_lock); -+ spin_lock(&inode_lock); -+ while ((inode = flush_tree_next(sb, start, prev_time)) != NULL) { - struct address_space *mapping = inode->i_mapping; - struct backing_dev_info *bdi = mapping->backing_dev_info; - long pages_skipped; - -+ prev_time = inode->dirtied_when; -+ inode->i_flushed_when = start; -+ - if (!bdi_cap_writeback_dirty(bdi)) { -- redirty_tail(inode); - if (sb_is_blkdev_sb(sb)) { - /* - * Dirty memory-backed blockdev: the ramdisk -@@ -436,17 +555,17 @@ sync_sb_inodes(struct super_block *sb, s - } - - if (wbc->nonblocking && bdi_write_congested(bdi)) { -+ wbc->more_io = 1; - wbc->encountered_congestion = 1; - if (!sb_is_blkdev_sb(sb)) - break; /* Skip a congested fs */ -- requeue_io(inode); - continue; /* Skip a congested blockdev */ - } - - if (wbc->bdi && bdi != wbc->bdi) { - if (!sb_is_blkdev_sb(sb)) - break; /* fs has the wrong queue */ -- requeue_io(inode); -+ wbc->more_io = 1; - continue; /* blockdev has wrong queue */ - } - -@@ -454,6 +573,11 @@ sync_sb_inodes(struct super_block *sb, s - if (time_after(inode->dirtied_when, start)) - break; - -+ /* Was this inode dirtied too recently? */ -+ if (wbc->older_than_this && -+ time_after(inode->dirtied_when, *wbc->older_than_this)) -+ break; -+ - /* Is another pdflush already flushing this queue? */ - if (current_is_pdflush() && !writeback_acquire(bdi)) - break; -@@ -462,19 +586,8 @@ sync_sb_inodes(struct super_block *sb, s - __iget(inode); - pages_skipped = wbc->pages_skipped; - __writeback_single_inode(inode, wbc); -- if (wbc->sync_mode == WB_SYNC_HOLD) { -- inode->dirtied_when = jiffies; -- list_move(&inode->i_list, &sb->s_dirty); -- } - if (current_is_pdflush()) - writeback_release(bdi); -- if (wbc->pages_skipped != pages_skipped) { -- /* -- * writeback is not making progress due to locked -- * buffers. Skip this inode for now. -- */ -- redirty_tail(inode); -- } - spin_unlock(&inode_lock); - iput(inode); - cond_resched(); -@@ -482,8 +595,9 @@ sync_sb_inodes(struct super_block *sb, s - if (wbc->nr_to_write <= 0) - break; - } -- if (!list_empty(&sb->s_more_io)) -- wbc->more_io = 1; -+ -+ spin_unlock(&inode_lock); -+ mutex_unlock(&sb->s_flush_lock); - return; /* Leave any unwritten inodes on s_io */ - } - -@@ -492,9 +606,9 @@ sync_sb_inodes(struct super_block *sb, s - * - * Note: - * We don't need to grab a reference to superblock here. If it has non-empty -- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed -- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all -- * empty. Since __sync_single_inode() regains inode_lock before it finally moves -+ * flush_tree it hasn't been killed yet and kill_super() won't proceed -+ * past sync_inodes_sb() until the flush_tree is empty. -+ * Since __sync_single_inode() regains inode_lock before it finally moves - * inode from superblock lists we are OK. - * - * If `older_than_this' is non-zero then only flush inodes which have a -@@ -527,9 +641,7 @@ restart: - */ - if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) { -- spin_lock(&inode_lock); - sync_sb_inodes(sb, wbc); -- spin_unlock(&inode_lock); - } - up_read(&sb->s_umount); - } -@@ -546,7 +658,7 @@ restart: - /* - * writeback and wait upon the filesystem's dirty inodes. The caller will - * do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is -- * used to park the written inodes on sb->s_dirty for the wait pass. -+ * used to park the written inodes on the flush_tree for the wait pass. - * - * A finite limit is set on the number of pages which will be written. - * To prevent infinite livelock of sys_sync(). -@@ -568,9 +680,7 @@ void sync_inodes_sb(struct super_block * - (inodes_stat.nr_inodes - inodes_stat.nr_unused) + - nr_dirty + nr_unstable; - wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ -- spin_lock(&inode_lock); - sync_sb_inodes(sb, &wbc); -- spin_unlock(&inode_lock); - } - - /* -Index: 2624rc3/fs/inode.c -=================================================================== ---- 2624rc3.orig/fs/inode.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/inode.c 2007-11-27 10:54:34.000000000 -0800 -@@ -143,6 +143,7 @@ static struct inode *alloc_inode(struct - inode->i_cdev = NULL; - inode->i_rdev = 0; - inode->dirtied_when = 0; -+ memset(&inode->i_flush_node, 0, sizeof(inode->i_flush_node)); - if (security_inode_alloc(inode)) { - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); -@@ -1044,6 +1045,10 @@ void generic_delete_inode(struct inode * - { - const struct super_operations *op = inode->i_sb->s_op; - -+ if ((inode->i_state & I_DIRTY)) { -+ flush_tree_remove(inode->i_sb, inode); -+ inode->i_state &= ~I_DIRTY; -+ } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); - inode->i_state |= I_FREEING; -Index: 2624rc3/fs/pipe.c -=================================================================== ---- 2624rc3.orig/fs/pipe.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/pipe.c 2007-11-27 10:59:15.000000000 -0800 -@@ -931,12 +931,10 @@ static struct inode * get_pipe_inode(voi - inode->i_fop = &rdwr_pipe_fops; - - /* -- * Mark the inode dirty from the very beginning, -- * that way it will never be moved to the dirty -- * list because "mark_inode_dirty()" will think -- * that it already _is_ on the dirty list. -+ * Mark the inode "never dirty" from the very beginning, -+ * that way it will never be written back. - */ -- inode->i_state = I_DIRTY; -+ inode->i_state = I_DIRTY_NEVER; - inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; -Index: 2624rc3/fs/proc/root.c -=================================================================== ---- 2624rc3.orig/fs/proc/root.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/proc/root.c 2007-11-27 11:00:19.000000000 -0800 -@@ -102,6 +102,7 @@ struct file_system_type proc_fs_type = { - .name = "proc", - .get_sb = proc_get_sb, - .kill_sb = proc_kill_sb, -+ .fs_flags = FS_ANONYMOUS, - }; - - void __init proc_root_init(void) -Index: 2624rc3/fs/super.c -=================================================================== ---- 2624rc3.orig/fs/super.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/super.c 2007-11-27 12:43:29.000000000 -0800 -@@ -61,9 +61,8 @@ static struct super_block *alloc_super(s - s = NULL; - goto out; - } -- INIT_LIST_HEAD(&s->s_dirty); -- INIT_LIST_HEAD(&s->s_io); -- INIT_LIST_HEAD(&s->s_more_io); -+ s->s_flush_root = RB_ROOT; -+ mutex_init(&s->s_flush_lock); - INIT_LIST_HEAD(&s->s_files); - INIT_LIST_HEAD(&s->s_instances); - INIT_HLIST_HEAD(&s->s_anon); -@@ -103,6 +102,7 @@ out: - */ - static inline void destroy_super(struct super_block *s) - { -+ mutex_destroy(&s->s_flush_lock); - security_sb_free(s); - kfree(s->s_subtype); - kfree(s); -Index: 2624rc3/fs/sysfs/mount.c -=================================================================== ---- 2624rc3.orig/fs/sysfs/mount.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/fs/sysfs/mount.c 2007-11-27 11:02:36.000000000 -0800 -@@ -80,6 +80,7 @@ static struct file_system_type sysfs_fs_ - .name = "sysfs", - .get_sb = sysfs_get_sb, - .kill_sb = kill_anon_super, -+ .fs_flags = FS_ANONYMOUS, - }; - - int __init sysfs_init(void) -Index: 2624rc3/include/linux/fs.h -=================================================================== ---- 2624rc3.orig/include/linux/fs.h 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/include/linux/fs.h 2007-11-27 12:41:22.000000000 -0800 -@@ -90,9 +90,10 @@ extern int dir_notify_enable; - #define SEL_EX 4 - - /* public flags for file_system_type */ --#define FS_REQUIRES_DEV 1 --#define FS_BINARY_MOUNTDATA 2 --#define FS_HAS_SUBTYPE 4 -+#define FS_REQUIRES_DEV 1 -+#define FS_BINARY_MOUNTDATA 2 -+#define FS_HAS_SUBTYPE 4 -+#define FS_ANONYMOUS 8 - #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ - #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() - * during rename() internally. -@@ -285,6 +286,7 @@ extern int dir_notify_enable; - #include <linux/pid.h> - #include <linux/mutex.h> - #include <linux/capability.h> -+#include <linux/rbtree.h> - - #include <asm/atomic.h> - #include <asm/semaphore.h> -@@ -592,6 +594,8 @@ struct inode { - struct hlist_node i_hash; - struct list_head i_list; - struct list_head i_sb_list; -+ struct rb_node i_flush_node; -+ unsigned long i_flushed_when; - struct list_head i_dentry; - unsigned long i_ino; - atomic_t i_count; -@@ -1003,9 +1007,11 @@ struct super_block { - struct xattr_handler **s_xattr; - - struct list_head s_inodes; /* all inodes */ -- struct list_head s_dirty; /* dirty inodes */ -- struct list_head s_io; /* parked for writeback */ -- struct list_head s_more_io; /* parked for more writeback */ -+ -+ struct rb_root s_flush_root; -+ unsigned long s_flush_count; -+ struct mutex s_flush_lock; -+ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ - struct list_head s_files; - -@@ -1315,17 +1321,18 @@ struct super_operations { - * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on - * I_CLEAR? If not, why? - */ --#define I_DIRTY_SYNC 1 --#define I_DIRTY_DATASYNC 2 --#define I_DIRTY_PAGES 4 --#define I_NEW 8 --#define I_WILL_FREE 16 --#define I_FREEING 32 --#define I_CLEAR 64 -+#define I_DIRTY_SYNC (1 << 0) -+#define I_DIRTY_DATASYNC (1 << 1) -+#define I_DIRTY_PAGES (1 << 2) -+#define I_NEW (1 << 3) -+#define I_WILL_FREE (1 << 4) -+#define I_FREEING (1 << 5) -+#define I_CLEAR (1 << 6) - #define __I_LOCK 7 - #define I_LOCK (1 << __I_LOCK) - #define __I_SYNC 8 - #define I_SYNC (1 << __I_SYNC) -+#define I_DIRTY_NEVER (1 << 9) - - #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) - -Index: 2624rc3/include/linux/rbtree.h -=================================================================== ---- 2624rc3.orig/include/linux/rbtree.h 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/include/linux/rbtree.h 2007-11-27 17:40:53.000000000 -0800 -@@ -135,6 +135,8 @@ static inline void rb_set_color(struct r - #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) - #define RB_EMPTY_NODE(node) (rb_parent(node) == node) - #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) -+#define RB_LINKED_NODE(node) ((node)->rb_parent_color || \ -+ (node)->rb_left || (node)->rb_right) - - extern void rb_insert_color(struct rb_node *, struct rb_root *); - extern void rb_erase(struct rb_node *, struct rb_root *); -Index: 2624rc3/include/linux/writeback.h -=================================================================== ---- 2624rc3.orig/include/linux/writeback.h 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/include/linux/writeback.h 2007-11-27 11:12:53.000000000 -0800 -@@ -72,6 +72,8 @@ void writeback_inodes(struct writeback_c - int inode_wait(void *); - void sync_inodes_sb(struct super_block *, int wait); - void sync_inodes(int wait); -+void flush_tree_remove(struct super_block *sb, struct inode *inode); -+ - - /* writeback.h requires fs.h; it, too, is not included from here. */ - static inline void wait_on_inode(struct inode *inode) -Index: 2624rc3/mm/shmem.c -=================================================================== ---- 2624rc3.orig/mm/shmem.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/mm/shmem.c 2007-11-27 11:13:45.000000000 -0800 -@@ -2460,6 +2460,7 @@ static struct file_system_type tmpfs_fs_ - .name = "tmpfs", - .get_sb = shmem_get_sb, - .kill_sb = kill_litter_super, -+ .fs_flags = FS_ANONYMOUS, - }; - static struct vfsmount *shm_mnt; - -Index: 2624rc3/mm/tiny-shmem.c -=================================================================== ---- 2624rc3.orig/mm/tiny-shmem.c 2007-11-16 21:16:36.000000000 -0800 -+++ 2624rc3/mm/tiny-shmem.c 2007-11-27 11:14:13.000000000 -0800 -@@ -24,6 +24,7 @@ static struct file_system_type tmpfs_fs_ - .name = "tmpfs", - .get_sb = ramfs_get_sb, - .kill_sb = kill_litter_super, -+ .fs_flags = FS_ANONYMOUS, - }; - - static struct vfsmount *shm_mnt; diff --git a/a/content_digest b/N1/content_digest index fb1a084..e6969dd 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -8,830 +8,5 @@ " wfg@mail.ustc.edu.cn\0" "\00:1\0" "b\0" - ">From mrubin@matchstick.corp.google.com Wed Nov 28 11:10:06 2007\n" - "Message-Id: <20071128190121.716364000@matchstick.corp.google.com>\n" - "Date: Wed, 28 Nov 2007 11:01:21 -0800\n" - "From: mrubin@google.com\n" - "To: mrubin@google.com\n" - "Subject: [patch 1/1] Writeback fix for concurrent large and small file writes.\n" - "\n" - "From: Michael Rubin <mrubin@google.com>\n" - "\n" - "Fixing a bug where writing to large files while concurrently writing to\n" - "smaller ones creates a situation where writeback cannot keep up with the\n" - "traffic and memory baloons until the we hit the threshold watermark. This\n" - "can result in surprising latency spikes when syncing. This latency\n" - "can take minutes on large memory systems. Upon request I can provide\n" - "a test to reproduce this situation. The flush tree fixes this issue and\n" - "fixes several other minor issues with fairness also.\n" - "\n" - "1) Adding a data structure to guarantee fairness when writing inodes\n" - "to disk. The flush_tree is based on an rbtree. The only difference is\n" - "how duplicate keys are chained off the same rb_node.\n" - "\n" - "2) Added a FS flag to mark file systems that are not disk backed so we\n" - "don't have to flush them. Not sure I marked all of them. But just marking\n" - "these improves writeback performance.\n" - "\n" - "3) Added an inode flag to allow inodes to be marked so that they are\n" - "never written back to disk. See get_pipe_inode.\n" - "\n" - "Under autotest this patch has passed: fsx, bonnie, and iozone. I am\n" - "currently writing more writeback focused tests (which so far have been\n" - "passed) to add into autotest.\n" - "\n" - "Signed-off-by: Michael Rubin <mrubin@google.com>\n" - "---\n" - "\n" - "Index: 2624rc3/fs/block_dev.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/block_dev.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/block_dev.c\t2007-11-27 10:51:26.000000000 -0800\n" - "@@ -518,6 +518,7 @@ static struct file_system_type bd_type =\n" - " \t.name\t\t= \"bdev\",\n" - " \t.get_sb\t\t= bd_get_sb,\n" - " \t.kill_sb\t= kill_anon_super,\n" - "+\t.fs_flags\t= FS_ANONYMOUS,\n" - " };\n" - " \n" - " static struct vfsmount *bd_mnt __read_mostly;\n" - "Index: 2624rc3/fs/fs-writeback.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/fs-writeback.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/fs-writeback.c\t2007-11-27 17:40:19.000000000 -0800\n" - "@@ -23,8 +23,174 @@\n" - " #include <linux/blkdev.h>\n" - " #include <linux/backing-dev.h>\n" - " #include <linux/buffer_head.h>\n" - "+#include <linux/rbtree.h>\n" - " #include \"internal.h\"\n" - " \n" - "+#define rb_to_inode(node) rb_entry((node), struct inode, i_flush_node)\n" - "+\n" - "+/*\n" - "+ * When inodes are parked for writeback they are parked in the\n" - "+ * flush_tree. The flush tree is a data structure based on an rb tree.\n" - "+ *\n" - "+ * Duplicate keys are handled by making a list in the tree for each key\n" - "+ * value. The order of how we choose the next inode to flush is decided\n" - "+ * by two fields. First the earliest dirtied_when value. If there are\n" - "+ * duplicate dirtied_when values then the earliest i_flushed_when value\n" - "+ * determines who gets flushed next.\n" - "+ *\n" - "+ * The flush tree organizes the dirtied_when keys with the rb_tree. Any\n" - "+ * inodes with a duplicate dirtied_when value are link listed together. This\n" - "+ * link list is sorted by the inode's i_flushed_when. When both the\n" - "+ * dirited_when and the i_flushed_when are indentical the order in the\n" - "+ * linked list determines the order we flush the inodes.\n" - "+ */\n" - "+\n" - "+/*\n" - "+ * Find a rb_node matching the key in the flush tree. There are no duplicate\n" - "+ * rb_nodes in the tree. Instead they are chained off the first node.\n" - "+ */\n" - "+static struct inode *flush_tree_search(struct super_block *sb,\n" - "+\t\t\t\t unsigned long ts)\n" - "+{\n" - "+\tstruct rb_node *n = sb->s_flush_root.rb_node;\n" - "+\tassert_spin_locked(&inode_lock);\n" - "+\twhile (n) {\n" - "+\t\tstruct inode *inode = rb_to_inode(n);\n" - "+\t\tif (time_before(ts, inode->dirtied_when)) {\n" - "+\t\t\tn = n->rb_left;\n" - "+\t\t} else if (time_after(ts, inode->dirtied_when)) {\n" - "+\t\t\tn = n->rb_right;\n" - "+\t\t} else {\n" - "+\t\t\treturn inode;\n" - "+\t\t}\n" - "+\t}\n" - "+\treturn NULL;\n" - "+}\n" - "+\n" - "+/*\n" - "+ * Inserting an inode into the flush tree. The tree is keyed by the\n" - "+ * dirtied_when member.\n" - "+ *\n" - "+ * If there is a duplicate key in the tree already the new inode is put\n" - "+ * on the tail of a list of the rb_node.\n" - "+ * All inserted inodes must have one of the I_DIRTY flags set.\n" - "+ */\n" - "+static void flush_tree_insert(struct super_block *sb, struct inode *inode)\n" - "+{\n" - "+\tstruct rb_node **new = &(sb->s_flush_root.rb_node);\n" - "+\tstruct rb_node *parent = NULL;\n" - "+\n" - "+\tassert_spin_locked(&inode_lock);\n" - "+\tBUG_ON((inode->i_state & I_DIRTY) == 0);\n" - "+\tBUG_ON(inode->i_state & (I_FREEING|I_CLEAR));\n" - "+\tBUG_ON(RB_LINKED_NODE(&inode->i_flush_node));\n" - "+\n" - "+\tsb->s_flush_count++;\n" - "+\n" - "+\tlist_del_init(&inode->i_list);\n" - "+\twhile (*new) {\n" - "+\t\tstruct inode *this = rb_to_inode(*new);\n" - "+\t\tparent = *new;\n" - "+\t\tif (time_before(inode->dirtied_when, this->dirtied_when))\n" - "+\t\t\tnew = &((*new)->rb_left);\n" - "+\t\telse if (time_after(inode->dirtied_when,\n" - "+\t\t\t\t this->dirtied_when)) {\n" - "+\t\t\tnew = &((*new)->rb_right);\n" - "+\t\t} else {\n" - "+\t\t\tlist_add_tail(&inode->i_list, &this->i_list);\n" - "+\t\t\treturn;\n" - "+\t\t}\n" - "+\t}\n" - "+\n" - "+\t/* Add in the new node and rebalance the tree */\n" - "+\trb_link_node(&inode->i_flush_node, parent, new);\n" - "+\trb_insert_color(&inode->i_flush_node, &sb->s_flush_root);\n" - "+}\n" - "+\n" - "+\n" - "+/*\n" - "+ * Here we return the inode that has the smallest key in the flush tree\n" - "+ * that is greater than the parameter \"prev_time\".\n" - "+ */\n" - "+static struct inode *flush_tree_min_greater(struct super_block *sb,\n" - "+\t\t\t\t\t unsigned long prev_time)\n" - "+{\n" - "+\tstruct rb_node *node = sb->s_flush_root.rb_node;\n" - "+\tstruct inode *bsf = NULL;\n" - "+\t/* best so far */\n" - "+\tassert_spin_locked(&inode_lock);\n" - "+\twhile (node) {\n" - "+\t\tstruct inode *data = rb_to_inode(node);\n" - "+\t\t/* Just trying to get lucky */\n" - "+\t\tif ((prev_time + 1) == data->dirtied_when)\n" - "+\t\t\treturn data;\n" - "+\n" - "+\t\t/* If this value is greater than our prev_time and is\n" - "+\t\tless than the best so far, this is our new best so far.*/\n" - "+\t\tif ((data->dirtied_when > prev_time) &&\n" - "+\t\t (bsf ? bsf->dirtied_when > data->dirtied_when : 1))\n" - "+\t\t\tbsf = data;\n" - "+\n" - "+\t\t/* Search all the way down to the bottom of the tree */\n" - "+\t\tif (time_before(prev_time, data->dirtied_when))\n" - "+\t\t\tnode = node->rb_left;\n" - "+\t\telse if (time_after_eq(prev_time, data->dirtied_when))\n" - "+\t\t\tnode = node->rb_right;\n" - "+\t}\n" - "+\treturn bsf;\n" - "+}\n" - "+\n" - "+/*\n" - "+ * Here is where we interate to find the next inode to process. The\n" - "+ * strategy is to first look for any other inodes with the same dirtied_when\n" - "+ * value. If we have already processed that node then we need to find\n" - "+ * the next highest dirtied_when value in the tree.\n" - "+ */\n" - "+static struct inode *flush_tree_next(struct super_block *sb,\n" - "+\t\t\t\t unsigned long start_time,\n" - "+\t\t\t\t unsigned long prev_time)\n" - "+{\n" - "+\tstruct inode *inode = flush_tree_search(sb, prev_time);\n" - "+\tassert_spin_locked(&inode_lock);\n" - "+\t/* We have a duplicate timed inode as the last processed */\n" - "+\tif (inode && (time_before(inode->i_flushed_when, start_time)))\n" - "+\t\treturn inode;\n" - "+\n" - "+\t/* Now we have to find the oldest one next */\n" - "+\treturn flush_tree_min_greater(sb, prev_time);\n" - "+}\n" - "+\n" - "+/* Removing a node from the flushtree. */\n" - "+void flush_tree_remove(struct super_block *sb, struct inode *inode)\n" - "+{\n" - "+\tstruct rb_node *rb_node = &inode->i_flush_node;\n" - "+\tstruct rb_root *rb_root = &sb->s_flush_root;\n" - "+\n" - "+\tassert_spin_locked(&inode_lock);\n" - "+\tBUG_ON((inode->i_state & I_DIRTY) == 0);\n" - "+\n" - "+\tsb->s_flush_count--;\n" - "+\n" - "+\t/* There is no chain on this inode. Just remove it from the tree */\n" - "+\tif (list_empty(&inode->i_list)) {\n" - "+\t\tBUG_ON(!RB_LINKED_NODE(rb_node));\n" - "+\t\trb_erase(rb_node, rb_root);\n" - "+\t\tmemset(rb_node, 0, sizeof(*rb_node));\n" - "+\t\treturn;\n" - "+\t}\n" - "+\n" - "+\t/* This node is on a chain AND is in the rb_tree */\n" - "+\tif (RB_LINKED_NODE(rb_node)) {\n" - "+\t\tstruct inode *new = list_entry(inode->i_list.next,\n" - "+\t\t\t\t\t struct inode, i_list);\n" - "+\t\trb_replace_node(rb_node, &new->i_flush_node, rb_root);\n" - "+\t\tmemset(rb_node, 0, sizeof(*rb_node));\n" - "+\t}\n" - "+\t/* Take it off the list */\n" - "+\tlist_del_init(&inode->i_list);\n" - "+}\n" - "+\n" - "+\n" - " /**\n" - " *\t__mark_inode_dirty -\tinternal function\n" - " *\t@inode: inode to mark\n" - "@@ -32,7 +198,7 @@\n" - " *\tMark an inode as dirty. Callers should use mark_inode_dirty or\n" - " * \tmark_inode_dirty_sync.\n" - " *\n" - "- * Put the inode on the super block's dirty list.\n" - "+ * Put the inode in the super block's flush_tree.\n" - " *\n" - " * CAREFUL! We mark it dirty unconditionally, but move it onto the\n" - " * dirty list only if it is hashed or if it refers to a blockdev.\n" - "@@ -75,6 +241,13 @@ void __mark_inode_dirty(struct inode *in\n" - " \tif ((inode->i_state & flags) == flags)\n" - " \t\treturn;\n" - " \n" - "+\t/* anonynous file systems do not write data back */\n" - "+\tif (inode->i_sb->s_type->fs_flags & FS_ANONYMOUS)\n" - "+\t\treturn;\n" - "+\n" - "+\tif (inode->i_state & I_DIRTY_NEVER)\n" - "+\t\treturn;\n" - "+\n" - " \tif (unlikely(block_dump)) {\n" - " \t\tstruct dentry *dentry = NULL;\n" - " \t\tconst char *name = \"?\";\n" - "@@ -97,14 +270,7 @@ void __mark_inode_dirty(struct inode *in\n" - " \tif ((inode->i_state & flags) != flags) {\n" - " \t\tconst int was_dirty = inode->i_state & I_DIRTY;\n" - " \n" - "-\t\tinode->i_state |= flags;\n" - "-\n" - "-\t\t/*\n" - "-\t\t * If the inode is being synced, just update its dirty state.\n" - "-\t\t * The unlocker will place the inode on the appropriate\n" - "-\t\t * superblock list, based upon its state.\n" - "-\t\t */\n" - "-\t\tif (inode->i_state & I_SYNC)\n" - "+\t\tif (inode->i_state & (I_FREEING|I_CLEAR))\n" - " \t\t\tgoto out;\n" - " \n" - " \t\t/*\n" - "@@ -115,7 +281,15 @@ void __mark_inode_dirty(struct inode *in\n" - " \t\t\tif (hlist_unhashed(&inode->i_hash))\n" - " \t\t\t\tgoto out;\n" - " \t\t}\n" - "-\t\tif (inode->i_state & (I_FREEING|I_CLEAR))\n" - "+\n" - "+\t\tinode->i_state |= flags;\n" - "+\n" - "+\t\t/*\n" - "+\t\t * If the inode is being synced, just update its dirty state.\n" - "+\t\t * The unlocker will place the inode on the appropriate\n" - "+\t\t * superblock list, based upon its state.\n" - "+\t\t */\n" - "+\t\tif (inode->i_state & I_SYNC)\n" - " \t\t\tgoto out;\n" - " \n" - " \t\t/*\n" - "@@ -124,7 +298,8 @@ void __mark_inode_dirty(struct inode *in\n" - " \t\t */\n" - " \t\tif (!was_dirty) {\n" - " \t\t\tinode->dirtied_when = jiffies;\n" - "-\t\t\tlist_move(&inode->i_list, &sb->s_dirty);\n" - "+\t\t\tinode->i_flushed_when = jiffies;\n" - "+\t\t\tflush_tree_insert(sb, inode);\n" - " \t\t}\n" - " \t}\n" - " out:\n" - "@@ -140,38 +315,6 @@ static int write_inode(struct inode *ino\n" - " \treturn 0;\n" - " }\n" - " \n" - "-/*\n" - "- * Redirty an inode: set its when-it-was dirtied timestamp and move it to the\n" - "- * furthest end of its superblock's dirty-inode list.\n" - "- *\n" - "- * Before stamping the inode's ->dirtied_when, we check to see whether it is\n" - "- * already the most-recently-dirtied inode on the s_dirty list. If that is\n" - "- * the case then the inode must have been redirtied while it was being written\n" - "- * out and we don't reset its dirtied_when.\n" - "- */\n" - "-static void redirty_tail(struct inode *inode)\n" - "-{\n" - "-\tstruct super_block *sb = inode->i_sb;\n" - "-\n" - "-\tif (!list_empty(&sb->s_dirty)) {\n" - "-\t\tstruct inode *tail_inode;\n" - "-\n" - "-\t\ttail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);\n" - "-\t\tif (!time_after_eq(inode->dirtied_when,\n" - "-\t\t\t\ttail_inode->dirtied_when))\n" - "-\t\t\tinode->dirtied_when = jiffies;\n" - "-\t}\n" - "-\tlist_move(&inode->i_list, &sb->s_dirty);\n" - "-}\n" - "-\n" - "-/*\n" - "- * requeue inode for re-scanning after sb->s_io list is exhausted.\n" - "- */\n" - "-static void requeue_io(struct inode *inode)\n" - "-{\n" - "-\tlist_move(&inode->i_list, &inode->i_sb->s_more_io);\n" - "-}\n" - "-\n" - " static void inode_sync_complete(struct inode *inode)\n" - " {\n" - " \t/*\n" - "@@ -181,38 +324,9 @@ static void inode_sync_complete(struct i\n" - " \twake_up_bit(&inode->i_state, __I_SYNC);\n" - " }\n" - " \n" - "-/*\n" - "- * Move expired dirty inodes from @delaying_queue to @dispatch_queue.\n" - "- */\n" - "-static void move_expired_inodes(struct list_head *delaying_queue,\n" - "-\t\t\t struct list_head *dispatch_queue,\n" - "-\t\t\t\tunsigned long *older_than_this)\n" - "-{\n" - "-\twhile (!list_empty(delaying_queue)) {\n" - "-\t\tstruct inode *inode = list_entry(delaying_queue->prev,\n" - "-\t\t\t\t\t\tstruct inode, i_list);\n" - "-\t\tif (older_than_this &&\n" - "-\t\t\ttime_after(inode->dirtied_when, *older_than_this))\n" - "-\t\t\tbreak;\n" - "-\t\tlist_move(&inode->i_list, dispatch_queue);\n" - "-\t}\n" - "-}\n" - "-\n" - "-/*\n" - "- * Queue all expired dirty inodes for io, eldest first.\n" - "- */\n" - "-static void queue_io(struct super_block *sb,\n" - "-\t\t\t\tunsigned long *older_than_this)\n" - "-{\n" - "-\tlist_splice_init(&sb->s_more_io, sb->s_io.prev);\n" - "-\tmove_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);\n" - "-}\n" - "-\n" - " int sb_has_dirty_inodes(struct super_block *sb)\n" - " {\n" - "-\treturn !list_empty(&sb->s_dirty) ||\n" - "-\t !list_empty(&sb->s_io) ||\n" - "-\t !list_empty(&sb->s_more_io);\n" - "+\treturn !RB_EMPTY_ROOT(&sb->s_flush_root);\n" - " }\n" - " EXPORT_SYMBOL(sb_has_dirty_inodes);\n" - " \n" - "@@ -237,6 +351,7 @@ __sync_single_inode(struct inode *inode,\n" - " \tBUG_ON(inode->i_state & I_SYNC);\n" - " \n" - " \t/* Set I_SYNC, reset I_DIRTY */\n" - "+\tflush_tree_remove(inode->i_sb, inode);\n" - " \tdirty = inode->i_state & I_DIRTY;\n" - " \tinode->i_state |= I_SYNC;\n" - " \tinode->i_state &= ~I_DIRTY;\n" - "@@ -279,12 +394,15 @@ __sync_single_inode(struct inode *inode,\n" - " \t\t\t */\n" - " \t\t\tif (wbc->for_kupdate) {\n" - " \t\t\t\t/*\n" - "-\t\t\t\t * For the kupdate function we move the inode\n" - "-\t\t\t\t * to s_more_io so it will get more writeout as\n" - "-\t\t\t\t * soon as the queue becomes uncongested.\n" - "+\t\t\t\t * For the kupdate function we leave\n" - "+\t\t\t\t * dirtied_when field untouched and return\n" - "+\t\t\t\t * it to the flush_tree. The next iteration\n" - "+\t\t\t\t * of kupdate will flush more pages when\n" - "+\t\t\t\t * the queue is no longer congested.\n" - " \t\t\t\t */\n" - " \t\t\t\tinode->i_state |= I_DIRTY_PAGES;\n" - "-\t\t\t\trequeue_io(inode);\n" - "+\t\t\t\twbc->more_io = 1;\n" - "+\t\t\t\tflush_tree_insert(inode->i_sb, inode);\n" - " \t\t\t} else {\n" - " \t\t\t\t/*\n" - " \t\t\t\t * Otherwise fully redirty the inode so that\n" - "@@ -294,14 +412,15 @@ __sync_single_inode(struct inode *inode,\n" - " \t\t\t\t * all the other files.\n" - " \t\t\t\t */\n" - " \t\t\t\tinode->i_state |= I_DIRTY_PAGES;\n" - "-\t\t\t\tredirty_tail(inode);\n" - "+\t\t\t\tinode->dirtied_when = jiffies;\n" - "+\t\t\t\tflush_tree_insert(inode->i_sb, inode);\n" - " \t\t\t}\n" - " \t\t} else if (inode->i_state & I_DIRTY) {\n" - " \t\t\t/*\n" - " \t\t\t * Someone redirtied the inode while were writing back\n" - " \t\t\t * the pages.\n" - " \t\t\t */\n" - "-\t\t\tredirty_tail(inode);\n" - "+\t\t\tflush_tree_insert(inode->i_sb, inode);\n" - " \t\t} else if (atomic_read(&inode->i_count)) {\n" - " \t\t\t/*\n" - " \t\t\t * The inode is clean, inuse\n" - "@@ -333,23 +452,22 @@ __writeback_single_inode(struct inode *i\n" - " \telse\n" - " \t\tWARN_ON(inode->i_state & I_WILL_FREE);\n" - " \n" - "+\tBUG_ON((inode->i_state & I_DIRTY) == 0);\n" - "+\n" - "+\t/*\n" - "+\t * If the inode is locked and we are not going to wait for it\n" - "+\t * to be unlocked then we can just exit the routine. Since the\n" - "+\t * inode is marked I_DIRTY it will be inserted into the flush\n" - "+\t * tree by sync_single_inode when the I_SYNC is released.\n" - "+\t */\n" - " \tif ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {\n" - "-\t\tstruct address_space *mapping = inode->i_mapping;\n" - " \t\tint ret;\n" - "-\n" - "-\t\t/*\n" - "-\t\t * We're skipping this inode because it's locked, and we're not\n" - "-\t\t * doing writeback-for-data-integrity. Move it to s_more_io so\n" - "-\t\t * that writeback can proceed with the other inodes on s_io.\n" - "-\t\t * We'll have another go at writing back this inode when we\n" - "-\t\t * completed a full scan of s_io.\n" - "-\t\t */\n" - "-\t\trequeue_io(inode);\n" - "-\n" - "+\t\tstruct address_space *mapping = inode->i_mapping;\n" - " \t\t/*\n" - " \t\t * Even if we don't actually write the inode itself here,\n" - " \t\t * we can at least start some of the data writeout..\n" - " \t\t */\n" - "+\t\twbc->more_io = 1;\n" - " \t\tspin_unlock(&inode_lock);\n" - " \t\tret = do_writepages(mapping, wbc);\n" - " \t\tspin_lock(&inode_lock);\n" - "@@ -383,8 +501,8 @@ __writeback_single_inode(struct inode *i\n" - " * If we're a pdlfush thread, then implement pdflush collision avoidance\n" - " * against the entire list.\n" - " *\n" - "- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so\n" - "- * that it can be located for waiting on in __writeback_single_inode().\n" - "+ * WB_SYNC_HOLD is a hack for sys_sync(): so that it can be located for\n" - "+ * waiting on in __writeback_single_inode().\n" - " *\n" - " * Called under inode_lock.\n" - " *\n" - "@@ -398,28 +516,29 @@ __writeback_single_inode(struct inode *i\n" - " * a queue with that address_space. (Easy: have a global \"dirty superblocks\"\n" - " * list).\n" - " *\n" - "- * The inodes to be written are parked on sb->s_io. They are moved back onto\n" - "- * sb->s_dirty as they are selected for writing. This way, none can be missed\n" - "- * on the writer throttling path, and we get decent balancing between many\n" - "- * throttled threads: we don't want them all piling up on inode_sync_wait.\n" - "+ * The inodes to be written are inserted into the flush_tree.\n" - " */\n" - " static void\n" - " sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)\n" - " {\n" - " \tconst unsigned long start = jiffies;\t/* livelock avoidance */\n" - "+\tstruct inode *inode = NULL;\n" - "+\tunsigned long prev_time = 0;\n" - " \n" - "-\tif (!wbc->for_kupdate || list_empty(&sb->s_io))\n" - "-\t\tqueue_io(sb, wbc->older_than_this);\n" - "+\tif (sb->s_type->fs_flags & FS_ANONYMOUS)\n" - "+\t\treturn;\n" - " \n" - "-\twhile (!list_empty(&sb->s_io)) {\n" - "-\t\tstruct inode *inode = list_entry(sb->s_io.prev,\n" - "-\t\t\t\t\t\tstruct inode, i_list);\n" - "+\tmutex_lock(&sb->s_flush_lock);\n" - "+\tspin_lock(&inode_lock);\n" - "+\twhile ((inode = flush_tree_next(sb, start, prev_time)) != NULL) {\n" - " \t\tstruct address_space *mapping = inode->i_mapping;\n" - " \t\tstruct backing_dev_info *bdi = mapping->backing_dev_info;\n" - " \t\tlong pages_skipped;\n" - " \n" - "+\t\tprev_time = inode->dirtied_when;\n" - "+\t\tinode->i_flushed_when = start;\n" - "+\n" - " \t\tif (!bdi_cap_writeback_dirty(bdi)) {\n" - "-\t\t\tredirty_tail(inode);\n" - " \t\t\tif (sb_is_blkdev_sb(sb)) {\n" - " \t\t\t\t/*\n" - " \t\t\t\t * Dirty memory-backed blockdev: the ramdisk\n" - "@@ -436,17 +555,17 @@ sync_sb_inodes(struct super_block *sb, s\n" - " \t\t}\n" - " \n" - " \t\tif (wbc->nonblocking && bdi_write_congested(bdi)) {\n" - "+\t\t\twbc->more_io = 1;\n" - " \t\t\twbc->encountered_congestion = 1;\n" - " \t\t\tif (!sb_is_blkdev_sb(sb))\n" - " \t\t\t\tbreak;\t\t/* Skip a congested fs */\n" - "-\t\t\trequeue_io(inode);\n" - " \t\t\tcontinue;\t\t/* Skip a congested blockdev */\n" - " \t\t}\n" - " \n" - " \t\tif (wbc->bdi && bdi != wbc->bdi) {\n" - " \t\t\tif (!sb_is_blkdev_sb(sb))\n" - " \t\t\t\tbreak;\t\t/* fs has the wrong queue */\n" - "-\t\t\trequeue_io(inode);\n" - "+\t\t\twbc->more_io = 1;\n" - " \t\t\tcontinue;\t\t/* blockdev has wrong queue */\n" - " \t\t}\n" - " \n" - "@@ -454,6 +573,11 @@ sync_sb_inodes(struct super_block *sb, s\n" - " \t\tif (time_after(inode->dirtied_when, start))\n" - " \t\t\tbreak;\n" - " \n" - "+\t\t/* Was this inode dirtied too recently? */\n" - "+\t\tif (wbc->older_than_this &&\n" - "+\t\t time_after(inode->dirtied_when, *wbc->older_than_this))\n" - "+\t\t\tbreak;\n" - "+\n" - " \t\t/* Is another pdflush already flushing this queue? */\n" - " \t\tif (current_is_pdflush() && !writeback_acquire(bdi))\n" - " \t\t\tbreak;\n" - "@@ -462,19 +586,8 @@ sync_sb_inodes(struct super_block *sb, s\n" - " \t\t__iget(inode);\n" - " \t\tpages_skipped = wbc->pages_skipped;\n" - " \t\t__writeback_single_inode(inode, wbc);\n" - "-\t\tif (wbc->sync_mode == WB_SYNC_HOLD) {\n" - "-\t\t\tinode->dirtied_when = jiffies;\n" - "-\t\t\tlist_move(&inode->i_list, &sb->s_dirty);\n" - "-\t\t}\n" - " \t\tif (current_is_pdflush())\n" - " \t\t\twriteback_release(bdi);\n" - "-\t\tif (wbc->pages_skipped != pages_skipped) {\n" - "-\t\t\t/*\n" - "-\t\t\t * writeback is not making progress due to locked\n" - "-\t\t\t * buffers. Skip this inode for now.\n" - "-\t\t\t */\n" - "-\t\t\tredirty_tail(inode);\n" - "-\t\t}\n" - " \t\tspin_unlock(&inode_lock);\n" - " \t\tiput(inode);\n" - " \t\tcond_resched();\n" - "@@ -482,8 +595,9 @@ sync_sb_inodes(struct super_block *sb, s\n" - " \t\tif (wbc->nr_to_write <= 0)\n" - " \t\t\tbreak;\n" - " \t}\n" - "-\tif (!list_empty(&sb->s_more_io))\n" - "-\t\twbc->more_io = 1;\n" - "+\n" - "+\tspin_unlock(&inode_lock);\n" - "+\tmutex_unlock(&sb->s_flush_lock);\n" - " \treturn;\t\t/* Leave any unwritten inodes on s_io */\n" - " }\n" - " \n" - "@@ -492,9 +606,9 @@ sync_sb_inodes(struct super_block *sb, s\n" - " *\n" - " * Note:\n" - " * We don't need to grab a reference to superblock here. If it has non-empty\n" - "- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed\n" - "- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all\n" - "- * empty. Since __sync_single_inode() regains inode_lock before it finally moves\n" - "+ * flush_tree it hasn't been killed yet and kill_super() won't proceed\n" - "+ * past sync_inodes_sb() until the flush_tree is empty.\n" - "+ * Since __sync_single_inode() regains inode_lock before it finally moves\n" - " * inode from superblock lists we are OK.\n" - " *\n" - " * If `older_than_this' is non-zero then only flush inodes which have a\n" - "@@ -527,9 +641,7 @@ restart:\n" - " \t\t\t */\n" - " \t\t\tif (down_read_trylock(&sb->s_umount)) {\n" - " \t\t\t\tif (sb->s_root) {\n" - "-\t\t\t\t\tspin_lock(&inode_lock);\n" - " \t\t\t\t\tsync_sb_inodes(sb, wbc);\n" - "-\t\t\t\t\tspin_unlock(&inode_lock);\n" - " \t\t\t\t}\n" - " \t\t\t\tup_read(&sb->s_umount);\n" - " \t\t\t}\n" - "@@ -546,7 +658,7 @@ restart:\n" - " /*\n" - " * writeback and wait upon the filesystem's dirty inodes. The caller will\n" - " * do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is\n" - "- * used to park the written inodes on sb->s_dirty for the wait pass.\n" - "+ * used to park the written inodes on the flush_tree for the wait pass.\n" - " *\n" - " * A finite limit is set on the number of pages which will be written.\n" - " * To prevent infinite livelock of sys_sync().\n" - "@@ -568,9 +680,7 @@ void sync_inodes_sb(struct super_block *\n" - " \t\t\t(inodes_stat.nr_inodes - inodes_stat.nr_unused) +\n" - " \t\t\tnr_dirty + nr_unstable;\n" - " \twbc.nr_to_write += wbc.nr_to_write / 2;\t\t/* Bit more for luck */\n" - "-\tspin_lock(&inode_lock);\n" - " \tsync_sb_inodes(sb, &wbc);\n" - "-\tspin_unlock(&inode_lock);\n" - " }\n" - " \n" - " /*\n" - "Index: 2624rc3/fs/inode.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/inode.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/inode.c\t2007-11-27 10:54:34.000000000 -0800\n" - "@@ -143,6 +143,7 @@ static struct inode *alloc_inode(struct \n" - " \t\tinode->i_cdev = NULL;\n" - " \t\tinode->i_rdev = 0;\n" - " \t\tinode->dirtied_when = 0;\n" - "+\t\tmemset(&inode->i_flush_node, 0, sizeof(inode->i_flush_node));\n" - " \t\tif (security_inode_alloc(inode)) {\n" - " \t\t\tif (inode->i_sb->s_op->destroy_inode)\n" - " \t\t\t\tinode->i_sb->s_op->destroy_inode(inode);\n" - "@@ -1044,6 +1045,10 @@ void generic_delete_inode(struct inode *\n" - " {\n" - " \tconst struct super_operations *op = inode->i_sb->s_op;\n" - " \n" - "+\tif ((inode->i_state & I_DIRTY)) {\n" - "+\t\tflush_tree_remove(inode->i_sb, inode);\n" - "+\t\tinode->i_state &= ~I_DIRTY;\n" - "+\t}\n" - " \tlist_del_init(&inode->i_list);\n" - " \tlist_del_init(&inode->i_sb_list);\n" - " \tinode->i_state |= I_FREEING;\n" - "Index: 2624rc3/fs/pipe.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/pipe.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/pipe.c\t2007-11-27 10:59:15.000000000 -0800\n" - "@@ -931,12 +931,10 @@ static struct inode * get_pipe_inode(voi\n" - " \tinode->i_fop = &rdwr_pipe_fops;\n" - " \n" - " \t/*\n" - "-\t * Mark the inode dirty from the very beginning,\n" - "-\t * that way it will never be moved to the dirty\n" - "-\t * list because \"mark_inode_dirty()\" will think\n" - "-\t * that it already _is_ on the dirty list.\n" - "+\t * Mark the inode \"never dirty\" from the very beginning,\n" - "+\t * that way it will never be written back.\n" - " \t */\n" - "-\tinode->i_state = I_DIRTY;\n" - "+\tinode->i_state = I_DIRTY_NEVER;\n" - " \tinode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;\n" - " \tinode->i_uid = current->fsuid;\n" - " \tinode->i_gid = current->fsgid;\n" - "Index: 2624rc3/fs/proc/root.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/proc/root.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/proc/root.c\t2007-11-27 11:00:19.000000000 -0800\n" - "@@ -102,6 +102,7 @@ struct file_system_type proc_fs_type = {\n" - " \t.name\t\t= \"proc\",\n" - " \t.get_sb\t\t= proc_get_sb,\n" - " \t.kill_sb\t= proc_kill_sb,\n" - "+\t.fs_flags\t= FS_ANONYMOUS,\n" - " };\n" - " \n" - " void __init proc_root_init(void)\n" - "Index: 2624rc3/fs/super.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/super.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/super.c\t2007-11-27 12:43:29.000000000 -0800\n" - "@@ -61,9 +61,8 @@ static struct super_block *alloc_super(s\n" - " \t\t\ts = NULL;\n" - " \t\t\tgoto out;\n" - " \t\t}\n" - "-\t\tINIT_LIST_HEAD(&s->s_dirty);\n" - "-\t\tINIT_LIST_HEAD(&s->s_io);\n" - "-\t\tINIT_LIST_HEAD(&s->s_more_io);\n" - "+\t\ts->s_flush_root = RB_ROOT;\n" - "+\t\tmutex_init(&s->s_flush_lock);\n" - " \t\tINIT_LIST_HEAD(&s->s_files);\n" - " \t\tINIT_LIST_HEAD(&s->s_instances);\n" - " \t\tINIT_HLIST_HEAD(&s->s_anon);\n" - "@@ -103,6 +102,7 @@ out:\n" - " */\n" - " static inline void destroy_super(struct super_block *s)\n" - " {\n" - "+\tmutex_destroy(&s->s_flush_lock);\n" - " \tsecurity_sb_free(s);\n" - " \tkfree(s->s_subtype);\n" - " \tkfree(s);\n" - "Index: 2624rc3/fs/sysfs/mount.c\n" - "===================================================================\n" - "--- 2624rc3.orig/fs/sysfs/mount.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/fs/sysfs/mount.c\t2007-11-27 11:02:36.000000000 -0800\n" - "@@ -80,6 +80,7 @@ static struct file_system_type sysfs_fs_\n" - " \t.name\t\t= \"sysfs\",\n" - " \t.get_sb\t\t= sysfs_get_sb,\n" - " \t.kill_sb\t= kill_anon_super,\n" - "+\t.fs_flags\t= FS_ANONYMOUS,\n" - " };\n" - " \n" - " int __init sysfs_init(void)\n" - "Index: 2624rc3/include/linux/fs.h\n" - "===================================================================\n" - "--- 2624rc3.orig/include/linux/fs.h\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/include/linux/fs.h\t2007-11-27 12:41:22.000000000 -0800\n" - "@@ -90,9 +90,10 @@ extern int dir_notify_enable;\n" - " #define SEL_EX\t\t4\n" - " \n" - " /* public flags for file_system_type */\n" - "-#define FS_REQUIRES_DEV 1 \n" - "-#define FS_BINARY_MOUNTDATA 2\n" - "-#define FS_HAS_SUBTYPE 4\n" - "+#define FS_REQUIRES_DEV\t\t1\n" - "+#define FS_BINARY_MOUNTDATA\t2\n" - "+#define FS_HAS_SUBTYPE\t\t4\n" - "+#define FS_ANONYMOUS\t\t8\n" - " #define FS_REVAL_DOT\t16384\t/* Check the paths \".\", \"..\" for staleness */\n" - " #define FS_RENAME_DOES_D_MOVE\t32768\t/* FS will handle d_move()\n" - " \t\t\t\t\t * during rename() internally.\n" - "@@ -285,6 +286,7 @@ extern int dir_notify_enable;\n" - " #include <linux/pid.h>\n" - " #include <linux/mutex.h>\n" - " #include <linux/capability.h>\n" - "+#include <linux/rbtree.h>\n" - " \n" - " #include <asm/atomic.h>\n" - " #include <asm/semaphore.h>\n" - "@@ -592,6 +594,8 @@ struct inode {\n" - " \tstruct hlist_node\ti_hash;\n" - " \tstruct list_head\ti_list;\n" - " \tstruct list_head\ti_sb_list;\n" - "+\tstruct rb_node\t\ti_flush_node;\n" - "+\tunsigned long\t\ti_flushed_when;\n" - " \tstruct list_head\ti_dentry;\n" - " \tunsigned long\t\ti_ino;\n" - " \tatomic_t\t\ti_count;\n" - "@@ -1003,9 +1007,11 @@ struct super_block {\n" - " \tstruct xattr_handler\t**s_xattr;\n" - " \n" - " \tstruct list_head\ts_inodes;\t/* all inodes */\n" - "-\tstruct list_head\ts_dirty;\t/* dirty inodes */\n" - "-\tstruct list_head\ts_io;\t\t/* parked for writeback */\n" - "-\tstruct list_head\ts_more_io;\t/* parked for more writeback */\n" - "+\n" - "+\tstruct rb_root\t\ts_flush_root;\n" - "+\tunsigned long\t\ts_flush_count;\n" - "+\tstruct mutex\t\ts_flush_lock;\n" - "+\n" - " \tstruct hlist_head\ts_anon;\t\t/* anonymous dentries for (nfs) exporting */\n" - " \tstruct list_head\ts_files;\n" - " \n" - "@@ -1315,17 +1321,18 @@ struct super_operations {\n" - " * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on\n" - " * I_CLEAR? If not, why?\n" - " */\n" - "-#define I_DIRTY_SYNC\t\t1\n" - "-#define I_DIRTY_DATASYNC\t2\n" - "-#define I_DIRTY_PAGES\t\t4\n" - "-#define I_NEW\t\t\t8\n" - "-#define I_WILL_FREE\t\t16\n" - "-#define I_FREEING\t\t32\n" - "-#define I_CLEAR\t\t\t64\n" - "+#define I_DIRTY_SYNC\t\t(1 << 0)\n" - "+#define I_DIRTY_DATASYNC\t(1 << 1)\n" - "+#define I_DIRTY_PAGES\t\t(1 << 2)\n" - "+#define I_NEW\t\t\t(1 << 3)\n" - "+#define I_WILL_FREE\t\t(1 << 4)\n" - "+#define I_FREEING\t\t(1 << 5)\n" - "+#define I_CLEAR\t\t\t(1 << 6)\n" - " #define __I_LOCK\t\t7\n" - " #define I_LOCK\t\t\t(1 << __I_LOCK)\n" - " #define __I_SYNC\t\t8\n" - " #define I_SYNC\t\t\t(1 << __I_SYNC)\n" - "+#define I_DIRTY_NEVER\t\t(1 << 9)\n" - " \n" - " #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)\n" - " \n" - "Index: 2624rc3/include/linux/rbtree.h\n" - "===================================================================\n" - "--- 2624rc3.orig/include/linux/rbtree.h\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/include/linux/rbtree.h\t2007-11-27 17:40:53.000000000 -0800\n" - "@@ -135,6 +135,8 @@ static inline void rb_set_color(struct r\n" - " #define RB_EMPTY_ROOT(root)\t((root)->rb_node == NULL)\n" - " #define RB_EMPTY_NODE(node)\t(rb_parent(node) == node)\n" - " #define RB_CLEAR_NODE(node)\t(rb_set_parent(node, node))\n" - "+#define RB_LINKED_NODE(node)\t((node)->rb_parent_color || \\\n" - "+\t\t\t\t (node)->rb_left || (node)->rb_right)\n" - " \n" - " extern void rb_insert_color(struct rb_node *, struct rb_root *);\n" - " extern void rb_erase(struct rb_node *, struct rb_root *);\n" - "Index: 2624rc3/include/linux/writeback.h\n" - "===================================================================\n" - "--- 2624rc3.orig/include/linux/writeback.h\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/include/linux/writeback.h\t2007-11-27 11:12:53.000000000 -0800\n" - "@@ -72,6 +72,8 @@ void writeback_inodes(struct writeback_c\n" - " int inode_wait(void *);\n" - " void sync_inodes_sb(struct super_block *, int wait);\n" - " void sync_inodes(int wait);\n" - "+void flush_tree_remove(struct super_block *sb, struct inode *inode);\n" - "+\n" - " \n" - " /* writeback.h requires fs.h; it, too, is not included from here. */\n" - " static inline void wait_on_inode(struct inode *inode)\n" - "Index: 2624rc3/mm/shmem.c\n" - "===================================================================\n" - "--- 2624rc3.orig/mm/shmem.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/mm/shmem.c\t2007-11-27 11:13:45.000000000 -0800\n" - "@@ -2460,6 +2460,7 @@ static struct file_system_type tmpfs_fs_\n" - " \t.name\t\t= \"tmpfs\",\n" - " \t.get_sb\t\t= shmem_get_sb,\n" - " \t.kill_sb\t= kill_litter_super,\n" - "+\t.fs_flags\t= FS_ANONYMOUS,\n" - " };\n" - " static struct vfsmount *shm_mnt;\n" - " \n" - "Index: 2624rc3/mm/tiny-shmem.c\n" - "===================================================================\n" - "--- 2624rc3.orig/mm/tiny-shmem.c\t2007-11-16 21:16:36.000000000 -0800\n" - "+++ 2624rc3/mm/tiny-shmem.c\t2007-11-27 11:14:13.000000000 -0800\n" - "@@ -24,6 +24,7 @@ static struct file_system_type tmpfs_fs_\n" - " \t.name\t\t= \"tmpfs\",\n" - " \t.get_sb\t\t= ramfs_get_sb,\n" - " \t.kill_sb\t= kill_litter_super,\n" - "+\t.fs_flags\t= FS_ANONYMOUS,\n" - " };\n" - " \n" - static struct vfsmount *shm_mnt; -ed879f5a06bcabfba1513cc5bff7038de3138d2e7242c0b117dac341f810a825 +cc6f6466d72a8f8693174c3a642755a917c14e82ffb7b1666119e8c59a7354e9
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.