All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20071128192957.511EAB8310@localhost>

diff --git a/a/1.txt b/N1/1.txt
index 8a9c073..8b13789 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,825 +1 @@
->From mrubin@matchstick.corp.google.com Wed Nov 28 11:10:06 2007
-Message-Id: <20071128190121.716364000@matchstick.corp.google.com>
-Date: Wed, 28 Nov 2007 11:01:21 -0800
-From: mrubin@google.com
-To: mrubin@google.com
-Subject: [patch 1/1] Writeback fix for concurrent large and small file writes.
 
-From: Michael Rubin <mrubin@google.com>
-
-Fixing a bug where writing to large files while concurrently writing to
-smaller ones creates a situation where writeback cannot keep up with the
-traffic and memory baloons until the we hit the threshold watermark. This
-can result in surprising latency spikes when syncing. This latency
-can take minutes on large memory systems. Upon request I can provide
-a test to reproduce this situation. The flush tree fixes this issue and
-fixes several other minor issues with fairness also.
-
-1) Adding a data structure to guarantee fairness when writing inodes
-to disk.  The flush_tree is based on an rbtree. The only difference is
-how duplicate keys are chained off the same rb_node.
-
-2) Added a FS flag to mark file systems that are not disk backed so we
-don't have to flush them. Not sure I marked all of them. But just marking
-these improves writeback performance.
-
-3) Added an inode flag to allow inodes to be marked so that they are
-never written back to disk. See get_pipe_inode.
-
-Under autotest this patch has passed: fsx, bonnie, and iozone. I am
-currently writing more writeback focused tests (which so far have been
-passed) to add into autotest.
-
-Signed-off-by: Michael Rubin <mrubin@google.com>
----
-
-Index: 2624rc3/fs/block_dev.c
-===================================================================
---- 2624rc3.orig/fs/block_dev.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/block_dev.c	2007-11-27 10:51:26.000000000 -0800
-@@ -518,6 +518,7 @@ static struct file_system_type bd_type =
- 	.name		= "bdev",
- 	.get_sb		= bd_get_sb,
- 	.kill_sb	= kill_anon_super,
-+	.fs_flags	= FS_ANONYMOUS,
- };
- 
- static struct vfsmount *bd_mnt __read_mostly;
-Index: 2624rc3/fs/fs-writeback.c
-===================================================================
---- 2624rc3.orig/fs/fs-writeback.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/fs-writeback.c	2007-11-27 17:40:19.000000000 -0800
-@@ -23,8 +23,174 @@
- #include <linux/blkdev.h>
- #include <linux/backing-dev.h>
- #include <linux/buffer_head.h>
-+#include <linux/rbtree.h>
- #include "internal.h"
- 
-+#define rb_to_inode(node) rb_entry((node), struct inode, i_flush_node)
-+
-+/*
-+ * When inodes are parked for writeback they are parked in the
-+ * flush_tree. The flush tree is a data structure based on an rb tree.
-+ *
-+ * Duplicate keys are handled by making a list in the tree for each key
-+ * value. The order of how we choose the next inode to flush is decided
-+ * by two fields. First the earliest dirtied_when value. If there are
-+ * duplicate dirtied_when values then the earliest i_flushed_when value
-+ * determines who gets flushed next.
-+ *
-+ * The flush tree organizes the dirtied_when keys with the rb_tree. Any
-+ * inodes with a duplicate dirtied_when value are link listed together. This
-+ * link list is sorted by the inode's i_flushed_when. When both the
-+ * dirited_when and the i_flushed_when are indentical the order in the
-+ * linked list determines the order we flush the inodes.
-+ */
-+
-+/*
-+ * Find a rb_node matching the key in the flush tree. There are no duplicate
-+ * rb_nodes in the tree. Instead they are chained off the first node.
-+ */
-+static struct inode *flush_tree_search(struct super_block *sb,
-+				       unsigned long ts)
-+{
-+	struct rb_node *n = sb->s_flush_root.rb_node;
-+	assert_spin_locked(&inode_lock);
-+	while (n) {
-+		struct inode *inode = rb_to_inode(n);
-+		if (time_before(ts, inode->dirtied_when)) {
-+			n = n->rb_left;
-+		} else if (time_after(ts, inode->dirtied_when)) {
-+			n = n->rb_right;
-+		} else {
-+			return inode;
-+		}
-+	}
-+	return NULL;
-+}
-+
-+/*
-+ * Inserting an inode into the flush tree. The tree is keyed by the
-+ * dirtied_when member.
-+ *
-+ * If there is a duplicate key in the tree already the new inode is put
-+ * on the tail of a list of the rb_node.
-+ * All inserted inodes must have one of the I_DIRTY flags set.
-+ */
-+static void flush_tree_insert(struct super_block *sb, struct inode *inode)
-+{
-+	struct rb_node **new = &(sb->s_flush_root.rb_node);
-+	struct rb_node *parent = NULL;
-+
-+	assert_spin_locked(&inode_lock);
-+	BUG_ON((inode->i_state & I_DIRTY) == 0);
-+	BUG_ON(inode->i_state & (I_FREEING|I_CLEAR));
-+	BUG_ON(RB_LINKED_NODE(&inode->i_flush_node));
-+
-+	sb->s_flush_count++;
-+
-+	list_del_init(&inode->i_list);
-+	while (*new) {
-+		struct inode *this = rb_to_inode(*new);
-+		parent = *new;
-+		if (time_before(inode->dirtied_when, this->dirtied_when))
-+			new = &((*new)->rb_left);
-+		else if (time_after(inode->dirtied_when,
-+				      this->dirtied_when)) {
-+			new = &((*new)->rb_right);
-+		} else {
-+			list_add_tail(&inode->i_list, &this->i_list);
-+			return;
-+		}
-+	}
-+
-+	/* Add in the new node and rebalance the tree */
-+	rb_link_node(&inode->i_flush_node, parent, new);
-+	rb_insert_color(&inode->i_flush_node, &sb->s_flush_root);
-+}
-+
-+
-+/*
-+ * Here we return the inode that has the smallest key in the flush tree
-+ * that is greater than the parameter "prev_time".
-+ */
-+static struct inode *flush_tree_min_greater(struct super_block *sb,
-+					    unsigned long prev_time)
-+{
-+	struct rb_node *node = sb->s_flush_root.rb_node;
-+	struct inode *bsf = NULL;
-+	/* best so far */
-+	assert_spin_locked(&inode_lock);
-+	while (node) {
-+		struct inode *data = rb_to_inode(node);
-+		/* Just trying to get lucky */
-+		if ((prev_time + 1) == data->dirtied_when)
-+			return data;
-+
-+		/* If this value is greater than our prev_time and is
-+		less than the best so far, this is our new best so far.*/
-+		if ((data->dirtied_when > prev_time) &&
-+		    (bsf ? bsf->dirtied_when > data->dirtied_when : 1))
-+			bsf = data;
-+
-+		/* Search all the way down to the bottom of the tree */
-+		if (time_before(prev_time, data->dirtied_when))
-+			node = node->rb_left;
-+		else if (time_after_eq(prev_time, data->dirtied_when))
-+			node = node->rb_right;
-+	}
-+	return bsf;
-+}
-+
-+/*
-+ * Here is where we interate to find the next inode to process. The
-+ * strategy is to first look for any other inodes with the same dirtied_when
-+ * value. If we have already processed that node then we need to find
-+ * the next highest dirtied_when value in the tree.
-+ */
-+static struct inode *flush_tree_next(struct super_block *sb,
-+				     unsigned long start_time,
-+				     unsigned long prev_time)
-+{
-+	struct inode *inode = flush_tree_search(sb, prev_time);
-+	assert_spin_locked(&inode_lock);
-+	/* We have a duplicate timed inode as the last processed */
-+	if (inode && (time_before(inode->i_flushed_when, start_time)))
-+		return inode;
-+
-+	/* Now we have to find the oldest one next */
-+	return flush_tree_min_greater(sb, prev_time);
-+}
-+
-+/* Removing a node from the flushtree. */
-+void flush_tree_remove(struct super_block *sb, struct inode *inode)
-+{
-+	struct rb_node *rb_node = &inode->i_flush_node;
-+	struct rb_root *rb_root = &sb->s_flush_root;
-+
-+	assert_spin_locked(&inode_lock);
-+	BUG_ON((inode->i_state & I_DIRTY) == 0);
-+
-+	sb->s_flush_count--;
-+
-+	/* There is no chain on this inode. Just remove it from the tree */
-+	if (list_empty(&inode->i_list)) {
-+		BUG_ON(!RB_LINKED_NODE(rb_node));
-+		rb_erase(rb_node, rb_root);
-+		memset(rb_node, 0, sizeof(*rb_node));
-+		return;
-+	}
-+
-+	/* This node is on a chain AND is in the rb_tree */
-+	if (RB_LINKED_NODE(rb_node)) {
-+		struct inode *new = list_entry(inode->i_list.next,
-+					       struct inode, i_list);
-+		rb_replace_node(rb_node, &new->i_flush_node, rb_root);
-+		memset(rb_node, 0, sizeof(*rb_node));
-+	}
-+	/* Take it off the list */
-+	list_del_init(&inode->i_list);
-+}
-+
-+
- /**
-  *	__mark_inode_dirty -	internal function
-  *	@inode: inode to mark
-@@ -32,7 +198,7 @@
-  *	Mark an inode as dirty. Callers should use mark_inode_dirty or
-  *  	mark_inode_dirty_sync.
-  *
-- * Put the inode on the super block's dirty list.
-+ * Put the inode in the super block's flush_tree.
-  *
-  * CAREFUL! We mark it dirty unconditionally, but move it onto the
-  * dirty list only if it is hashed or if it refers to a blockdev.
-@@ -75,6 +241,13 @@ void __mark_inode_dirty(struct inode *in
- 	if ((inode->i_state & flags) == flags)
- 		return;
- 
-+	/* anonynous file systems do not write data back */
-+	if (inode->i_sb->s_type->fs_flags & FS_ANONYMOUS)
-+		return;
-+
-+	if (inode->i_state & I_DIRTY_NEVER)
-+		return;
-+
- 	if (unlikely(block_dump)) {
- 		struct dentry *dentry = NULL;
- 		const char *name = "?";
-@@ -97,14 +270,7 @@ void __mark_inode_dirty(struct inode *in
- 	if ((inode->i_state & flags) != flags) {
- 		const int was_dirty = inode->i_state & I_DIRTY;
- 
--		inode->i_state |= flags;
--
--		/*
--		 * If the inode is being synced, just update its dirty state.
--		 * The unlocker will place the inode on the appropriate
--		 * superblock list, based upon its state.
--		 */
--		if (inode->i_state & I_SYNC)
-+		if (inode->i_state & (I_FREEING|I_CLEAR))
- 			goto out;
- 
- 		/*
-@@ -115,7 +281,15 @@ void __mark_inode_dirty(struct inode *in
- 			if (hlist_unhashed(&inode->i_hash))
- 				goto out;
- 		}
--		if (inode->i_state & (I_FREEING|I_CLEAR))
-+
-+		inode->i_state |= flags;
-+
-+		/*
-+		 * If the inode is being synced, just update its dirty state.
-+		 * The unlocker will place the inode on the appropriate
-+		 * superblock list, based upon its state.
-+		 */
-+		if (inode->i_state & I_SYNC)
- 			goto out;
- 
- 		/*
-@@ -124,7 +298,8 @@ void __mark_inode_dirty(struct inode *in
- 		 */
- 		if (!was_dirty) {
- 			inode->dirtied_when = jiffies;
--			list_move(&inode->i_list, &sb->s_dirty);
-+			inode->i_flushed_when = jiffies;
-+			flush_tree_insert(sb, inode);
- 		}
- 	}
- out:
-@@ -140,38 +315,6 @@ static int write_inode(struct inode *ino
- 	return 0;
- }
- 
--/*
-- * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
-- * furthest end of its superblock's dirty-inode list.
-- *
-- * Before stamping the inode's ->dirtied_when, we check to see whether it is
-- * already the most-recently-dirtied inode on the s_dirty list.  If that is
-- * the case then the inode must have been redirtied while it was being written
-- * out and we don't reset its dirtied_when.
-- */
--static void redirty_tail(struct inode *inode)
--{
--	struct super_block *sb = inode->i_sb;
--
--	if (!list_empty(&sb->s_dirty)) {
--		struct inode *tail_inode;
--
--		tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);
--		if (!time_after_eq(inode->dirtied_when,
--				tail_inode->dirtied_when))
--			inode->dirtied_when = jiffies;
--	}
--	list_move(&inode->i_list, &sb->s_dirty);
--}
--
--/*
-- * requeue inode for re-scanning after sb->s_io list is exhausted.
-- */
--static void requeue_io(struct inode *inode)
--{
--	list_move(&inode->i_list, &inode->i_sb->s_more_io);
--}
--
- static void inode_sync_complete(struct inode *inode)
- {
- 	/*
-@@ -181,38 +324,9 @@ static void inode_sync_complete(struct i
- 	wake_up_bit(&inode->i_state, __I_SYNC);
- }
- 
--/*
-- * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
-- */
--static void move_expired_inodes(struct list_head *delaying_queue,
--			       struct list_head *dispatch_queue,
--				unsigned long *older_than_this)
--{
--	while (!list_empty(delaying_queue)) {
--		struct inode *inode = list_entry(delaying_queue->prev,
--						struct inode, i_list);
--		if (older_than_this &&
--			time_after(inode->dirtied_when, *older_than_this))
--			break;
--		list_move(&inode->i_list, dispatch_queue);
--	}
--}
--
--/*
-- * Queue all expired dirty inodes for io, eldest first.
-- */
--static void queue_io(struct super_block *sb,
--				unsigned long *older_than_this)
--{
--	list_splice_init(&sb->s_more_io, sb->s_io.prev);
--	move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);
--}
--
- int sb_has_dirty_inodes(struct super_block *sb)
- {
--	return !list_empty(&sb->s_dirty) ||
--	       !list_empty(&sb->s_io) ||
--	       !list_empty(&sb->s_more_io);
-+	return !RB_EMPTY_ROOT(&sb->s_flush_root);
- }
- EXPORT_SYMBOL(sb_has_dirty_inodes);
- 
-@@ -237,6 +351,7 @@ __sync_single_inode(struct inode *inode,
- 	BUG_ON(inode->i_state & I_SYNC);
- 
- 	/* Set I_SYNC, reset I_DIRTY */
-+	flush_tree_remove(inode->i_sb, inode);
- 	dirty = inode->i_state & I_DIRTY;
- 	inode->i_state |= I_SYNC;
- 	inode->i_state &= ~I_DIRTY;
-@@ -279,12 +394,15 @@ __sync_single_inode(struct inode *inode,
- 			 */
- 			if (wbc->for_kupdate) {
- 				/*
--				 * For the kupdate function we move the inode
--				 * to s_more_io so it will get more writeout as
--				 * soon as the queue becomes uncongested.
-+				 * For the kupdate function we leave
-+				 * dirtied_when field untouched and return
-+				 * it to the flush_tree. The next iteration
-+				 * of kupdate will flush more pages when
-+				 * the queue is no longer congested.
- 				 */
- 				inode->i_state |= I_DIRTY_PAGES;
--				requeue_io(inode);
-+				wbc->more_io = 1;
-+				flush_tree_insert(inode->i_sb, inode);
- 			} else {
- 				/*
- 				 * Otherwise fully redirty the inode so that
-@@ -294,14 +412,15 @@ __sync_single_inode(struct inode *inode,
- 				 * all the other files.
- 				 */
- 				inode->i_state |= I_DIRTY_PAGES;
--				redirty_tail(inode);
-+				inode->dirtied_when = jiffies;
-+				flush_tree_insert(inode->i_sb, inode);
- 			}
- 		} else if (inode->i_state & I_DIRTY) {
- 			/*
- 			 * Someone redirtied the inode while were writing back
- 			 * the pages.
- 			 */
--			redirty_tail(inode);
-+			flush_tree_insert(inode->i_sb, inode);
- 		} else if (atomic_read(&inode->i_count)) {
- 			/*
- 			 * The inode is clean, inuse
-@@ -333,23 +452,22 @@ __writeback_single_inode(struct inode *i
- 	else
- 		WARN_ON(inode->i_state & I_WILL_FREE);
- 
-+	BUG_ON((inode->i_state & I_DIRTY) == 0);
-+
-+	/*
-+	 * If the inode is locked and we are not going to wait for it
-+	 * to be unlocked then we can just exit the routine. Since the
-+	 * inode is marked I_DIRTY it will be inserted into the flush
-+	 * tree by sync_single_inode when the I_SYNC is released.
-+	 */
- 	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
--		struct address_space *mapping = inode->i_mapping;
- 		int ret;
--
--		/*
--		 * We're skipping this inode because it's locked, and we're not
--		 * doing writeback-for-data-integrity.  Move it to s_more_io so
--		 * that writeback can proceed with the other inodes on s_io.
--		 * We'll have another go at writing back this inode when we
--		 * completed a full scan of s_io.
--		 */
--		requeue_io(inode);
--
-+		struct address_space *mapping = inode->i_mapping;
- 		/*
- 		 * Even if we don't actually write the inode itself here,
- 		 * we can at least start some of the data writeout..
- 		 */
-+		wbc->more_io = 1;
- 		spin_unlock(&inode_lock);
- 		ret = do_writepages(mapping, wbc);
- 		spin_lock(&inode_lock);
-@@ -383,8 +501,8 @@ __writeback_single_inode(struct inode *i
-  * If we're a pdlfush thread, then implement pdflush collision avoidance
-  * against the entire list.
-  *
-- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
-- * that it can be located for waiting on in __writeback_single_inode().
-+ * WB_SYNC_HOLD is a hack for sys_sync(): so that it can be located for
-+ * waiting on in __writeback_single_inode().
-  *
-  * Called under inode_lock.
-  *
-@@ -398,28 +516,29 @@ __writeback_single_inode(struct inode *i
-  * a queue with that address_space.  (Easy: have a global "dirty superblocks"
-  * list).
-  *
-- * The inodes to be written are parked on sb->s_io.  They are moved back onto
-- * sb->s_dirty as they are selected for writing.  This way, none can be missed
-- * on the writer throttling path, and we get decent balancing between many
-- * throttled threads: we don't want them all piling up on inode_sync_wait.
-+ * The inodes to be written are inserted into the flush_tree.
-  */
- static void
- sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
- {
- 	const unsigned long start = jiffies;	/* livelock avoidance */
-+	struct inode *inode = NULL;
-+	unsigned long prev_time = 0;
- 
--	if (!wbc->for_kupdate || list_empty(&sb->s_io))
--		queue_io(sb, wbc->older_than_this);
-+	if (sb->s_type->fs_flags & FS_ANONYMOUS)
-+		return;
- 
--	while (!list_empty(&sb->s_io)) {
--		struct inode *inode = list_entry(sb->s_io.prev,
--						struct inode, i_list);
-+	mutex_lock(&sb->s_flush_lock);
-+	spin_lock(&inode_lock);
-+	while ((inode = flush_tree_next(sb, start, prev_time)) != NULL) {
- 		struct address_space *mapping = inode->i_mapping;
- 		struct backing_dev_info *bdi = mapping->backing_dev_info;
- 		long pages_skipped;
- 
-+		prev_time = inode->dirtied_when;
-+		inode->i_flushed_when = start;
-+
- 		if (!bdi_cap_writeback_dirty(bdi)) {
--			redirty_tail(inode);
- 			if (sb_is_blkdev_sb(sb)) {
- 				/*
- 				 * Dirty memory-backed blockdev: the ramdisk
-@@ -436,17 +555,17 @@ sync_sb_inodes(struct super_block *sb, s
- 		}
- 
- 		if (wbc->nonblocking && bdi_write_congested(bdi)) {
-+			wbc->more_io = 1;
- 			wbc->encountered_congestion = 1;
- 			if (!sb_is_blkdev_sb(sb))
- 				break;		/* Skip a congested fs */
--			requeue_io(inode);
- 			continue;		/* Skip a congested blockdev */
- 		}
- 
- 		if (wbc->bdi && bdi != wbc->bdi) {
- 			if (!sb_is_blkdev_sb(sb))
- 				break;		/* fs has the wrong queue */
--			requeue_io(inode);
-+			wbc->more_io = 1;
- 			continue;		/* blockdev has wrong queue */
- 		}
- 
-@@ -454,6 +573,11 @@ sync_sb_inodes(struct super_block *sb, s
- 		if (time_after(inode->dirtied_when, start))
- 			break;
- 
-+		/* Was this inode dirtied too recently? */
-+		if (wbc->older_than_this &&
-+		    time_after(inode->dirtied_when, *wbc->older_than_this))
-+			break;
-+
- 		/* Is another pdflush already flushing this queue? */
- 		if (current_is_pdflush() && !writeback_acquire(bdi))
- 			break;
-@@ -462,19 +586,8 @@ sync_sb_inodes(struct super_block *sb, s
- 		__iget(inode);
- 		pages_skipped = wbc->pages_skipped;
- 		__writeback_single_inode(inode, wbc);
--		if (wbc->sync_mode == WB_SYNC_HOLD) {
--			inode->dirtied_when = jiffies;
--			list_move(&inode->i_list, &sb->s_dirty);
--		}
- 		if (current_is_pdflush())
- 			writeback_release(bdi);
--		if (wbc->pages_skipped != pages_skipped) {
--			/*
--			 * writeback is not making progress due to locked
--			 * buffers.  Skip this inode for now.
--			 */
--			redirty_tail(inode);
--		}
- 		spin_unlock(&inode_lock);
- 		iput(inode);
- 		cond_resched();
-@@ -482,8 +595,9 @@ sync_sb_inodes(struct super_block *sb, s
- 		if (wbc->nr_to_write <= 0)
- 			break;
- 	}
--	if (!list_empty(&sb->s_more_io))
--		wbc->more_io = 1;
-+
-+	spin_unlock(&inode_lock);
-+	mutex_unlock(&sb->s_flush_lock);
- 	return;		/* Leave any unwritten inodes on s_io */
- }
- 
-@@ -492,9 +606,9 @@ sync_sb_inodes(struct super_block *sb, s
-  *
-  * Note:
-  * We don't need to grab a reference to superblock here. If it has non-empty
-- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
-- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
-- * empty. Since __sync_single_inode() regains inode_lock before it finally moves
-+ * flush_tree it hasn't been killed yet and kill_super() won't proceed
-+ * past sync_inodes_sb() until the flush_tree is empty.
-+ * Since __sync_single_inode() regains inode_lock before it finally moves
-  * inode from superblock lists we are OK.
-  *
-  * If `older_than_this' is non-zero then only flush inodes which have a
-@@ -527,9 +641,7 @@ restart:
- 			 */
- 			if (down_read_trylock(&sb->s_umount)) {
- 				if (sb->s_root) {
--					spin_lock(&inode_lock);
- 					sync_sb_inodes(sb, wbc);
--					spin_unlock(&inode_lock);
- 				}
- 				up_read(&sb->s_umount);
- 			}
-@@ -546,7 +658,7 @@ restart:
- /*
-  * writeback and wait upon the filesystem's dirty inodes.  The caller will
-  * do this in two passes - one to write, and one to wait.  WB_SYNC_HOLD is
-- * used to park the written inodes on sb->s_dirty for the wait pass.
-+ * used to park the written inodes on the flush_tree for the wait pass.
-  *
-  * A finite limit is set on the number of pages which will be written.
-  * To prevent infinite livelock of sys_sync().
-@@ -568,9 +680,7 @@ void sync_inodes_sb(struct super_block *
- 			(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
- 			nr_dirty + nr_unstable;
- 	wbc.nr_to_write += wbc.nr_to_write / 2;		/* Bit more for luck */
--	spin_lock(&inode_lock);
- 	sync_sb_inodes(sb, &wbc);
--	spin_unlock(&inode_lock);
- }
- 
- /*
-Index: 2624rc3/fs/inode.c
-===================================================================
---- 2624rc3.orig/fs/inode.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/inode.c	2007-11-27 10:54:34.000000000 -0800
-@@ -143,6 +143,7 @@ static struct inode *alloc_inode(struct 
- 		inode->i_cdev = NULL;
- 		inode->i_rdev = 0;
- 		inode->dirtied_when = 0;
-+		memset(&inode->i_flush_node, 0, sizeof(inode->i_flush_node));
- 		if (security_inode_alloc(inode)) {
- 			if (inode->i_sb->s_op->destroy_inode)
- 				inode->i_sb->s_op->destroy_inode(inode);
-@@ -1044,6 +1045,10 @@ void generic_delete_inode(struct inode *
- {
- 	const struct super_operations *op = inode->i_sb->s_op;
- 
-+	if ((inode->i_state & I_DIRTY)) {
-+		flush_tree_remove(inode->i_sb, inode);
-+		inode->i_state &= ~I_DIRTY;
-+	}
- 	list_del_init(&inode->i_list);
- 	list_del_init(&inode->i_sb_list);
- 	inode->i_state |= I_FREEING;
-Index: 2624rc3/fs/pipe.c
-===================================================================
---- 2624rc3.orig/fs/pipe.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/pipe.c	2007-11-27 10:59:15.000000000 -0800
-@@ -931,12 +931,10 @@ static struct inode * get_pipe_inode(voi
- 	inode->i_fop = &rdwr_pipe_fops;
- 
- 	/*
--	 * Mark the inode dirty from the very beginning,
--	 * that way it will never be moved to the dirty
--	 * list because "mark_inode_dirty()" will think
--	 * that it already _is_ on the dirty list.
-+	 * Mark the inode "never dirty" from the very beginning,
-+	 * that way it will never be written back.
- 	 */
--	inode->i_state = I_DIRTY;
-+	inode->i_state = I_DIRTY_NEVER;
- 	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
- 	inode->i_uid = current->fsuid;
- 	inode->i_gid = current->fsgid;
-Index: 2624rc3/fs/proc/root.c
-===================================================================
---- 2624rc3.orig/fs/proc/root.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/proc/root.c	2007-11-27 11:00:19.000000000 -0800
-@@ -102,6 +102,7 @@ struct file_system_type proc_fs_type = {
- 	.name		= "proc",
- 	.get_sb		= proc_get_sb,
- 	.kill_sb	= proc_kill_sb,
-+	.fs_flags	= FS_ANONYMOUS,
- };
- 
- void __init proc_root_init(void)
-Index: 2624rc3/fs/super.c
-===================================================================
---- 2624rc3.orig/fs/super.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/super.c	2007-11-27 12:43:29.000000000 -0800
-@@ -61,9 +61,8 @@ static struct super_block *alloc_super(s
- 			s = NULL;
- 			goto out;
- 		}
--		INIT_LIST_HEAD(&s->s_dirty);
--		INIT_LIST_HEAD(&s->s_io);
--		INIT_LIST_HEAD(&s->s_more_io);
-+		s->s_flush_root = RB_ROOT;
-+		mutex_init(&s->s_flush_lock);
- 		INIT_LIST_HEAD(&s->s_files);
- 		INIT_LIST_HEAD(&s->s_instances);
- 		INIT_HLIST_HEAD(&s->s_anon);
-@@ -103,6 +102,7 @@ out:
-  */
- static inline void destroy_super(struct super_block *s)
- {
-+	mutex_destroy(&s->s_flush_lock);
- 	security_sb_free(s);
- 	kfree(s->s_subtype);
- 	kfree(s);
-Index: 2624rc3/fs/sysfs/mount.c
-===================================================================
---- 2624rc3.orig/fs/sysfs/mount.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/fs/sysfs/mount.c	2007-11-27 11:02:36.000000000 -0800
-@@ -80,6 +80,7 @@ static struct file_system_type sysfs_fs_
- 	.name		= "sysfs",
- 	.get_sb		= sysfs_get_sb,
- 	.kill_sb	= kill_anon_super,
-+	.fs_flags	= FS_ANONYMOUS,
- };
- 
- int __init sysfs_init(void)
-Index: 2624rc3/include/linux/fs.h
-===================================================================
---- 2624rc3.orig/include/linux/fs.h	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/include/linux/fs.h	2007-11-27 12:41:22.000000000 -0800
-@@ -90,9 +90,10 @@ extern int dir_notify_enable;
- #define SEL_EX		4
- 
- /* public flags for file_system_type */
--#define FS_REQUIRES_DEV 1 
--#define FS_BINARY_MOUNTDATA 2
--#define FS_HAS_SUBTYPE 4
-+#define FS_REQUIRES_DEV		1
-+#define FS_BINARY_MOUNTDATA	2
-+#define FS_HAS_SUBTYPE		4
-+#define FS_ANONYMOUS		8
- #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
- #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
- 					 * during rename() internally.
-@@ -285,6 +286,7 @@ extern int dir_notify_enable;
- #include <linux/pid.h>
- #include <linux/mutex.h>
- #include <linux/capability.h>
-+#include <linux/rbtree.h>
- 
- #include <asm/atomic.h>
- #include <asm/semaphore.h>
-@@ -592,6 +594,8 @@ struct inode {
- 	struct hlist_node	i_hash;
- 	struct list_head	i_list;
- 	struct list_head	i_sb_list;
-+	struct rb_node		i_flush_node;
-+	unsigned long		i_flushed_when;
- 	struct list_head	i_dentry;
- 	unsigned long		i_ino;
- 	atomic_t		i_count;
-@@ -1003,9 +1007,11 @@ struct super_block {
- 	struct xattr_handler	**s_xattr;
- 
- 	struct list_head	s_inodes;	/* all inodes */
--	struct list_head	s_dirty;	/* dirty inodes */
--	struct list_head	s_io;		/* parked for writeback */
--	struct list_head	s_more_io;	/* parked for more writeback */
-+
-+	struct rb_root		s_flush_root;
-+	unsigned long		s_flush_count;
-+	struct mutex		s_flush_lock;
-+
- 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
- 	struct list_head	s_files;
- 
-@@ -1315,17 +1321,18 @@ struct super_operations {
-  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
-  *    I_CLEAR?  If not, why?
-  */
--#define I_DIRTY_SYNC		1
--#define I_DIRTY_DATASYNC	2
--#define I_DIRTY_PAGES		4
--#define I_NEW			8
--#define I_WILL_FREE		16
--#define I_FREEING		32
--#define I_CLEAR			64
-+#define I_DIRTY_SYNC		(1 << 0)
-+#define I_DIRTY_DATASYNC	(1 << 1)
-+#define I_DIRTY_PAGES		(1 << 2)
-+#define I_NEW			(1 << 3)
-+#define I_WILL_FREE		(1 << 4)
-+#define I_FREEING		(1 << 5)
-+#define I_CLEAR			(1 << 6)
- #define __I_LOCK		7
- #define I_LOCK			(1 << __I_LOCK)
- #define __I_SYNC		8
- #define I_SYNC			(1 << __I_SYNC)
-+#define I_DIRTY_NEVER		(1 << 9)
- 
- #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
- 
-Index: 2624rc3/include/linux/rbtree.h
-===================================================================
---- 2624rc3.orig/include/linux/rbtree.h	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/include/linux/rbtree.h	2007-11-27 17:40:53.000000000 -0800
-@@ -135,6 +135,8 @@ static inline void rb_set_color(struct r
- #define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
- #define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
- #define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
-+#define RB_LINKED_NODE(node)	((node)->rb_parent_color || \
-+				 (node)->rb_left || (node)->rb_right)
- 
- extern void rb_insert_color(struct rb_node *, struct rb_root *);
- extern void rb_erase(struct rb_node *, struct rb_root *);
-Index: 2624rc3/include/linux/writeback.h
-===================================================================
---- 2624rc3.orig/include/linux/writeback.h	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/include/linux/writeback.h	2007-11-27 11:12:53.000000000 -0800
-@@ -72,6 +72,8 @@ void writeback_inodes(struct writeback_c
- int inode_wait(void *);
- void sync_inodes_sb(struct super_block *, int wait);
- void sync_inodes(int wait);
-+void flush_tree_remove(struct super_block *sb, struct inode *inode);
-+
- 
- /* writeback.h requires fs.h; it, too, is not included from here. */
- static inline void wait_on_inode(struct inode *inode)
-Index: 2624rc3/mm/shmem.c
-===================================================================
---- 2624rc3.orig/mm/shmem.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/mm/shmem.c	2007-11-27 11:13:45.000000000 -0800
-@@ -2460,6 +2460,7 @@ static struct file_system_type tmpfs_fs_
- 	.name		= "tmpfs",
- 	.get_sb		= shmem_get_sb,
- 	.kill_sb	= kill_litter_super,
-+	.fs_flags	= FS_ANONYMOUS,
- };
- static struct vfsmount *shm_mnt;
- 
-Index: 2624rc3/mm/tiny-shmem.c
-===================================================================
---- 2624rc3.orig/mm/tiny-shmem.c	2007-11-16 21:16:36.000000000 -0800
-+++ 2624rc3/mm/tiny-shmem.c	2007-11-27 11:14:13.000000000 -0800
-@@ -24,6 +24,7 @@ static struct file_system_type tmpfs_fs_
- 	.name		= "tmpfs",
- 	.get_sb		= ramfs_get_sb,
- 	.kill_sb	= kill_litter_super,
-+	.fs_flags	= FS_ANONYMOUS,
- };
- 
- static struct vfsmount *shm_mnt;
diff --git a/a/content_digest b/N1/content_digest
index fb1a084..e6969dd 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -8,830 +8,5 @@
  " wfg@mail.ustc.edu.cn\0"
  "\00:1\0"
  "b\0"
- ">From mrubin@matchstick.corp.google.com Wed Nov 28 11:10:06 2007\n"
- "Message-Id: <20071128190121.716364000@matchstick.corp.google.com>\n"
- "Date: Wed, 28 Nov 2007 11:01:21 -0800\n"
- "From: mrubin@google.com\n"
- "To: mrubin@google.com\n"
- "Subject: [patch 1/1] Writeback fix for concurrent large and small file writes.\n"
- "\n"
- "From: Michael Rubin <mrubin@google.com>\n"
- "\n"
- "Fixing a bug where writing to large files while concurrently writing to\n"
- "smaller ones creates a situation where writeback cannot keep up with the\n"
- "traffic and memory baloons until the we hit the threshold watermark. This\n"
- "can result in surprising latency spikes when syncing. This latency\n"
- "can take minutes on large memory systems. Upon request I can provide\n"
- "a test to reproduce this situation. The flush tree fixes this issue and\n"
- "fixes several other minor issues with fairness also.\n"
- "\n"
- "1) Adding a data structure to guarantee fairness when writing inodes\n"
- "to disk.  The flush_tree is based on an rbtree. The only difference is\n"
- "how duplicate keys are chained off the same rb_node.\n"
- "\n"
- "2) Added a FS flag to mark file systems that are not disk backed so we\n"
- "don't have to flush them. Not sure I marked all of them. But just marking\n"
- "these improves writeback performance.\n"
- "\n"
- "3) Added an inode flag to allow inodes to be marked so that they are\n"
- "never written back to disk. See get_pipe_inode.\n"
- "\n"
- "Under autotest this patch has passed: fsx, bonnie, and iozone. I am\n"
- "currently writing more writeback focused tests (which so far have been\n"
- "passed) to add into autotest.\n"
- "\n"
- "Signed-off-by: Michael Rubin <mrubin@google.com>\n"
- "---\n"
- "\n"
- "Index: 2624rc3/fs/block_dev.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/block_dev.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/block_dev.c\t2007-11-27 10:51:26.000000000 -0800\n"
- "@@ -518,6 +518,7 @@ static struct file_system_type bd_type =\n"
- " \t.name\t\t= \"bdev\",\n"
- " \t.get_sb\t\t= bd_get_sb,\n"
- " \t.kill_sb\t= kill_anon_super,\n"
- "+\t.fs_flags\t= FS_ANONYMOUS,\n"
- " };\n"
- " \n"
- " static struct vfsmount *bd_mnt __read_mostly;\n"
- "Index: 2624rc3/fs/fs-writeback.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/fs-writeback.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/fs-writeback.c\t2007-11-27 17:40:19.000000000 -0800\n"
- "@@ -23,8 +23,174 @@\n"
- " #include <linux/blkdev.h>\n"
- " #include <linux/backing-dev.h>\n"
- " #include <linux/buffer_head.h>\n"
- "+#include <linux/rbtree.h>\n"
- " #include \"internal.h\"\n"
- " \n"
- "+#define rb_to_inode(node) rb_entry((node), struct inode, i_flush_node)\n"
- "+\n"
- "+/*\n"
- "+ * When inodes are parked for writeback they are parked in the\n"
- "+ * flush_tree. The flush tree is a data structure based on an rb tree.\n"
- "+ *\n"
- "+ * Duplicate keys are handled by making a list in the tree for each key\n"
- "+ * value. The order of how we choose the next inode to flush is decided\n"
- "+ * by two fields. First the earliest dirtied_when value. If there are\n"
- "+ * duplicate dirtied_when values then the earliest i_flushed_when value\n"
- "+ * determines who gets flushed next.\n"
- "+ *\n"
- "+ * The flush tree organizes the dirtied_when keys with the rb_tree. Any\n"
- "+ * inodes with a duplicate dirtied_when value are link listed together. This\n"
- "+ * link list is sorted by the inode's i_flushed_when. When both the\n"
- "+ * dirited_when and the i_flushed_when are indentical the order in the\n"
- "+ * linked list determines the order we flush the inodes.\n"
- "+ */\n"
- "+\n"
- "+/*\n"
- "+ * Find a rb_node matching the key in the flush tree. There are no duplicate\n"
- "+ * rb_nodes in the tree. Instead they are chained off the first node.\n"
- "+ */\n"
- "+static struct inode *flush_tree_search(struct super_block *sb,\n"
- "+\t\t\t\t       unsigned long ts)\n"
- "+{\n"
- "+\tstruct rb_node *n = sb->s_flush_root.rb_node;\n"
- "+\tassert_spin_locked(&inode_lock);\n"
- "+\twhile (n) {\n"
- "+\t\tstruct inode *inode = rb_to_inode(n);\n"
- "+\t\tif (time_before(ts, inode->dirtied_when)) {\n"
- "+\t\t\tn = n->rb_left;\n"
- "+\t\t} else if (time_after(ts, inode->dirtied_when)) {\n"
- "+\t\t\tn = n->rb_right;\n"
- "+\t\t} else {\n"
- "+\t\t\treturn inode;\n"
- "+\t\t}\n"
- "+\t}\n"
- "+\treturn NULL;\n"
- "+}\n"
- "+\n"
- "+/*\n"
- "+ * Inserting an inode into the flush tree. The tree is keyed by the\n"
- "+ * dirtied_when member.\n"
- "+ *\n"
- "+ * If there is a duplicate key in the tree already the new inode is put\n"
- "+ * on the tail of a list of the rb_node.\n"
- "+ * All inserted inodes must have one of the I_DIRTY flags set.\n"
- "+ */\n"
- "+static void flush_tree_insert(struct super_block *sb, struct inode *inode)\n"
- "+{\n"
- "+\tstruct rb_node **new = &(sb->s_flush_root.rb_node);\n"
- "+\tstruct rb_node *parent = NULL;\n"
- "+\n"
- "+\tassert_spin_locked(&inode_lock);\n"
- "+\tBUG_ON((inode->i_state & I_DIRTY) == 0);\n"
- "+\tBUG_ON(inode->i_state & (I_FREEING|I_CLEAR));\n"
- "+\tBUG_ON(RB_LINKED_NODE(&inode->i_flush_node));\n"
- "+\n"
- "+\tsb->s_flush_count++;\n"
- "+\n"
- "+\tlist_del_init(&inode->i_list);\n"
- "+\twhile (*new) {\n"
- "+\t\tstruct inode *this = rb_to_inode(*new);\n"
- "+\t\tparent = *new;\n"
- "+\t\tif (time_before(inode->dirtied_when, this->dirtied_when))\n"
- "+\t\t\tnew = &((*new)->rb_left);\n"
- "+\t\telse if (time_after(inode->dirtied_when,\n"
- "+\t\t\t\t      this->dirtied_when)) {\n"
- "+\t\t\tnew = &((*new)->rb_right);\n"
- "+\t\t} else {\n"
- "+\t\t\tlist_add_tail(&inode->i_list, &this->i_list);\n"
- "+\t\t\treturn;\n"
- "+\t\t}\n"
- "+\t}\n"
- "+\n"
- "+\t/* Add in the new node and rebalance the tree */\n"
- "+\trb_link_node(&inode->i_flush_node, parent, new);\n"
- "+\trb_insert_color(&inode->i_flush_node, &sb->s_flush_root);\n"
- "+}\n"
- "+\n"
- "+\n"
- "+/*\n"
- "+ * Here we return the inode that has the smallest key in the flush tree\n"
- "+ * that is greater than the parameter \"prev_time\".\n"
- "+ */\n"
- "+static struct inode *flush_tree_min_greater(struct super_block *sb,\n"
- "+\t\t\t\t\t    unsigned long prev_time)\n"
- "+{\n"
- "+\tstruct rb_node *node = sb->s_flush_root.rb_node;\n"
- "+\tstruct inode *bsf = NULL;\n"
- "+\t/* best so far */\n"
- "+\tassert_spin_locked(&inode_lock);\n"
- "+\twhile (node) {\n"
- "+\t\tstruct inode *data = rb_to_inode(node);\n"
- "+\t\t/* Just trying to get lucky */\n"
- "+\t\tif ((prev_time + 1) == data->dirtied_when)\n"
- "+\t\t\treturn data;\n"
- "+\n"
- "+\t\t/* If this value is greater than our prev_time and is\n"
- "+\t\tless than the best so far, this is our new best so far.*/\n"
- "+\t\tif ((data->dirtied_when > prev_time) &&\n"
- "+\t\t    (bsf ? bsf->dirtied_when > data->dirtied_when : 1))\n"
- "+\t\t\tbsf = data;\n"
- "+\n"
- "+\t\t/* Search all the way down to the bottom of the tree */\n"
- "+\t\tif (time_before(prev_time, data->dirtied_when))\n"
- "+\t\t\tnode = node->rb_left;\n"
- "+\t\telse if (time_after_eq(prev_time, data->dirtied_when))\n"
- "+\t\t\tnode = node->rb_right;\n"
- "+\t}\n"
- "+\treturn bsf;\n"
- "+}\n"
- "+\n"
- "+/*\n"
- "+ * Here is where we interate to find the next inode to process. The\n"
- "+ * strategy is to first look for any other inodes with the same dirtied_when\n"
- "+ * value. If we have already processed that node then we need to find\n"
- "+ * the next highest dirtied_when value in the tree.\n"
- "+ */\n"
- "+static struct inode *flush_tree_next(struct super_block *sb,\n"
- "+\t\t\t\t     unsigned long start_time,\n"
- "+\t\t\t\t     unsigned long prev_time)\n"
- "+{\n"
- "+\tstruct inode *inode = flush_tree_search(sb, prev_time);\n"
- "+\tassert_spin_locked(&inode_lock);\n"
- "+\t/* We have a duplicate timed inode as the last processed */\n"
- "+\tif (inode && (time_before(inode->i_flushed_when, start_time)))\n"
- "+\t\treturn inode;\n"
- "+\n"
- "+\t/* Now we have to find the oldest one next */\n"
- "+\treturn flush_tree_min_greater(sb, prev_time);\n"
- "+}\n"
- "+\n"
- "+/* Removing a node from the flushtree. */\n"
- "+void flush_tree_remove(struct super_block *sb, struct inode *inode)\n"
- "+{\n"
- "+\tstruct rb_node *rb_node = &inode->i_flush_node;\n"
- "+\tstruct rb_root *rb_root = &sb->s_flush_root;\n"
- "+\n"
- "+\tassert_spin_locked(&inode_lock);\n"
- "+\tBUG_ON((inode->i_state & I_DIRTY) == 0);\n"
- "+\n"
- "+\tsb->s_flush_count--;\n"
- "+\n"
- "+\t/* There is no chain on this inode. Just remove it from the tree */\n"
- "+\tif (list_empty(&inode->i_list)) {\n"
- "+\t\tBUG_ON(!RB_LINKED_NODE(rb_node));\n"
- "+\t\trb_erase(rb_node, rb_root);\n"
- "+\t\tmemset(rb_node, 0, sizeof(*rb_node));\n"
- "+\t\treturn;\n"
- "+\t}\n"
- "+\n"
- "+\t/* This node is on a chain AND is in the rb_tree */\n"
- "+\tif (RB_LINKED_NODE(rb_node)) {\n"
- "+\t\tstruct inode *new = list_entry(inode->i_list.next,\n"
- "+\t\t\t\t\t       struct inode, i_list);\n"
- "+\t\trb_replace_node(rb_node, &new->i_flush_node, rb_root);\n"
- "+\t\tmemset(rb_node, 0, sizeof(*rb_node));\n"
- "+\t}\n"
- "+\t/* Take it off the list */\n"
- "+\tlist_del_init(&inode->i_list);\n"
- "+}\n"
- "+\n"
- "+\n"
- " /**\n"
- "  *\t__mark_inode_dirty -\tinternal function\n"
- "  *\t@inode: inode to mark\n"
- "@@ -32,7 +198,7 @@\n"
- "  *\tMark an inode as dirty. Callers should use mark_inode_dirty or\n"
- "  *  \tmark_inode_dirty_sync.\n"
- "  *\n"
- "- * Put the inode on the super block's dirty list.\n"
- "+ * Put the inode in the super block's flush_tree.\n"
- "  *\n"
- "  * CAREFUL! We mark it dirty unconditionally, but move it onto the\n"
- "  * dirty list only if it is hashed or if it refers to a blockdev.\n"
- "@@ -75,6 +241,13 @@ void __mark_inode_dirty(struct inode *in\n"
- " \tif ((inode->i_state & flags) == flags)\n"
- " \t\treturn;\n"
- " \n"
- "+\t/* anonynous file systems do not write data back */\n"
- "+\tif (inode->i_sb->s_type->fs_flags & FS_ANONYMOUS)\n"
- "+\t\treturn;\n"
- "+\n"
- "+\tif (inode->i_state & I_DIRTY_NEVER)\n"
- "+\t\treturn;\n"
- "+\n"
- " \tif (unlikely(block_dump)) {\n"
- " \t\tstruct dentry *dentry = NULL;\n"
- " \t\tconst char *name = \"?\";\n"
- "@@ -97,14 +270,7 @@ void __mark_inode_dirty(struct inode *in\n"
- " \tif ((inode->i_state & flags) != flags) {\n"
- " \t\tconst int was_dirty = inode->i_state & I_DIRTY;\n"
- " \n"
- "-\t\tinode->i_state |= flags;\n"
- "-\n"
- "-\t\t/*\n"
- "-\t\t * If the inode is being synced, just update its dirty state.\n"
- "-\t\t * The unlocker will place the inode on the appropriate\n"
- "-\t\t * superblock list, based upon its state.\n"
- "-\t\t */\n"
- "-\t\tif (inode->i_state & I_SYNC)\n"
- "+\t\tif (inode->i_state & (I_FREEING|I_CLEAR))\n"
- " \t\t\tgoto out;\n"
- " \n"
- " \t\t/*\n"
- "@@ -115,7 +281,15 @@ void __mark_inode_dirty(struct inode *in\n"
- " \t\t\tif (hlist_unhashed(&inode->i_hash))\n"
- " \t\t\t\tgoto out;\n"
- " \t\t}\n"
- "-\t\tif (inode->i_state & (I_FREEING|I_CLEAR))\n"
- "+\n"
- "+\t\tinode->i_state |= flags;\n"
- "+\n"
- "+\t\t/*\n"
- "+\t\t * If the inode is being synced, just update its dirty state.\n"
- "+\t\t * The unlocker will place the inode on the appropriate\n"
- "+\t\t * superblock list, based upon its state.\n"
- "+\t\t */\n"
- "+\t\tif (inode->i_state & I_SYNC)\n"
- " \t\t\tgoto out;\n"
- " \n"
- " \t\t/*\n"
- "@@ -124,7 +298,8 @@ void __mark_inode_dirty(struct inode *in\n"
- " \t\t */\n"
- " \t\tif (!was_dirty) {\n"
- " \t\t\tinode->dirtied_when = jiffies;\n"
- "-\t\t\tlist_move(&inode->i_list, &sb->s_dirty);\n"
- "+\t\t\tinode->i_flushed_when = jiffies;\n"
- "+\t\t\tflush_tree_insert(sb, inode);\n"
- " \t\t}\n"
- " \t}\n"
- " out:\n"
- "@@ -140,38 +315,6 @@ static int write_inode(struct inode *ino\n"
- " \treturn 0;\n"
- " }\n"
- " \n"
- "-/*\n"
- "- * Redirty an inode: set its when-it-was dirtied timestamp and move it to the\n"
- "- * furthest end of its superblock's dirty-inode list.\n"
- "- *\n"
- "- * Before stamping the inode's ->dirtied_when, we check to see whether it is\n"
- "- * already the most-recently-dirtied inode on the s_dirty list.  If that is\n"
- "- * the case then the inode must have been redirtied while it was being written\n"
- "- * out and we don't reset its dirtied_when.\n"
- "- */\n"
- "-static void redirty_tail(struct inode *inode)\n"
- "-{\n"
- "-\tstruct super_block *sb = inode->i_sb;\n"
- "-\n"
- "-\tif (!list_empty(&sb->s_dirty)) {\n"
- "-\t\tstruct inode *tail_inode;\n"
- "-\n"
- "-\t\ttail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);\n"
- "-\t\tif (!time_after_eq(inode->dirtied_when,\n"
- "-\t\t\t\ttail_inode->dirtied_when))\n"
- "-\t\t\tinode->dirtied_when = jiffies;\n"
- "-\t}\n"
- "-\tlist_move(&inode->i_list, &sb->s_dirty);\n"
- "-}\n"
- "-\n"
- "-/*\n"
- "- * requeue inode for re-scanning after sb->s_io list is exhausted.\n"
- "- */\n"
- "-static void requeue_io(struct inode *inode)\n"
- "-{\n"
- "-\tlist_move(&inode->i_list, &inode->i_sb->s_more_io);\n"
- "-}\n"
- "-\n"
- " static void inode_sync_complete(struct inode *inode)\n"
- " {\n"
- " \t/*\n"
- "@@ -181,38 +324,9 @@ static void inode_sync_complete(struct i\n"
- " \twake_up_bit(&inode->i_state, __I_SYNC);\n"
- " }\n"
- " \n"
- "-/*\n"
- "- * Move expired dirty inodes from @delaying_queue to @dispatch_queue.\n"
- "- */\n"
- "-static void move_expired_inodes(struct list_head *delaying_queue,\n"
- "-\t\t\t       struct list_head *dispatch_queue,\n"
- "-\t\t\t\tunsigned long *older_than_this)\n"
- "-{\n"
- "-\twhile (!list_empty(delaying_queue)) {\n"
- "-\t\tstruct inode *inode = list_entry(delaying_queue->prev,\n"
- "-\t\t\t\t\t\tstruct inode, i_list);\n"
- "-\t\tif (older_than_this &&\n"
- "-\t\t\ttime_after(inode->dirtied_when, *older_than_this))\n"
- "-\t\t\tbreak;\n"
- "-\t\tlist_move(&inode->i_list, dispatch_queue);\n"
- "-\t}\n"
- "-}\n"
- "-\n"
- "-/*\n"
- "- * Queue all expired dirty inodes for io, eldest first.\n"
- "- */\n"
- "-static void queue_io(struct super_block *sb,\n"
- "-\t\t\t\tunsigned long *older_than_this)\n"
- "-{\n"
- "-\tlist_splice_init(&sb->s_more_io, sb->s_io.prev);\n"
- "-\tmove_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);\n"
- "-}\n"
- "-\n"
- " int sb_has_dirty_inodes(struct super_block *sb)\n"
- " {\n"
- "-\treturn !list_empty(&sb->s_dirty) ||\n"
- "-\t       !list_empty(&sb->s_io) ||\n"
- "-\t       !list_empty(&sb->s_more_io);\n"
- "+\treturn !RB_EMPTY_ROOT(&sb->s_flush_root);\n"
- " }\n"
- " EXPORT_SYMBOL(sb_has_dirty_inodes);\n"
- " \n"
- "@@ -237,6 +351,7 @@ __sync_single_inode(struct inode *inode,\n"
- " \tBUG_ON(inode->i_state & I_SYNC);\n"
- " \n"
- " \t/* Set I_SYNC, reset I_DIRTY */\n"
- "+\tflush_tree_remove(inode->i_sb, inode);\n"
- " \tdirty = inode->i_state & I_DIRTY;\n"
- " \tinode->i_state |= I_SYNC;\n"
- " \tinode->i_state &= ~I_DIRTY;\n"
- "@@ -279,12 +394,15 @@ __sync_single_inode(struct inode *inode,\n"
- " \t\t\t */\n"
- " \t\t\tif (wbc->for_kupdate) {\n"
- " \t\t\t\t/*\n"
- "-\t\t\t\t * For the kupdate function we move the inode\n"
- "-\t\t\t\t * to s_more_io so it will get more writeout as\n"
- "-\t\t\t\t * soon as the queue becomes uncongested.\n"
- "+\t\t\t\t * For the kupdate function we leave\n"
- "+\t\t\t\t * dirtied_when field untouched and return\n"
- "+\t\t\t\t * it to the flush_tree. The next iteration\n"
- "+\t\t\t\t * of kupdate will flush more pages when\n"
- "+\t\t\t\t * the queue is no longer congested.\n"
- " \t\t\t\t */\n"
- " \t\t\t\tinode->i_state |= I_DIRTY_PAGES;\n"
- "-\t\t\t\trequeue_io(inode);\n"
- "+\t\t\t\twbc->more_io = 1;\n"
- "+\t\t\t\tflush_tree_insert(inode->i_sb, inode);\n"
- " \t\t\t} else {\n"
- " \t\t\t\t/*\n"
- " \t\t\t\t * Otherwise fully redirty the inode so that\n"
- "@@ -294,14 +412,15 @@ __sync_single_inode(struct inode *inode,\n"
- " \t\t\t\t * all the other files.\n"
- " \t\t\t\t */\n"
- " \t\t\t\tinode->i_state |= I_DIRTY_PAGES;\n"
- "-\t\t\t\tredirty_tail(inode);\n"
- "+\t\t\t\tinode->dirtied_when = jiffies;\n"
- "+\t\t\t\tflush_tree_insert(inode->i_sb, inode);\n"
- " \t\t\t}\n"
- " \t\t} else if (inode->i_state & I_DIRTY) {\n"
- " \t\t\t/*\n"
- " \t\t\t * Someone redirtied the inode while were writing back\n"
- " \t\t\t * the pages.\n"
- " \t\t\t */\n"
- "-\t\t\tredirty_tail(inode);\n"
- "+\t\t\tflush_tree_insert(inode->i_sb, inode);\n"
- " \t\t} else if (atomic_read(&inode->i_count)) {\n"
- " \t\t\t/*\n"
- " \t\t\t * The inode is clean, inuse\n"
- "@@ -333,23 +452,22 @@ __writeback_single_inode(struct inode *i\n"
- " \telse\n"
- " \t\tWARN_ON(inode->i_state & I_WILL_FREE);\n"
- " \n"
- "+\tBUG_ON((inode->i_state & I_DIRTY) == 0);\n"
- "+\n"
- "+\t/*\n"
- "+\t * If the inode is locked and we are not going to wait for it\n"
- "+\t * to be unlocked then we can just exit the routine. Since the\n"
- "+\t * inode is marked I_DIRTY it will be inserted into the flush\n"
- "+\t * tree by sync_single_inode when the I_SYNC is released.\n"
- "+\t */\n"
- " \tif ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {\n"
- "-\t\tstruct address_space *mapping = inode->i_mapping;\n"
- " \t\tint ret;\n"
- "-\n"
- "-\t\t/*\n"
- "-\t\t * We're skipping this inode because it's locked, and we're not\n"
- "-\t\t * doing writeback-for-data-integrity.  Move it to s_more_io so\n"
- "-\t\t * that writeback can proceed with the other inodes on s_io.\n"
- "-\t\t * We'll have another go at writing back this inode when we\n"
- "-\t\t * completed a full scan of s_io.\n"
- "-\t\t */\n"
- "-\t\trequeue_io(inode);\n"
- "-\n"
- "+\t\tstruct address_space *mapping = inode->i_mapping;\n"
- " \t\t/*\n"
- " \t\t * Even if we don't actually write the inode itself here,\n"
- " \t\t * we can at least start some of the data writeout..\n"
- " \t\t */\n"
- "+\t\twbc->more_io = 1;\n"
- " \t\tspin_unlock(&inode_lock);\n"
- " \t\tret = do_writepages(mapping, wbc);\n"
- " \t\tspin_lock(&inode_lock);\n"
- "@@ -383,8 +501,8 @@ __writeback_single_inode(struct inode *i\n"
- "  * If we're a pdlfush thread, then implement pdflush collision avoidance\n"
- "  * against the entire list.\n"
- "  *\n"
- "- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so\n"
- "- * that it can be located for waiting on in __writeback_single_inode().\n"
- "+ * WB_SYNC_HOLD is a hack for sys_sync(): so that it can be located for\n"
- "+ * waiting on in __writeback_single_inode().\n"
- "  *\n"
- "  * Called under inode_lock.\n"
- "  *\n"
- "@@ -398,28 +516,29 @@ __writeback_single_inode(struct inode *i\n"
- "  * a queue with that address_space.  (Easy: have a global \"dirty superblocks\"\n"
- "  * list).\n"
- "  *\n"
- "- * The inodes to be written are parked on sb->s_io.  They are moved back onto\n"
- "- * sb->s_dirty as they are selected for writing.  This way, none can be missed\n"
- "- * on the writer throttling path, and we get decent balancing between many\n"
- "- * throttled threads: we don't want them all piling up on inode_sync_wait.\n"
- "+ * The inodes to be written are inserted into the flush_tree.\n"
- "  */\n"
- " static void\n"
- " sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)\n"
- " {\n"
- " \tconst unsigned long start = jiffies;\t/* livelock avoidance */\n"
- "+\tstruct inode *inode = NULL;\n"
- "+\tunsigned long prev_time = 0;\n"
- " \n"
- "-\tif (!wbc->for_kupdate || list_empty(&sb->s_io))\n"
- "-\t\tqueue_io(sb, wbc->older_than_this);\n"
- "+\tif (sb->s_type->fs_flags & FS_ANONYMOUS)\n"
- "+\t\treturn;\n"
- " \n"
- "-\twhile (!list_empty(&sb->s_io)) {\n"
- "-\t\tstruct inode *inode = list_entry(sb->s_io.prev,\n"
- "-\t\t\t\t\t\tstruct inode, i_list);\n"
- "+\tmutex_lock(&sb->s_flush_lock);\n"
- "+\tspin_lock(&inode_lock);\n"
- "+\twhile ((inode = flush_tree_next(sb, start, prev_time)) != NULL) {\n"
- " \t\tstruct address_space *mapping = inode->i_mapping;\n"
- " \t\tstruct backing_dev_info *bdi = mapping->backing_dev_info;\n"
- " \t\tlong pages_skipped;\n"
- " \n"
- "+\t\tprev_time = inode->dirtied_when;\n"
- "+\t\tinode->i_flushed_when = start;\n"
- "+\n"
- " \t\tif (!bdi_cap_writeback_dirty(bdi)) {\n"
- "-\t\t\tredirty_tail(inode);\n"
- " \t\t\tif (sb_is_blkdev_sb(sb)) {\n"
- " \t\t\t\t/*\n"
- " \t\t\t\t * Dirty memory-backed blockdev: the ramdisk\n"
- "@@ -436,17 +555,17 @@ sync_sb_inodes(struct super_block *sb, s\n"
- " \t\t}\n"
- " \n"
- " \t\tif (wbc->nonblocking && bdi_write_congested(bdi)) {\n"
- "+\t\t\twbc->more_io = 1;\n"
- " \t\t\twbc->encountered_congestion = 1;\n"
- " \t\t\tif (!sb_is_blkdev_sb(sb))\n"
- " \t\t\t\tbreak;\t\t/* Skip a congested fs */\n"
- "-\t\t\trequeue_io(inode);\n"
- " \t\t\tcontinue;\t\t/* Skip a congested blockdev */\n"
- " \t\t}\n"
- " \n"
- " \t\tif (wbc->bdi && bdi != wbc->bdi) {\n"
- " \t\t\tif (!sb_is_blkdev_sb(sb))\n"
- " \t\t\t\tbreak;\t\t/* fs has the wrong queue */\n"
- "-\t\t\trequeue_io(inode);\n"
- "+\t\t\twbc->more_io = 1;\n"
- " \t\t\tcontinue;\t\t/* blockdev has wrong queue */\n"
- " \t\t}\n"
- " \n"
- "@@ -454,6 +573,11 @@ sync_sb_inodes(struct super_block *sb, s\n"
- " \t\tif (time_after(inode->dirtied_when, start))\n"
- " \t\t\tbreak;\n"
- " \n"
- "+\t\t/* Was this inode dirtied too recently? */\n"
- "+\t\tif (wbc->older_than_this &&\n"
- "+\t\t    time_after(inode->dirtied_when, *wbc->older_than_this))\n"
- "+\t\t\tbreak;\n"
- "+\n"
- " \t\t/* Is another pdflush already flushing this queue? */\n"
- " \t\tif (current_is_pdflush() && !writeback_acquire(bdi))\n"
- " \t\t\tbreak;\n"
- "@@ -462,19 +586,8 @@ sync_sb_inodes(struct super_block *sb, s\n"
- " \t\t__iget(inode);\n"
- " \t\tpages_skipped = wbc->pages_skipped;\n"
- " \t\t__writeback_single_inode(inode, wbc);\n"
- "-\t\tif (wbc->sync_mode == WB_SYNC_HOLD) {\n"
- "-\t\t\tinode->dirtied_when = jiffies;\n"
- "-\t\t\tlist_move(&inode->i_list, &sb->s_dirty);\n"
- "-\t\t}\n"
- " \t\tif (current_is_pdflush())\n"
- " \t\t\twriteback_release(bdi);\n"
- "-\t\tif (wbc->pages_skipped != pages_skipped) {\n"
- "-\t\t\t/*\n"
- "-\t\t\t * writeback is not making progress due to locked\n"
- "-\t\t\t * buffers.  Skip this inode for now.\n"
- "-\t\t\t */\n"
- "-\t\t\tredirty_tail(inode);\n"
- "-\t\t}\n"
- " \t\tspin_unlock(&inode_lock);\n"
- " \t\tiput(inode);\n"
- " \t\tcond_resched();\n"
- "@@ -482,8 +595,9 @@ sync_sb_inodes(struct super_block *sb, s\n"
- " \t\tif (wbc->nr_to_write <= 0)\n"
- " \t\t\tbreak;\n"
- " \t}\n"
- "-\tif (!list_empty(&sb->s_more_io))\n"
- "-\t\twbc->more_io = 1;\n"
- "+\n"
- "+\tspin_unlock(&inode_lock);\n"
- "+\tmutex_unlock(&sb->s_flush_lock);\n"
- " \treturn;\t\t/* Leave any unwritten inodes on s_io */\n"
- " }\n"
- " \n"
- "@@ -492,9 +606,9 @@ sync_sb_inodes(struct super_block *sb, s\n"
- "  *\n"
- "  * Note:\n"
- "  * We don't need to grab a reference to superblock here. If it has non-empty\n"
- "- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed\n"
- "- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all\n"
- "- * empty. Since __sync_single_inode() regains inode_lock before it finally moves\n"
- "+ * flush_tree it hasn't been killed yet and kill_super() won't proceed\n"
- "+ * past sync_inodes_sb() until the flush_tree is empty.\n"
- "+ * Since __sync_single_inode() regains inode_lock before it finally moves\n"
- "  * inode from superblock lists we are OK.\n"
- "  *\n"
- "  * If `older_than_this' is non-zero then only flush inodes which have a\n"
- "@@ -527,9 +641,7 @@ restart:\n"
- " \t\t\t */\n"
- " \t\t\tif (down_read_trylock(&sb->s_umount)) {\n"
- " \t\t\t\tif (sb->s_root) {\n"
- "-\t\t\t\t\tspin_lock(&inode_lock);\n"
- " \t\t\t\t\tsync_sb_inodes(sb, wbc);\n"
- "-\t\t\t\t\tspin_unlock(&inode_lock);\n"
- " \t\t\t\t}\n"
- " \t\t\t\tup_read(&sb->s_umount);\n"
- " \t\t\t}\n"
- "@@ -546,7 +658,7 @@ restart:\n"
- " /*\n"
- "  * writeback and wait upon the filesystem's dirty inodes.  The caller will\n"
- "  * do this in two passes - one to write, and one to wait.  WB_SYNC_HOLD is\n"
- "- * used to park the written inodes on sb->s_dirty for the wait pass.\n"
- "+ * used to park the written inodes on the flush_tree for the wait pass.\n"
- "  *\n"
- "  * A finite limit is set on the number of pages which will be written.\n"
- "  * To prevent infinite livelock of sys_sync().\n"
- "@@ -568,9 +680,7 @@ void sync_inodes_sb(struct super_block *\n"
- " \t\t\t(inodes_stat.nr_inodes - inodes_stat.nr_unused) +\n"
- " \t\t\tnr_dirty + nr_unstable;\n"
- " \twbc.nr_to_write += wbc.nr_to_write / 2;\t\t/* Bit more for luck */\n"
- "-\tspin_lock(&inode_lock);\n"
- " \tsync_sb_inodes(sb, &wbc);\n"
- "-\tspin_unlock(&inode_lock);\n"
- " }\n"
- " \n"
- " /*\n"
- "Index: 2624rc3/fs/inode.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/inode.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/inode.c\t2007-11-27 10:54:34.000000000 -0800\n"
- "@@ -143,6 +143,7 @@ static struct inode *alloc_inode(struct \n"
- " \t\tinode->i_cdev = NULL;\n"
- " \t\tinode->i_rdev = 0;\n"
- " \t\tinode->dirtied_when = 0;\n"
- "+\t\tmemset(&inode->i_flush_node, 0, sizeof(inode->i_flush_node));\n"
- " \t\tif (security_inode_alloc(inode)) {\n"
- " \t\t\tif (inode->i_sb->s_op->destroy_inode)\n"
- " \t\t\t\tinode->i_sb->s_op->destroy_inode(inode);\n"
- "@@ -1044,6 +1045,10 @@ void generic_delete_inode(struct inode *\n"
- " {\n"
- " \tconst struct super_operations *op = inode->i_sb->s_op;\n"
- " \n"
- "+\tif ((inode->i_state & I_DIRTY)) {\n"
- "+\t\tflush_tree_remove(inode->i_sb, inode);\n"
- "+\t\tinode->i_state &= ~I_DIRTY;\n"
- "+\t}\n"
- " \tlist_del_init(&inode->i_list);\n"
- " \tlist_del_init(&inode->i_sb_list);\n"
- " \tinode->i_state |= I_FREEING;\n"
- "Index: 2624rc3/fs/pipe.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/pipe.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/pipe.c\t2007-11-27 10:59:15.000000000 -0800\n"
- "@@ -931,12 +931,10 @@ static struct inode * get_pipe_inode(voi\n"
- " \tinode->i_fop = &rdwr_pipe_fops;\n"
- " \n"
- " \t/*\n"
- "-\t * Mark the inode dirty from the very beginning,\n"
- "-\t * that way it will never be moved to the dirty\n"
- "-\t * list because \"mark_inode_dirty()\" will think\n"
- "-\t * that it already _is_ on the dirty list.\n"
- "+\t * Mark the inode \"never dirty\" from the very beginning,\n"
- "+\t * that way it will never be written back.\n"
- " \t */\n"
- "-\tinode->i_state = I_DIRTY;\n"
- "+\tinode->i_state = I_DIRTY_NEVER;\n"
- " \tinode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;\n"
- " \tinode->i_uid = current->fsuid;\n"
- " \tinode->i_gid = current->fsgid;\n"
- "Index: 2624rc3/fs/proc/root.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/proc/root.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/proc/root.c\t2007-11-27 11:00:19.000000000 -0800\n"
- "@@ -102,6 +102,7 @@ struct file_system_type proc_fs_type = {\n"
- " \t.name\t\t= \"proc\",\n"
- " \t.get_sb\t\t= proc_get_sb,\n"
- " \t.kill_sb\t= proc_kill_sb,\n"
- "+\t.fs_flags\t= FS_ANONYMOUS,\n"
- " };\n"
- " \n"
- " void __init proc_root_init(void)\n"
- "Index: 2624rc3/fs/super.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/super.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/super.c\t2007-11-27 12:43:29.000000000 -0800\n"
- "@@ -61,9 +61,8 @@ static struct super_block *alloc_super(s\n"
- " \t\t\ts = NULL;\n"
- " \t\t\tgoto out;\n"
- " \t\t}\n"
- "-\t\tINIT_LIST_HEAD(&s->s_dirty);\n"
- "-\t\tINIT_LIST_HEAD(&s->s_io);\n"
- "-\t\tINIT_LIST_HEAD(&s->s_more_io);\n"
- "+\t\ts->s_flush_root = RB_ROOT;\n"
- "+\t\tmutex_init(&s->s_flush_lock);\n"
- " \t\tINIT_LIST_HEAD(&s->s_files);\n"
- " \t\tINIT_LIST_HEAD(&s->s_instances);\n"
- " \t\tINIT_HLIST_HEAD(&s->s_anon);\n"
- "@@ -103,6 +102,7 @@ out:\n"
- "  */\n"
- " static inline void destroy_super(struct super_block *s)\n"
- " {\n"
- "+\tmutex_destroy(&s->s_flush_lock);\n"
- " \tsecurity_sb_free(s);\n"
- " \tkfree(s->s_subtype);\n"
- " \tkfree(s);\n"
- "Index: 2624rc3/fs/sysfs/mount.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/fs/sysfs/mount.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/fs/sysfs/mount.c\t2007-11-27 11:02:36.000000000 -0800\n"
- "@@ -80,6 +80,7 @@ static struct file_system_type sysfs_fs_\n"
- " \t.name\t\t= \"sysfs\",\n"
- " \t.get_sb\t\t= sysfs_get_sb,\n"
- " \t.kill_sb\t= kill_anon_super,\n"
- "+\t.fs_flags\t= FS_ANONYMOUS,\n"
- " };\n"
- " \n"
- " int __init sysfs_init(void)\n"
- "Index: 2624rc3/include/linux/fs.h\n"
- "===================================================================\n"
- "--- 2624rc3.orig/include/linux/fs.h\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/include/linux/fs.h\t2007-11-27 12:41:22.000000000 -0800\n"
- "@@ -90,9 +90,10 @@ extern int dir_notify_enable;\n"
- " #define SEL_EX\t\t4\n"
- " \n"
- " /* public flags for file_system_type */\n"
- "-#define FS_REQUIRES_DEV 1 \n"
- "-#define FS_BINARY_MOUNTDATA 2\n"
- "-#define FS_HAS_SUBTYPE 4\n"
- "+#define FS_REQUIRES_DEV\t\t1\n"
- "+#define FS_BINARY_MOUNTDATA\t2\n"
- "+#define FS_HAS_SUBTYPE\t\t4\n"
- "+#define FS_ANONYMOUS\t\t8\n"
- " #define FS_REVAL_DOT\t16384\t/* Check the paths \".\", \"..\" for staleness */\n"
- " #define FS_RENAME_DOES_D_MOVE\t32768\t/* FS will handle d_move()\n"
- " \t\t\t\t\t * during rename() internally.\n"
- "@@ -285,6 +286,7 @@ extern int dir_notify_enable;\n"
- " #include <linux/pid.h>\n"
- " #include <linux/mutex.h>\n"
- " #include <linux/capability.h>\n"
- "+#include <linux/rbtree.h>\n"
- " \n"
- " #include <asm/atomic.h>\n"
- " #include <asm/semaphore.h>\n"
- "@@ -592,6 +594,8 @@ struct inode {\n"
- " \tstruct hlist_node\ti_hash;\n"
- " \tstruct list_head\ti_list;\n"
- " \tstruct list_head\ti_sb_list;\n"
- "+\tstruct rb_node\t\ti_flush_node;\n"
- "+\tunsigned long\t\ti_flushed_when;\n"
- " \tstruct list_head\ti_dentry;\n"
- " \tunsigned long\t\ti_ino;\n"
- " \tatomic_t\t\ti_count;\n"
- "@@ -1003,9 +1007,11 @@ struct super_block {\n"
- " \tstruct xattr_handler\t**s_xattr;\n"
- " \n"
- " \tstruct list_head\ts_inodes;\t/* all inodes */\n"
- "-\tstruct list_head\ts_dirty;\t/* dirty inodes */\n"
- "-\tstruct list_head\ts_io;\t\t/* parked for writeback */\n"
- "-\tstruct list_head\ts_more_io;\t/* parked for more writeback */\n"
- "+\n"
- "+\tstruct rb_root\t\ts_flush_root;\n"
- "+\tunsigned long\t\ts_flush_count;\n"
- "+\tstruct mutex\t\ts_flush_lock;\n"
- "+\n"
- " \tstruct hlist_head\ts_anon;\t\t/* anonymous dentries for (nfs) exporting */\n"
- " \tstruct list_head\ts_files;\n"
- " \n"
- "@@ -1315,17 +1321,18 @@ struct super_operations {\n"
- "  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on\n"
- "  *    I_CLEAR?  If not, why?\n"
- "  */\n"
- "-#define I_DIRTY_SYNC\t\t1\n"
- "-#define I_DIRTY_DATASYNC\t2\n"
- "-#define I_DIRTY_PAGES\t\t4\n"
- "-#define I_NEW\t\t\t8\n"
- "-#define I_WILL_FREE\t\t16\n"
- "-#define I_FREEING\t\t32\n"
- "-#define I_CLEAR\t\t\t64\n"
- "+#define I_DIRTY_SYNC\t\t(1 << 0)\n"
- "+#define I_DIRTY_DATASYNC\t(1 << 1)\n"
- "+#define I_DIRTY_PAGES\t\t(1 << 2)\n"
- "+#define I_NEW\t\t\t(1 << 3)\n"
- "+#define I_WILL_FREE\t\t(1 << 4)\n"
- "+#define I_FREEING\t\t(1 << 5)\n"
- "+#define I_CLEAR\t\t\t(1 << 6)\n"
- " #define __I_LOCK\t\t7\n"
- " #define I_LOCK\t\t\t(1 << __I_LOCK)\n"
- " #define __I_SYNC\t\t8\n"
- " #define I_SYNC\t\t\t(1 << __I_SYNC)\n"
- "+#define I_DIRTY_NEVER\t\t(1 << 9)\n"
- " \n"
- " #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)\n"
- " \n"
- "Index: 2624rc3/include/linux/rbtree.h\n"
- "===================================================================\n"
- "--- 2624rc3.orig/include/linux/rbtree.h\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/include/linux/rbtree.h\t2007-11-27 17:40:53.000000000 -0800\n"
- "@@ -135,6 +135,8 @@ static inline void rb_set_color(struct r\n"
- " #define RB_EMPTY_ROOT(root)\t((root)->rb_node == NULL)\n"
- " #define RB_EMPTY_NODE(node)\t(rb_parent(node) == node)\n"
- " #define RB_CLEAR_NODE(node)\t(rb_set_parent(node, node))\n"
- "+#define RB_LINKED_NODE(node)\t((node)->rb_parent_color || \\\n"
- "+\t\t\t\t (node)->rb_left || (node)->rb_right)\n"
- " \n"
- " extern void rb_insert_color(struct rb_node *, struct rb_root *);\n"
- " extern void rb_erase(struct rb_node *, struct rb_root *);\n"
- "Index: 2624rc3/include/linux/writeback.h\n"
- "===================================================================\n"
- "--- 2624rc3.orig/include/linux/writeback.h\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/include/linux/writeback.h\t2007-11-27 11:12:53.000000000 -0800\n"
- "@@ -72,6 +72,8 @@ void writeback_inodes(struct writeback_c\n"
- " int inode_wait(void *);\n"
- " void sync_inodes_sb(struct super_block *, int wait);\n"
- " void sync_inodes(int wait);\n"
- "+void flush_tree_remove(struct super_block *sb, struct inode *inode);\n"
- "+\n"
- " \n"
- " /* writeback.h requires fs.h; it, too, is not included from here. */\n"
- " static inline void wait_on_inode(struct inode *inode)\n"
- "Index: 2624rc3/mm/shmem.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/mm/shmem.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/mm/shmem.c\t2007-11-27 11:13:45.000000000 -0800\n"
- "@@ -2460,6 +2460,7 @@ static struct file_system_type tmpfs_fs_\n"
- " \t.name\t\t= \"tmpfs\",\n"
- " \t.get_sb\t\t= shmem_get_sb,\n"
- " \t.kill_sb\t= kill_litter_super,\n"
- "+\t.fs_flags\t= FS_ANONYMOUS,\n"
- " };\n"
- " static struct vfsmount *shm_mnt;\n"
- " \n"
- "Index: 2624rc3/mm/tiny-shmem.c\n"
- "===================================================================\n"
- "--- 2624rc3.orig/mm/tiny-shmem.c\t2007-11-16 21:16:36.000000000 -0800\n"
- "+++ 2624rc3/mm/tiny-shmem.c\t2007-11-27 11:14:13.000000000 -0800\n"
- "@@ -24,6 +24,7 @@ static struct file_system_type tmpfs_fs_\n"
- " \t.name\t\t= \"tmpfs\",\n"
- " \t.get_sb\t\t= ramfs_get_sb,\n"
- " \t.kill_sb\t= kill_litter_super,\n"
- "+\t.fs_flags\t= FS_ANONYMOUS,\n"
- " };\n"
- " \n"
-  static struct vfsmount *shm_mnt;
 
-ed879f5a06bcabfba1513cc5bff7038de3138d2e7242c0b117dac341f810a825
+cc6f6466d72a8f8693174c3a642755a917c14e82ffb7b1666119e8c59a7354e9

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.