public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: lkml <linux-kernel@vger.kernel.org>
Subject: [patch 9/15] pdflush exclusion
Date: Sun, 19 May 2002 12:42:00 -0700	[thread overview]
Message-ID: <3CE80008.5A9CA5DA@zip.com.au> (raw)



Use the pdflush exclusion infrastructure to ensure that only one
pdlfush thread is ever performing writeback against a particular
request_queue.

This works rather well.  It requires a lot of activity against a lot of
disks to cause more pdflush threads to start up.  Possibly the
thread-creation logic is a little weak: it starts more threads when a
pdflush thread goes back to sleep.  It may be better to start new
threads within pdlfush_operation().

All non-request_queue-backed address_spaces share the global
default_backing_dev_info structure.  So at present only a single
pdflush instance will be available for background writeback of *all*
NFS filesystems (for example).

It there is benefit in concurrent background writeback for multiple NFS
mounts then NFS would need to create per-mount backing_dev_info
structures and install those into new inode's address_spaces in some
manner.


=====================================

--- 2.5.16/fs/fs-writeback.c~pdflush-single	Sun May 19 11:49:48 2002
+++ 2.5.16-akpm/fs/fs-writeback.c	Sun May 19 12:02:57 2002
@@ -187,6 +187,9 @@ static void __sync_single_inode(struct i
 static void
 __writeback_single_inode(struct inode *inode, int sync, int *nr_to_write)
 {
+	if (current_is_pdflush() && (inode->i_state & I_LOCK))
+		return;
+
 	while (inode->i_state & I_LOCK) {
 		__iget(inode);
 		spin_unlock(&inode_lock);
@@ -213,6 +216,9 @@ void writeback_single_inode(struct inode
  * had their first dirtying at a time earlier than *older_than_this.
  *
  * Called under inode_lock.
+ *
+ * If we're a pdlfush thread, then implement pdlfush collision avoidance
+ * against the entire list.
  */
 static void __sync_list(struct list_head *head, int sync_mode,
 		int *nr_to_write, unsigned long *older_than_this)
@@ -223,6 +229,8 @@ static void __sync_list(struct list_head
 	while ((tmp = head->prev) != head) {
 		struct inode *inode = list_entry(tmp, struct inode, i_list);
 		struct address_space *mapping = inode->i_mapping;
+		struct backing_dev_info *bdi;
+
 		int really_sync;
 
 		/* Was this inode dirtied after __sync_list was called? */
@@ -233,10 +241,18 @@ static void __sync_list(struct list_head
 			time_after(mapping->dirtied_when, *older_than_this))
 			break;
 
+		bdi = mapping->backing_dev_info;
+		if (current_is_pdflush() && !writeback_acquire(bdi))
+			break;
+
 		really_sync = (sync_mode == WB_SYNC_ALL);
 		if ((sync_mode == WB_SYNC_LAST) && (head->prev == head))
 			really_sync = 1;
 		__writeback_single_inode(inode, really_sync, nr_to_write);
+
+		if (current_is_pdflush())
+			writeback_release(bdi);
+
 		if (nr_to_write && *nr_to_write == 0)
 			break;
 	}
@@ -255,6 +271,8 @@ static void __sync_list(struct list_head
  *
  * If `older_than_this' is non-zero then only flush inodes which have a
  * flushtime older than *older_than_this.
+ *
+ * This is a "memory cleansing" operation, not a "data integrity" operation.
  */
 void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
 				unsigned long *older_than_this)
@@ -276,29 +294,12 @@ void writeback_unlocked_inodes(int *nr_t
 		if (sb->s_writeback_gen == writeback_gen)
 			continue;
 		sb->s_writeback_gen = writeback_gen;
-
-		if (current->flags & PF_FLUSHER) {
-			if (sb->s_flags & MS_FLUSHING) {
-				/*
-				 * There's no point in two pdflush threads
-				 * flushing the same device.  But for other
-				 * callers, we want to perform the flush
-				 * because the fdatasync is how we implement
-				 * writer throttling.
-				 */
-				continue;
-			}
-			sb->s_flags |= MS_FLUSHING;
-		}
-
 		if (!list_empty(&sb->s_dirty)) {
 			spin_unlock(&sb_lock);
 			__sync_list(&sb->s_dirty, sync_mode,
 					nr_to_write, older_than_this);
 			spin_lock(&sb_lock);
 		}
-		if (current->flags & PF_FLUSHER)
-			sb->s_flags &= ~MS_FLUSHING;
 		if (nr_to_write && *nr_to_write == 0)
 			break;
 	}
@@ -307,7 +308,7 @@ void writeback_unlocked_inodes(int *nr_t
 }
 
 /*
- * Called under inode_lock
+ * Called under inode_lock.
  */
 static int __try_to_writeback_unused_list(struct list_head *head, int nr_inodes)
 {
@@ -318,7 +319,17 @@ static int __try_to_writeback_unused_lis
 		inode = list_entry(tmp, struct inode, i_list);
 
 		if (!atomic_read(&inode->i_count)) {
+			struct backing_dev_info *bdi;
+
+			bdi = inode->i_mapping->backing_dev_info;
+			if (current_is_pdflush() && !writeback_acquire(bdi))
+				goto out;
+
 			__sync_single_inode(inode, 0, NULL);
+
+			if (current_is_pdflush())
+				writeback_release(bdi);
+
 			nr_inodes--;
 
 			/* 
@@ -328,7 +339,7 @@ static int __try_to_writeback_unused_lis
 			tmp = head;
 		}
 	}
-
+out:
 	return nr_inodes;
 }
 
@@ -421,7 +432,11 @@ void sync_inodes(void)
 	}
 }
 
-void try_to_writeback_unused_inodes(unsigned long pexclusive)
+/*
+ * FIXME: the try_to_writeback_unused functions look dreadfully similar to
+ * writeback_unlocked_inodes...
+ */
+void try_to_writeback_unused_inodes(unsigned long unused)
 {
 	struct super_block * sb;
 	int nr_inodes = inodes_stat.nr_unused;
@@ -440,7 +455,6 @@ void try_to_writeback_unused_inodes(unsi
 	}
 	spin_unlock(&sb_lock);
 	spin_unlock(&inode_lock);
-	clear_bit(0, (unsigned long *)pexclusive);
 }
 
 /**
--- 2.5.16/include/linux/writeback.h~pdflush-single	Sun May 19 11:49:48 2002
+++ 2.5.16-akpm/include/linux/writeback.h	Sun May 19 12:02:57 2002
@@ -13,6 +13,15 @@ extern struct list_head inode_in_use;
 extern struct list_head inode_unused;
 
 /*
+ * Yes, writeback.h requires sched.h
+ * No, sched.h is not included from here.
+ */
+static inline int current_is_pdflush(void)
+{
+	return current->flags & PF_FLUSHER;
+}
+
+/*
  * fs/fs-writeback.c
  */
 #define WB_SYNC_NONE	0	/* Don't wait on anything */
--- 2.5.16/fs/inode.c~pdflush-single	Sun May 19 11:49:48 2002
+++ 2.5.16-akpm/fs/inode.c	Sun May 19 12:02:57 2002
@@ -404,21 +404,14 @@ void prune_icache(int goal)
 	dispose_list(freeable);
 
 	/* 
-	 * If we didn't freed enough clean inodes schedule
-	 * a sync of the dirty inodes, we cannot do it
-	 * from here or we're either synchronously dogslow
-	 * or we deadlock with oom.
+	 * If we didn't free enough clean inodes then schedule writeback of
+	 * the dirty inodes.  We cannot do it from here or we're either
+	 * synchronously dogslow or we deadlock with oom.
 	 */
-	if (goal) {
-		static unsigned long exclusive;
-
-		if (!test_and_set_bit(0, &exclusive)) {
-			if (pdflush_operation(try_to_writeback_unused_inodes,
-						(unsigned long)&exclusive))
-				clear_bit(0, &exclusive);
-		}
-	}
+	if (goal)
+		pdflush_operation(try_to_writeback_unused_inodes, 0);
 }
+
 /*
  * This is called from kswapd when we think we need some
  * more memory, but aren't really sure how much. So we
--- 2.5.16/include/linux/fs.h~pdflush-single	Sun May 19 11:49:48 2002
+++ 2.5.16-akpm/include/linux/fs.h	Sun May 19 12:02:57 2002
@@ -112,7 +112,6 @@ extern int leases_enable, dir_notify_ena
 #define MS_MOVE		8192
 #define MS_REC		16384
 #define MS_VERBOSE	32768
-#define MS_FLUSHING	(1<<16)	/* inodes are currently under writeout */
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
 
@@ -156,7 +155,6 @@ extern int leases_enable, dir_notify_ena
 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
 #define IS_SYNC(inode)		(__IS_FLG(inode, MS_SYNCHRONOUS) || ((inode)->i_flags & S_SYNC))
 #define IS_MANDLOCK(inode)	__IS_FLG(inode, MS_MANDLOCK)
-#define IS_FLUSHING(inode)	__IS_FLG(inode, MS_FLUSHING)
 
 #define IS_QUOTAINIT(inode)	((inode)->i_flags & S_QUOTA)
 #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
--- 2.5.16/mm/page-writeback.c~pdflush-single	Sun May 19 11:49:48 2002
+++ 2.5.16-akpm/mm/page-writeback.c	Sun May 19 12:02:57 2002
@@ -20,6 +20,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/sysrq.h>
+#include <linux/backing-dev.h>
 
 /*
  * Memory thresholds, in percentages
@@ -86,10 +87,7 @@ void balance_dirty_pages(struct address_
 		wake_pdflush = 1;
 	}
 
-	if (wake_pdflush && !IS_FLUSHING(mapping->host)) {
-		/*
-		 * There is no flush thread against this device. Start one now.
-		 */
+	if (wake_pdflush && !writeback_in_progress(mapping->backing_dev_info)) {
 		if (dirty_and_writeback > async_thresh) {
 			pdflush_flush(dirty_and_writeback - async_thresh);
 			yield();

-

                 reply	other threads:[~2002-05-19 19:40 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3CE80008.5A9CA5DA@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox