All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@digeo.com>
To: Chris Mason <mason@suse.com>
Cc: "linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH] 2.5.x write_super is not for syncing
Date: Mon, 02 Dec 2002 17:40:52 -0800	[thread overview]
Message-ID: <3DEC0BA4.70E2E344@digeo.com> (raw)
In-Reply-To: 1038877824.3040.133.camel@tiny

Chris Mason wrote:
> 
> ...
> > What about something along these lines?
> 
> You'd still want sync_filesystem to get calls in the sys_sync case
> though, right?

yes.

>  I'd be happy to rework the commit_super patch into a
> sync_filesystem patch that doesn't bother with s_dirt except when
> calling write_super().
> 
> The patch has two parts that matter in my mind:
> 
> 1) In sync_supers (and other similar funcs), allow the FS to leave
> s_dirt set.  This way we can setup per FS timings on when periodic
> writes get done.

hm.  Is that the simplest possible way of doing such a thing?
 
> 2) Give the FS a way to tell the difference between periodic writes that
> don't need to force and wait for a commit, and a request from the user
> that does require a full commit.

yup.  The whole sync thing is a mess, really.  It's designed for ext2,
and other filesystems just graft stuff on top of that, doing magical
things by inference.

It needs a complete rethink, IMO.  And I'm not really inclined to
do it in the 2.5 timeframe, mainly because the stuff we have now
mostly works, and everybody's busy on other stuff.

And we need a 2.4 solution, asap.  These two patches are what I'll
be testing tonight for 2.5.

 Documentation/filesystems/Locking |    7 +++++++
 fs/buffer.c                       |   21 +++++++++++++++++++--
 fs/dquot.c                        |    1 +
 fs/super.c                        |    5 ++---
 include/linux/buffer_head.h       |    2 --
 include/linux/fs.h                |    3 +++
 include/linux/writeback.h         |    1 -
 kernel/ksyms.c                    |    1 +
 8 files changed, 33 insertions(+), 8 deletions(-)

--- 25/fs/buffer.c~sync-fs	Mon Dec  2 16:24:34 2002
+++ 25-akpm/fs/buffer.c	Mon Dec  2 16:51:04 2002
@@ -213,7 +213,7 @@ EXPORT_SYMBOL(sync_blockdev);
  * superblock.  Filesystem data as well as the underlying block
  * device.  Takes the superblock lock.
  */
-int fsync_super(struct super_block *sb)
+static int __sync_filesystem(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	DQUOT_SYNC(sb);
@@ -227,6 +227,23 @@ int fsync_super(struct super_block *sb)
 	return sync_blockdev(sb->s_bdev);
 }
 
+int sync_filesystem(struct super_block *sb)
+{
+	int ret;
+	int cflags = current->flags;
+
+	current->flags |= PF_SYNC;	/* ick.  This is a "data integrity"
+					 * operation
+					 */
+
+	if (sb->s_op && sb->s_op->sync_filesystem)
+		ret = sb->s_op->sync_filesystem(sb);
+	else
+		ret = __sync_filesystem(sb);
+	current->flags = cflags;
+	return ret;
+}
+
 /*
  * Write out and wait upon all dirty data associated with this
  * device.   Filesystem data as well as the underlying block
@@ -236,7 +253,7 @@ int fsync_bdev(struct block_device *bdev
 {
 	struct super_block *sb = get_super(bdev);
 	if (sb) {
-		int res = fsync_super(sb);
+		int res = sync_filesystem(sb);
 		drop_super(sb);
 		return res;
 	}
--- 25/include/linux/buffer_head.h~sync-fs	Mon Dec  2 16:24:34 2002
+++ 25-akpm/include/linux/buffer_head.h	Mon Dec  2 16:24:34 2002
@@ -155,8 +155,6 @@ void __wait_on_buffer(struct buffer_head
 void sleep_on_buffer(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
-int fsync_super(struct super_block *);
-int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
 struct buffer_head * __getblk(struct block_device *, sector_t, int);
 void __brelse(struct buffer_head *);
--- 25/include/linux/fs.h~sync-fs	Mon Dec  2 16:24:34 2002
+++ 25-akpm/include/linux/fs.h	Mon Dec  2 16:44:00 2002
@@ -816,6 +816,7 @@ struct super_operations {
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*clear_inode) (struct inode *);
 	void (*umount_begin) (struct super_block *);
+	int (*sync_filesystem)(struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
 };
@@ -1311,6 +1312,8 @@ extern struct dentry *simple_lookup(stru
 extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *);
 extern struct file_operations simple_dir_operations;
 extern struct inode_operations simple_dir_inode_operations;
+extern int sync_filesystem(struct super_block *sb);
+extern void sync_inodes_sb(struct super_block *, int wait);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 extern unsigned int real_root_dev;
--- 25/fs/super.c~sync-fs	Mon Dec  2 16:24:34 2002
+++ 25-akpm/fs/super.c	Mon Dec  2 16:24:34 2002
@@ -27,7 +27,6 @@
 #include <linux/blkdev.h>
 #include <linux/quotaops.h>
 #include <linux/namei.h>
-#include <linux/buffer_head.h>		/* for fsync_super() */
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <asm/uaccess.h>
@@ -180,7 +179,7 @@ void generic_shutdown_super(struct super
 		shrink_dcache_parent(root);
 		shrink_dcache_anon(&sb->s_anon);
 		dput(root);
-		fsync_super(sb);
+		sync_filesystem(sb);
 		lock_super(sb);
 		lock_kernel();
 		sb->s_flags &= ~MS_ACTIVE;
@@ -391,7 +390,7 @@ int do_remount_sb(struct super_block *sb
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
-	fsync_super(sb);
+	sync_filesystem(sb);
 	/* If we are remounting RDONLY, make sure there are no rw files open */
 	if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
 		if (!fs_may_remount_ro(sb))
--- 25/Documentation/filesystems/Locking~sync-fs	Mon Dec  2 16:27:20 2002
+++ 25-akpm/Documentation/filesystems/Locking	Mon Dec  2 16:28:56 2002
@@ -96,6 +96,7 @@ prototypes:
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*clear_inode) (struct inode *);
 	void (*umount_begin) (struct super_block *);
+	int (*sync_filesystem)(struct super_block *);
 
 locking rules:
 	All may block.
@@ -111,11 +112,17 @@ write_super:	no	yes	maybe		(see below)
 statfs:		no	no	no
 remount_fs:	yes	yes	maybe		(see below)
 umount_begin:	yes	no	maybe		(see below)
+sync_filesystem:no      no      ??
 
 ->read_inode() is not a method - it's a callback used in iget().
 rules for mount_sem are not too nice - it is going to die and be replaced
 by better scheme anyway.
 
+sync_filesystem() must commit all pending data (and metadata) to disk,
+and wait on its writeout.  It is a full sync of everything in that fs,
+period.
+
+
 --------------------------- file_system_type ---------------------------
 prototypes:
 	struct super_block *(*get_sb) (struct file_system_type *, int, char *, void *);
--- 25/include/linux/writeback.h~sync-fs	Mon Dec  2 16:43:32 2002
+++ 25-akpm/include/linux/writeback.h	Mon Dec  2 16:43:51 2002
@@ -50,7 +50,6 @@ struct writeback_control {
 void writeback_inodes(struct writeback_control *wbc);
 void wake_up_inode(struct inode *inode);
 void __wait_on_inode(struct inode * inode);
-void sync_inodes_sb(struct super_block *, int wait);
 void sync_inodes(int wait);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
--- 25/kernel/ksyms.c~sync-fs	Mon Dec  2 16:56:24 2002
+++ 25-akpm/kernel/ksyms.c	Mon Dec  2 16:57:37 2002
@@ -546,6 +546,7 @@ EXPORT_SYMBOL(buffer_insert_list);
 EXPORT_SYMBOL(make_bad_inode);
 EXPORT_SYMBOL(is_bad_inode);
 EXPORT_SYMBOL(__inode_dir_notify);
+EXPORT_SYMBOL(sync_inodes_sb);
 
 #ifdef CONFIG_UID16
 EXPORT_SYMBOL(overflowuid);
--- 25/fs/dquot.c~sync-fs	Mon Dec  2 16:58:07 2002
+++ 25-akpm/fs/dquot.c	Mon Dec  2 16:58:24 2002
@@ -1425,3 +1425,4 @@ EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_list_lock);
 EXPORT_SYMBOL(dq_data_lock);
+EXPORT_SYMBOL(sync_dquots);

_


 fs/ext3/super.c |   85 ++++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 61 insertions(+), 24 deletions(-)

--- 25/fs/ext3/super.c~ext3-sync-fs	Mon Dec  2 17:00:20 2002
+++ 25-akpm/fs/ext3/super.c	Mon Dec  2 17:15:53 2002
@@ -29,6 +29,7 @@
 #include <linux/blkdev.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "xattr.h"
 #include "acl.h"
@@ -47,6 +48,7 @@ static void ext3_mark_recovery_complete(
 					struct ext3_super_block * es);
 static void ext3_clear_journal_err(struct super_block * sb,
 				   struct ext3_super_block * es);
+static int ext3_sync_filesystem(struct super_block *sb);
 
 #ifdef CONFIG_JBD_DEBUG
 int journal_no_write[2];
@@ -495,18 +497,19 @@ static void ext3_clear_inode(struct inod
 static struct super_operations ext3_sops = {
 	.alloc_inode	= ext3_alloc_inode,
 	.destroy_inode	= ext3_destroy_inode,
-	.read_inode	= ext3_read_inode,	/* BKL held */
-	.write_inode	= ext3_write_inode,	/* BKL not held.  Don't need */
-	.dirty_inode	= ext3_dirty_inode,	/* BKL not held.  We take it */
-	.put_inode	= ext3_put_inode,		/* BKL not held.  Don't need */
-	.delete_inode	= ext3_delete_inode,	/* BKL not held.  We take it */
-	.put_super	= ext3_put_super,		/* BKL held */
-	.write_super	= ext3_write_super,	/* BKL not held. We take it. Needed? */
-	.write_super_lockfs = ext3_write_super_lockfs, /* BKL not held. Take it */
-	.unlockfs	= ext3_unlockfs,		/* BKL not held.  We take it */
-	.statfs		= ext3_statfs,		/* BKL not held. */
-	.remount_fs	= ext3_remount,		/* BKL held */
-	.clear_inode	= ext3_clear_inode,	/* BKL not needed. */
+	.read_inode	= ext3_read_inode,
+	.write_inode	= ext3_write_inode,
+	.dirty_inode	= ext3_dirty_inode,
+	.put_inode	= ext3_put_inode,
+	.delete_inode	= ext3_delete_inode,
+	.put_super	= ext3_put_super,
+	.write_super	= ext3_write_super,
+	.sync_filesystem= ext3_sync_filesystem,
+	.write_super_lockfs = ext3_write_super_lockfs,
+	.unlockfs	= ext3_unlockfs,
+	.statfs		= ext3_statfs,
+	.remount_fs	= ext3_remount,
+	.clear_inode	= ext3_clear_inode,
 };
 
 struct dentry *ext3_get_parent(struct dentry *child);
@@ -1714,25 +1717,59 @@ int ext3_force_commit(struct super_block
  * This implicitly triggers the writebehind on sync().
  */
 
-static int do_sync_supers = 0;
-MODULE_PARM(do_sync_supers, "i");
-MODULE_PARM_DESC(do_sync_supers, "Write superblocks synchronously");
-
 void ext3_write_super (struct super_block * sb)
 {
-	tid_t target;
 	lock_kernel();	
 	if (down_trylock(&sb->s_lock) == 0)
-		BUG();		/* aviro detector */
+		BUG();
 	sb->s_dirt = 0;
-	target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+	log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+	unlock_kernel();
+}
 
-	if (do_sync_supers) {
-		unlock_super(sb);
-		log_wait_commit(EXT3_SB(sb)->s_journal, target);
-		lock_super(sb);
-	}
+static int ext3_sync_filesystem(struct super_block *sb)
+{
+	tid_t target;
+
+	/*
+	 * The pagecache flush may instantiate new indirects and
+	 * may dirty inodes.
+	 */
+	sync_inodes_sb(sb, 0);
+
+	/*
+	 * Quota sync may write to files
+	 */
+	DQUOT_SYNC(sb);
+
+	/*
+	 * So sync the files again
+	 */
+	sync_inodes_sb(sb, 0);
+
+	/*
+	 * Now run a commit, and wait on it.
+	 */
+	lock_kernel();	
+	sb->s_dirt = 0;
+	target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+	log_wait_commit(EXT3_SB(sb)->s_journal, target);
 	unlock_kernel();
+
+	/*
+	 * For data=journal inodes, the commit may have generated new dirty
+	 * buffers.  Their metadata is all tight on disk, so just flush the
+	 * pages again.  And wait on them.
+	 */
+	sync_inodes_sb(sb, 0);		/* Nicer IO scheduling */
+	sync_inodes_sb(sb, 1);		/* Wait here */
+
+	/*
+	 * And we know that the only dirty buffers will be against the blockdev
+	 * mapping, so write them and wait on them.
+	 */
+	sync_blockdev(sb->s_bdev);
+	return 0;
 }
 
 /*

_

  reply	other threads:[~2002-12-03  1:40 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-10-16 18:00 [PATCH] 2.5.x write_super is not for syncing Chris Mason
2002-12-02 22:07 ` Andrew Morton
2002-12-02 23:40   ` Chris Mason
2002-12-03  0:03     ` Andrew Morton
2002-12-03  1:10       ` Chris Mason
2002-12-03  1:40         ` Andrew Morton [this message]
2002-12-03  3:09           ` Andrew Morton
2002-12-03 19:36             ` Bryan Henderson
2002-12-03 20:06               ` Andrew Morton
2002-12-03 21:41                 ` Bryan Henderson
2002-12-03 22:13                   ` Andrew Morton
2002-12-04  2:05                     ` Bryan Henderson
2002-12-04  4:29                       ` Andrew Morton
2002-12-04 19:00                         ` Bryan Henderson
2002-12-04 19:23                           ` Matthew Wilcox
2002-12-04 22:17                             ` Bryan Henderson
2002-12-05 10:36                               ` Arnaldo Carvalho de Melo
2002-12-05 16:31                                 ` Stephen C. Tweedie
2002-12-05 17:24                                   ` girish
2002-12-04 21:10                       ` Stephen C. Tweedie
2002-12-04 22:46                         ` Bryan Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3DEC0BA4.70E2E344@digeo.com \
    --to=akpm@digeo.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=mason@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.