All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Andrea Arcangeli <andrea@suse.de>
Cc: Alexander Viro <viro@math.psu.edu>,
	Linus Torvalds <torvalds@transmeta.com>,
	torrey.hoffman@myrio.com, linux-kernel@vger.kernel.org,
	Marcelo Tosatti <marcelo@conectiva.com.br>,
	"Stephen C. Tweedie" <sct@redhat.com>,
	Trond Myklebust <trond.myklebust@fys.uio.no>
Subject: Re: ramdisk corruption problems - was: RE: pivot_root and initrd kern   el panic woes
Date: Sat, 05 Jan 2002 03:43:36 -0800	[thread overview]
Message-ID: <3C36E6E8.628BF0BF@zip.com.au> (raw)
In-Reply-To: <3C2EB208.B2BA7CBF@zip.com.au> <Pine.GSO.4.21.0112300129060.8523-100000@weyl.math.psu.edu>, <Pine.GSO.4.21.0112300129060.8523-100000@weyl.math.psu.edu>; from viro@math.psu.edu on Sun, Dec 30, 2001 at 01:33:24AM -0500 <20011231010537.K1356@athlon.random>

Andrea Arcangeli wrote:
> 
> Note: I'm fine to introduce another way to notify the app about -ENOSPC,
> -ENOSPC on mmap is the most obvious one, but we could still allow the
> current "overcommit" behaviour with a kind of sigbus mentioned by
> Andrew (possibly not sigbus though, since it has just well defined
> semantics for MAP_SHARED, maybe they could be extended, anyways as said
> this is only a matter of API). My point is only that some API should be
> added because your mmap on sparse files are unreliable at the moment.
> 

The very least we can do is to return a sensible error code from msync().
At present, if you create a 200 meg mapping on a 100 meg disk, dirty it
all and run msync(MS_SYNC), the damn thing returns zero and you don't
know that you lost half your data.

The patch makes filemap_fdatasync() and filemap_fdatawait() return
an error code, and propagates that up through all callers, including 
fsync() and fdatasync().  Please review, especially the nfs and
generic_file_direct_IO() changes.

There's also a half-assed attempt to implement MS_ASYNC in here.
If anyone thinks that's not worth the effort, I won't be offended.



--- linux-2.4.18-pre1/include/linux/fs.h	Fri Dec 21 11:19:23 2001
+++ linux-akpm/include/linux/fs.h	Sat Jan  5 03:21:07 2002
@@ -1212,8 +1212,8 @@ extern int osync_inode_data_buffers(stru
 extern int fsync_inode_buffers(struct inode *);
 extern int fsync_inode_data_buffers(struct inode *);
 extern int inode_has_buffers(struct inode *);
-extern void filemap_fdatasync(struct address_space *);
-extern void filemap_fdatawait(struct address_space *);
+extern int filemap_fdatasync(struct address_space *);
+extern int filemap_fdatawait(struct address_space *);
 extern void sync_supers(kdev_t);
 extern int bmap(struct inode *, int);
 extern int notify_change(struct dentry *, struct iattr *);
--- linux-2.4.18-pre1/mm/filemap.c	Wed Dec 26 11:47:41 2001
+++ linux-akpm/mm/filemap.c	Sat Jan  5 03:23:09 2002
@@ -582,8 +582,9 @@ EXPORT_SYMBOL(fail_writepage);
  *      @mapping: address space structure to write
  *
  */
-void filemap_fdatasync(struct address_space * mapping)
+int filemap_fdatasync(struct address_space * mapping)
 {
+	int ret = 0;
 	int (*writepage)(struct page *) = mapping->a_ops->writepage;
 
 	spin_lock(&pagecache_lock);
@@ -603,8 +604,11 @@ void filemap_fdatasync(struct address_sp
 		lock_page(page);
 
 		if (PageDirty(page)) {
+			int err;
 			ClearPageDirty(page);
-			writepage(page);
+			err = writepage(page);
+			if (err && !ret)
+				ret = err;
 		} else
 			UnlockPage(page);
 
@@ -612,6 +616,7 @@ void filemap_fdatasync(struct address_sp
 		spin_lock(&pagecache_lock);
 	}
 	spin_unlock(&pagecache_lock);
+	return ret;
 }
 
 /**
@@ -621,8 +626,10 @@ void filemap_fdatasync(struct address_sp
  *      @mapping: address space structure to wait for
  *
  */
-void filemap_fdatawait(struct address_space * mapping)
+int filemap_fdatawait(struct address_space * mapping)
 {
+	int ret = 0;
+
 	spin_lock(&pagecache_lock);
 
         while (!list_empty(&mapping->locked_pages)) {
@@ -638,11 +645,14 @@ void filemap_fdatawait(struct address_sp
 		spin_unlock(&pagecache_lock);
 
 		___wait_on_page(page);
+		if (PageError(page))
+			ret = -EIO;
 
 		page_cache_release(page);
 		spin_lock(&pagecache_lock);
 	}
 	spin_unlock(&pagecache_lock);
+	return ret;
 }
 
 /*
@@ -1519,12 +1529,14 @@ static ssize_t generic_file_direct_IO(in
 		goto out_free;
 
 	/*
-	 * Flush to disk exlusively the _data_, metadata must remains
+	 * Flush to disk exclusively the _data_, metadata must remain
 	 * completly asynchronous or performance will go to /dev/null.
 	 */
-	filemap_fdatasync(mapping);
-	retval = fsync_inode_data_buffers(inode);
-	filemap_fdatawait(mapping);
+	retval = filemap_fdatasync(mapping);
+	if (retval == 0)
+		retval = fsync_inode_data_buffers(inode);
+	if (retval == 0)
+		retval = filemap_fdatawait(mapping);
 	if (retval < 0)
 		goto out_free;
 
@@ -2141,26 +2153,41 @@ int generic_file_mmap(struct file * file
  * The msync() system call.
  */
 
+/*
+ * We attempt to implement MS_ASYNC, but it's lame.  There needs to be a way
+ * of starting async writeout of the metadata and inode.
+ */
 static int msync_interval(struct vm_area_struct * vma,
 	unsigned long start, unsigned long end, int flags)
 {
+	int ret = 0;
 	struct file * file = vma->vm_file;
+
 	if (file && (vma->vm_flags & VM_SHARED)) {
-		int error;
-		error = filemap_sync(vma, start, end-start, flags);
+		/* filemap_sync() cannot fail */
+		ret = filemap_sync(vma, start, end-start, flags);
 
-		if (!error && (flags & MS_SYNC)) {
+		if (flags & (MS_SYNC|MS_ASYNC)) {
 			struct inode * inode = file->f_dentry->d_inode;
+
 			down(&inode->i_sem);
-			filemap_fdatasync(inode->i_mapping);
-			if (file->f_op && file->f_op->fsync)
-				error = file->f_op->fsync(file, file->f_dentry, 1);
-			filemap_fdatawait(inode->i_mapping);
+			ret = filemap_fdatasync(inode->i_mapping);
+			if (flags & MS_SYNC) {
+				int err;
+
+				if (file->f_op && file->f_op->fsync) {
+					err = file->f_op->fsync(file, file->f_dentry, 1);
+					if (err && !ret)
+						ret = err;
+				}
+				err = filemap_fdatawait(inode->i_mapping);
+				if (err && !ret)
+					ret = err;
+			}
 			up(&inode->i_sem);
 		}
-		return error;
 	}
-	return 0;
+	return ret;
 }
 
 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
--- linux-2.4.18-pre1/fs/block_dev.c	Fri Dec 21 11:19:14 2001
+++ linux-akpm/fs/block_dev.c	Sat Jan  5 03:21:07 2002
@@ -171,11 +171,15 @@ static loff_t block_llseek(struct file *
 
 static int __block_fsync(struct inode * inode)
 {
-	int ret;
+	int ret, err;
 
-	filemap_fdatasync(inode->i_mapping);
-	ret = sync_buffers(inode->i_rdev, 1);
-	filemap_fdatawait(inode->i_mapping);
+	ret = filemap_fdatasync(inode->i_mapping);
+	err = sync_buffers(inode->i_rdev, 1);
+	if (err && !ret)
+		ret = err;
+	err = filemap_fdatawait(inode->i_mapping);
+	if (err && !ret)
+		ret = err;
 
 	return ret;
 }
--- linux-2.4.18-pre1/fs/buffer.c	Fri Dec 21 11:19:14 2001
+++ linux-akpm/fs/buffer.c	Sat Jan  5 03:21:07 2002
@@ -401,9 +401,9 @@ asmlinkage long sys_fsync(unsigned int f
 	struct file * file;
 	struct dentry * dentry;
 	struct inode * inode;
-	int err;
+	int ret, err;
 
-	err = -EBADF;
+	ret = -EBADF;
 	file = fget(fd);
 	if (!file)
 		goto out;
@@ -411,21 +411,27 @@ asmlinkage long sys_fsync(unsigned int f
 	dentry = file->f_dentry;
 	inode = dentry->d_inode;
 
-	err = -EINVAL;
-	if (!file->f_op || !file->f_op->fsync)
+	ret = -EINVAL;
+	if (!file->f_op || !file->f_op->fsync) {
+		/* Why?  We can still call filemap_fdatasync */
 		goto out_putf;
+	}
 
 	/* We need to protect against concurrent writers.. */
 	down(&inode->i_sem);
-	filemap_fdatasync(inode->i_mapping);
+	ret = filemap_fdatasync(inode->i_mapping);
 	err = file->f_op->fsync(file, dentry, 0);
-	filemap_fdatawait(inode->i_mapping);
+	if (err && !ret)
+		ret = err;
+	err = filemap_fdatawait(inode->i_mapping);
+	if (err && !ret)
+		ret = err;
 	up(&inode->i_sem);
 
 out_putf:
 	fput(file);
 out:
-	return err;
+	return ret;
 }
 
 asmlinkage long sys_fdatasync(unsigned int fd)
@@ -433,9 +439,9 @@ asmlinkage long sys_fdatasync(unsigned i
 	struct file * file;
 	struct dentry * dentry;
 	struct inode * inode;
-	int err;
+	int ret, err;
 
-	err = -EBADF;
+	ret = -EBADF;
 	file = fget(fd);
 	if (!file)
 		goto out;
@@ -443,14 +449,18 @@ asmlinkage long sys_fdatasync(unsigned i
 	dentry = file->f_dentry;
 	inode = dentry->d_inode;
 
-	err = -EINVAL;
+	ret = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync)
 		goto out_putf;
 
 	down(&inode->i_sem);
-	filemap_fdatasync(inode->i_mapping);
+	ret = filemap_fdatasync(inode->i_mapping);
 	err = file->f_op->fsync(file, dentry, 1);
-	filemap_fdatawait(inode->i_mapping);
+	if (err && !ret)
+		ret = err;
+	err = filemap_fdatawait(inode->i_mapping);
+	if (err && !ret)
+		ret = err;
 	up(&inode->i_sem);
 
 out_putf:
--- linux-2.4.18-pre1/fs/nfs/file.c	Wed Dec 26 11:47:41 2001
+++ linux-akpm/fs/nfs/file.c	Sat Jan  5 03:21:07 2002
@@ -244,6 +244,7 @@ nfs_lock(struct file *filp, int cmd, str
 {
 	struct inode * inode = filp->f_dentry->d_inode;
 	int	status = 0;
+	int	status2;
 
 	dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
 			inode->i_dev, inode->i_ino,
@@ -278,11 +279,18 @@ nfs_lock(struct file *filp, int cmd, str
 	 * Flush all pending writes before doing anything
 	 * with locks..
 	 */
-	filemap_fdatasync(inode->i_mapping);
+	/*
+	 * Shouldn't filemap_fdatasync/wait be inside i_sem?
+	 */
+	status = filemap_fdatasync(inode->i_mapping);
 	down(&inode->i_sem);
-	status = nfs_wb_all(inode);
+	status2 = nfs_wb_all(inode);
+	if (status2 && !status)
+		status = status2;
 	up(&inode->i_sem);
-	filemap_fdatawait(inode->i_mapping);
+	status2 = filemap_fdatawait(inode->i_mapping);
+	if (status2 && !status)
+		status = status2;
 	if (status < 0)
 		return status;
 
@@ -300,11 +308,17 @@ nfs_lock(struct file *filp, int cmd, str
 	 */
  out_ok:
 	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
-		filemap_fdatasync(inode->i_mapping);
+		status2 = filemap_fdatasync(inode->i_mapping);
+		if (status2 && !status)
+			status = status2;
 		down(&inode->i_sem);
-		nfs_wb_all(inode);      /* we may have slept */
+		status2 = nfs_wb_all(inode);      /* we may have slept */
+		if (status2 && !status)
+			status2 = status;
 		up(&inode->i_sem);
-		filemap_fdatawait(inode->i_mapping);
+		status2 = filemap_fdatawait(inode->i_mapping);
+		if (status2 && !status)
+			status = status2;
 		nfs_zap_caches(inode);
 	}
 	return status;

  reply	other threads:[~2002-01-05 11:48 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-12-20 19:06 ramdisk corruption problems - was: RE: pivot_root and initrd kern el panic woes Torrey Hoffman
2001-12-20 19:46 ` Linus Torvalds
2001-12-20 22:56   ` Alexander Viro
2001-12-20 23:42   ` Andrea Arcangeli
2001-12-21  1:49     ` Andrea Arcangeli
     [not found]       ` <3C22CF16.C78B1F19@zip.com.au>
2001-12-29 15:40         ` Andrea Arcangeli
2001-12-30  6:19           ` Andrew Morton
2001-12-30  6:33             ` Alexander Viro
2001-12-30  6:38               ` Andrew Morton
2001-12-30  7:17                 ` Alexander Viro
2001-12-30 10:15                   ` ramdisk corruption problems - was: RE: pivot_root and initrd Alan Cox
2001-12-31  0:08                 ` ramdisk corruption problems - was: RE: pivot_root and initrd kern el panic woes Andrea Arcangeli
2001-12-30  7:08               ` Andrew Morton
2001-12-30  7:29                 ` Alexander Viro
2001-12-30  7:59                   ` ramdisk corruption problems - was: RE: pivot_root and initrdkern " Andrew Morton
2001-12-30 17:40                     ` Linus Torvalds
2001-12-31  0:28                       ` Andrea Arcangeli
2001-12-31  0:35                         ` Linus Torvalds
2001-12-31  1:00                           ` Andrea Arcangeli
2001-12-31  0:05               ` ramdisk corruption problems - was: RE: pivot_root and initrd kern " Andrea Arcangeli
2002-01-05 11:43                 ` Andrew Morton [this message]
2002-01-05 14:04                   ` Trond Myklebust
2002-01-07  3:08                   ` Andrea Arcangeli
2002-01-07  3:49                     ` Andrew Morton
2002-01-07  4:31                       ` Andrea Arcangeli
2001-12-30 23:56             ` Andrea Arcangeli
2001-12-31 10:06             ` Daniel Phillips
2002-01-04 16:38             ` Stephen C. Tweedie
2002-01-05  7:53       ` Andrew Morton
2002-01-07  1:08         ` Andrea Arcangeli
2001-12-21  1:38   ` Tachino Nobuhiro
2001-12-21  1:51     ` Everyone else but TWO Andre Hedrick
     [not found] <Pine.GSO.4.21.0112210151020.15555-100000@weyl.math.psu.edu>
2001-12-21 23:11 ` ramdisk corruption problems - was: RE: pivot_root and initrd kern el panic woes Linus Torvalds
2001-12-21 23:39   ` Alexander Viro
  -- strict thread matches above, loose matches on Subject: below --
2001-12-18 20:14 Torrey Hoffman
2001-12-20 12:19 ` Tachino Nobuhiro
2001-12-18  1:44 Torrey Hoffman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3C36E6E8.628BF0BF@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=andrea@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcelo@conectiva.com.br \
    --cc=sct@redhat.com \
    --cc=torrey.hoffman@myrio.com \
    --cc=torvalds@transmeta.com \
    --cc=trond.myklebust@fys.uio.no \
    --cc=viro@math.psu.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.