* [PATCH 2.5] Report write errors to applications
@ 2003-01-29 6:09 Oliver Xymoron
2003-01-29 7:29 ` Andrew Morton
0 siblings, 1 reply; 9+ messages in thread
From: Oliver Xymoron @ 2003-01-29 6:09 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Andrew, this is a forward-port of the 2.4 write error propagation
patch I just posted, applies against -mm6. Lightly tested on 2.5, but
should be straightforward.
diff -urN -x '.patch*' -x '*.orig' orig/fs/buffer.c patched/fs/buffer.c
--- orig/fs/buffer.c 2003-01-24 13:24:21.000000000 -0600
+++ patched/fs/buffer.c 2003-01-24 13:25:08.000000000 -0600
@@ -165,15 +165,27 @@
* Default synchronous end-of-IO handler.. Just mark it up-to-date and
* unlock the buffer. This is what ll_rw_block uses too.
*/
-void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
if (uptodate) {
set_buffer_uptodate(bh);
} else {
- /*
- * This happens, due to failed READA attempts.
- * buffer_io_error(bh);
- */
+ /* This happens, due to failed READA attempts. */
+ clear_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ put_bh(bh);
+}
+
+void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
+{
+ if (uptodate) {
+ set_buffer_uptodate(bh);
+ } else {
+ buffer_io_error(bh);
+ printk(KERN_WARNING "lost page write due to I/O error on %s\n",
+ bdevname(bh->b_bdev));
+ bh->b_page->mapping->error = -EIO;
clear_buffer_uptodate(bh);
}
unlock_buffer(bh);
@@ -550,6 +562,9 @@
set_buffer_uptodate(bh);
} else {
buffer_io_error(bh);
+ printk(KERN_WARNING "lost page write due to I/O error on %s\n",
+ bdevname(bh->b_bdev));
+ bh->b_page->mapping->error = -EIO;
clear_buffer_uptodate(bh);
SetPageError(page);
}
@@ -1201,7 +1216,7 @@
if (buffer_dirty(bh))
buffer_error();
get_bh(bh);
- bh->b_end_io = end_buffer_io_sync;
+ bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
@@ -2604,13 +2619,14 @@
continue;
get_bh(bh);
- bh->b_end_io = end_buffer_io_sync;
if (rw == WRITE) {
+ bh->b_end_io = end_buffer_write_sync;
if (test_clear_buffer_dirty(bh)) {
submit_bh(WRITE, bh);
continue;
}
} else {
+ bh->b_end_io = end_buffer_read_sync;
if (!buffer_uptodate(bh)) {
submit_bh(rw, bh);
continue;
diff -urN -x '.patch*' -x '*.orig' orig/fs/inode.c patched/fs/inode.c
--- orig/fs/inode.c 2003-01-24 13:24:21.000000000 -0600
+++ patched/fs/inode.c 2003-01-24 13:24:26.000000000 -0600
@@ -134,6 +134,7 @@
mapping->dirtied_when = 0;
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
+ mapping->error = 0;
if (sb->s_bdev)
inode->i_data.backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
memset(&inode->u, 0, sizeof(inode->u));
diff -urN -x '.patch*' -x '*.orig' orig/fs/ntfs/compress.c patched/fs/ntfs/compress.c
--- orig/fs/ntfs/compress.c 2003-01-13 23:58:43.000000000 -0600
+++ patched/fs/ntfs/compress.c 2003-01-24 13:24:26.000000000 -0600
@@ -598,7 +598,7 @@
continue;
}
atomic_inc(&tbh->b_count);
- tbh->b_end_io = end_buffer_io_sync;
+ tbh->b_end_io = end_buffer_read_sync;
submit_bh(READ, tbh);
}
diff -urN -x '.patch*' -x '*.orig' orig/fs/open.c patched/fs/open.c
--- orig/fs/open.c 2003-01-13 23:58:05.000000000 -0600
+++ patched/fs/open.c 2003-01-24 13:24:26.000000000 -0600
@@ -843,21 +843,41 @@
*/
int filp_close(struct file *filp, fl_owner_t id)
{
- int retval;
+ struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
+ struct inode *inode = mapping->host;
+ int retval = 0, err;
+
+ /* Report and clear outstanding errors */
+ err = filp->f_error;
+ if (err) {
+ filp->f_error = 0;
+ retval = err;
+ }
+
+ down(&inode->i_sem);
+ err = mapping->error;
+ if (err && !retval) {
+ mapping->error = 0;
+ retval = err;
+ }
+ up(&inode->i_sem);
if (!file_count(filp)) {
printk(KERN_ERR "VFS: Close: file count is 0\n");
- return 0;
+ return retval;
}
- retval = 0;
+
if (filp->f_op && filp->f_op->flush) {
lock_kernel();
- retval = filp->f_op->flush(filp);
+ err = filp->f_op->flush(filp);
unlock_kernel();
+ if (err && !retval)
+ retval = err;
}
dnotify_flush(filp, id);
locks_remove_posix(filp, id);
fput(filp);
+
return retval;
}
diff -urN -x '.patch*' -x '*.orig' orig/include/linux/buffer_head.h patched/include/linux/buffer_head.h
--- orig/include/linux/buffer_head.h 2003-01-24 13:24:21.000000000 -0600
+++ patched/include/linux/buffer_head.h 2003-01-24 13:24:26.000000000 -0600
@@ -136,7 +136,8 @@
int try_to_free_buffers(struct page *);
void create_empty_buffers(struct page *, unsigned long,
unsigned long b_state);
-void end_buffer_io_sync(struct buffer_head *bh, int uptodate);
+void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
+void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
/* Things to do with buffers at mapping->private_list */
void buffer_insert_list(spinlock_t *lock,
diff -urN -x '.patch*' -x '*.orig' orig/include/linux/fs.h patched/include/linux/fs.h
--- orig/include/linux/fs.h 2003-01-24 13:24:21.000000000 -0600
+++ patched/include/linux/fs.h 2003-01-24 13:24:26.000000000 -0600
@@ -326,6 +326,7 @@
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
+ int error; /* write error for fsync */
};
struct char_device {
diff -urN -x '.patch*' -x '*.orig' orig/kernel/ksyms.c patched/kernel/ksyms.c
--- orig/kernel/ksyms.c 2003-01-24 13:24:21.000000000 -0600
+++ patched/kernel/ksyms.c 2003-01-24 13:24:26.000000000 -0600
@@ -176,7 +176,8 @@
EXPORT_SYMBOL(d_lookup);
EXPORT_SYMBOL(d_path);
EXPORT_SYMBOL(mark_buffer_dirty);
-EXPORT_SYMBOL(end_buffer_io_sync);
+EXPORT_SYMBOL(end_buffer_read_sync);
+EXPORT_SYMBOL(end_buffer_write_sync);
EXPORT_SYMBOL(__mark_inode_dirty);
EXPORT_SYMBOL(get_empty_filp);
EXPORT_SYMBOL(init_private_file);
diff -urN -x '.patch*' -x '*.orig' orig/mm/filemap.c patched/mm/filemap.c
--- orig/mm/filemap.c 2003-01-24 13:24:21.000000000 -0600
+++ patched/mm/filemap.c 2003-01-24 13:24:26.000000000 -0600
@@ -185,6 +185,14 @@
write_lock(&mapping->page_lock);
}
write_unlock(&mapping->page_lock);
+
+ /* Check for outstanding write errors */
+ if (mapping->error)
+ {
+ ret = mapping->error;
+ mapping->error = 0;
+ }
+
return ret;
}
diff -urN -x '.patch*' -x '*.orig' orig/mm/vmscan.c patched/mm/vmscan.c
--- orig/mm/vmscan.c 2003-01-24 13:24:21.000000000 -0600
+++ patched/mm/vmscan.c 2003-01-24 13:24:26.000000000 -0600
@@ -342,6 +342,9 @@
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
+ if (res < 0) {
+ mapping->error = res;
+ }
if (res == WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
goto activate_locked;
--
"Love the dolphins," she advised him. "Write by W.A.S.T.E.."
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH 2.5] Report write errors to applications 2003-01-29 6:09 [PATCH 2.5] Report write errors to applications Oliver Xymoron @ 2003-01-29 7:29 ` Andrew Morton 2003-01-29 16:24 ` Oliver Xymoron 0 siblings, 1 reply; 9+ messages in thread From: Andrew Morton @ 2003-01-29 7:29 UTC (permalink / raw) To: Oliver Xymoron; +Cc: linux-kernel Oliver Xymoron <oxymoron@waste.org> wrote: > > Andrew, this is a forward-port of the 2.4 write error propagation > patch I just posted, applies against -mm6. Lightly tested on 2.5, but > should be straightforward. You sent Marcelo the 2.5 patch. > diff -urN -x '.patch*' -x '*.orig' orig/fs/buffer.c patched/fs/buffer.c > --- orig/fs/buffer.c 2003-01-24 13:24:21.000000000 -0600 > +++ patched/fs/buffer.c 2003-01-24 13:25:08.000000000 -0600 > @@ -165,15 +165,27 @@ > * Default synchronous end-of-IO handler.. Just mark it up-to-date and > * unlock the buffer. This is what ll_rw_block uses too. > */ > -void end_buffer_io_sync(struct buffer_head *bh, int uptodate) > +void end_buffer_read_sync(struct buffer_head *bh, int uptodate) > { > if (uptodate) { > set_buffer_uptodate(bh); > } else { > - /* > - * This happens, due to failed READA attempts. > - * buffer_io_error(bh); > - */ > + /* This happens, due to failed READA attempts. */ > + clear_buffer_uptodate(bh); > + } > + unlock_buffer(bh); > + put_bh(bh); > +} > + > +void end_buffer_write_sync(struct buffer_head *bh, int uptodate) > +{ > + if (uptodate) { > + set_buffer_uptodate(bh); > + } else { > + buffer_io_error(bh); > + printk(KERN_WARNING "lost page write due to I/O error on %s\n", > + bdevname(bh->b_bdev)); > + bh->b_page->mapping->error = -EIO; Accessing bh->b_page here could be risky. In 2.5, this _has_ to be safe, because submit_bh() requires it. But in 2.4 (and in thus-far-untested corners of 2.5) it is possible that someone is feeding a buffer_head with a garbage ->b_page into ll_rw_blk(). Now it could be that we're safe in this respect (the comment above generic_make_request says that the caller must set up b_page, but perhaps lots of drivers do not look at it. Not sure...) So what you could do here is to deliberately dereference bh->b_page even in the non-error case so those problems are quickly detected. But probably, there's not much point in doing any of this for ll_rw_blk() and end_buffer_io_sync(). Most of those buffers are against the blockdev mapping, and once the errors are propagated into the blockdev mapping's address space we no longer know what file they pertain to. It would be better to check the uptodate state of the buffer_head in drop_buffers(), and to propagate errors into the address_space there. For buffers which are still attached to the S_ISREG file's address_space we're OK - fsync_buffers_list() will handle them and will return errors to the fsync() caller. We only need to handle those buffers which were stripped asynchronously by VM activity. > clear_buffer_uptodate(bh); > } > unlock_buffer(bh); > @@ -550,6 +562,9 @@ > set_buffer_uptodate(bh); > } else { > buffer_io_error(bh); > + printk(KERN_WARNING "lost page write due to I/O error on %s\n", > + bdevname(bh->b_bdev)); > + bh->b_page->mapping->error = -EIO; page->mapping->error = -EIO; would suffice here. > clear_buffer_uptodate(bh); > SetPageError(page); > } > @@ -1201,7 +1216,7 @@ > if (buffer_dirty(bh)) > buffer_error(); > get_bh(bh); > - bh->b_end_io = end_buffer_io_sync; > + bh->b_end_io = end_buffer_read_sync; > submit_bh(READ, bh); > wait_on_buffer(bh); > if (buffer_uptodate(bh)) > @@ -2604,13 +2619,14 @@ > continue; > > get_bh(bh); > - bh->b_end_io = end_buffer_io_sync; > if (rw == WRITE) { > + bh->b_end_io = end_buffer_write_sync; > if (test_clear_buffer_dirty(bh)) { > submit_bh(WRITE, bh); > continue; > } > } else { > + bh->b_end_io = end_buffer_read_sync; > if (!buffer_uptodate(bh)) { > submit_bh(rw, bh); > continue; > diff -urN -x '.patch*' -x '*.orig' orig/fs/inode.c patched/fs/inode.c > --- orig/fs/inode.c 2003-01-24 13:24:21.000000000 -0600 > +++ patched/fs/inode.c 2003-01-24 13:24:26.000000000 -0600 > @@ -134,6 +134,7 @@ > mapping->dirtied_when = 0; > mapping->assoc_mapping = NULL; > mapping->backing_dev_info = &default_backing_dev_info; > + mapping->error = 0; Machines can have millions of address_spaces in-core and here you have used 32 bits where two would do. Please make mapping->error an unsigned long and just use bits zero and one here for ENOSPC and EIO. Make sure that atomic ops are used, and this way we get 30 spare bits in the address_space for future expansion. > @@ -598,7 +598,7 @@ > continue; > } > atomic_inc(&tbh->b_count); > - tbh->b_end_io = end_buffer_io_sync; > + tbh->b_end_io = end_buffer_read_sync; > submit_bh(READ, tbh); > } > > diff -urN -x '.patch*' -x '*.orig' orig/fs/open.c patched/fs/open.c > --- orig/fs/open.c 2003-01-13 23:58:05.000000000 -0600 > +++ patched/fs/open.c 2003-01-24 13:24:26.000000000 -0600 > @@ -843,21 +843,41 @@ > */ > int filp_close(struct file *filp, fl_owner_t id) > { > - int retval; > + struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; > + struct inode *inode = mapping->host; > + int retval = 0, err; > + > + /* Report and clear outstanding errors */ > + err = filp->f_error; > + if (err) { > + filp->f_error = 0; > + retval = err; > + } > + > + down(&inode->i_sem); > + err = mapping->error; > + if (err && !retval) { > + mapping->error = 0; > + retval = err; > + } I don't really understand why you're reporting the error on close(). Applications probably do not expect that. It would be better to report the error to the next call to fsync(), fdatasync() and msync(). Reporting it on close as well doesn't hurt, but *sync() should be the main reporting interface. > + up(&inode->i_sem); with test_and_clear_bit() against mapping->errors we can do away with the i_sem locking here. --- orig/mm/vmscan.c 2003-01-24 13:24:21.000000000 -0600 +++ patched/mm/vmscan.c 2003-01-24 13:24:26.000000000 -0600 @@ -342,6 +342,9 @@ SetPageReclaim(page); res = mapping->a_ops->writepage(page, &wbc); + if (res < 0) { + mapping->error = res; + } Most writepage() activity happens in fs/mpage.c - you'll need to propagate IO errors into the address_space in the BIO completion handlers there. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-29 7:29 ` Andrew Morton @ 2003-01-29 16:24 ` Oliver Xymoron 2003-01-29 21:42 ` Andrew Morton 0 siblings, 1 reply; 9+ messages in thread From: Oliver Xymoron @ 2003-01-29 16:24 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, Marcelo Tosatti On Tue, Jan 28, 2003 at 11:29:29PM -0800, Andrew Morton wrote: > Oliver Xymoron <oxymoron@waste.org> wrote: > > > > Andrew, this is a forward-port of the 2.4 write error propagation > > patch I just posted, applies against -mm6. Lightly tested on 2.5, but > > should be straightforward. > > You sent Marcelo the 2.5 patch. Doh. > > diff -urN -x '.patch*' -x '*.orig' orig/fs/buffer.c patched/fs/buffer.c > > +++ patched/fs/buffer.c 2003-01-24 13:25:08.000000000 -0600 > > @@ -165,15 +165,27 @@ > > * Default synchronous end-of-IO handler.. Just mark it up-to-date and > > * unlock the buffer. This is what ll_rw_block uses too. > > */ > > -void end_buffer_io_sync(struct buffer_head *bh, int uptodate) > > +void end_buffer_read_sync(struct buffer_head *bh, int uptodate) > > { > > if (uptodate) { > > set_buffer_uptodate(bh); > > } else { > > - /* > > - * This happens, due to failed READA attempts. > > - * buffer_io_error(bh); > > - */ > > + /* This happens, due to failed READA attempts. */ > > + clear_buffer_uptodate(bh); > > + } > > + unlock_buffer(bh); > > + put_bh(bh); > > +} > > + > > +void end_buffer_write_sync(struct buffer_head *bh, int uptodate) > > +{ > > + if (uptodate) { > > + set_buffer_uptodate(bh); > > + } else { > > + buffer_io_error(bh); > > + printk(KERN_WARNING "lost page write due to I/O error on %s\n", > > + bdevname(bh->b_bdev)); > > + bh->b_page->mapping->error = -EIO; > > Accessing bh->b_page here could be risky. > > In 2.5, this _has_ to be safe, because submit_bh() requires it. > > But in 2.4 (and in thus-far-untested corners of 2.5) it is possible that > someone is feeding a buffer_head with a garbage ->b_page into ll_rw_blk(). I actually looked at this a bit and it seemed fine. What are some likely candidates? I'll take a look. > Now it could be that we're safe in this respect (the comment above > generic_make_request says that the caller must set up b_page, but > perhaps lots of drivers do not look at it. Not sure...) > > So what you could do here is to deliberately dereference bh->b_page even in > the non-error case so those problems are quickly detected. Enforcing it in 2.5 this way might make sense, but if it's actually a problem in 2.4, I doubt that's acceptable at this point. Some form of this fix needs to make it to 2.4, which is where the people with big disk farms are going to be for at least the next year. > But probably, there's not much point in doing any of this for ll_rw_blk() and > end_buffer_io_sync(). Most of those buffers are against the blockdev > mapping, and once the errors are propagated into the blockdev mapping's > address space we no longer know what file they pertain to. Ok, didn't realize how much this had changed in 2.5. This works nicely for regular files in 2.4. > It would be better to check the uptodate state of the buffer_head in > drop_buffers(), and to propagate errors into the address_space there. For > buffers which are still attached to the S_ISREG file's address_space we're OK > - fsync_buffers_list() will handle them and will return errors to the fsync() > caller. We only need to handle those buffers which were stripped > asynchronously by VM activity. Are we guaranteed that we'll get a try_to_free_buffers after IO completion and before sync? I haven't dug through this path much. > > > clear_buffer_uptodate(bh); > > } > > unlock_buffer(bh); > > @@ -550,6 +562,9 @@ > > set_buffer_uptodate(bh); > > } else { > > buffer_io_error(bh); > > + printk(KERN_WARNING "lost page write due to I/O error on %s\n", > > + bdevname(bh->b_bdev)); > > + bh->b_page->mapping->error = -EIO; > > page->mapping->error = -EIO; > > would suffice here. Will look at that. > > clear_buffer_uptodate(bh); > > SetPageError(page); > > } > > @@ -1201,7 +1216,7 @@ > > if (buffer_dirty(bh)) > > buffer_error(); > > get_bh(bh); > > - bh->b_end_io = end_buffer_io_sync; > > + bh->b_end_io = end_buffer_read_sync; > > submit_bh(READ, bh); > > wait_on_buffer(bh); > > if (buffer_uptodate(bh)) > > @@ -2604,13 +2619,14 @@ > > continue; > > > > get_bh(bh); > > - bh->b_end_io = end_buffer_io_sync; > > if (rw == WRITE) { > > + bh->b_end_io = end_buffer_write_sync; > > if (test_clear_buffer_dirty(bh)) { > > submit_bh(WRITE, bh); > > continue; > > } > > } else { > > + bh->b_end_io = end_buffer_read_sync; > > if (!buffer_uptodate(bh)) { > > submit_bh(rw, bh); > > continue; > > diff -urN -x '.patch*' -x '*.orig' orig/fs/inode.c patched/fs/inode.c > > +++ patched/fs/inode.c 2003-01-24 13:24:26.000000000 -0600 > > @@ -134,6 +134,7 @@ > > mapping->dirtied_when = 0; > > mapping->assoc_mapping = NULL; > > mapping->backing_dev_info = &default_backing_dev_info; > > + mapping->error = 0; > > Machines can have millions of address_spaces in-core and here you have used > 32 bits where two would do. > > Please make mapping->error an unsigned long and just use bits zero and one > here for ENOSPC and EIO. Make sure that atomic ops are used, and this way we > get 30 spare bits in the address_space for future expansion. I certainly thought about that (given your partial patch that did the same with pages), but I couldn't see the point. We can return exactly one error and given that they're all effectively "everything you did since the last sync is hosed", there's no obvious way to choose among them. There is no ENOSPC_AND_BTW_IO. As long as we're dedicating a new structure member to this, why not simplify the logic and just track the latest error? If we're going to bother with this complexity, then we ought to merge it with an existing element, for example, combine it with gfp_mask. If you think that's worth the effort, I'll take a look at it. I don't think this complexity makes sense for 2.4 though. > > @@ -598,7 +598,7 @@ > > continue; > > } > > atomic_inc(&tbh->b_count); > > - tbh->b_end_io = end_buffer_io_sync; > > + tbh->b_end_io = end_buffer_read_sync; > > submit_bh(READ, tbh); > > } > > > > diff -urN -x '.patch*' -x '*.orig' orig/fs/open.c patched/fs/open.c > > +++ patched/fs/open.c 2003-01-24 13:24:26.000000000 -0600 > > @@ -843,21 +843,41 @@ > > */ > > int filp_close(struct file *filp, fl_owner_t id) > > { > > - int retval; > > + struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; > > + struct inode *inode = mapping->host; > > + int retval = 0, err; > > + > > + /* Report and clear outstanding errors */ > > + err = filp->f_error; > > + if (err) { > > + filp->f_error = 0; > > + retval = err; > > + } > > + > > + down(&inode->i_sem); > > + err = mapping->error; > > + if (err && !retval) { > > + mapping->error = 0; > > + retval = err; > > + } > > I don't really understand why you're reporting the error on close(). > Applications probably do not expect that. > > It would be better to report the error to the next call to fsync(), > fdatasync() and msync(). Reporting it on close as well doesn't hurt, but > *sync() should be the main reporting interface. Yes, I do those too, there's a chunk that you don't appear to have quoted from filemap which is in the common waiting path of all the sync syscalls: diff -urN -x '.patch*' -x '*.orig' orig/mm/filemap.c patched/mm/filemap.c --- orig/mm/filemap.c 2003-01-24 13:24:21.000000000 -0600 +++ patched/mm/filemap.c 2003-01-24 13:24:26.000000000 -0600 @@ -185,6 +185,14 @@ write_lock(&mapping->page_lock); } write_unlock(&mapping->page_lock); + + /* Check for outstanding write errors */ + if (mapping->error) + { + ret = mapping->error; + mapping->error = 0; + } + return ret; } Though it turns out a lot of apps _are_ checking close and not doing sync (I believe this includes some of the standard fileutils). > > > + up(&inode->i_sem); > > with test_and_clear_bit() against mapping->errors we can do away with the > i_sem locking here. I'm actually not really concerned about concurrent writers in the close case. It inherently races with with the VM pushing pages out anyway. And we already make no guarantees about _who_ the error gets delivered to in the multiple writers case, so the case where two simultaneous closers get errors is ok too. The above locking is just cut and paste from the code that handles another class of inode error. > +++ patched/mm/vmscan.c 2003-01-24 13:24:26.000000000 -0600 > @@ -342,6 +342,9 @@ > SetPageReclaim(page); > res = mapping->a_ops->writepage(page, &wbc); > > + if (res < 0) { > + mapping->error = res; > + } > > Most writepage() activity happens in fs/mpage.c - you'll need to propagate IO > errors into the address_space in the BIO completion handlers there. Another 2.5 change I hadn't noticed. Ok, will look at that. I haven't come up with a good test for the writepage ENOSPC, thoughts? -- "Love the dolphins," she advised him. "Write by W.A.S.T.E.." ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-29 16:24 ` Oliver Xymoron @ 2003-01-29 21:42 ` Andrew Morton 2003-01-30 21:12 ` Oliver Xymoron 0 siblings, 1 reply; 9+ messages in thread From: Andrew Morton @ 2003-01-29 21:42 UTC (permalink / raw) To: Oliver Xymoron; +Cc: linux-kernel, marcelo Oliver Xymoron <oxymoron@waste.org> wrote: > > > - fsync_buffers_list() will handle them and will return errors to the fsync() > > caller. We only need to handle those buffers which were stripped > > asynchronously by VM activity. > > Are we guaranteed that we'll get a try_to_free_buffers after IO > completion and before sync? I haven't dug through this path much. Think so. That's the only place where buffers are detached. Otherwise, fsync_buffers_list() looks at them all. There's also the prune_icache() buffer-stripper remove_inode_buffers(). Nobody knows about the inode by that time, but there's a chance that the inode will be rescued before it is thrown away, so remove_inode_buffers() should propagate errors into the address_space as well. > Another 2.5 change I hadn't noticed. Ok, will look at that. I haven't > come up with a good test for the writepage ENOSPC, thoughts? See kswapd-writepage.c from http://www.zip.com.au/~akpm/linux/patches/2.5/ext3-tools.tar.gz If you run that over a filesystem which has only a few K of space, apply memory pressure while it is sleeping then data loss will ensue. hm, there's also enospc-writepage.c which is designed to exactly demonstrate this problem. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-29 21:42 ` Andrew Morton @ 2003-01-30 21:12 ` Oliver Xymoron 2003-01-30 21:39 ` Andrew Morton 0 siblings, 1 reply; 9+ messages in thread From: Oliver Xymoron @ 2003-01-30 21:12 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Wed, Jan 29, 2003 at 01:42:05PM -0800, Andrew Morton wrote: > Oliver Xymoron <oxymoron@waste.org> wrote: > > > > > - fsync_buffers_list() will handle them and will return errors to the fsync() > > > caller. We only need to handle those buffers which were stripped > > > asynchronously by VM activity. > > > > Are we guaranteed that we'll get a try_to_free_buffers after IO > > completion and before sync? I haven't dug through this path much. > > Think so. That's the only place where buffers are detached. Otherwise, > fsync_buffers_list() looks at them all. The other problem here is that by the time we're in try_to_free_buffers we no longer know that we're looking at a harmless stale page (readahead?) or a write error, which is why Linus had me make the separate end_buffer functions. So I don't think this pans out - thoughts? -- "Love the dolphins," she advised him. "Write by W.A.S.T.E.." ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-30 21:12 ` Oliver Xymoron @ 2003-01-30 21:39 ` Andrew Morton 2003-01-30 22:00 ` Oliver Xymoron 0 siblings, 1 reply; 9+ messages in thread From: Andrew Morton @ 2003-01-30 21:39 UTC (permalink / raw) To: Oliver Xymoron; +Cc: linux-kernel Oliver Xymoron wrote: > > On Wed, Jan 29, 2003 at 01:42:05PM -0800, Andrew Morton wrote: > > Oliver Xymoron <oxymoron@waste.org> wrote: > > > > > > > - fsync_buffers_list() will handle them and will return errors to the fsync() > > > > caller. We only need to handle those buffers which were stripped > > > > asynchronously by VM activity. > > > > > > Are we guaranteed that we'll get a try_to_free_buffers after IO > > > completion and before sync? I haven't dug through this path much. > > > > Think so. That's the only place where buffers are detached. Otherwise, > > fsync_buffers_list() looks at them all. > > The other problem here is that by the time we're in > try_to_free_buffers we no longer know that we're looking at a harmless > stale page (readahead?) or a write error, which is why Linus had me > make the separate end_buffer functions. So I don't think this pans out > - thoughts? If the buffer has buffer_req and !buffer_uptodate and !buffer_locked we know that it was submitted for IO, that the IO has completed, and that it failed. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-30 21:39 ` Andrew Morton @ 2003-01-30 22:00 ` Oliver Xymoron 2003-01-30 22:13 ` Andrew Morton 0 siblings, 1 reply; 9+ messages in thread From: Oliver Xymoron @ 2003-01-30 22:00 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Thu, Jan 30, 2003 at 01:39:31PM -0800, Andrew Morton wrote: > Oliver Xymoron wrote: > > > > On Wed, Jan 29, 2003 at 01:42:05PM -0800, Andrew Morton wrote: > > > Oliver Xymoron <oxymoron@waste.org> wrote: > > > > > > > > > - fsync_buffers_list() will handle them and will return errors to the fsync() > > > > > caller. We only need to handle those buffers which were stripped > > > > > asynchronously by VM activity. > > > > > > > > Are we guaranteed that we'll get a try_to_free_buffers after IO > > > > completion and before sync? I haven't dug through this path much. > > > > > > Think so. That's the only place where buffers are detached. Otherwise, > > > fsync_buffers_list() looks at them all. > > > > The other problem here is that by the time we're in > > try_to_free_buffers we no longer know that we're looking at a harmless > > stale page (readahead?) or a write error, which is why Linus had me > > make the separate end_buffer functions. So I don't think this pans out > > - thoughts? > > If the buffer has buffer_req and !buffer_uptodate and !buffer_locked > we know that it was submitted for IO, that the IO has completed, and > that it failed. 2.5 says this: void end_buffer_io_sync(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); } else { /* * This happens, due to failed READA attempts. * buffer_io_error(bh); */ clear_buffer_uptodate(bh); } unlock_buffer(bh); put_bh(bh); } The comment suggests that we need to distinguish read errors from write errors and I tend to agree. Bear in mind that we're limited to doing this per inode, so if we start flagging errors for reads, we can really confuse writers. Perhaps not fatal, but suboptimal certainly. On the other hand, I haven't been able to find anywhere in 2.4 that's setting b_end_io to end_io_write_sync that's not also setting b_page, so I think my original patch is safe in this regard. I suspect 2.5 is similar. -- "Love the dolphins," she advised him. "Write by W.A.S.T.E.." ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-30 22:00 ` Oliver Xymoron @ 2003-01-30 22:13 ` Andrew Morton 2003-01-31 18:42 ` Oliver Xymoron 0 siblings, 1 reply; 9+ messages in thread From: Andrew Morton @ 2003-01-30 22:13 UTC (permalink / raw) To: Oliver Xymoron; +Cc: linux-kernel Oliver Xymoron wrote: > > The comment suggests that we need to distinguish read errors from > write errors and I tend to agree. OK, well you could call set_buffer_write_io_error/set_buffer_read_io_error() in the end_io handlers, and pick that up later on. To avoid adding a couple of new clear_bits in submit_bh, you could do: if (test_set_buffer_req(bh)) { clear_buffer_write_io_error(bh); clear_buffer_read_io_error(bh); } ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2.5] Report write errors to applications 2003-01-30 22:13 ` Andrew Morton @ 2003-01-31 18:42 ` Oliver Xymoron 0 siblings, 0 replies; 9+ messages in thread From: Oliver Xymoron @ 2003-01-31 18:42 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Thu, Jan 30, 2003 at 02:13:54PM -0800, Andrew Morton wrote: > Oliver Xymoron wrote: > > > > The comment suggests that we need to distinguish read errors from > > write errors and I tend to agree. > > OK, well you could call set_buffer_write_io_error/set_buffer_read_io_error() > in the end_io handlers, and pick that up later on. > > To avoid adding a couple of new clear_bits in submit_bh, > you could do: > > if (test_set_buffer_req(bh)) { > clear_buffer_write_io_error(bh); > clear_buffer_read_io_error(bh); > } Read errors aren't really a problem as they tend to be caught synchronously. And we also have to be careful not to clear out a write error by doing a subsequent read on the now !uptodate page. How's this look? Against -mm7, compiles, untested: diff -urN -x '.patch*' -x '*.orig' orig/fs/buffer.c patched/fs/buffer.c --- orig/fs/buffer.c 2003-01-31 12:34:05.000000000 -0600 +++ patched/fs/buffer.c 2003-01-31 12:37:13.000000000 -0600 @@ -165,15 +165,27 @@ * Default synchronous end-of-IO handler.. Just mark it up-to-date and * unlock the buffer. This is what ll_rw_block uses too. */ -void end_buffer_io_sync(struct buffer_head *bh, int uptodate) +void end_buffer_read_sync(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); } else { - /* - * This happens, due to failed READA attempts. - * buffer_io_error(bh); - */ + /* This happens, due to failed READA attempts. */ + clear_buffer_uptodate(bh); + } + unlock_buffer(bh); + put_bh(bh); +} + +void end_buffer_write_sync(struct buffer_head *bh, int uptodate) +{ + if (uptodate) { + set_buffer_uptodate(bh); + } else { + buffer_io_error(bh); + printk(KERN_WARNING "lost page write due to I/O error on %s\n", + bdevname(bh->b_bdev)); + set_buffer_write_io_error(bh); clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -550,6 +562,9 @@ set_buffer_uptodate(bh); } else { buffer_io_error(bh); + printk(KERN_WARNING "lost page write due to I/O error on %s\n", + bdevname(bh->b_bdev)); + page->mapping->error = -EIO; clear_buffer_uptodate(bh); SetPageError(page); } @@ -1201,7 +1216,7 @@ if (buffer_dirty(bh)) buffer_error(); get_bh(bh); - bh->b_end_io = end_buffer_io_sync; + bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) @@ -2543,8 +2558,10 @@ buffer_error(); if (rw == READ && buffer_dirty(bh)) buffer_error(); - - set_buffer_req(bh); + + /* Only clear out a write error when rewriting */ + if (test_set_buffer_req(bh) && rw == WRITE) + clear_buffer_write_io_error(bh); /* * from here on down, it's all bio -- do the initial mapping, @@ -2604,13 +2621,14 @@ continue; get_bh(bh); - bh->b_end_io = end_buffer_io_sync; if (rw == WRITE) { + bh->b_end_io = end_buffer_write_sync; if (test_clear_buffer_dirty(bh)) { submit_bh(WRITE, bh); continue; } } else { + bh->b_end_io = end_buffer_read_sync; if (!buffer_uptodate(bh)) { submit_bh(rw, bh); continue; @@ -2672,6 +2690,8 @@ bh = head; do { check_ttfb_buffer(page, bh); + if (buffer_write_io_error(bh)) + page->mapping->error = -EIO; if (buffer_busy(bh)) goto failed; if (!buffer_uptodate(bh) && !buffer_req(bh)) diff -urN -x '.patch*' -x '*.orig' orig/fs/inode.c patched/fs/inode.c --- orig/fs/inode.c 2003-01-31 12:34:05.000000000 -0600 +++ patched/fs/inode.c 2003-01-31 12:34:09.000000000 -0600 @@ -134,6 +134,7 @@ mapping->dirtied_when = 0; mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; + mapping->error = 0; if (sb->s_bdev) inode->i_data.backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; memset(&inode->u, 0, sizeof(inode->u)); diff -urN -x '.patch*' -x '*.orig' orig/fs/mpage.c patched/fs/mpage.c --- orig/fs/mpage.c 2003-01-31 12:34:05.000000000 -0600 +++ patched/fs/mpage.c 2003-01-31 12:34:09.000000000 -0600 @@ -562,6 +562,8 @@ if (bio) bio = mpage_bio_submit(WRITE, bio); *ret = page->mapping->a_ops->writepage(page, wbc); + if (*ret < 0) + page->mapping->error = *ret; out: return bio; } @@ -665,6 +667,8 @@ test_clear_page_dirty(page)) { if (writepage) { ret = (*writepage)(page, wbc); + if (ret < 0) + page->mapping->error = ret; } else { bio = mpage_writepage(bio, page, get_block, &last_block_in_bio, &ret, wbc); diff -urN -x '.patch*' -x '*.orig' orig/fs/ntfs/compress.c patched/fs/ntfs/compress.c --- orig/fs/ntfs/compress.c 2003-01-13 23:58:43.000000000 -0600 +++ patched/fs/ntfs/compress.c 2003-01-31 12:34:09.000000000 -0600 @@ -598,7 +598,7 @@ continue; } atomic_inc(&tbh->b_count); - tbh->b_end_io = end_buffer_io_sync; + tbh->b_end_io = end_buffer_read_sync; submit_bh(READ, tbh); } diff -urN -x '.patch*' -x '*.orig' orig/fs/open.c patched/fs/open.c --- orig/fs/open.c 2003-01-13 23:58:05.000000000 -0600 +++ patched/fs/open.c 2003-01-31 12:34:09.000000000 -0600 @@ -843,21 +843,38 @@ */ int filp_close(struct file *filp, fl_owner_t id) { - int retval; + struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; + int retval = 0, err; + + /* Report and clear outstanding errors */ + err = filp->f_error; + if (err) { + filp->f_error = 0; + retval = err; + } + + err = mapping->error; + if (err && !retval) { + mapping->error = 0; + retval = err; + } if (!file_count(filp)) { printk(KERN_ERR "VFS: Close: file count is 0\n"); - return 0; + return retval; } - retval = 0; + if (filp->f_op && filp->f_op->flush) { lock_kernel(); - retval = filp->f_op->flush(filp); + err = filp->f_op->flush(filp); unlock_kernel(); + if (err && !retval) + retval = err; } dnotify_flush(filp, id); locks_remove_posix(filp, id); fput(filp); + return retval; } diff -urN -x '.patch*' -x '*.orig' orig/include/linux/buffer_head.h patched/include/linux/buffer_head.h --- orig/include/linux/buffer_head.h 2003-01-31 12:34:05.000000000 -0600 +++ patched/include/linux/buffer_head.h 2003-01-31 12:36:56.000000000 -0600 @@ -24,8 +24,9 @@ BH_Async_Read, /* Is under end_buffer_async_read I/O */ BH_Async_Write, /* Is under end_buffer_async_write I/O */ BH_Delay, /* Buffer is not yet allocated on disk */ - BH_Boundary, /* Block is followed by a discontiguity */ + BH_Write_EIO, /* I/O error on write */ + BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities */ @@ -103,12 +104,14 @@ BUFFER_FNS(Lock, locked) TAS_BUFFER_FNS(Lock, locked) BUFFER_FNS(Req, req) +TAS_BUFFER_FNS(Req, req) BUFFER_FNS(Mapped, mapped) BUFFER_FNS(New, new) BUFFER_FNS(Async_Read, async_read) BUFFER_FNS(Async_Write, async_write) -BUFFER_FNS(Delay, delay); +BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) +BUFFER_FNS(Write_EIO,write_io_error) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) #define touch_buffer(bh) mark_page_accessed(bh->b_page) @@ -136,7 +139,8 @@ int try_to_free_buffers(struct page *); void create_empty_buffers(struct page *, unsigned long, unsigned long b_state); -void end_buffer_io_sync(struct buffer_head *bh, int uptodate); +void end_buffer_read_sync(struct buffer_head *bh, int uptodate); +void end_buffer_write_sync(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void buffer_insert_list(spinlock_t *lock, diff -urN -x '.patch*' -x '*.orig' orig/include/linux/fs.h patched/include/linux/fs.h --- orig/include/linux/fs.h 2003-01-31 12:34:05.000000000 -0600 +++ patched/include/linux/fs.h 2003-01-31 12:34:09.000000000 -0600 @@ -326,6 +326,7 @@ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ struct address_space *assoc_mapping; /* ditto */ + int error; /* write error for fsync */ }; struct char_device { diff -urN -x '.patch*' -x '*.orig' orig/kernel/ksyms.c patched/kernel/ksyms.c --- orig/kernel/ksyms.c 2003-01-31 12:34:05.000000000 -0600 +++ patched/kernel/ksyms.c 2003-01-31 12:34:09.000000000 -0600 @@ -176,7 +176,8 @@ EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(mark_buffer_dirty); -EXPORT_SYMBOL(end_buffer_io_sync); +EXPORT_SYMBOL(end_buffer_read_sync); +EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(get_empty_filp); EXPORT_SYMBOL(init_private_file); diff -urN -x '.patch*' -x '*.orig' orig/mm/filemap.c patched/mm/filemap.c --- orig/mm/filemap.c 2003-01-31 12:34:05.000000000 -0600 +++ patched/mm/filemap.c 2003-01-31 12:34:09.000000000 -0600 @@ -185,6 +185,14 @@ write_lock(&mapping->page_lock); } write_unlock(&mapping->page_lock); + + /* Check for outstanding write errors */ + if (mapping->error) + { + ret = mapping->error; + mapping->error = 0; + } + return ret; } diff -urN -x '.patch*' -x '*.orig' orig/mm/vmscan.c patched/mm/vmscan.c --- orig/mm/vmscan.c 2003-01-31 12:34:05.000000000 -0600 +++ patched/mm/vmscan.c 2003-01-31 12:34:09.000000000 -0600 @@ -342,6 +342,9 @@ SetPageReclaim(page); res = mapping->a_ops->writepage(page, &wbc); + if (res < 0) { + mapping->error = res; + } if (res == WRITEPAGE_ACTIVATE) { ClearPageReclaim(page); goto activate_locked; -- "Love the dolphins," she advised him. "Write by W.A.S.T.E.." ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2003-01-31 18:33 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2003-01-29 6:09 [PATCH 2.5] Report write errors to applications Oliver Xymoron 2003-01-29 7:29 ` Andrew Morton 2003-01-29 16:24 ` Oliver Xymoron 2003-01-29 21:42 ` Andrew Morton 2003-01-30 21:12 ` Oliver Xymoron 2003-01-30 21:39 ` Andrew Morton 2003-01-30 22:00 ` Oliver Xymoron 2003-01-30 22:13 ` Andrew Morton 2003-01-31 18:42 ` Oliver Xymoron
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox