[PATCH] Use of getblk differs between locations

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] Use of getblk differs between locations
@ 2005-10-10 20:45 Glauber de Oliveira Costa
  2005-10-10 21:20 ` Anton Altaparmakov
  0 siblings, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-10 20:45 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, mikulas, akpm

[-- Attachment #1: Type: text/plain, Size: 924 bytes --]

Hi all,

I've just noticed that the use of sb_getblk differs between locations
inside the kernel. To be precise, in some locations there are tests
against its return value, and in some places there are not.

According to the comments in __getblk definition, the tests are not
necessary, as the function always return a buffer_head (maybe a wrong
one),

The patch bellow just make it's use homogeneous trough the whole code
in the various filesystems that use it.

One thing to mention, is that I've kept my hands away from hpfs code.
That's because sb_getblk is used inside another function, that returns
NULL in the case sb_getblk does it too (Which does not seem to happen).
The correct action would be trace down all the uses of that function and
change it.

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================

[-- Attachment #2: patch_getblk --]
[-- Type: text/plain, Size: 4594 bytes --]

diff -Naurp linux-2.6.14-rc2-orig/fs/ext2/xattr.c linux-2.6.14-rc2-cleanp/fs/ext2/xattr.c
--- linux-2.6.14-rc2-orig/fs/ext2/xattr.c	2005-09-01 14:26:03.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/ext2/xattr.c	2005-10-10 19:47:27.000000000 +0000
@@ -679,11 +679,6 @@ ext2_xattr_set2(struct inode *inode, str
 			ea_idebug(inode, "creating block %d", block);
 
 			new_bh = sb_getblk(sb, block);
-			if (!new_bh) {
-				ext2_free_blocks(inode, block, 1);
-				error = -EIO;
-				goto cleanup;
-			}
 			lock_buffer(new_bh);
 			memcpy(new_bh->b_data, header, new_bh->b_size);
 			set_buffer_uptodate(new_bh);
diff -Naurp linux-2.6.14-rc2-orig/fs/fat/dir.c linux-2.6.14-rc2-cleanp/fs/fat/dir.c
--- linux-2.6.14-rc2-orig/fs/fat/dir.c	2005-09-26 13:58:15.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/fat/dir.c	2005-10-10 19:37:47.000000000 +0000
@@ -46,7 +46,7 @@ static inline void fat_dir_readahead(str
 		return;
 
 	bh = sb_getblk(sb, phys);
-	if (bh && !buffer_uptodate(bh)) {
+	if (!buffer_uptodate(bh)) {
 		for (sec = 0; sec < sbi->sec_per_clus; sec++)
 			sb_breadahead(sb, phys + sec);
 	}
@@ -978,10 +978,6 @@ static int fat_zeroed_cluster(struct ino
 	n = nr_used;
 	while (blknr < last_blknr) {
 		bhs[n] = sb_getblk(sb, blknr);
-		if (!bhs[n]) {
-			err = -ENOMEM;
-			goto error;
-		}
 		memset(bhs[n]->b_data, 0, sb->s_blocksize);
 		set_buffer_uptodate(bhs[n]);
 		mark_buffer_dirty(bhs[n]);
@@ -1031,10 +1027,6 @@ int fat_alloc_new_dir(struct inode *dir,
 
 	blknr = fat_clus_to_blknr(sbi, cluster);
 	bhs[0] = sb_getblk(sb, blknr);
-	if (!bhs[0]) {
-		err = -ENOMEM;
-		goto error_free;
-	}
 
 	fat_date_unix2dos(ts->tv_sec, &time, &date);
 
@@ -1113,10 +1105,6 @@ static int fat_add_new_entries(struct in
 		last_blknr = start_blknr + sbi->sec_per_clus;
 		while (blknr < last_blknr) {
 			bhs[n] = sb_getblk(sb, blknr);
-			if (!bhs[n]) {
-				err = -ENOMEM;
-				goto error_nomem;
-			}
 
 			/* fill the directory entry */
 			copy = min(size, sb->s_blocksize);
diff -Naurp linux-2.6.14-rc2-orig/fs/fat/fatent.c linux-2.6.14-rc2-cleanp/fs/fat/fatent.c
--- linux-2.6.14-rc2-orig/fs/fat/fatent.c	2005-06-17 19:48:29.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/fat/fatent.c	2005-10-10 19:38:10.000000000 +0000
@@ -357,10 +357,6 @@ static int fat_mirror_bhs(struct super_b
 
 		for (n = 0; n < nr_bhs; n++) {
 			c_bh = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
-			if (!c_bh) {
-				err = -ENOMEM;
-				goto error;
-			}
 			memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
 			set_buffer_uptodate(c_bh);
 			mark_buffer_dirty(c_bh);
diff -Naurp linux-2.6.14-rc2-orig/fs/isofs/inode.c linux-2.6.14-rc2-cleanp/fs/isofs/inode.c
--- linux-2.6.14-rc2-orig/fs/isofs/inode.c	2005-09-01 14:26:03.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/isofs/inode.c	2005-10-10 19:55:40.000000000 +0000
@@ -994,8 +994,6 @@ int isofs_get_blocks(struct inode *inode
 			map_bh(*bh, inode->i_sb, firstext + b_off - offset);
 		} else {
 			*bh = sb_getblk(inode->i_sb, firstext+b_off-offset);
-			if ( !*bh )
-				goto abort;
 		}
 		bh++;	/* Next buffer head */
 		b_off++;	/* Next buffer offset */
diff -Naurp linux-2.6.14-rc2-orig/fs/ntfs/compress.c linux-2.6.14-rc2-cleanp/fs/ntfs/compress.c
--- linux-2.6.14-rc2-orig/fs/ntfs/compress.c	2005-09-26 13:58:16.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/ntfs/compress.c	2005-10-10 19:50:15.000000000 +0000
@@ -641,8 +641,7 @@ lock_retry_remap:
 		max_block = block + (vol->cluster_size >> block_size_bits);
 		do {
 			ntfs_debug("block = 0x%x.", block);
-			if (unlikely(!(bhs[nr_bhs] = sb_getblk(sb, block))))
-				goto getblk_err;
+			bhs[nr_bhs] = sb_getblk(sb, block);
 			nr_bhs++;
 		} while (++block < max_block);
 	}
@@ -938,9 +937,6 @@ rl_err:
 			"compression block.");
 	goto err_out;
 
-getblk_err:
-	up_read(&ni->runlist.lock);
-	ntfs_error(vol->sb, "getblk() failed. Cannot read compression block.");
 
 err_out:
 	kfree(bhs);
diff -Naurp linux-2.6.14-rc2-orig/fs/sysv/balloc.c linux-2.6.14-rc2-cleanp/fs/sysv/balloc.c
--- linux-2.6.14-rc2-orig/fs/sysv/balloc.c	2005-06-17 19:48:29.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/sysv/balloc.c	2005-10-10 19:52:03.000000000 +0000
@@ -75,11 +75,6 @@ void sysv_free_block(struct super_block 
 	if (count == sbi->s_flc_size || count == 0) {
 		block += sbi->s_block_base;
 		bh = sb_getblk(sb, block);
-		if (!bh) {
-			printk("sysv_free_block: getblk() failed\n");
-			unlock_super(sb);
-			return;
-		}
 		memset(bh->b_data, 0, sb->s_blocksize);
 		*(__fs16*)bh->b_data = cpu_to_fs16(sbi, count);
 		memcpy(get_chunk(sb,bh), blocks, count * sizeof(sysv_zone_t));

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 20:45 [PATCH] Use of getblk differs between locations Glauber de Oliveira Costa
@ 2005-10-10 21:20 ` Anton Altaparmakov
  2005-10-10 21:46   ` Glauber de Oliveira Costa
  0 siblings, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-10 21:20 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, mikulas, akpm

Hi,

On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> I've just noticed that the use of sb_getblk differs between locations
> inside the kernel. To be precise, in some locations there are tests
> against its return value, and in some places there are not.
> 
> According to the comments in __getblk definition, the tests are not
> necessary, as the function always return a buffer_head (maybe a wrong
> one),

If you had read the source code rather than just the comments you would 
have seen that this is not true.  It can return NULL (see 
fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the 
checks in NTFS, please.  They may only be good for catching bugs but I 
like catching bugs rather than segfaulting due to a NULL dereference.

Best regards,

	Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 21:20 ` Anton Altaparmakov
@ 2005-10-10 21:46   ` Glauber de Oliveira Costa
  2005-10-10 21:58     ` Mikulas Patocka
  0 siblings, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-10 21:46 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: glommer, linux-kernel, linux-fsdevel, ext2-devel, hirofumi,
	linux-ntfs-dev, aia21, hch, viro, mikulas, akpm

On Mon, Oct 10, 2005 at 10:20:07PM +0100, Anton Altaparmakov wrote:
> Hi,
> 
> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> > I've just noticed that the use of sb_getblk differs between locations
> > inside the kernel. To be precise, in some locations there are tests
> > against its return value, and in some places there are not.
> > 
> > According to the comments in __getblk definition, the tests are not
> > necessary, as the function always return a buffer_head (maybe a wrong
> > one),
> 
> If you had read the source code rather than just the comments you would 
> have seen that this is not true.  It can return NULL (see 
> fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the 
> checks in NTFS, please.  They may only be good for catching bugs but I 
> like catching bugs rather than segfaulting due to a NULL dereference.

I did. But I did not see this specifically, for sure. What takes us to
the opposite problem: A lot of places do not check for the return value
of getblk (Almost half of them, I'd say), and may thus lead to a 
dereferencing  of a NULL pointer.

Does anyone else have any comments on that?

> Best regards,
Thanks,
> 	Anton
Glauber

> -- 
> Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
> Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
> Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
> WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/
> 

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 21:46   ` Glauber de Oliveira Costa
@ 2005-10-10 21:58     ` Mikulas Patocka
  2005-10-10 22:25       ` Anton Altaparmakov
  2005-10-10 22:36       ` Glauber de Oliveira Costa
  0 siblings, 2 replies; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-10 21:58 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: Anton Altaparmakov, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm



On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:

> On Mon, Oct 10, 2005 at 10:20:07PM +0100, Anton Altaparmakov wrote:
>> Hi,
>>
>> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
>>> I've just noticed that the use of sb_getblk differs between locations
>>> inside the kernel. To be precise, in some locations there are tests
>>> against its return value, and in some places there are not.
>>>
>>> According to the comments in __getblk definition, the tests are not
>>> necessary, as the function always return a buffer_head (maybe a wrong
>>> one),
>>
>> If you had read the source code rather than just the comments you would
>> have seen that this is not true.  It can return NULL (see
>> fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the
>> checks in NTFS, please.  They may only be good for catching bugs but I
>> like catching bugs rather than segfaulting due to a NULL dereference.

The check should be rather a BUG() than dump_stack() and return NULL --- I 
think it's not right to write code to recover from programming errors. 
Filesystem drivers are supposed to pass correct blocksize to getblk(). --- 
even for users it's better to crash, because user whose machine has locked 
up on BUG() will report bug more likely than user whose machine has 
written stack dump into log and corrupted filesystem --- by the time he 
discovers the corruption and mesage he might not even remember what 
triggered it.

As comment in buffer.c says, getblk will deadlock if the machine is out of 
memory. It is questionable whether to deadlock or return NULL and corrupt 
filesystem in this case --- deadlock is probably better.

Mikulas

>
> I did. But I did not see this specifically, for sure. What takes us to
> the opposite problem: A lot of places do not check for the return value
> of getblk (Almost half of them, I'd say), and may thus lead to a
> dereferencing  of a NULL pointer.
>
> Does anyone else have any comments on that?
>
>> Best regards,
> Thanks,
>> 	Anton
> Glauber
>
>> --
>> Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
>> Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
>> Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
>> WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/
>>
>
> -- 
> =====================================
> Glauber de Oliveira Costa
> IBM Linux Technology Center - Brazil
> glommer@br.ibm.com
> =====================================
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 21:58     ` Mikulas Patocka
@ 2005-10-10 22:25       ` Anton Altaparmakov
  2005-10-10 22:49         ` Mikulas Patocka
  2005-10-10 22:36       ` Glauber de Oliveira Costa
  1 sibling, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-10 22:25 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Glauber de Oliveira Costa, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

On Mon, 10 Oct 2005, Mikulas Patocka wrote:
> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> > On Mon, Oct 10, 2005 at 10:20:07PM +0100, Anton Altaparmakov wrote:
> > > On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> > > > I've just noticed that the use of sb_getblk differs between locations
> > > > inside the kernel. To be precise, in some locations there are tests
> > > > against its return value, and in some places there are not.
> > > > 
> > > > According to the comments in __getblk definition, the tests are not
> > > > necessary, as the function always return a buffer_head (maybe a wrong
> > > > one),
> > > 
> > > If you had read the source code rather than just the comments you would
> > > have seen that this is not true.  It can return NULL (see
> > > fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the
> > > checks in NTFS, please.  They may only be good for catching bugs but I
> > > like catching bugs rather than segfaulting due to a NULL dereference.
> 
> The check should be rather a BUG() than dump_stack() and return NULL --- I
> think it's not right to write code to recover from programming errors.

Why programming errors?  It could be faulty memory or other corruption, 
perhaps even caused by a different driver altogether (e.g. I found a bug 
in ntfs last week which caused it to memset() to zero a random location in 
memory of a random size and it caused a lot of strange effects like my 
shell suddenly exiting and me being left on the login prompt...).  Also it 
could be that the function one day changes and it can return NULL.  It is 
far safer to do checking than to make assumptions about not being able to 
return NULL.

> Filesystem drivers are supposed to pass correct blocksize to getblk(). ---
> even for users it's better to crash, because user whose machine has locked up
> on BUG() will report bug more likely than user whose machine has written stack
> dump into log and corrupted filesystem --- by the time he discovers the
> corruption and mesage he might not even remember what triggered it.
> 
> As comment in buffer.c says, getblk will deadlock if the machine is out of
> memory. It is questionable whether to deadlock or return NULL and corrupt
> filesystem in this case --- deadlock is probably better.

What do you mean corrupt filesystem?  If a filesystem is written so badly 
that it will cause corruption when a NULL is returned somewhere, I 
certainly don't want to have anything to do with it.

Going BUG() is generally a bad thing if the error can be recovered from.  
Certainly all my code attempts to recover from all error conditions it can 
possibly encounter.

I would much rather see NULL and then handle the error gracefully with an 
error message than go BUG().  You can then still umount and remove the fs 
module and everything works fine (you may need an fsck you may not depends 
on how good your error handling is).  If you do a BUG() you are guaranteed 
to cause corruption...  I only use BUG() when something really cannot 
happen unless there is a bug in which case I want to know it...

Best regards,

	Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 22:36       ` Glauber de Oliveira Costa
@ 2005-10-10 22:28         ` Anton Altaparmakov
  2005-10-10 23:36           ` Andrew Morton
  0 siblings, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-10 22:28 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: Mikulas Patocka, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> > >>If you had read the source code rather than just the comments you would
> > >>have seen that this is not true.  It can return NULL (see
> > >>fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the
> > >>checks in NTFS, please.  They may only be good for catching bugs but I
> > >>like catching bugs rather than segfaulting due to a NULL dereference.
> > 
> > The check should be rather a BUG() than dump_stack() and return NULL --- I 
> > think it's not right to write code to recover from programming errors. 
> > Filesystem drivers are supposed to pass correct blocksize to getblk(). --- 
> > even for users it's better to crash, because user whose machine has locked 
> > up on BUG() will report bug more likely than user whose machine has 
> > written stack dump into log and corrupted filesystem --- by the time he 
> > discovers the corruption and mesage he might not even remember what 
> > triggered it.
> 
> That was what I meant by having the opposite problem here. I think
> dumping the stack and returning NULL is okay, as long as all programmers
> test its return value, and decide to fail in an alternative way, just
> like Anton does, for example. But unfortunately, that's not what happen. 
> 
> In a lot of cases, we see uses like these: (This one from affs.h)
> 
> 	bh = sb_getblk(sb, block);
> 	lock_buffer(bh);
> 	memset(bh->b_data, 0 , sb->s_blocksize);
> 	set_buffer_uptodate(bh);
> 	unlock_buffer(bh);
> 
> Which does not seem to be the right usage for it.
> 
> As I said, I took away the checks because I missed that return
> statement. I usually don't think that hanging is the preferred solution
> in the cases in which you can stop gracefully - But in case you do stop
> gracefully, not dereference a NULL pointer.
> 
> 																																								
> > 
> > As comment in buffer.c says, getblk will deadlock if the machine is out of 
> > memory. It is questionable whether to deadlock or return NULL and corrupt 
> > filesystem in this case --- deadlock is probably better.
> > 
> > Mikulas
> 
> Maybe the best solution is neither one nor another. Testing and failing
> gracefully seems better.
> 
> What do you think?

I certainly agree with you there.  I neither want a deadlock nor 
corruption.  (-:

Best regards,

	Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 21:58     ` Mikulas Patocka
  2005-10-10 22:25       ` Anton Altaparmakov
@ 2005-10-10 22:36       ` Glauber de Oliveira Costa
  2005-10-10 22:28         ` Anton Altaparmakov
  1 sibling, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-10 22:36 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: glommer, Anton Altaparmakov, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

> >>If you had read the source code rather than just the comments you would
> >>have seen that this is not true.  It can return NULL (see
> >>fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the
> >>checks in NTFS, please.  They may only be good for catching bugs but I
> >>like catching bugs rather than segfaulting due to a NULL dereference.
> 
> The check should be rather a BUG() than dump_stack() and return NULL --- I 
> think it's not right to write code to recover from programming errors. 
> Filesystem drivers are supposed to pass correct blocksize to getblk(). --- 
> even for users it's better to crash, because user whose machine has locked 
> up on BUG() will report bug more likely than user whose machine has 
> written stack dump into log and corrupted filesystem --- by the time he 
> discovers the corruption and mesage he might not even remember what 
> triggered it.

That was what I meant by having the opposite problem here. I think
dumping the stack and returning NULL is okay, as long as all programmers
test its return value, and decide to fail in an alternative way, just
like Anton does, for example. But unfortunately, that's not what happen. 

In a lot of cases, we see uses like these: (This one from affs.h)

	bh = sb_getblk(sb, block);
	lock_buffer(bh);
	memset(bh->b_data, 0 , sb->s_blocksize);
	set_buffer_uptodate(bh);
	unlock_buffer(bh);

Which does not seem to be the right usage for it.

As I said, I took away the checks because I missed that return
statement. I usually don't think that hanging is the preferred solution
in the cases in which you can stop gracefully - But in case you do stop
gracefully, not dereference a NULL pointer.

																																								
> 
> As comment in buffer.c says, getblk will deadlock if the machine is out of 
> memory. It is questionable whether to deadlock or return NULL and corrupt 
> filesystem in this case --- deadlock is probably better.
> 
> Mikulas

Maybe the best solution is neither one nor another. Testing and failing
gracefully seems better.

What do you think?

Glauber


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 22:25       ` Anton Altaparmakov
@ 2005-10-10 22:49         ` Mikulas Patocka
  2005-10-10 23:12           ` Glauber de Oliveira Costa
  2005-10-11  7:52           ` Anton Altaparmakov
  0 siblings, 2 replies; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-10 22:49 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: Glauber de Oliveira Costa, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm



On Mon, 10 Oct 2005, Anton Altaparmakov wrote:

> On Mon, 10 Oct 2005, Mikulas Patocka wrote:
>> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
>>> On Mon, Oct 10, 2005 at 10:20:07PM +0100, Anton Altaparmakov wrote:
>>>> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
>>>>> I've just noticed that the use of sb_getblk differs between locations
>>>>> inside the kernel. To be precise, in some locations there are tests
>>>>> against its return value, and in some places there are not.
>>>>>
>>>>> According to the comments in __getblk definition, the tests are not
>>>>> necessary, as the function always return a buffer_head (maybe a wrong
>>>>> one),
>>>>
>>>> If you had read the source code rather than just the comments you would
>>>> have seen that this is not true.  It can return NULL (see
>>>> fs/buffer.c::__getblk_slow()).  Certainly I would prefer to keep the
>>>> checks in NTFS, please.  They may only be good for catching bugs but I
>>>> like catching bugs rather than segfaulting due to a NULL dereference.
>>
>> The check should be rather a BUG() than dump_stack() and return NULL --- I
>> think it's not right to write code to recover from programming errors.
>
> Why programming errors?  It could be faulty memory or other corruption,
> perhaps even caused by a different driver altogether (e.g. I found a bug
> in ntfs last week which caused it to memset() to zero a random location in
> memory of a random size and it caused a lot of strange effects like my

You are lucky that it didn't hit buffers with inode table or something 
like that.

> shell suddenly exiting and me being left on the login prompt...).  Also it
> could be that the function one day changes and it can return NULL.  It is
> far safer to do checking than to make assumptions about not being able to
> return NULL.

It is safest to panic() in case of overwriting random blocks of memory --- 
not even oops or BUG() but panic... --- you lose your running processes in 
that case but at least you won't lose anything on disk. I got inode table 
corrupted because of a completely unrelated bug (luckily it hit the part 
with /dev/ nodes that were easy to recreate --- if it hit some real 
important directory, I would have to reinstall).

>> Filesystem drivers are supposed to pass correct blocksize to getblk(). ---
>> even for users it's better to crash, because user whose machine has locked up
>> on BUG() will report bug more likely than user whose machine has written stack
>> dump into log and corrupted filesystem --- by the time he discovers the
>> corruption and mesage he might not even remember what triggered it.
>>
>> As comment in buffer.c says, getblk will deadlock if the machine is out of
>> memory. It is questionable whether to deadlock or return NULL and corrupt
>> filesystem in this case --- deadlock is probably better.
>
> What do you mean corrupt filesystem?  If a filesystem is written so badly
> that it will cause corruption when a NULL is returned somewhere, I
> certainly don't want to have anything to do with it.

What should a filesystem driver do if it can't suddenly read or write any 
blocks on media?

> Going BUG() is generally a bad thing if the error can be recovered from.
> Certainly all my code attempts to recover from all error conditions it can
> possibly encounter.
>
> I would much rather see NULL and then handle the error gracefully with an
> error message than go BUG().  You can then still umount and remove the fs
> module and everything works fine (you may need an fsck you may not depends
> on how good your error handling is).  If you do a BUG() you are guaranteed
> to cause corruption...
> 
> I only use BUG() when something really cannot happen unless there is a 
> bug in which case I want to know it...

Of course, this "can't happen unless there is a bug" is exactly the case 
of __getblk_slow().

Mikulas

> Best regards,
>
> 	Anton
> -- 
> Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
> Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
> Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
> WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 22:49         ` Mikulas Patocka
@ 2005-10-10 23:12           ` Glauber de Oliveira Costa
  2005-10-10 23:16             ` Mikulas Patocka
  2005-10-11  7:52           ` Anton Altaparmakov
  1 sibling, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-10 23:12 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Anton Altaparmakov, glommer, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm


> What should a filesystem driver do if it can't suddenly read or write any 
> blocks on media?

Maybe stopping gracefully, warn about what happened, and let the system
keep going. You may be right about your main filesystem, but in the case
I'm running, for example, my system in an ext3 filesystem, and have a
vfat from a usb key. Should my system really hang because I'm not able
to read/write to the device?

> >Going BUG() is generally a bad thing if the error can be recovered from.
> >Certainly all my code attempts to recover from all error conditions it can
> >possibly encounter.
> >
> >I would much rather see NULL and then handle the error gracefully with an
> >error message than go BUG().  You can then still umount and remove the fs
> >module and everything works fine (you may need an fsck you may not depends
> >on how good your error handling is).  If you do a BUG() you are guaranteed
> >to cause corruption...
> >
> >I only use BUG() when something really cannot happen unless there is a 
> >bug in which case I want to know it...
> 
> Of course, this "can't happen unless there is a bug" is exactly the case 
> of __getblk_slow().
> 
> Mikulas
> 
> >Best regards,
> >
> >	Anton
> >-- 
> >Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
> >Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
> >Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
> >WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/
> >
> 

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 23:12           ` Glauber de Oliveira Costa
@ 2005-10-10 23:16             ` Mikulas Patocka
  2005-10-10 23:33               ` Glauber de Oliveira Costa
  0 siblings, 1 reply; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-10 23:16 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: Anton Altaparmakov, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm



On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:

>
>> What should a filesystem driver do if it can't suddenly read or write any
>> blocks on media?
>
> Maybe stopping gracefully, warn about what happened, and let the system
> keep going. You may be right about your main filesystem, but in the case
> I'm running, for example, my system in an ext3 filesystem, and have a
> vfat from a usb key. Should my system really hang because I'm not able
> to read/write to the device?

getblk won't fail because of I/O error --- it can fail only because of 
memory management bugs. I think it's right to stop the system in that case 
--- it's better than silently corrupting data on any device.

Mikulas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 23:16             ` Mikulas Patocka
@ 2005-10-10 23:33               ` Glauber de Oliveira Costa
  2005-10-10 23:34                 ` Mikulas Patocka
  0 siblings, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-10 23:33 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: glommer, Anton Altaparmakov, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

On Tue, Oct 11, 2005 at 01:16:59AM +0200, Mikulas Patocka wrote:
> 
> 
> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> 
> >
> >>What should a filesystem driver do if it can't suddenly read or write any
> >>blocks on media?
> >
> >Maybe stopping gracefully, warn about what happened, and let the system
> >keep going. You may be right about your main filesystem, but in the case
> >I'm running, for example, my system in an ext3 filesystem, and have a
> >vfat from a usb key. Should my system really hang because I'm not able
> >to read/write to the device?
> 
> getblk won't fail because of I/O error --- it can fail only because of 
> memory management bugs. I think it's right to stop the system in that case 
> --- it's better than silently corrupting data on any device.
> 
> Mikulas
> 
In the code, we see:

if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
                        (size < 512 || size > PAGE_SIZE))) {

This is where __getblk_slow, and thus, __getblk fails, and it does not
seem to be due to any memory management bug.

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 23:33               ` Glauber de Oliveira Costa
@ 2005-10-10 23:34                 ` Mikulas Patocka
  2005-10-10 23:49                   ` Glauber de Oliveira Costa
  0 siblings, 1 reply; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-10 23:34 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: Anton Altaparmakov, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

>>>> What should a filesystem driver do if it can't suddenly read or write any
>>>> blocks on media?
>>>
>>> Maybe stopping gracefully, warn about what happened, and let the system
>>> keep going. You may be right about your main filesystem, but in the case
>>> I'm running, for example, my system in an ext3 filesystem, and have a
>>> vfat from a usb key. Should my system really hang because I'm not able
>>> to read/write to the device?
>>
>> getblk won't fail because of I/O error --- it can fail only because of
>> memory management bugs. I think it's right to stop the system in that case
>> --- it's better than silently corrupting data on any device.
>>
>> Mikulas
>>
> In the code, we see:
>
> if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
>                        (size < 512 || size > PAGE_SIZE))) {
>
> This is where __getblk_slow, and thus, __getblk fails, and it does not
> seem to be due to any memory management bug.

This is a filesystem bug --- filesystem should set it's blocksize with 
sb_set_blocksize (and refuse to mount if the device doesn't support it) 
before using it in requests.

Mikulas


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 22:28         ` Anton Altaparmakov
@ 2005-10-10 23:36           ` Andrew Morton
  2005-10-11  0:07             ` Glauber de Oliveira Costa
  2005-10-11  0:09             ` Mikulas Patocka
  0 siblings, 2 replies; 43+ messages in thread
From: Andrew Morton @ 2005-10-10 23:36 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: glommer, mikulas, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro

Anton Altaparmakov <aia21@cam.ac.uk> wrote:
>
>  > Maybe the best solution is neither one nor another. Testing and failing
>  > gracefully seems better.
>  > 
>  > What do you think?
> 
>  I certainly agree with you there.  I neither want a deadlock nor 
>  corruption.  (-:

Yup.  In the present implementation __getblk_slow() "cannot fail".  It's
conceivable that at some future stage we'll change __getblk_slow() so that
it returns NULL on an out-of-memory condition.  Anyone making such a change
would have to audit all callers to make sure that they handle the NULL
correctly.

It is appropriate at this time to fix the callers so that they correctly
handle the NULL return.  However, it is non-trivial to actually _test_ such
changes, and such changes should be tested.  Or at least, they should be
done with considerable care and knowledge of the specific filesystems.  

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 23:34                 ` Mikulas Patocka
@ 2005-10-10 23:49                   ` Glauber de Oliveira Costa
  0 siblings, 0 replies; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-10 23:49 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: glommer, Anton Altaparmakov, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

> >In the code, we see:
> >
> >if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
> >                       (size < 512 || size > PAGE_SIZE))) {
> >
> >This is where __getblk_slow, and thus, __getblk fails, and it does not
> >seem to be due to any memory management bug.
> 
> This is a filesystem bug --- filesystem should set it's blocksize with 
> sb_set_blocksize (and refuse to mount if the device doesn't support it) 
> before using it in requests.
> 
> Mikulas
> 
No doubt about it. 
But in case it does not, or in the case the value gets corrupted after
the check but before the call, it will lead some code to dereferencing a 
NULL pointer, and making the whole system crash for a silly thing.

So, for me, checking for the value after the call to __getblk does seem
the right approach. 

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  0:07             ` Glauber de Oliveira Costa
@ 2005-10-11  0:05               ` Al Viro
  2005-10-11  0:40                 ` Glauber de Oliveira Costa
  0 siblings, 1 reply; 43+ messages in thread
From: Al Viro @ 2005-10-11  0:05 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: Andrew Morton, Anton Altaparmakov, mikulas, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro

On Mon, Oct 10, 2005 at 09:07:34PM -0300, Glauber de Oliveira Costa wrote:
>  	if (!bh)
>  		return -EIO;
>  	new = sb_getblk(sb, to);
> +	if (!new)
> +		return -EIO;

You've just introduced a leak here, obviously.

Please, read the code before "fixing" that stuff; slapping returns at random
and hoping that it will help is not a good way to deal with that - the only
thing you achieve is hiding the problem.

The same goes for the rest of patch - in each case it's not obvious that your
changes are correct.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 23:36           ` Andrew Morton
@ 2005-10-11  0:07             ` Glauber de Oliveira Costa
  2005-10-11  0:05               ` Al Viro
  2005-10-11  0:09             ` Mikulas Patocka
  1 sibling, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-11  0:07 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Anton Altaparmakov, glommer, mikulas, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro

[-- Attachment #1: Type: text/plain, Size: 1425 bytes --]

Andrew,

I'm providing a patch that puts the test in some more or less trivial
cases.
Maybe putting them in -mm tree could give them the test environment they
need.
But even if the patch gets in, there are still some cases that I don't
feel comfortable with right now to fix.


On Mon, Oct 10, 2005 at 04:36:48PM -0700, Andrew Morton wrote:
> Anton Altaparmakov <aia21@cam.ac.uk> wrote:
> >
> >  > Maybe the best solution is neither one nor another. Testing and failing
> >  > gracefully seems better.
> >  > 
> >  > What do you think?
> > 
> >  I certainly agree with you there.  I neither want a deadlock nor 
> >  corruption.  (-:
> 
> Yup.  In the present implementation __getblk_slow() "cannot fail".  It's
> conceivable that at some future stage we'll change __getblk_slow() so that
> it returns NULL on an out-of-memory condition.  Anyone making such a change
> would have to audit all callers to make sure that they handle the NULL
> correctly.
> 
> It is appropriate at this time to fix the callers so that they correctly
> handle the NULL return.  However, it is non-trivial to actually _test_ such
> changes, and such changes should be tested.  Or at least, they should be
> done with considerable care and knowledge of the specific filesystems.  
> 

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================

[-- Attachment #2: patch_test_getblk --]
[-- Type: text/plain, Size: 5116 bytes --]

diff -Naurp linux-2.6.14-rc2-orig/fs/affs/affs.h linux-2.6.14-rc2-cleanp/fs/affs/affs.h
--- linux-2.6.14-rc2-orig/fs/affs/affs.h	2005-06-17 19:48:29.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/affs/affs.h	2005-10-10 22:28:30.000000000 +0000
@@ -230,6 +230,8 @@ affs_getzeroblk(struct super_block *sb, 
 	pr_debug("affs_getzeroblk: %d\n", block);
 	if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
 		bh = sb_getblk(sb, block);
+		if (!bh)
+			return NULL;
 		lock_buffer(bh);
 		memset(bh->b_data, 0 , sb->s_blocksize);
 		set_buffer_uptodate(bh);
@@ -245,6 +247,8 @@ affs_getemptyblk(struct super_block *sb,
 	pr_debug("affs_getemptyblk: %d\n", block);
 	if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
 		bh = sb_getblk(sb, block);
+		if (!bh)
+			return NULL;
 		wait_on_buffer(bh);
 		set_buffer_uptodate(bh);
 		return bh;
diff -Naurp linux-2.6.14-rc2-orig/fs/bfs/file.c linux-2.6.14-rc2-cleanp/fs/bfs/file.c
--- linux-2.6.14-rc2-orig/fs/bfs/file.c	2005-09-26 13:58:15.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/bfs/file.c	2005-10-10 21:58:23.000000000 +0000
@@ -33,6 +33,8 @@ static int bfs_move_block(unsigned long 
 	if (!bh)
 		return -EIO;
 	new = sb_getblk(sb, to);
+	if (!new)
+		return -EIO;
 	memcpy(new->b_data, bh->b_data, bh->b_size);
 	mark_buffer_dirty(new);
 	bforget(bh);
diff -Naurp linux-2.6.14-rc2-orig/fs/ext2/inode.c linux-2.6.14-rc2-cleanp/fs/ext2/inode.c
--- linux-2.6.14-rc2-orig/fs/ext2/inode.c	2005-09-26 13:58:15.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/ext2/inode.c	2005-10-10 22:31:12.000000000 +0000
@@ -440,6 +440,8 @@ static int ext2_alloc_branch(struct inod
 		 * the pointer to new one, then send parent to disk.
 		 */
 		bh = sb_getblk(inode->i_sb, parent);
+		if (!bh)
+			break;
 		lock_buffer(bh);
 		memset(bh->b_data, 0, blocksize);
 		branch[n].bh = bh;
diff -Naurp linux-2.6.14-rc2-orig/fs/ext3/inode.c linux-2.6.14-rc2-cleanp/fs/ext3/inode.c
--- linux-2.6.14-rc2-orig/fs/ext3/inode.c	2005-10-09 20:26:54.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/ext3/inode.c	2005-10-10 22:38:05.000000000 +0000
@@ -532,6 +532,8 @@ static int ext3_alloc_branch(handle_t *h
 			 */
 			bh = sb_getblk(inode->i_sb, parent);
 			branch[n].bh = bh;
+			if (!bh)
+				break;
 			lock_buffer(bh);
 			BUFFER_TRACE(bh, "call get_create_access");
 			err = ext3_journal_get_create_access(handle, bh);
@@ -864,7 +866,7 @@ struct buffer_head *ext3_getblk(handle_t
 	if (!*errp && buffer_mapped(&dummy)) {
 		struct buffer_head *bh;
 		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-		if (buffer_new(&dummy)) {
+		if (bh && buffer_new(&dummy)) {
 			J_ASSERT(create != 0);
 			J_ASSERT(handle != 0);
 
diff -Naurp linux-2.6.14-rc2-orig/fs/minix/itree_common.c linux-2.6.14-rc2-cleanp/fs/minix/itree_common.c
--- linux-2.6.14-rc2-orig/fs/minix/itree_common.c	2005-06-17 19:48:29.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/minix/itree_common.c	2005-10-10 22:59:31.000000000 +0000
@@ -84,6 +84,8 @@ static int alloc_branch(struct inode *in
 			break;
 		branch[n].key = cpu_to_block(nr);
 		bh = sb_getblk(inode->i_sb, parent);
+		if (!bh)
+			break;
 		lock_buffer(bh);
 		memset(bh->b_data, 0, BLOCK_SIZE);
 		branch[n].bh = bh;
diff -Naurp linux-2.6.14-rc2-orig/fs/reiserfs/journal.c linux-2.6.14-rc2-cleanp/fs/reiserfs/journal.c
--- linux-2.6.14-rc2-orig/fs/reiserfs/journal.c	2005-09-26 13:58:16.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/reiserfs/journal.c	2005-10-10 22:44:31.000000000 +0000
@@ -2120,7 +2120,7 @@ static int journal_read_transaction(stru
 				      le32_to_cpu(commit->
 						  j_realblock[i - trans_half]));
 		}
-		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
+		if (real_blocks[i] && real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
 			reiserfs_warning(p_s_sb,
 					 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
 			goto abort_replay;
diff -Naurp linux-2.6.14-rc2-orig/fs/reiserfs/stree.c linux-2.6.14-rc2-cleanp/fs/reiserfs/stree.c
--- linux-2.6.14-rc2-orig/fs/reiserfs/stree.c	2005-09-01 14:26:04.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/reiserfs/stree.c	2005-10-10 22:56:14.000000000 +0000
@@ -664,7 +664,7 @@ int search_by_key(struct super_block *p_
 		   have a pointer to it. */
 		if ((p_s_bh = p_s_last_element->pe_buffer =
 		     sb_getblk(p_s_sb, n_block_number))) {
-			if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
+			if (p_s_sb && !buffer_uptodate(p_s_bh) && reada_count > 1) {
 				search_by_key_reada(p_s_sb, reada_bh,
 						    reada_blocks, reada_count);
 			}
diff -Naurp linux-2.6.14-rc2-orig/fs/sysv/itree.c linux-2.6.14-rc2-cleanp/fs/sysv/itree.c
--- linux-2.6.14-rc2-orig/fs/sysv/itree.c	2005-06-17 19:48:29.000000000 +0000
+++ linux-2.6.14-rc2-cleanp/fs/sysv/itree.c	2005-10-10 22:41:35.000000000 +0000
@@ -144,6 +144,8 @@ static int alloc_branch(struct inode *in
 		 */
 		parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
 		bh = sb_getblk(inode->i_sb, parent);
+		if (!bh)
+			break;
 		lock_buffer(bh);
 		memset(bh->b_data, 0, blocksize);
 		branch[n].bh = bh;

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 23:36           ` Andrew Morton
  2005-10-11  0:07             ` Glauber de Oliveira Costa
@ 2005-10-11  0:09             ` Mikulas Patocka
  2005-10-11  1:07               ` Andrew Morton
  1 sibling, 1 reply; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-11  0:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Anton Altaparmakov, glommer, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro

On Mon, 10 Oct 2005, Andrew Morton wrote:

> Anton Altaparmakov <aia21@cam.ac.uk> wrote:
>>
>> > Maybe the best solution is neither one nor another. Testing and failing
>> > gracefully seems better.
>> >
>> > What do you think?
>>
>>  I certainly agree with you there.  I neither want a deadlock nor
>>  corruption.  (-:
>
> Yup.  In the present implementation __getblk_slow() "cannot fail".  It's
> conceivable that at some future stage we'll change __getblk_slow() so that
> it returns NULL on an out-of-memory condition.

The question is if it is desired --- it will make bread return NULL on 
out-of-memory condition, callers will treat it like an IO error, skipping 
access to the affected block, causing damage on perfectly healthy 
filesystem.

I liked what linux-2.0 did in this case --- if the kernel was out of 
memory, getblk just took another buffer, wrote it if it was dirty and used 
it. Except for writeable loopback device (where writing one buffer 
generates more dirty buffers), it couldn't deadlock.

Mikukas

> Anyone making such a change
> would have to audit all callers to make sure that they handle the NULL
> correctly.
>
> It is appropriate at this time to fix the callers so that they correctly
> handle the NULL return.  However, it is non-trivial to actually _test_ such
> changes, and such changes should be tested.  Or at least, they should be
> done with considerable care and knowledge of the specific filesystems.
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  0:05               ` Al Viro
@ 2005-10-11  0:40                 ` Glauber de Oliveira Costa
  2005-10-11 12:35                   ` Jan Hudec
  0 siblings, 1 reply; 43+ messages in thread
From: Glauber de Oliveira Costa @ 2005-10-11  0:40 UTC (permalink / raw)
  To: Al Viro
  Cc: glommer, Andrew Morton, Anton Altaparmakov, mikulas, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro

Some of them can be wrong, but they're not that random.

Let's discuss each one of them

In the changes made in affs.h, I returned NULL because it seemed to be
the some-went-wrong behaviour on these functions

affs_getzeroblk, for example, tests some conditions. The buffer being
NULL is just another test.

In the code you commented, I thought that we get the same case testing
from or to conditions, and thus, it would be correct to threat them in
the same way.

in minix, ext2 and ext3 code, I changed the alloc_branch functions ,
because they already has some code that is prepared to recover from a
failed allocation

In the others, I've just put my hands on code that would execute inside
conditionals using struct members, to prevent dereferencing a NULL pointer. 
As I said, there is a lot of code that has the problem, and are untouched. 
If I was being at random, they would also figuring out here.

I don't think it will necessarily hide the problems (although it can be doing 
it in some of them), because __getblk_slow will produce a lot of noise
in the case something did get wrong.

glauber

On Tue, Oct 11, 2005 at 01:05:03AM +0100, Al Viro wrote:
> On Mon, Oct 10, 2005 at 09:07:34PM -0300, Glauber de Oliveira Costa wrote:
> >  	if (!bh)
> >  		return -EIO;
> >  	new = sb_getblk(sb, to);
> > +	if (!new)
> > +		return -EIO;
> 
> You've just introduced a leak here, obviously.
> 
> Please, read the code before "fixing" that stuff; slapping returns at random
> and hoping that it will help is not a good way to deal with that - the only
> thing you achieve is hiding the problem.
> 
> The same goes for the rest of patch - in each case it's not obvious that your
> changes are correct.
> 

-- 
=====================================
Glauber de Oliveira Costa
IBM Linux Technology Center - Brazil
glommer@br.ibm.com
=====================================

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  0:09             ` Mikulas Patocka
@ 2005-10-11  1:07               ` Andrew Morton
  2005-10-11  1:20                 ` Mikulas Patocka
  2005-10-11  8:01                 ` Anton Altaparmakov
  0 siblings, 2 replies; 43+ messages in thread
From: Andrew Morton @ 2005-10-11  1:07 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: aia21, glommer, linux-kernel, linux-fsdevel, ext2-devel, hirofumi,
	linux-ntfs-dev, aia21, hch, viro

Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> wrote:
>
>  On Mon, 10 Oct 2005, Andrew Morton wrote:
> 
>  > Anton Altaparmakov <aia21@cam.ac.uk> wrote:
>  >>
>  >> > Maybe the best solution is neither one nor another. Testing and failing
>  >> > gracefully seems better.
>  >> >
>  >> > What do you think?
>  >>
>  >>  I certainly agree with you there.  I neither want a deadlock nor
>  >>  corruption.  (-:
>  >
>  > Yup.  In the present implementation __getblk_slow() "cannot fail".  It's
>  > conceivable that at some future stage we'll change __getblk_slow() so that
>  > it returns NULL on an out-of-memory condition.
> 
>  The question is if it is desired --- it will make bread return NULL on 
>  out-of-memory condition, callers will treat it like an IO error, skipping 
>  access to the affected block, causing damage on perfectly healthy 
>  filesystem.

Yes, that is a bit dumb.  A filesystem might indeed want to take different
action for ENOMEM versus EIO.

>  I liked what linux-2.0 did in this case --- if the kernel was out of 
>  memory, getblk just took another buffer, wrote it if it was dirty and used 
>  it. Except for writeable loopback device (where writing one buffer 
>  generates more dirty buffers), it couldn't deadlock.

Wouldn't it be better if bread() were to return ERR_PTR(-EIO) or
ERR_PTR(-ENOMEM)?    Big change.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  1:07               ` Andrew Morton
@ 2005-10-11  1:20                 ` Mikulas Patocka
  2005-10-11  5:02                   ` Andrew Morton
  2005-10-11  8:07                   ` Anton Altaparmakov
  2005-10-11  8:01                 ` Anton Altaparmakov
  1 sibling, 2 replies; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-11  1:20 UTC (permalink / raw)
  To: Andrew Morton
  Cc: aia21, glommer, linux-kernel, linux-fsdevel, ext2-devel, hirofumi,
	linux-ntfs-dev, aia21, hch, viro

>>  I liked what linux-2.0 did in this case --- if the kernel was out of
>>  memory, getblk just took another buffer, wrote it if it was dirty and used
>>  it. Except for writeable loopback device (where writing one buffer
>>  generates more dirty buffers), it couldn't deadlock.
>
> Wouldn't it be better if bread() were to return ERR_PTR(-EIO) or
> ERR_PTR(-ENOMEM)?    Big change.

No. Out of memory condition can happen even under normal circumstances 
under lightly loaded system. Think of a situation when dirty file-mapped 
pages fill up the whole memory, now a burst of packets from network comes 
that fills up kernel atomic reserve, you have zero pages free --- and what 
now? --- returning ENOMEM and dropping dirty pages without writing them is 
wrong, deadlocking (filesystem waits until memory manager frees some pages 
and memory manager waits until filesystem writes the dirty pages) is wrong 
too.

The filesystem must make sure that it doesn't need any memory to do block 
allocation and data write. Linux-2.0 got this right, it could do getblk 
and bread even if get_free_pages constantly failed.

Mikulas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  1:20                 ` Mikulas Patocka
@ 2005-10-11  5:02                   ` Andrew Morton
  2005-10-11  8:07                   ` Anton Altaparmakov
  1 sibling, 0 replies; 43+ messages in thread
From: Andrew Morton @ 2005-10-11  5:02 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: aia21, glommer, linux-kernel, linux-fsdevel, ext2-devel, hirofumi,
	linux-ntfs-dev, aia21, hch, viro

Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> wrote:
>
> >>  I liked what linux-2.0 did in this case --- if the kernel was out of
> >>  memory, getblk just took another buffer, wrote it if it was dirty and used
> >>  it. Except for writeable loopback device (where writing one buffer
> >>  generates more dirty buffers), it couldn't deadlock.
> >
> > Wouldn't it be better if bread() were to return ERR_PTR(-EIO) or
> > ERR_PTR(-ENOMEM)?    Big change.
> 
> No. Out of memory condition can happen even under normal circumstances 
> under lightly loaded system. Think of a situation when dirty file-mapped 
> pages fill up the whole memory, now a burst of packets from network comes 
> that fills up kernel atomic reserve, you have zero pages free --- and what 
> now? --- returning ENOMEM and dropping dirty pages without writing them is 
> wrong, deadlocking (filesystem waits until memory manager frees some pages 
> and memory manager waits until filesystem writes the dirty pages) is wrong 
> too.

Well.  The filesystem could just redirty the page and skip the writepage().
That's still deadlockable but I bet the kernel would recover in the vast
majority of cases.

Still, this is all very theoretical - there are no plans to make getblk
bail out on oom - AFAIK nobody has been able to demonstrate a testcase...


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-10 22:49         ` Mikulas Patocka
  2005-10-10 23:12           ` Glauber de Oliveira Costa
@ 2005-10-11  7:52           ` Anton Altaparmakov
  2005-10-12 19:51             ` Jeff Mahoney
  1 sibling, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-11  7:52 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Glauber de Oliveira Costa, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

On Tue, 2005-10-11 at 00:49 +0200, Mikulas Patocka wrote:
> On Mon, 10 Oct 2005, Anton Altaparmakov wrote:
> > On Mon, 10 Oct 2005, Mikulas Patocka wrote:
> >> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> >> As comment in buffer.c says, getblk will deadlock if the machine is out of
> >> memory. It is questionable whether to deadlock or return NULL and corrupt
> >> filesystem in this case --- deadlock is probably better.
> >
> > What do you mean corrupt filesystem?  If a filesystem is written so badly
> > that it will cause corruption when a NULL is returned somewhere, I
> > certainly don't want to have anything to do with it.
> 
> What should a filesystem driver do if it can't suddenly read or write any 
> blocks on media?

Two clear choices:

1) Switch to read-only and use the cached data to fulfil requests and
fail all others.

2) Ask the user to insert the media/plug the device back in (this is by
far the most likely cause of all requests suddenly failing) and then
continue where they left off.

It is unfortunate that Linux does not allow for 2) so you need to do 1).

I completely disagree with people who want the system to panic() or even
BUG() in such case.  I don't want "me accidentally knocking the
flashdrive attached to my keyboard's usb ports" to panic() my system
thank you very much!  And I don't want it to go BUG() either!

Best regards,

        Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  1:07               ` Andrew Morton
  2005-10-11  1:20                 ` Mikulas Patocka
@ 2005-10-11  8:01                 ` Anton Altaparmakov
  2005-10-13  0:58                   ` Mike Christie
  1 sibling, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-11  8:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Mikulas Patocka, glommer, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro

On Mon, 2005-10-10 at 18:07 -0700, Andrew Morton wrote:
> Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> wrote:
> >
> >  On Mon, 10 Oct 2005, Andrew Morton wrote:
> > 
> >  > Anton Altaparmakov <aia21@cam.ac.uk> wrote:
> >  >>
> >  >> > Maybe the best solution is neither one nor another. Testing and failing
> >  >> > gracefully seems better.
> >  >> >
> >  >> > What do you think?
> >  >>
> >  >>  I certainly agree with you there.  I neither want a deadlock nor
> >  >>  corruption.  (-:
> >  >
> >  > Yup.  In the present implementation __getblk_slow() "cannot fail".  It's
> >  > conceivable that at some future stage we'll change __getblk_slow() so that
> >  > it returns NULL on an out-of-memory condition.
> > 
> >  The question is if it is desired --- it will make bread return NULL on 
> >  out-of-memory condition, callers will treat it like an IO error, skipping 
> >  access to the affected block, causing damage on perfectly healthy 
> >  filesystem.
> 
> Yes, that is a bit dumb.  A filesystem might indeed want to take different
> action for ENOMEM versus EIO.
> 
> >  I liked what linux-2.0 did in this case --- if the kernel was out of 
> >  memory, getblk just took another buffer, wrote it if it was dirty and used 
> >  it. Except for writeable loopback device (where writing one buffer 
> >  generates more dirty buffers), it couldn't deadlock.
> 
> Wouldn't it be better if bread() were to return ERR_PTR(-EIO) or
> ERR_PTR(-ENOMEM)?    Big change.

It would indeed.  Much better.  And whilst at it, it would be even
better if we had a lot more error codes like "ERR_PTR(-EDEVUNPLUGGED)"
for example...  But that would be an even better change.  Anyone feeling
like touching every block driver in the kernel?  (-;

Best regards,

        Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  1:20                 ` Mikulas Patocka
  2005-10-11  5:02                   ` Andrew Morton
@ 2005-10-11  8:07                   ` Anton Altaparmakov
  1 sibling, 0 replies; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-11  8:07 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Andrew Morton, glommer, linux-kernel, linux-fsdevel, ext2-devel,
	hirofumi, linux-ntfs-dev, aia21, hch, viro

On Tue, 2005-10-11 at 03:20 +0200, Mikulas Patocka wrote:
> >>  I liked what linux-2.0 did in this case --- if the kernel was out of
> >>  memory, getblk just took another buffer, wrote it if it was dirty and used
> >>  it. Except for writeable loopback device (where writing one buffer
> >>  generates more dirty buffers), it couldn't deadlock.
> >
> > Wouldn't it be better if bread() were to return ERR_PTR(-EIO) or
> > ERR_PTR(-ENOMEM)?    Big change.
> 
> No. Out of memory condition can happen even under normal circumstances 
> under lightly loaded system. Think of a situation when dirty file-mapped 
> pages fill up the whole memory, now a burst of packets from network comes 
> that fills up kernel atomic reserve, you have zero pages free --- and what 
> now? --- returning ENOMEM and dropping dirty pages without writing them is 
> wrong,

Oh, don't be stupid.  You would just redirty the page and return.  See
for example the writepage implementation in fs/ntfs/aops.c...

>  deadlocking (filesystem waits until memory manager frees some pages 
> and memory manager waits until filesystem writes the dirty pages) is wrong 
> too.

That would never really happen.  The probability of every single dirty
page on the system being tied into needing a memory allocation is very
close to zero...  It is just a theoretical deadlock.

> The filesystem must make sure that it doesn't need any memory to do block 
> allocation and data write. Linux-2.0 got this right, it could do getblk 
> and bread even if get_free_pages constantly failed.

That is impossible to do for complex filesystems.  Every filesystem
would have to pre-allocate masses of memory from all sorts of places
(you would need pages that can be plugged into the page cache and/or you
would need buffers and/or you would need bios which you are not allowed
to hold onto so you would already have a problem here, etc...) to be
able to do that and that would impact system performance greatly unless
you had ridiculous amount of ram to start with in which case you would
not need to bother with this complexity as you would never oom anyway.

Best regards,

        Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  0:40                 ` Glauber de Oliveira Costa
@ 2005-10-11 12:35                   ` Jan Hudec
  0 siblings, 0 replies; 43+ messages in thread
From: Jan Hudec @ 2005-10-11 12:35 UTC (permalink / raw)
  To: Glauber de Oliveira Costa
  Cc: Al Viro, Andrew Morton, Anton Altaparmakov, mikulas, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro

[-- Attachment #1: Type: text/plain, Size: 1150 bytes --]

On Mon, Oct 10, 2005 at 21:40:43 -0300, Glauber de Oliveira Costa wrote:
> [...]
> In the code you commented, I thought that we get the same case testing
> from or to conditions, and thus, it would be correct to threat them in
> the same way.

In that code (below), the first test can safely just return. But the
second has to undo the first call before returning. When you test new,
the bh is already non-null. So you must release it.

> [...]
> 
> On Tue, Oct 11, 2005 at 01:05:03AM +0100, Al Viro wrote:
> > On Mon, Oct 10, 2005 at 09:07:34PM -0300, Glauber de Oliveira Costa wrote:
> > >  	if (!bh)
> > >  		return -EIO;
> > >  	new = sb_getblk(sb, to);
> > > +	if (!new)
> > > +		return -EIO;
> > 
> > You've just introduced a leak here, obviously.
> > 
> > Please, read the code before "fixing" that stuff; slapping returns at random
> > and hoping that it will help is not a good way to deal with that - the only
> > thing you achieve is hiding the problem.
> > 
> > The same goes for the rest of patch - in each case it's not obvious that your
> > changes are correct.

--
						 Jan 'Bulb' Hudec <bulb@ucw.cz>

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  7:52           ` Anton Altaparmakov
@ 2005-10-12 19:51             ` Jeff Mahoney
  2005-10-12 19:59               ` Mikulas Patocka
  2005-10-12 20:08               ` Anton Altaparmakov
  0 siblings, 2 replies; 43+ messages in thread
From: Jeff Mahoney @ 2005-10-12 19:51 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: Mikulas Patocka, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Anton Altaparmakov wrote:
> On Tue, 2005-10-11 at 00:49 +0200, Mikulas Patocka wrote:
>> On Mon, 10 Oct 2005, Anton Altaparmakov wrote:
>>> On Mon, 10 Oct 2005, Mikulas Patocka wrote:
>>>> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
>>>> As comment in buffer.c says, getblk will deadlock if the machine is out of
>>>> memory. It is questionable whether to deadlock or return NULL and corrupt
>>>> filesystem in this case --- deadlock is probably better.
>>> What do you mean corrupt filesystem?  If a filesystem is written so badly
>>> that it will cause corruption when a NULL is returned somewhere, I
>>> certainly don't want to have anything to do with it.
>> What should a filesystem driver do if it can't suddenly read or write any 
>> blocks on media?
> 
> Two clear choices:
> 
> 1) Switch to read-only and use the cached data to fulfil requests and
> fail all others.
> 
> 2) Ask the user to insert the media/plug the device back in (this is by
> far the most likely cause of all requests suddenly failing) and then
> continue where they left off.
> 
> It is unfortunate that Linux does not allow for 2) so you need to do 1).

I recently looked into 2) a bit and the dm multipath code is almost
enough to do exactly this.

If you configure your block device as an mpath device that queues on
path failure, and change the table to the new device location on device
re-attach, the queued up i/o will be flushed out. Almost. Right now,
when you change the table and resume the dm mapping, it does a suspend
which attempts to write out the data to a device which is no longer
there, causing it to just be dropped on the floor. If this were changed
not to do that, and perhaps set a timer so that the dirty data wouldn't
be left around forever if the device wasn't reattached, 2) would
definitely be possible.

I realize that the userspace intervention required may involve a bit of
dark magic, but my point is most of the code required on the kernel side
is already implemented.

- -Jeff

- --
Jeff Mahoney
SUSE Labs
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFDTWkyLPWxlyuTD7IRAg0lAJ4yoTfJPgBFPgrT7LaIOdKfN7hFCACfXVSC
685SGzxL2pGMnAySooeNaxk=
=9Wfm
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 19:51             ` Jeff Mahoney
@ 2005-10-12 19:59               ` Mikulas Patocka
  2005-10-12 20:07                 ` Jeff Mahoney
  2005-10-12 20:08               ` Anton Altaparmakov
  1 sibling, 1 reply; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-12 19:59 UTC (permalink / raw)
  To: Jeff Mahoney
  Cc: Anton Altaparmakov, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

> Anton Altaparmakov wrote:
>> On Tue, 2005-10-11 at 00:49 +0200, Mikulas Patocka wrote:
>>> On Mon, 10 Oct 2005, Anton Altaparmakov wrote:
>>> What should a filesystem driver do if it can't suddenly read or write any
>>> blocks on media?
>>
>> Two clear choices:
>>
>> 1) Switch to read-only and use the cached data to fulfil requests and
>> fail all others.
>>
>> 2) Ask the user to insert the media/plug the device back in (this is by
>> far the most likely cause of all requests suddenly failing) and then
>> continue where they left off.
>>
>> It is unfortunate that Linux does not allow for 2) so you need to do 1).
>
> I recently looked into 2) a bit and the dm multipath code is almost
> enough to do exactly this.
>
> If you configure your block device as an mpath device that queues on
> path failure, and change the table to the new device location on device
> re-attach, the queued up i/o will be flushed out. Almost. Right now,
> when you change the table and resume the dm mapping, it does a suspend
> which attempts to write out the data to a device which is no longer
> there, causing it to just be dropped on the floor. If this were changed
> not to do that, and perhaps set a timer so that the dirty data wouldn't
> be left around forever if the device wasn't reattached, 2) would
> definitely be possible.
>
> I realize that the userspace intervention required may involve a bit of
> dark magic, but my point is most of the code required on the kernel side
> is already implemented.
>
> - -Jeff

Is memory management ready for this? Can't deadlock like this happen?
- displaying dialog window needs memory, so it waits until memory will be 
available
- system decides to write some write-back cached data in order to free 
memory
- the write of these data waits until the dialog window is displayed, 
user inserts the device and clicks 'OK'

Mikulas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 19:59               ` Mikulas Patocka
@ 2005-10-12 20:07                 ` Jeff Mahoney
  2005-10-12 20:12                   ` Mikulas Patocka
  2005-10-13  0:05                   ` Jamie Lokier
  0 siblings, 2 replies; 43+ messages in thread
From: Jeff Mahoney @ 2005-10-12 20:07 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Anton Altaparmakov, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Mikulas Patocka wrote:
>> Anton Altaparmakov wrote:
>>> On Tue, 2005-10-11 at 00:49 +0200, Mikulas Patocka wrote:
>>>> On Mon, 10 Oct 2005, Anton Altaparmakov wrote:
>>>> What should a filesystem driver do if it can't suddenly read or
>>>> write any
>>>> blocks on media?
>>>
>>> Two clear choices:
>>>
>>> 1) Switch to read-only and use the cached data to fulfil requests and
>>> fail all others.
>>>
>>> 2) Ask the user to insert the media/plug the device back in (this is by
>>> far the most likely cause of all requests suddenly failing) and then
>>> continue where they left off.
>>>
>>> It is unfortunate that Linux does not allow for 2) so you need to do 1).
>>
>> I recently looked into 2) a bit and the dm multipath code is almost
>> enough to do exactly this.
>>
>> If you configure your block device as an mpath device that queues on
>> path failure, and change the table to the new device location on device
>> re-attach, the queued up i/o will be flushed out. Almost. Right now,
>> when you change the table and resume the dm mapping, it does a suspend
>> which attempts to write out the data to a device which is no longer
>> there, causing it to just be dropped on the floor. If this were changed
>> not to do that, and perhaps set a timer so that the dirty data wouldn't
>> be left around forever if the device wasn't reattached, 2) would
>> definitely be possible.
>>
>> I realize that the userspace intervention required may involve a bit of
>> dark magic, but my point is most of the code required on the kernel side
>> is already implemented.
>>
>> - -Jeff
> 
> Is memory management ready for this? Can't deadlock like this happen?
> - displaying dialog window needs memory, so it waits until memory will
> be available
> - system decides to write some write-back cached data in order to free
> memory
> - the write of these data waits until the dialog window is displayed,
> user inserts the device and clicks 'OK'

No, it's not, and deadlock is definitely possible. However, if we're at
the point where memory is tight enough that it's an issue, the timer can
expire and all the pending i/o is dropped just as it would be without
the multipath code enabled.

I'm not saying it's a solution ready for production, just a good
starting point.

- -Jeff

- --
Jeff Mahoney
SUSE Labs
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFDTWz5LPWxlyuTD7IRAiXRAJ4oRz7NpSrhMxp1ODlJWFsDaBcMsgCfbB8q
3XoPFrF0XHA1NFInVSjRicw=
=Do4z
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 19:51             ` Jeff Mahoney
  2005-10-12 19:59               ` Mikulas Patocka
@ 2005-10-12 20:08               ` Anton Altaparmakov
  1 sibling, 0 replies; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-12 20:08 UTC (permalink / raw)
  To: Jeff Mahoney
  Cc: Mikulas Patocka, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

On Wed, 12 Oct 2005, Jeff Mahoney wrote:
> Anton Altaparmakov wrote:
> > On Tue, 2005-10-11 at 00:49 +0200, Mikulas Patocka wrote:
> >> On Mon, 10 Oct 2005, Anton Altaparmakov wrote:
> >>> On Mon, 10 Oct 2005, Mikulas Patocka wrote:
> >>>> On Mon, 10 Oct 2005, Glauber de Oliveira Costa wrote:
> >>>> As comment in buffer.c says, getblk will deadlock if the machine is out of
> >>>> memory. It is questionable whether to deadlock or return NULL and corrupt
> >>>> filesystem in this case --- deadlock is probably better.
> >>> What do you mean corrupt filesystem?  If a filesystem is written so badly
> >>> that it will cause corruption when a NULL is returned somewhere, I
> >>> certainly don't want to have anything to do with it.
> >> What should a filesystem driver do if it can't suddenly read or write any 
> >> blocks on media?
> > 
> > Two clear choices:
> > 
> > 1) Switch to read-only and use the cached data to fulfil requests and
> > fail all others.
> > 
> > 2) Ask the user to insert the media/plug the device back in (this is by
> > far the most likely cause of all requests suddenly failing) and then
> > continue where they left off.
> > 
> > It is unfortunate that Linux does not allow for 2) so you need to do 1).
> 
> I recently looked into 2) a bit and the dm multipath code is almost
> enough to do exactly this.
> 
> If you configure your block device as an mpath device that queues on
> path failure, and change the table to the new device location on device
> re-attach, the queued up i/o will be flushed out. Almost. Right now,
> when you change the table and resume the dm mapping, it does a suspend
> which attempts to write out the data to a device which is no longer
> there, causing it to just be dropped on the floor. If this were changed
> not to do that, and perhaps set a timer so that the dirty data wouldn't
> be left around forever if the device wasn't reattached, 2) would
> definitely be possible.
> 
> I realize that the userspace intervention required may involve a bit of
> dark magic, but my point is most of the code required on the kernel side
> is already implemented.

Cool.  I didn't know that.  On Linux as you say the userspace intervention 
is the real problem due to non-X vs X and gnome vs KDE vs whatnot...  But 
I would imagine a simple userspace helper a-la /sbin/modprobe and things 
like that would be enough.  And that could then be system specific to 
display a message to the local user...

Best regards,

	Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:07                 ` Jeff Mahoney
@ 2005-10-12 20:12                   ` Mikulas Patocka
  2005-10-12 20:14                     ` Anton Altaparmakov
                                       ` (2 more replies)
  2005-10-13  0:05                   ` Jamie Lokier
  1 sibling, 3 replies; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-12 20:12 UTC (permalink / raw)
  To: Jeff Mahoney
  Cc: Anton Altaparmakov, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

>> Is memory management ready for this? Can't deadlock like this happen?
>> - displaying dialog window needs memory, so it waits until memory will
>> be available
>> - system decides to write some write-back cached data in order to free
>> memory
>> - the write of these data waits until the dialog window is displayed,
>> user inserts the device and clicks 'OK'
>
> No, it's not, and deadlock is definitely possible. However, if we're at
> the point where memory is tight enough that it's an issue, the timer can
> expire and all the pending i/o is dropped just as it would be without
> the multipath code enabled.
>
> I'm not saying it's a solution ready for production, just a good
> starting point.

But discarding data sometimes on USB unplug is even worse than discarding 
data always --- users will by experimenting learn that linux doesn't 
discard write-cached data and reminds them to replug the device --- and 
one day, randomly, they lose their data because of some memory management 
condition...

Mikulas

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:12                   ` Mikulas Patocka
@ 2005-10-12 20:14                     ` Anton Altaparmakov
  2005-10-12 20:31                       ` Mikulas Patocka
  2005-10-13  0:09                     ` Jamie Lokier
  2005-10-13 11:17                     ` Pavel Machek
  2 siblings, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-12 20:14 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Jeff Mahoney, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

On Wed, 12 Oct 2005, Mikulas Patocka wrote:
> > > Is memory management ready for this? Can't deadlock like this happen?
> > > - displaying dialog window needs memory, so it waits until memory will
> > > be available
> > > - system decides to write some write-back cached data in order to free
> > > memory
> > > - the write of these data waits until the dialog window is displayed,
> > > user inserts the device and clicks 'OK'
> > 
> > No, it's not, and deadlock is definitely possible. However, if we're at
> > the point where memory is tight enough that it's an issue, the timer can
> > expire and all the pending i/o is dropped just as it would be without
> > the multipath code enabled.
> > 
> > I'm not saying it's a solution ready for production, just a good
> > starting point.
> 
> But discarding data sometimes on USB unplug is even worse than discarding data
> always --- users will by experimenting learn that linux doesn't discard
> write-cached data and reminds them to replug the device --- and one day,
> randomly, they lose their data because of some memory management condition...

And how exactly is that worse than discarding the data every time?!?!?!?

Best regards,

	Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:14                     ` Anton Altaparmakov
@ 2005-10-12 20:31                       ` Mikulas Patocka
  2005-10-12 21:19                         ` Jeff Mahoney
  2005-10-12 21:35                         ` Anton Altaparmakov
  0 siblings, 2 replies; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-12 20:31 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: Jeff Mahoney, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

>> But discarding data sometimes on USB unplug is even worse than discarding data
>> always --- users will by experimenting learn that linux doesn't discard
>> write-cached data and reminds them to replug the device --- and one day,
>> randomly, they lose their data because of some memory management condition...
>
> And how exactly is that worse than discarding the data every time?!?!?!?

Undeterministic behaviour is worse than deterministic. You can learn 
the system that behaves deterministically.

If you know that unplug damages filesystem on your USB disk, you replug 
it, recheck filesystem and copy the important data again --- you have 0% 
probability of data damage.
However, if damage on unplug happens only with 1/100 probability, will 
you still check filesystem and copy all recently created files on it? You 
forget it (or you wouldn't even know that damage might occur) and you have 
1% probability of data damage.

Mikulas

> Best regards,
>
> 	Anton

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:31                       ` Mikulas Patocka
@ 2005-10-12 21:19                         ` Jeff Mahoney
  2005-10-12 21:35                         ` Anton Altaparmakov
  1 sibling, 0 replies; 43+ messages in thread
From: Jeff Mahoney @ 2005-10-12 21:19 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Anton Altaparmakov, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Mikulas Patocka wrote:
>>> But discarding data sometimes on USB unplug is even worse than
>>> discarding data
>>> always --- users will by experimenting learn that linux doesn't discard
>>> write-cached data and reminds them to replug the device --- and one day,
>>> randomly, they lose their data because of some memory management
>>> condition...
>>
>> And how exactly is that worse than discarding the data every time?!?!?!?
> 
> Undeterministic behaviour is worse than deterministic. You can learn the
> system that behaves deterministically.
> 
> If you know that unplug damages filesystem on your USB disk, you replug
> it, recheck filesystem and copy the important data again --- you have 0%
> probability of data damage.
> However, if damage on unplug happens only with 1/100 probability, will
> you still check filesystem and copy all recently created files on it?
> You forget it (or you wouldn't even know that damage might occur) and
> you have 1% probability of data damage.

I agree that dependability is important, but so important that we keep
the absolute worst case scenario for all cases because it could happen
occasionally? We can warn the user that removing media without umounting
/ejecting it may cause data loss and prompt them to reinsert the media.
If they don't reinsert the media soon enough (for any reason, including
the availabilty of memory) we can inform them that data loss may have
occured and that they should attempt to recover the file system.

- -Jeff

- --
Jeff Mahoney
SUSE Labs
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFDTX3TLPWxlyuTD7IRApliAJ4+8+OGhzKXlfkgx67lfDJioBeqngCgmF66
HOTsI39yyNUTL4H7KP9ZM38=
=WTcy
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:31                       ` Mikulas Patocka
  2005-10-12 21:19                         ` Jeff Mahoney
@ 2005-10-12 21:35                         ` Anton Altaparmakov
  2005-10-14 11:32                           ` Al Boldi
  1 sibling, 1 reply; 43+ messages in thread
From: Anton Altaparmakov @ 2005-10-12 21:35 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Jeff Mahoney, Glauber de Oliveira Costa, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro, akpm

On Wed, 12 Oct 2005, Mikulas Patocka wrote:
> > > But discarding data sometimes on USB unplug is even worse than discarding
> > > data
> > > always --- users will by experimenting learn that linux doesn't discard
> > > write-cached data and reminds them to replug the device --- and one day,
> > > randomly, they lose their data because of some memory management
> > > condition...
> > 
> > And how exactly is that worse than discarding the data every time?!?!?!?
> 
> Undeterministic behaviour is worse than deterministic. You can learn the
> system that behaves deterministically.
> 
> If you know that unplug damages filesystem on your USB disk, you replug it,
> recheck filesystem and copy the important data again --- you have 0%
> probability of data damage.
> However, if damage on unplug happens only with 1/100 probability, will you
> still check filesystem and copy all recently created files on it? You forget
> it (or you wouldn't even know that damage might occur) and you have 1%
> probability of data damage.

So display a warning that data may be lost (or may already have been 
lost), just like Windows does.

Also, your probabilities are ridiculous.  You are not telling me that 
there is a 1% probability of OOM every time I unplud a usb device?!?  I 
have used Linux for almost 10 years now and I have only ever have seen 
OOMs once when I had a bad memory leak.  To me that counts under 
"practically never happens" so I don't care too much about that case.  I 
would rather see it working correctly in majority of cases.  Getting 
everything right is _impossible_.  You cannot designd a sane system with 
sane performance that cannot OOM.  And when that happens applications will 
get killed so you will loose all your data anyway.  So who cares that 
some data on the usb stick may then be lost?

And anyway you are completely wrong/unknowledgeable about usb sticks and 
what filesystems are used on them if you think that "nothing is lost" at 
present since user just knows to do an fsck!  At the moment whenever I get 
a usb stick unplugged without properly "sync, umount, sync several times", 
I usually find that the fs on it is 100% destroyed the moment I try to use 
it on one of the "other" OS...  Fsck, ha!  Total data loss everytime for 
me!  And I would much rather have it just work on replugging every time 
except in the < 0.000000000000000000000000000001% chance of OOM than 
_never_ as it is now.

Think practical, not theoretical.  Theory is all nice except that it never 
meets with practice.  Just ask Linus what he thinks of 
Specifications which are another form of Theoretically Correct entity...  
(-;

Best regards,

	Anton
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:07                 ` Jeff Mahoney
  2005-10-12 20:12                   ` Mikulas Patocka
@ 2005-10-13  0:05                   ` Jamie Lokier
  1 sibling, 0 replies; 43+ messages in thread
From: Jamie Lokier @ 2005-10-13  0:05 UTC (permalink / raw)
  To: Jeff Mahoney
  Cc: Mikulas Patocka, Anton Altaparmakov, Glauber de Oliveira Costa,
	linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, akpm

Jeff Mahoney wrote:
> No, it's not, and deadlock is definitely possible. However, if we're at
> the point where memory is tight enough that it's an issue, the timer can
> expire and all the pending i/o is dropped just as it would be without
> the multipath code enabled.

Followed by requesting a dialog to tell the user that their dirty
data/metadata has been dropped :)

-- Jamie

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:12                   ` Mikulas Patocka
  2005-10-12 20:14                     ` Anton Altaparmakov
@ 2005-10-13  0:09                     ` Jamie Lokier
  2005-10-13  0:21                       ` Mikulas Patocka
  2005-10-13 11:17                     ` Pavel Machek
  2 siblings, 1 reply; 43+ messages in thread
From: Jamie Lokier @ 2005-10-13  0:09 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Jeff Mahoney, Anton Altaparmakov, Glauber de Oliveira Costa,
	linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, akpm

Mikulas Patocka wrote:
> But discarding data sometimes on USB unplug is even worse than discarding 
> data always --- users will by experimenting learn that linux doesn't 
> discard write-cached data and reminds them to replug the device --- and 
> one day, randomly, they lose their data because of some memory management 
> condition...

It should not happen provided the total amount of dirty data for
detachable devices is restricted to allow enough room for opening a
dialog.

That's no different, in principle, than the restrictions that are used
to ensure some types of kernel memory allocation always succeed.

There's no exact calculation, just a notion of "this many megabytes
should be enough for a dialog".

-- Jamie

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-13  0:09                     ` Jamie Lokier
@ 2005-10-13  0:21                       ` Mikulas Patocka
  2005-10-13  0:27                         ` Jamie Lokier
  0 siblings, 1 reply; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-13  0:21 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Jeff Mahoney, Anton Altaparmakov, Glauber de Oliveira Costa,
	linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, akpm



On Thu, 13 Oct 2005, Jamie Lokier wrote:

> Mikulas Patocka wrote:
>> But discarding data sometimes on USB unplug is even worse than discarding
>> data always --- users will by experimenting learn that linux doesn't
>> discard write-cached data and reminds them to replug the device --- and
>> one day, randomly, they lose their data because of some memory management
>> condition...
>
> It should not happen provided the total amount of dirty data for
> detachable devices is restricted to allow enough room for opening a
> dialog.

That is possible ... you must also make sure that you do not hold an 
important semaphore while waiting for some removable device (auditing VFS 
for this will be a bit harder...)

Mikulas

> That's no different, in principle, than the restrictions that are used
> to ensure some types of kernel memory allocation always succeed.
>
> There's no exact calculation, just a notion of "this many megabytes
> should be enough for a dialog".
>
> -- Jamie
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-13  0:21                       ` Mikulas Patocka
@ 2005-10-13  0:27                         ` Jamie Lokier
  0 siblings, 0 replies; 43+ messages in thread
From: Jamie Lokier @ 2005-10-13  0:27 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Jeff Mahoney, Anton Altaparmakov, Glauber de Oliveira Costa,
	linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, akpm

Mikulas Patocka wrote:
> That is possible ... you must also make sure that you do not hold an 
> important semaphore while waiting for some removable device (auditing VFS 
> for this will be a bit harder...)

If any filesystem is holding any _global_ semaphores while waiting for
an I/O to complete - that's a major bug already.

Activity which may take a long time due to slow I/O on one filesystem
shouldn't block activity on other, unrelated filesystems, apart from
global resource competition such as numbers of dirty pages...

-- Jamie

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-11  8:01                 ` Anton Altaparmakov
@ 2005-10-13  0:58                   ` Mike Christie
  0 siblings, 0 replies; 43+ messages in thread
From: Mike Christie @ 2005-10-13  0:58 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: Andrew Morton, Mikulas Patocka, glommer, linux-kernel,
	linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch,
	viro

Anton Altaparmakov wrote:
> On Mon, 2005-10-10 at 18:07 -0700, Andrew Morton wrote:
> 
>>Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> wrote:
>>
>>> On Mon, 10 Oct 2005, Andrew Morton wrote:
>>>
>>> > Anton Altaparmakov <aia21@cam.ac.uk> wrote:
>>> >>
>>> >> > Maybe the best solution is neither one nor another. Testing and failing
>>> >> > gracefully seems better.
>>> >> >
>>> >> > What do you think?
>>> >>
>>> >>  I certainly agree with you there.  I neither want a deadlock nor
>>> >>  corruption.  (-:
>>> >
>>> > Yup.  In the present implementation __getblk_slow() "cannot fail".  It's
>>> > conceivable that at some future stage we'll change __getblk_slow() so that
>>> > it returns NULL on an out-of-memory condition.
>>>
>>> The question is if it is desired --- it will make bread return NULL on 
>>> out-of-memory condition, callers will treat it like an IO error, skipping 
>>> access to the affected block, causing damage on perfectly healthy 
>>> filesystem.
>>
>>Yes, that is a bit dumb.  A filesystem might indeed want to take different
>>action for ENOMEM versus EIO.
>>
>>
>>> I liked what linux-2.0 did in this case --- if the kernel was out of 
>>> memory, getblk just took another buffer, wrote it if it was dirty and used 
>>> it. Except for writeable loopback device (where writing one buffer 
>>> generates more dirty buffers), it couldn't deadlock.
>>
>>Wouldn't it be better if bread() were to return ERR_PTR(-EIO) or
>>ERR_PTR(-ENOMEM)?    Big change.
> 
> 
> It would indeed.  Much better.  And whilst at it, it would be even
> better if we had a lot more error codes like "ERR_PTR(-EDEVUNPLUGGED)"
> for example...  But that would be an even better change.  Anyone feeling
> like touching every block driver in the kernel?  (-;
> 

I have actually done this
http://marc.theaimsgroup.com/?l=linux-scsi&m=112487427230642&w=2
(this is just the bio users, the end_that_request_first/chunk users are 
in another patch).

I am just trying to figure out how to support some wierd scsi HW before 
reposting. If you have suggestions about how to implement the bitmap 
suggestion in that thread I am listening too (I implemented it like 
scsi's scsi_cmnd result field).

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 20:12                   ` Mikulas Patocka
  2005-10-12 20:14                     ` Anton Altaparmakov
  2005-10-13  0:09                     ` Jamie Lokier
@ 2005-10-13 11:17                     ` Pavel Machek
  2005-10-14 16:52                       ` Jamie Lokier
  2 siblings, 1 reply; 43+ messages in thread
From: Pavel Machek @ 2005-10-13 11:17 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Jeff Mahoney, Anton Altaparmakov, Glauber de Oliveira Costa,
	linux-kernel, linux-fsdevel, ext2-devel, hirofumi, linux-ntfs-dev,
	aia21, hch, viro, akpm

Hi!

> >>- the write of these data waits until the dialog window is 
> >>displayed,
> >>user inserts the device and clicks 'OK'
> >
> >No, it's not, and deadlock is definitely possible. However, if we're 
> >at
> >the point where memory is tight enough that it's an issue, the timer 
> >can
> >expire and all the pending i/o is dropped just as it would be without
> >the multipath code enabled.
> >
> >I'm not saying it's a solution ready for production, just a good
> >starting point.
> 
> But discarding data sometimes on USB unplug is even worse than 
> discarding data always --- users will by experimenting learn that 

*Good starting point*.

Anyway, one solution would be to simply mlockall() on that
replugitd and/or make dirty data hdd based
(not ram based) and/or
restricting dirty buffers to 10MB for removable media.

				Pavel
-- 
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms         

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-12 21:35                         ` Anton Altaparmakov
@ 2005-10-14 11:32                           ` Al Boldi
  0 siblings, 0 replies; 43+ messages in thread
From: Al Boldi @ 2005-10-14 11:32 UTC (permalink / raw)
  To: linux-fsdevel

Anton Altaparmakov wrote:
> You cannot design a sane system with sane performance that cannot OOM.
> And when that happens applications will get killed so you will loose all
> your data anyway.
> So who cares that some data on the usb stick may then be lost?

Memory is a resource.  When the system runs out of a resource it should be 
able to recover in a sane manner, w/o killing anything.
OOM, i.e. overcommit, should be an option for those who like to live on the 
edge.  It should not be the norm, which is best for performance and 
stability and reliability.

--
Al

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-13 11:17                     ` Pavel Machek
@ 2005-10-14 16:52                       ` Jamie Lokier
  2005-10-14 18:26                         ` Mikulas Patocka
  0 siblings, 1 reply; 43+ messages in thread
From: Jamie Lokier @ 2005-10-14 16:52 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Mikulas Patocka, Jeff Mahoney, Anton Altaparmakov,
	Glauber de Oliveira Costa, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm

Pavel Machek wrote:
> *Good starting point*.
> 
> Anyway, one solution would be to simply mlockall() on that replugitd

Unfortunately, mlockall() isn't effective for an X application (for
the dialog) because the X server needs resources too, and that's not
usually mlock'd because it's too big.  (It would be nice to have a
general solution to allow mlock'd GUI applications).

> and/or make dirty data hdd based (not ram based)

Ooh, swappable dirty data... nice idea :)

> and/or restricting dirty buffers to 10MB for removable media.

That seems like the simplest effective solution.

-- Jamie


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Use of getblk differs between locations
  2005-10-14 16:52                       ` Jamie Lokier
@ 2005-10-14 18:26                         ` Mikulas Patocka
  0 siblings, 0 replies; 43+ messages in thread
From: Mikulas Patocka @ 2005-10-14 18:26 UTC (permalink / raw)
  To: Jamie Lokier
  Cc: Pavel Machek, Jeff Mahoney, Anton Altaparmakov,
	Glauber de Oliveira Costa, linux-kernel, linux-fsdevel,
	ext2-devel, hirofumi, linux-ntfs-dev, aia21, hch, viro, akpm



On Fri, 14 Oct 2005, Jamie Lokier wrote:

> Pavel Machek wrote:
>
>> and/or make dirty data hdd based (not ram based)
>
> Ooh, swappable dirty data... nice idea :)

Multics had something like this :-) They had fast small drum and slow big 
disk --- so they wrote dirty file pages first to drum and then moved them 
to disk. Needless to say that it very too complex and they ripped it out 
of the system after years.

http://www.multicians.org/mgp.html#pagemultilevel

Mikulas

>> and/or restricting dirty buffers to 10MB for removable media.
>
> That seems like the simplest effective solution.
>
> -- Jamie
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2005-10-14 18:26 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-10 20:45 [PATCH] Use of getblk differs between locations Glauber de Oliveira Costa
2005-10-10 21:20 ` Anton Altaparmakov
2005-10-10 21:46   ` Glauber de Oliveira Costa
2005-10-10 21:58     ` Mikulas Patocka
2005-10-10 22:25       ` Anton Altaparmakov
2005-10-10 22:49         ` Mikulas Patocka
2005-10-10 23:12           ` Glauber de Oliveira Costa
2005-10-10 23:16             ` Mikulas Patocka
2005-10-10 23:33               ` Glauber de Oliveira Costa
2005-10-10 23:34                 ` Mikulas Patocka
2005-10-10 23:49                   ` Glauber de Oliveira Costa
2005-10-11  7:52           ` Anton Altaparmakov
2005-10-12 19:51             ` Jeff Mahoney
2005-10-12 19:59               ` Mikulas Patocka
2005-10-12 20:07                 ` Jeff Mahoney
2005-10-12 20:12                   ` Mikulas Patocka
2005-10-12 20:14                     ` Anton Altaparmakov
2005-10-12 20:31                       ` Mikulas Patocka
2005-10-12 21:19                         ` Jeff Mahoney
2005-10-12 21:35                         ` Anton Altaparmakov
2005-10-14 11:32                           ` Al Boldi
2005-10-13  0:09                     ` Jamie Lokier
2005-10-13  0:21                       ` Mikulas Patocka
2005-10-13  0:27                         ` Jamie Lokier
2005-10-13 11:17                     ` Pavel Machek
2005-10-14 16:52                       ` Jamie Lokier
2005-10-14 18:26                         ` Mikulas Patocka
2005-10-13  0:05                   ` Jamie Lokier
2005-10-12 20:08               ` Anton Altaparmakov
2005-10-10 22:36       ` Glauber de Oliveira Costa
2005-10-10 22:28         ` Anton Altaparmakov
2005-10-10 23:36           ` Andrew Morton
2005-10-11  0:07             ` Glauber de Oliveira Costa
2005-10-11  0:05               ` Al Viro
2005-10-11  0:40                 ` Glauber de Oliveira Costa
2005-10-11 12:35                   ` Jan Hudec
2005-10-11  0:09             ` Mikulas Patocka
2005-10-11  1:07               ` Andrew Morton
2005-10-11  1:20                 ` Mikulas Patocka
2005-10-11  5:02                   ` Andrew Morton
2005-10-11  8:07                   ` Anton Altaparmakov
2005-10-11  8:01                 ` Anton Altaparmakov
2005-10-13  0:58                   ` Mike Christie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).