public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
@ 2011-09-20 13:56 Chandra Seetharaman
  2011-09-20 15:00 ` Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Chandra Seetharaman @ 2011-09-20 13:56 UTC (permalink / raw)
  To: XFS Mailing List; +Cc: Alex Elder

Ran the xfstests (auto) overnight and didn't see any new issues.

--
Check the return value of xfs_trans_get_buf() and fail appropriately.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>

---
 fs/xfs/xfs_attr_leaf.c |    2 ++
 fs/xfs/xfs_btree.c     |    3 ++-
 fs/xfs/xfs_dquot.c     |    5 ++++-
 fs/xfs/xfs_ialloc.c    |   13 ++++++++-----
 fs/xfs/xfs_inode.c     |    9 ++++++---
 fs/xfs/xfs_vnodeops.c  |    9 ++++++++-
 6 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 8fad960..58c3add 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -2948,6 +2948,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
 			bp = xfs_trans_get_buf(*trans,
 					dp->i_mount->m_ddev_targp,
 					dblkno, dblkcnt, XBF_LOCK);
+			if (!bp)
+				return ENOMEM;
 			xfs_trans_binval(*trans, bp);
 			/*
 			 * Roll to next transaction.
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 2b9fd38..28cc019 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -970,7 +970,8 @@ xfs_btree_get_buf_block(
 	*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
 				 mp->m_bsize, flags);
 
-	ASSERT(!xfs_buf_geterror(*bpp));
+	if (!*bpp)
+		return ENOMEM;
 
 	*block = XFS_BUF_TO_BLOCK(*bpp);
 	return 0;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 3e2ccae..0c5fe66 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -402,8 +402,11 @@ xfs_qm_dqalloc(
 			       dqp->q_blkno,
 			       mp->m_quotainfo->qi_dqchunklen,
 			       0);
-	if (!bp || (error = xfs_buf_geterror(bp)))
+
+	error = xfs_buf_geterror(bp);
+	if (error)
 		goto error1;
+
 	/*
 	 * Make a chunk of dquots out of this buffer and log
 	 * the entire thing.
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9f24ec2..207e0b0 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -150,7 +150,7 @@ xfs_check_agi_freecount(
 /*
  * Initialise a new set of inodes.
  */
-STATIC void
+STATIC int
 xfs_ialloc_inode_init(
 	struct xfs_mount	*mp,
 	struct xfs_trans	*tp,
@@ -202,8 +202,8 @@ xfs_ialloc_inode_init(
 		fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
 					 mp->m_bsize * blks_per_cluster,
 					 XBF_LOCK);
-		ASSERT(!xfs_buf_geterror(fbuf));
-
+		if (!fbuf)
+			return ENOMEM;
 		/*
 		 * Initialize all inodes in this buffer and then log them.
 		 *
@@ -225,6 +225,7 @@ xfs_ialloc_inode_init(
 		}
 		xfs_trans_inode_alloc_buf(tp, fbuf);
 	}
+	return 0;
 }
 
 /*
@@ -369,9 +370,11 @@ xfs_ialloc_ag_alloc(
 	 * rather than a linear progression to prevent the next generation
 	 * number from being easily guessable.
 	 */
-	xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
-			      random32());
+	error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
+			args.len, random32());
 
+	if (error)
+		return error;
 	/*
 	 * Convert the results.
 	 */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7f237ba..d689253 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1644,7 +1644,7 @@ xfs_iunlink_remove(
  * inodes that are in memory - they all must be marked stale and attached to
  * the cluster buffer.
  */
-STATIC void
+STATIC int
 xfs_ifree_cluster(
 	xfs_inode_t	*free_ip,
 	xfs_trans_t	*tp,
@@ -1690,6 +1690,8 @@ xfs_ifree_cluster(
 					mp->m_bsize * blks_per_cluster,
 					XBF_LOCK);
 
+		if (!bp)
+			return ENOMEM;
 		/*
 		 * Walk the inodes already attached to the buffer and mark them
 		 * stale. These will all have the flush locks held, so an
@@ -1799,6 +1801,7 @@ retry:
 	}
 
 	xfs_perag_put(pag);
+	return 0;
 }
 
 /*
@@ -1878,10 +1881,10 @@ xfs_ifree(
 	dip->di_mode = 0;
 
 	if (delete) {
-		xfs_ifree_cluster(ip, tp, first_ino);
+		error = xfs_ifree_cluster(ip, tp, first_ino);
 	}
 
-	return 0;
+	return error;
 }
 
 /*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c2ff0fc..0d1caec 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -308,6 +308,10 @@ xfs_inactive_symlink_rmt(
 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
 			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
+		if (!bp) {
+			error = ENOMEM;
+			goto error1;
+		}
 		xfs_trans_binval(tp, bp);
 	}
 	/*
@@ -1648,7 +1652,10 @@ xfs_symlink(
 			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
 			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
 					       BTOBB(byte_cnt), 0);
-			ASSERT(!xfs_buf_geterror(bp));
+			if (!bp) {
+				error = ENOMEM;
+				goto error2;
+			}
 			if (pathlen < byte_cnt) {
 				byte_cnt = pathlen;
 			}
-- 
1.7.1



_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-20 13:56 [PATCH v2] xfs: Check the return value of xfs_trans_get_buf() Chandra Seetharaman
@ 2011-09-20 15:00 ` Christoph Hellwig
  2011-09-20 18:05   ` Chandra Seetharaman
  2011-09-20 15:37 ` Alex Elder
  2011-09-21  0:56 ` Dave Chinner
  2 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2011-09-20 15:00 UTC (permalink / raw)
  To: Chandra Seetharaman; +Cc: Alex Elder, XFS Mailing List

On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> --- a/fs/xfs/xfs_attr_leaf.c
> +++ b/fs/xfs/xfs_attr_leaf.c
> @@ -2948,6 +2948,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
>  			bp = xfs_trans_get_buf(*trans,
>  					dp->i_mount->m_ddev_targp,
>  					dblkno, dblkcnt, XBF_LOCK);
> +			if (!bp)
> +				return ENOMEM;
>  			xfs_trans_binval(*trans, bp);

xfs_trans_binval only really does anything if the buffer was in memory.

We have a few callers using that patterm, and I think they should simply
switch to not reading the buffer in if it's not there yet, e.g.
using something like an xfs_trans_incore.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-20 13:56 [PATCH v2] xfs: Check the return value of xfs_trans_get_buf() Chandra Seetharaman
  2011-09-20 15:00 ` Christoph Hellwig
@ 2011-09-20 15:37 ` Alex Elder
  2011-09-21  0:56 ` Dave Chinner
  2 siblings, 0 replies; 10+ messages in thread
From: Alex Elder @ 2011-09-20 15:37 UTC (permalink / raw)
  To: sekharan; +Cc: XFS Mailing List

On Tue, 2011-09-20 at 08:56 -0500, Chandra Seetharaman wrote:
> Ran the xfstests (auto) overnight and didn't see any new issues.
> 
> --
> Check the return value of xfs_trans_get_buf() and fail appropriately.
> 
> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>

Looks good.

Reviewed-by: Alex Elder <aelder@sgi.com>


_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-20 15:00 ` Christoph Hellwig
@ 2011-09-20 18:05   ` Chandra Seetharaman
  2011-09-20 18:09     ` Christoph Hellwig
  0 siblings, 1 reply; 10+ messages in thread
From: Chandra Seetharaman @ 2011-09-20 18:05 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Alex Elder, XFS Mailing List

On Tue, 2011-09-20 at 11:00 -0400, Christoph Hellwig wrote:
> On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> > --- a/fs/xfs/xfs_attr_leaf.c
> > +++ b/fs/xfs/xfs_attr_leaf.c
> > @@ -2948,6 +2948,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
> >  			bp = xfs_trans_get_buf(*trans,
> >  					dp->i_mount->m_ddev_targp,
> >  					dblkno, dblkcnt, XBF_LOCK);
> > +			if (!bp)
> > +				return ENOMEM;
> >  			xfs_trans_binval(*trans, bp);
> 
> xfs_trans_binval only really does anything if the buffer was in memory.
> 
> We have a few callers using that patterm, and I think they should simply
> switch to not reading the buffer in if it's not there yet, e.g.
> using something like an xfs_trans_incore.

Hi Christoph,

I do not understand. Can you elaborate on what needs to be done.

Thanks

Chandra
> 


_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-20 18:05   ` Chandra Seetharaman
@ 2011-09-20 18:09     ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2011-09-20 18:09 UTC (permalink / raw)
  To: Chandra Seetharaman; +Cc: Christoph Hellwig, Alex Elder, XFS Mailing List

On Tue, Sep 20, 2011 at 01:05:54PM -0500, Chandra Seetharaman wrote:
> 
> Hi Christoph,
> 
> I do not understand. Can you elaborate on what needs to be done.

All callers that do the xfs_trans_binval only really care about buffers
that already exist.  So we don't even have to allocate a new buffer
if it isn't in memory yet - note that it generally will be given that
the buffer is attached to the transaction.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-20 13:56 [PATCH v2] xfs: Check the return value of xfs_trans_get_buf() Chandra Seetharaman
  2011-09-20 15:00 ` Christoph Hellwig
  2011-09-20 15:37 ` Alex Elder
@ 2011-09-21  0:56 ` Dave Chinner
  2011-09-21 14:28   ` Chandra Seetharaman
  2011-09-21 20:38   ` Alex Elder
  2 siblings, 2 replies; 10+ messages in thread
From: Dave Chinner @ 2011-09-21  0:56 UTC (permalink / raw)
  To: Chandra Seetharaman; +Cc: Alex Elder, XFS Mailing List

On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> Ran the xfstests (auto) overnight and didn't see any new issues.

Sure, but xfstests won't be triggering the new failure paths.

It looks to me like any failure to get a buffer will now result in a
cancelled transaction and a filesystem shutdown - the new failure
paths really need to be tested to ensure that failures are handled
gracefully and don't result in filesystem corruption.

As it is, I'm not sure we want to do this. The only reason we can
fail to get a buffer is allocation failures in extremely low memory
conditions. However, the last thing we want is for filesystem
shutdowns to be triggered by transient low memory conditions.

The current state of the code is that the xfs_buf_get() code tries
really, really hard to allocate memory, and we don't have any
evidence to point to the fact that is it failing to allocate memory.
We'd be seeing asserts firing and/or NULL pointer deref panics if
xfs_buf_get() was failing, and neither of these are happening.

As it is, before we can gracefully handle memory allocation failures
in the xfs_buf layer, we need to be able to roll back dirty
transactions so that memory allocation failure does not result
filesystem shutdowns. That's actually possible to do now with the
delayed logging infrastructure (because the CIL keeps a copy of the
previous in memory modifications prior to the bad transaction), so
we should look towards implementing transaction rollback first
before allowing memory allocation to fail inside transaction
contexts....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-21  0:56 ` Dave Chinner
@ 2011-09-21 14:28   ` Chandra Seetharaman
  2011-09-21 22:28     ` Dave Chinner
  2011-09-21 20:38   ` Alex Elder
  1 sibling, 1 reply; 10+ messages in thread
From: Chandra Seetharaman @ 2011-09-21 14:28 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Alex Elder, XFS Mailing List

On Wed, 2011-09-21 at 10:56 +1000, Dave Chinner wrote:
> On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> > Ran the xfstests (auto) overnight and didn't see any new issues.
>
> Sure, but xfstests won't be triggering the new failure paths.

I meant to convey that I did not introduce any regressions.

> 
> It looks to me like any failure to get a buffer will now result in a
> cancelled transaction and a filesystem shutdown - the new failure
> paths really need to be tested to ensure that failures are handled
> gracefully and don't result in filesystem corruption.
> 
> As it is, I'm not sure we want to do this. The only reason we can
> fail to get a buffer is allocation failures in extremely low memory
> conditions. However, the last thing we want is for filesystem
> shutdowns to be triggered by transient low memory conditions.
> 
> The current state of the code is that the xfs_buf_get() code tries
> really, really hard to allocate memory, and we don't have any
> evidence to point to the fact that is it failing to allocate memory.
> We'd be seeing asserts firing and/or NULL pointer deref panics if
> xfs_buf_get() was failing, and neither of these are happening.

I am really confused. Are you saying we should rather let the kernel
reference a null pointer and panic than making the file system shutdown
gracefully ?!

I do agree that we may not be seeing any buffer allocation failures (I
was very surprised to see allocations not being checked in such a mature
code). Under the very same token, we will not exercise the failure paths
these patches introduce) and hence will not get into file system
shutdown state. No ?

Just in case when the buffer allocation do fail, the changes in these
patches would prevent a kernel panic and lead to a graceful file system
shutdown. Isn't that a better option ?
 
> 
> As it is, before we can gracefully handle memory allocation failures
> in the xfs_buf layer, we need to be able to roll back dirty
> transactions so that memory allocation failure does not result
> filesystem shutdowns. That's actually possible to do now with the
> delayed logging infrastructure (because the CIL keeps a copy of the
> previous in memory modifications prior to the bad transaction), so
> we should look towards implementing transaction rollback first
> before allowing memory allocation to fail inside transaction
> contexts....
> 
> Cheers,
> 
> Dave.


_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-21  0:56 ` Dave Chinner
  2011-09-21 14:28   ` Chandra Seetharaman
@ 2011-09-21 20:38   ` Alex Elder
  2011-09-21 22:43     ` Dave Chinner
  1 sibling, 1 reply; 10+ messages in thread
From: Alex Elder @ 2011-09-21 20:38 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Chandra Seetharaman, XFS Mailing List

On Wed, 2011-09-21 at 10:56 +1000, Dave Chinner wrote:
> On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> > Ran the xfstests (auto) overnight and didn't see any new issues.
> 
> Sure, but xfstests won't be triggering the new failure paths.
> 
> It looks to me like any failure to get a buffer will now result in a
> cancelled transaction and a filesystem shutdown - the new failure
> paths really need to be tested to ensure that failures are handled
> gracefully and don't result in filesystem corruption.

This is true.

> As it is, I'm not sure we want to do this. The only reason we can
> fail to get a buffer is allocation failures in extremely low memory
> conditions. However, the last thing we want is for filesystem
> shutdowns to be triggered by transient low memory conditions.

But a failure to get a buffer, not checked, can't be good
can it?  In other words, the patch now adds handling for
a xfs_buf_get() failure, which avoids a kernel-mode null
pointer dereference in the off chance that would happen.
That's worse than a filesystem shutdown.

Now I grant you your earlier statement, namely that it's
conceivable that the new error return could lead to a
previously un-exercised path that leads to file system
corruption.

But I do believe that all these places handle *an* error
(not the specific ENOMEM error), so those spots already
are generally prepared for something to go wrong.

> The current state of the code is that the xfs_buf_get() code tries
> really, really hard to allocate memory, and we don't have any
> evidence to point to the fact that is it failing to allocate memory.
> We'd be seeing asserts firing and/or NULL pointer deref panics if
> xfs_buf_get() was failing, and neither of these are happening.
> 
> As it is, before we can gracefully handle memory allocation failures
> in the xfs_buf layer, we need to be able to roll back dirty
> transactions so that memory allocation failure does not result
> filesystem shutdowns. That's actually possible to do now with the
> delayed logging infrastructure (because the CIL keeps a copy of the
> previous in memory modifications prior to the bad transaction), so
> we should look towards implementing transaction rollback first
> before allowing memory allocation to fail inside transaction
> contexts....

Now that's a great thing to do--use the CIL to facilitate
rolling back dirty transactions.  Very cool.

But this patch doesn't "allow" memory allocation to fail,
it simply avoids a sudden panic if it ever did (which you
point out is only slightly less likely than impossible).

I didn't anticipate this, and the patch has already been
committed.  I need to know whether people think this is
critical enough for me to revert the patch.

					-Alex



_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-21 14:28   ` Chandra Seetharaman
@ 2011-09-21 22:28     ` Dave Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2011-09-21 22:28 UTC (permalink / raw)
  To: Chandra Seetharaman; +Cc: Alex Elder, XFS Mailing List

On Wed, Sep 21, 2011 at 09:28:50AM -0500, Chandra Seetharaman wrote:
> On Wed, 2011-09-21 at 10:56 +1000, Dave Chinner wrote:
> > On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> > > Ran the xfstests (auto) overnight and didn't see any new issues.
> >
> > Sure, but xfstests won't be triggering the new failure paths.
> 
> I meant to convey that I did not introduce any regressions.

That's fine, but part of the review process is ensuring that
modifications were tested properly. i.e. answering the question "has
this code had adequate test coverage?"

> > It looks to me like any failure to get a buffer will now result in a
> > cancelled transaction and a filesystem shutdown - the new failure
> > paths really need to be tested to ensure that failures are handled
> > gracefully and don't result in filesystem corruption.
> > 
> > As it is, I'm not sure we want to do this. The only reason we can
> > fail to get a buffer is allocation failures in extremely low memory
> > conditions. However, the last thing we want is for filesystem
> > shutdowns to be triggered by transient low memory conditions.
> > 
> > The current state of the code is that the xfs_buf_get() code tries
> > really, really hard to allocate memory, and we don't have any
> > evidence to point to the fact that is it failing to allocate memory.
> > We'd be seeing asserts firing and/or NULL pointer deref panics if
> > xfs_buf_get() was failing, and neither of these are happening.
> 
> I am really confused. Are you saying we should rather let the kernel
> reference a null pointer and panic than making the file system shutdown
> gracefully ?!

What I'm saying is that adding the error handling may cause
worse problems that we currently already have. Shutdowns are only
supposed to happen when the filesystem has detected a condition that
means it cannot continue operation. i.e. a permanent failure
condition that requires administrator intervention to fix.

Memory allocation failures are usually transient failures, and in
general there is nothing an admin can do to prevent them. hence
triggering shutdowns on memory allocation failures is the wrong
direction to be headed. The correct direction to be going is to
handle memory allocation failures gracefully, and a filesystem
shutdown is anything but graceful.

> I do agree that we may not be seeing any buffer allocation failures (I
> was very surprised to see allocations not being checked in such a mature
> code).

The Irix kernel, where all this code and it's underlying assumptions
came from, guaranteed that kernel memory allocations would *never*
fail. The assumption that xfs_buf allocation (and that allocations
during transactions) will never fail come from this heritage.

IOWs, this change breaks an architectural assumption that XFS was
originally built around. Yes, we need to be able to handle memory
allocation errors but I don't think this is the right solution.

> Under the very same token, we will not exercise the failure paths
> these patches introduce) and hence will not get into file system
> shutdown state. No ?

Well, that's what I'm worried about. There may be people out there
that are seeing these failures, and just not reporting them (happens
all the time). In that case we better make sure that the shutdowns
occur appropriatey if we are adding error checking.

> Just in case when the buffer allocation do fail, the changes in these
> patches would prevent a kernel panic and lead to a graceful file system
> shutdown. Isn't that a better option ?

Only if the shutdown is done correctly. For example, we can now
about inode allocation half way through writing the new inode
templates - that's a brand new failure path that we've never had to
deal with before.

So, does the error handling path ensure that we don't get partial
inode clusters written to disk (e.g. allocation fails 3 buffers into
an 8 buffer-long initialisation loop)? inode allocation buffers are
handled specially by both transaction commit and log recovery, so
the blanket statement of "the higher level path has error handling"
doesn't really fill me with confidence that this has been fully
considered.

A panic will stop the transaction dead with all it's modified
objects locked and unable to be flushed to disk. If the error
handling is not cancelling everything properly and preventing the
partial state from getting to disk or out to userspace, then a panic
is far more preferable than trying to handle the allocation
error....

The other thing to consider is that an oops will only stop the
current transaction - if critical buffers are not used in the
transaction, then the filesystem can still continue to operate (e.g.
writeback will be able to clean pages and move out of the OOM
conditions). In this case, the users don't lose all the dirty data
that is cached in memory, whereas a shutdown will simply discard it
all.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] xfs: Check the return value of xfs_trans_get_buf()
  2011-09-21 20:38   ` Alex Elder
@ 2011-09-21 22:43     ` Dave Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2011-09-21 22:43 UTC (permalink / raw)
  To: Alex Elder; +Cc: Chandra Seetharaman, XFS Mailing List

On Wed, Sep 21, 2011 at 03:38:38PM -0500, Alex Elder wrote:
> On Wed, 2011-09-21 at 10:56 +1000, Dave Chinner wrote:
> > On Tue, Sep 20, 2011 at 08:56:55AM -0500, Chandra Seetharaman wrote:
> > > Ran the xfstests (auto) overnight and didn't see any new issues.
> > 
> > Sure, but xfstests won't be triggering the new failure paths.
> > 
> > It looks to me like any failure to get a buffer will now result in a
> > cancelled transaction and a filesystem shutdown - the new failure
> > paths really need to be tested to ensure that failures are handled
> > gracefully and don't result in filesystem corruption.
> 
> This is true.
> 
> > As it is, I'm not sure we want to do this. The only reason we can
> > fail to get a buffer is allocation failures in extremely low memory
> > conditions. However, the last thing we want is for filesystem
> > shutdowns to be triggered by transient low memory conditions.
> 
> But a failure to get a buffer, not checked, can't be good
> can it?  In other words, the patch now adds handling for
> a xfs_buf_get() failure, which avoids a kernel-mode null
> pointer dereference in the off chance that would happen.
> That's worse than a filesystem shutdown.

See my previous post about how the behaviour of the FS after an oops
is very different to a shutdown.

> Now I grant you your earlier statement, namely that it's
> conceivable that the new error return could lead to a
> previously un-exercised path that leads to file system
> corruption.
> 
> But I do believe that all these places handle *an* error
> (not the specific ENOMEM error), so those spots already
> are generally prepared for something to go wrong.

Yes, but that's making the assumption that those error handling
routines handle the new failure cases correctly. The inode
allocation case is one I'm particularly concerned about...

> I didn't anticipate this, and the patch has already been
> committed.  I need to know whether people think this is
> critical enough for me to revert the patch.

In the long run, it doesn't matter. All I'm concerned about is that
everyone is aware of the potential downsides of this change, and why
not handling the error might be preferrable in the short term until
we get transaction rollbacks sorted out....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-09-21 22:43 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-20 13:56 [PATCH v2] xfs: Check the return value of xfs_trans_get_buf() Chandra Seetharaman
2011-09-20 15:00 ` Christoph Hellwig
2011-09-20 18:05   ` Chandra Seetharaman
2011-09-20 18:09     ` Christoph Hellwig
2011-09-20 15:37 ` Alex Elder
2011-09-21  0:56 ` Dave Chinner
2011-09-21 14:28   ` Chandra Seetharaman
2011-09-21 22:28     ` Dave Chinner
2011-09-21 20:38   ` Alex Elder
2011-09-21 22:43     ` Dave Chinner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox