Review: Be smarter about handling ENOSPC during writeback

public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed

* Review: Be smarter about handling ENOSPC during writeback
@ 2007-06-04  4:52 David Chinner
  2007-06-04  6:13 ` Timothy Shimmin
  2007-06-04  9:41 ` Timothy Shimmin
  0 siblings, 2 replies; 8+ messages in thread
From: David Chinner @ 2007-06-04  4:52 UTC (permalink / raw)
  To: xfs-dev; +Cc: xfs-oss


During delayed allocation extent conversion or unwritten extent
conversion, we need to reserve some blocks for transactions
reservations. We need to reserve these blocks in case a btree
split occurs and we need to allocate some blocks.

Unfortunately, we've only ever reserved the number of data blocks we
are allocating, so in both the unwritten and delalloc case we can
get ENOSPC to the transaction reservation. This is bad because in
both cases we cannot report the failure to the writing application.

The fix is two-fold:

	1 - leverage the reserved block infrastructure XFS already
	has to reserve a small pool of blocks by default to allow
	specially marked transactions to dip into when we are at
	ENOSPC.

	    Default setting is min(5%, 1024 blocks).

	2 - convert critical transaction reservations to be allowed
	to dip into this pool. Spots changed are delalloc
	conversion, unwritten extent conversion and growing a
	filesystem at ENOSPC.

Comments?

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

---
 fs/xfs/xfs_fsops.c |   10 +++++++---
 fs/xfs/xfs_iomap.c |   22 ++++++++--------------
 fs/xfs/xfs_mount.c |   37 +++++++++++++++++++++++++++++++++++--
 3 files changed, 50 insertions(+), 19 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_fsops.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_fsops.c	2007-05-11 10:35:29.288847149 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_fsops.c	2007-05-11 11:13:34.195363437 +1000
@@ -179,6 +179,7 @@ xfs_growfs_data_private(
 		up_write(&mp->m_peraglock);
 	}
 	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
+	tp->t_flags |= XFS_TRANS_RESERVE;
 	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
 			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
 		xfs_trans_cancel(tp, 0);
@@ -500,8 +501,9 @@ xfs_reserve_blocks(
 	unsigned long		s;
 
 	/* If inval is null, report current values and return */
-
 	if (inval == (__uint64_t *)NULL) {
+		if (!outval)
+			return EINVAL;
 		outval->resblks = mp->m_resblks;
 		outval->resblks_avail = mp->m_resblks_avail;
 		return 0;
@@ -564,8 +566,10 @@ retry:
 		}
 	}
 out:
-	outval->resblks = mp->m_resblks;
-	outval->resblks_avail = mp->m_resblks_avail;
+	if (outval) {
+		outval->resblks = mp->m_resblks;
+		outval->resblks_avail = mp->m_resblks_avail;
+	}
 	XFS_SB_UNLOCK(mp, s);
 
 	if (fdblks_delta) {
Index: 2.6.x-xfs-new/fs/xfs/xfs_mount.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_mount.c	2007-05-11 10:35:29.292846630 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_mount.c	2007-05-11 11:13:47.229662318 +1000
@@ -718,7 +718,7 @@ xfs_mountfs(
 	bhv_vnode_t	*rvp = NULL;
 	int		readio_log, writeio_log;
 	xfs_daddr_t	d;
-	__uint64_t	ret64;
+	__uint64_t	resblks;
 	__int64_t	update_flags;
 	uint		quotamount, quotaflags;
 	int		agno;
@@ -835,6 +835,7 @@ xfs_mountfs(
 	 */
 	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
 	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+		__uint64_t	ret64;
 		if (xfs_uuid_mount(mp)) {
 			error = XFS_ERROR(EINVAL);
 			goto error1;
@@ -1127,13 +1128,27 @@ xfs_mountfs(
 		goto error4;
 	}
 
-
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
 	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
 		goto error4;
 
+	/*
+	 * Now we are mounted, reserve a small amount of unused space for
+	 * privileged transactions. This is needed so that transaction
+	 * space required for critical operations can dip into this pool
+	 * when at ENOSPC. This is needed for operations like create with
+	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
+	 * are not allowed to use this reserved space.
+	 *
+	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
+	 * This may drive us straight to ENOSPC on mount, but that implies
+	 * we were already there on the last unmount.
+	 */
+	resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
+	xfs_reserve_blocks(mp, &resblks, NULL);
+
 	return 0;
 
  error4:
@@ -1172,6 +1187,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	int64_t		fsid;
 #endif
+	__uint64_t	resblks;
 
 	/*
 	 * We can potentially deadlock here if we have an inode cluster
@@ -1200,6 +1216,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
 		xfs_binval(mp->m_rtdev_targp);
 	}
 
+	/*
+	 * Unreserve any blocks we have so that when we unmount we don't account
+	 * the reserved free space as used. This is really only necessary for
+	 * lazy superblock counting because it trusts the incore superblock
+	 * counters to be aboslutely correct on clean unmount.
+	 *
+	 * We don't bother correcting this elsewhere for lazy superblock
+	 * counting because on mount of an unclean filesystem we reconstruct the
+	 * correct counter value and this is irrelevant.
+	 *
+	 * For non-lazy counter filesystems, this doesn't matter at all because
+	 * we only every apply deltas to the superblock and hence the incore
+	 * value does not matter....
+	 */
+	resblks = 0;
+	xfs_reserve_blocks(mp, &resblks, NULL);
+
 	xfs_log_sbcount(mp, 1);
 	xfs_unmountfs_writesb(mp);
 	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
Index: 2.6.x-xfs-new/fs/xfs/xfs_iomap.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_iomap.c	2007-05-11 11:13:13.862017149 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_iomap.c	2007-05-11 11:13:34.199362915 +1000
@@ -489,13 +489,13 @@ xfs_iomap_write_direct(
 	if (unlikely(rt)) {
 		resrtextents = qblocks = resaligned;
 		resrtextents /= mp->m_sb.sb_rextsize;
-  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-  		quota_flag = XFS_QMOPT_RES_RTBLKS;
-  	} else {
-  		resrtextents = 0;
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+		quota_flag = XFS_QMOPT_RES_RTBLKS;
+	} else {
+		resrtextents = 0;
 		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-  		quota_flag = XFS_QMOPT_RES_REGBLKS;
-  	}
+		quota_flag = XFS_QMOPT_RES_REGBLKS;
+	}
 
 	/*
 	 * Allocate and setup the transaction
@@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
 		nimaps = 0;
 		while (nimaps == 0) {
 			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+			tp->t_flags |= XFS_TRANS_RESERVE;
 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
 			error = xfs_trans_reserve(tp, nres,
 					XFS_WRITE_LOG_RES(mp),
 					0, XFS_TRANS_PERM_LOG_RES,
 					XFS_WRITE_LOG_COUNT);
-			if (error == ENOSPC) {
-				error = xfs_trans_reserve(tp, 0,
-						XFS_WRITE_LOG_RES(mp),
-						0,
-						XFS_TRANS_PERM_LOG_RES,
-						XFS_WRITE_LOG_COUNT);
-			}
 			if (error) {
 				xfs_trans_cancel(tp, 0);
 				return XFS_ERROR(error);
@@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
 		 * from unwritten to real. Do allocations in a loop until
 		 * we have covered the range passed in.
 		 */
-
 		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+		tp->t_flags |= XFS_TRANS_RESERVE;
 		error = xfs_trans_reserve(tp, resblks,
 				XFS_WRITE_LOG_RES(mp), 0,
 				XFS_TRANS_PERM_LOG_RES,

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-04  4:52 Review: Be smarter about handling ENOSPC during writeback David Chinner
@ 2007-06-04  6:13 ` Timothy Shimmin
  2007-06-08  5:28   ` Timothy Shimmin
  2007-06-04  9:41 ` Timothy Shimmin
  1 sibling, 1 reply; 8+ messages in thread
From: Timothy Shimmin @ 2007-06-04  6:13 UTC (permalink / raw)
  To: David Chinner, xfs-dev; +Cc: xfs-oss

As previously discussed, the idea sounds reasonable to me.
I'll look at the patch shortly.

--Tim

--On 4 June 2007 2:52:19 PM +1000 David Chinner <dgc@sgi.com> wrote:

>
> During delayed allocation extent conversion or unwritten extent
> conversion, we need to reserve some blocks for transactions
> reservations. We need to reserve these blocks in case a btree
> split occurs and we need to allocate some blocks.
>
> Unfortunately, we've only ever reserved the number of data blocks we
> are allocating, so in both the unwritten and delalloc case we can
> get ENOSPC to the transaction reservation. This is bad because in
> both cases we cannot report the failure to the writing application.
>
> The fix is two-fold:
>
> 	1 - leverage the reserved block infrastructure XFS already
> 	has to reserve a small pool of blocks by default to allow
> 	specially marked transactions to dip into when we are at
> 	ENOSPC.
>
> 	    Default setting is min(5%, 1024 blocks).
>
> 	2 - convert critical transaction reservations to be allowed
> 	to dip into this pool. Spots changed are delalloc
> 	conversion, unwritten extent conversion and growing a
> 	filesystem at ENOSPC.
>
> Comments?
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> Principal Engineer
> SGI Australian Software Group
>
> ---
>  fs/xfs/xfs_fsops.c |   10 +++++++---
>  fs/xfs/xfs_iomap.c |   22 ++++++++--------------
>  fs/xfs/xfs_mount.c |   37 +++++++++++++++++++++++++++++++++++--
>  3 files changed, 50 insertions(+), 19 deletions(-)
>
> Index: 2.6.x-xfs-new/fs/xfs/xfs_fsops.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_fsops.c	2007-05-11 10:35:29.288847149 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_fsops.c	2007-05-11 11:13:34.195363437 +1000
> @@ -179,6 +179,7 @@ xfs_growfs_data_private(
>  		up_write(&mp->m_peraglock);
>  	}
>  	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
> +	tp->t_flags |= XFS_TRANS_RESERVE;
>  	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
>  			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
>  		xfs_trans_cancel(tp, 0);
> @@ -500,8 +501,9 @@ xfs_reserve_blocks(
>  	unsigned long		s;
>
>  	/* If inval is null, report current values and return */
> -
>  	if (inval == (__uint64_t *)NULL) {
> +		if (!outval)
> +			return EINVAL;
>  		outval->resblks = mp->m_resblks;
>  		outval->resblks_avail = mp->m_resblks_avail;
>  		return 0;
> @@ -564,8 +566,10 @@ retry:
>  		}
>  	}
>  out:
> -	outval->resblks = mp->m_resblks;
> -	outval->resblks_avail = mp->m_resblks_avail;
> +	if (outval) {
> +		outval->resblks = mp->m_resblks;
> +		outval->resblks_avail = mp->m_resblks_avail;
> +	}
>  	XFS_SB_UNLOCK(mp, s);
>
>  	if (fdblks_delta) {
> Index: 2.6.x-xfs-new/fs/xfs/xfs_mount.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_mount.c	2007-05-11 10:35:29.292846630 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_mount.c	2007-05-11 11:13:47.229662318 +1000
> @@ -718,7 +718,7 @@ xfs_mountfs(
>  	bhv_vnode_t	*rvp = NULL;
>  	int		readio_log, writeio_log;
>  	xfs_daddr_t	d;
> -	__uint64_t	ret64;
> +	__uint64_t	resblks;
>  	__int64_t	update_flags;
>  	uint		quotamount, quotaflags;
>  	int		agno;
> @@ -835,6 +835,7 @@ xfs_mountfs(
>  	 */
>  	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
>  	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
> +		__uint64_t	ret64;
>  		if (xfs_uuid_mount(mp)) {
>  			error = XFS_ERROR(EINVAL);
>  			goto error1;
> @@ -1127,13 +1128,27 @@ xfs_mountfs(
>  		goto error4;
>  	}
>
> -
>  	/*
>  	 * Complete the quota initialisation, post-log-replay component.
>  	 */
>  	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
>  		goto error4;
>
> +	/*
> +	 * Now we are mounted, reserve a small amount of unused space for
> +	 * privileged transactions. This is needed so that transaction
> +	 * space required for critical operations can dip into this pool
> +	 * when at ENOSPC. This is needed for operations like create with
> +	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
> +	 * are not allowed to use this reserved space.
> +	 *
> +	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
> +	 * This may drive us straight to ENOSPC on mount, but that implies
> +	 * we were already there on the last unmount.
> +	 */
> +	resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
> +	xfs_reserve_blocks(mp, &resblks, NULL);
> +
>  	return 0;
>
>   error4:
> @@ -1172,6 +1187,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
>  #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
>  	int64_t		fsid;
>  #endif
> +	__uint64_t	resblks;
>
>  	/*
>  	 * We can potentially deadlock here if we have an inode cluster
> @@ -1200,6 +1216,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
>  		xfs_binval(mp->m_rtdev_targp);
>  	}
>
> +	/*
> +	 * Unreserve any blocks we have so that when we unmount we don't account
> +	 * the reserved free space as used. This is really only necessary for
> +	 * lazy superblock counting because it trusts the incore superblock
> +	 * counters to be aboslutely correct on clean unmount.
> +	 *
> +	 * We don't bother correcting this elsewhere for lazy superblock
> +	 * counting because on mount of an unclean filesystem we reconstruct the
> +	 * correct counter value and this is irrelevant.
> +	 *
> +	 * For non-lazy counter filesystems, this doesn't matter at all because
> +	 * we only every apply deltas to the superblock and hence the incore
> +	 * value does not matter....
> +	 */
> +	resblks = 0;
> +	xfs_reserve_blocks(mp, &resblks, NULL);
> +
>  	xfs_log_sbcount(mp, 1);
>  	xfs_unmountfs_writesb(mp);
>  	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
> Index: 2.6.x-xfs-new/fs/xfs/xfs_iomap.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_iomap.c	2007-05-11 11:13:13.862017149 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_iomap.c	2007-05-11 11:13:34.199362915 +1000
> @@ -489,13 +489,13 @@ xfs_iomap_write_direct(
>  	if (unlikely(rt)) {
>  		resrtextents = qblocks = resaligned;
>  		resrtextents /= mp->m_sb.sb_rextsize;
> -  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
> -  		quota_flag = XFS_QMOPT_RES_RTBLKS;
> -  	} else {
> -  		resrtextents = 0;
> +		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
> +		quota_flag = XFS_QMOPT_RES_RTBLKS;
> +	} else {
> +		resrtextents = 0;
>  		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> -  		quota_flag = XFS_QMOPT_RES_REGBLKS;
> -  	}
> +		quota_flag = XFS_QMOPT_RES_REGBLKS;
> +	}
>
>  	/*
>  	 * Allocate and setup the transaction
> @@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
>  		nimaps = 0;
>  		while (nimaps == 0) {
>  			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
> +			tp->t_flags |= XFS_TRANS_RESERVE;
>  			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
>  			error = xfs_trans_reserve(tp, nres,
>  					XFS_WRITE_LOG_RES(mp),
>  					0, XFS_TRANS_PERM_LOG_RES,
>  					XFS_WRITE_LOG_COUNT);
> -			if (error == ENOSPC) {
> -				error = xfs_trans_reserve(tp, 0,
> -						XFS_WRITE_LOG_RES(mp),
> -						0,
> -						XFS_TRANS_PERM_LOG_RES,
> -						XFS_WRITE_LOG_COUNT);
> -			}
>  			if (error) {
>  				xfs_trans_cancel(tp, 0);
>  				return XFS_ERROR(error);
> @@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
>  		 * from unwritten to real. Do allocations in a loop until
>  		 * we have covered the range passed in.
>  		 */
> -
>  		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
> +		tp->t_flags |= XFS_TRANS_RESERVE;
>  		error = xfs_trans_reserve(tp, resblks,
>  				XFS_WRITE_LOG_RES(mp), 0,
>  				XFS_TRANS_PERM_LOG_RES,

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-04  4:52 Review: Be smarter about handling ENOSPC during writeback David Chinner
  2007-06-04  6:13 ` Timothy Shimmin
@ 2007-06-04  9:41 ` Timothy Shimmin
  2007-06-04 14:11   ` David Chinner
  1 sibling, 1 reply; 8+ messages in thread
From: Timothy Shimmin @ 2007-06-04  9:41 UTC (permalink / raw)
  To: David Chinner, xfs-dev; +Cc: xfs-oss

Hi Dave,

As an aside,
I don't understand the following bit of code in xfs_reserve_blocks():
        /*
         * If our previous reservation was larger than the current value,
         * then move any unused blocks back to the free pool.
         */
        fdblks_delta = 0;
        if (mp->m_resblks > request) {
                lcounter = mp->m_resblks_avail - request;
                if (lcounter  > 0) {            /* release unused blocks */
                        fdblks_delta = lcounter;
                        mp->m_resblks_avail -= lcounter;
                }

I can't see why it is not calculating a delta for:
  delta = m_resblks - request
and using that to update the m_resblks_avail and fdblks_delta;
like we do in the case below where the request is the larger one.
Instead it is affectively doing:
m_resblks_avail = m_resblks_avail - m_resblks_avail + request
                = request
It looks wrong to me.
What am I missing?
And why doesn't sb_fdblocks need to be updated in this case.

Thanks,
--Tim

--On 4 June 2007 2:52:19 PM +1000 David Chinner <dgc@sgi.com> wrote:

>
> During delayed allocation extent conversion or unwritten extent
> conversion, we need to reserve some blocks for transactions
> reservations. We need to reserve these blocks in case a btree
> split occurs and we need to allocate some blocks.
>
> Unfortunately, we've only ever reserved the number of data blocks we
> are allocating, so in both the unwritten and delalloc case we can
> get ENOSPC to the transaction reservation. This is bad because in
> both cases we cannot report the failure to the writing application.
>
> The fix is two-fold:
>
> 	1 - leverage the reserved block infrastructure XFS already
> 	has to reserve a small pool of blocks by default to allow
> 	specially marked transactions to dip into when we are at
> 	ENOSPC.
>
> 	    Default setting is min(5%, 1024 blocks).
>
> 	2 - convert critical transaction reservations to be allowed
> 	to dip into this pool. Spots changed are delalloc
> 	conversion, unwritten extent conversion and growing a
> 	filesystem at ENOSPC.
>
> Comments?
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> Principal Engineer
> SGI Australian Software Group
>
> ---
>  fs/xfs/xfs_fsops.c |   10 +++++++---
>  fs/xfs/xfs_iomap.c |   22 ++++++++--------------
>  fs/xfs/xfs_mount.c |   37 +++++++++++++++++++++++++++++++++++--
>  3 files changed, 50 insertions(+), 19 deletions(-)
>
> Index: 2.6.x-xfs-new/fs/xfs/xfs_fsops.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_fsops.c	2007-05-11 10:35:29.288847149 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_fsops.c	2007-05-11 11:13:34.195363437 +1000
> @@ -179,6 +179,7 @@ xfs_growfs_data_private(
>  		up_write(&mp->m_peraglock);
>  	}
>  	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
> +	tp->t_flags |= XFS_TRANS_RESERVE;
>  	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
>  			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
>  		xfs_trans_cancel(tp, 0);
> @@ -500,8 +501,9 @@ xfs_reserve_blocks(
>  	unsigned long		s;
>
>  	/* If inval is null, report current values and return */
> -
>  	if (inval == (__uint64_t *)NULL) {
> +		if (!outval)
> +			return EINVAL;
>  		outval->resblks = mp->m_resblks;
>  		outval->resblks_avail = mp->m_resblks_avail;
>  		return 0;
> @@ -564,8 +566,10 @@ retry:
>  		}
>  	}
>  out:
> -	outval->resblks = mp->m_resblks;
> -	outval->resblks_avail = mp->m_resblks_avail;
> +	if (outval) {
> +		outval->resblks = mp->m_resblks;
> +		outval->resblks_avail = mp->m_resblks_avail;
> +	}
>  	XFS_SB_UNLOCK(mp, s);
>
>  	if (fdblks_delta) {
> Index: 2.6.x-xfs-new/fs/xfs/xfs_mount.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_mount.c	2007-05-11 10:35:29.292846630 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_mount.c	2007-05-11 11:13:47.229662318 +1000
> @@ -718,7 +718,7 @@ xfs_mountfs(
>  	bhv_vnode_t	*rvp = NULL;
>  	int		readio_log, writeio_log;
>  	xfs_daddr_t	d;
> -	__uint64_t	ret64;
> +	__uint64_t	resblks;
>  	__int64_t	update_flags;
>  	uint		quotamount, quotaflags;
>  	int		agno;
> @@ -835,6 +835,7 @@ xfs_mountfs(
>  	 */
>  	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
>  	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
> +		__uint64_t	ret64;
>  		if (xfs_uuid_mount(mp)) {
>  			error = XFS_ERROR(EINVAL);
>  			goto error1;
> @@ -1127,13 +1128,27 @@ xfs_mountfs(
>  		goto error4;
>  	}
>
> -
>  	/*
>  	 * Complete the quota initialisation, post-log-replay component.
>  	 */
>  	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
>  		goto error4;
>
> +	/*
> +	 * Now we are mounted, reserve a small amount of unused space for
> +	 * privileged transactions. This is needed so that transaction
> +	 * space required for critical operations can dip into this pool
> +	 * when at ENOSPC. This is needed for operations like create with
> +	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
> +	 * are not allowed to use this reserved space.
> +	 *
> +	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
> +	 * This may drive us straight to ENOSPC on mount, but that implies
> +	 * we were already there on the last unmount.
> +	 */
> +	resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
> +	xfs_reserve_blocks(mp, &resblks, NULL);
> +
>  	return 0;
>
>   error4:
> @@ -1172,6 +1187,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
>  #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
>  	int64_t		fsid;
>  #endif
> +	__uint64_t	resblks;
>
>  	/*
>  	 * We can potentially deadlock here if we have an inode cluster
> @@ -1200,6 +1216,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
>  		xfs_binval(mp->m_rtdev_targp);
>  	}
>
> +	/*
> +	 * Unreserve any blocks we have so that when we unmount we don't account
> +	 * the reserved free space as used. This is really only necessary for
> +	 * lazy superblock counting because it trusts the incore superblock
> +	 * counters to be aboslutely correct on clean unmount.
> +	 *
> +	 * We don't bother correcting this elsewhere for lazy superblock
> +	 * counting because on mount of an unclean filesystem we reconstruct the
> +	 * correct counter value and this is irrelevant.
> +	 *
> +	 * For non-lazy counter filesystems, this doesn't matter at all because
> +	 * we only every apply deltas to the superblock and hence the incore
> +	 * value does not matter....
> +	 */
> +	resblks = 0;
> +	xfs_reserve_blocks(mp, &resblks, NULL);
> +
>  	xfs_log_sbcount(mp, 1);
>  	xfs_unmountfs_writesb(mp);
>  	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
> Index: 2.6.x-xfs-new/fs/xfs/xfs_iomap.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_iomap.c	2007-05-11 11:13:13.862017149 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_iomap.c	2007-05-11 11:13:34.199362915 +1000
> @@ -489,13 +489,13 @@ xfs_iomap_write_direct(
>  	if (unlikely(rt)) {
>  		resrtextents = qblocks = resaligned;
>  		resrtextents /= mp->m_sb.sb_rextsize;
> -  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
> -  		quota_flag = XFS_QMOPT_RES_RTBLKS;
> -  	} else {
> -  		resrtextents = 0;
> +		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
> +		quota_flag = XFS_QMOPT_RES_RTBLKS;
> +	} else {
> +		resrtextents = 0;
>  		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> -  		quota_flag = XFS_QMOPT_RES_REGBLKS;
> -  	}
> +		quota_flag = XFS_QMOPT_RES_REGBLKS;
> +	}
>
>  	/*
>  	 * Allocate and setup the transaction
> @@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
>  		nimaps = 0;
>  		while (nimaps == 0) {
>  			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
> +			tp->t_flags |= XFS_TRANS_RESERVE;
>  			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
>  			error = xfs_trans_reserve(tp, nres,
>  					XFS_WRITE_LOG_RES(mp),
>  					0, XFS_TRANS_PERM_LOG_RES,
>  					XFS_WRITE_LOG_COUNT);
> -			if (error == ENOSPC) {
> -				error = xfs_trans_reserve(tp, 0,
> -						XFS_WRITE_LOG_RES(mp),
> -						0,
> -						XFS_TRANS_PERM_LOG_RES,
> -						XFS_WRITE_LOG_COUNT);
> -			}
>  			if (error) {
>  				xfs_trans_cancel(tp, 0);
>  				return XFS_ERROR(error);
> @@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
>  		 * from unwritten to real. Do allocations in a loop until
>  		 * we have covered the range passed in.
>  		 */
> -
>  		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
> +		tp->t_flags |= XFS_TRANS_RESERVE;
>  		error = xfs_trans_reserve(tp, resblks,
>  				XFS_WRITE_LOG_RES(mp), 0,
>  				XFS_TRANS_PERM_LOG_RES,

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-04  9:41 ` Timothy Shimmin
@ 2007-06-04 14:11   ` David Chinner
  0 siblings, 0 replies; 8+ messages in thread
From: David Chinner @ 2007-06-04 14:11 UTC (permalink / raw)
  To: Timothy Shimmin; +Cc: David Chinner, xfs-dev, xfs-oss

On Mon, Jun 04, 2007 at 07:41:19PM +1000, Timothy Shimmin wrote:
> Hi Dave,
> 
> As an aside,
> I don't understand the following bit of code in xfs_reserve_blocks():
>        /*
>         * If our previous reservation was larger than the current value,
>         * then move any unused blocks back to the free pool.
>         */
>        fdblks_delta = 0;
>        if (mp->m_resblks > request) {
>                lcounter = mp->m_resblks_avail - request;
>                if (lcounter  > 0) {            /* release unused blocks */
>                        fdblks_delta = lcounter;
>                        mp->m_resblks_avail -= lcounter;
>                }
	>>>>	 mp->m_resblks = request;

> 
> I can't see why it is not calculating a delta for:
>  delta = m_resblks - request

Because mp->m_resblks_avail is the amount of reservation space
we have *unallocated*. i.e. 0 <= mp->m_resblks_avail <= mp->m_resblks

IOWs, when we have used some blocks and then change mp->m_resblks, the
amount we can can return is limited by the the available blocks,
not the total reservation.

> and using that to update the m_resblks_avail and fdblks_delta;
> like we do in the case below where the request is the larger one.
> Instead it is affectively doing:
> m_resblks_avail = m_resblks_avail - m_resblks_avail + request
>                = request

Surprising, but correct. When we reduce mp->m_resblks,
mp->m_resblks_avail must be <= mp->m_resblks. IOWs we reduce the
available blocks to that of the limit, so any allocated reserved
blocks will now immediately be considered as unreserved and when
they are freed the space will be immediately returned to the normal
pool.

Example: resblks = 1000, avail = 750. Set new resblks = 500.
avail must be reduced to 500 and 250 must be freed.

	fdblks_delta = 0
	if (1000 > 500) {
		lcounter = 750 - 500 = 250
		if (250 > 0) {
			fdblks_delta = 250
			resblks_avail = 500
		}
		m_resblks = 500;
	} else {
.....
	}
.....
	if (fdblks_delta) {
.....
		error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0);
.....

That "frees" 250 blocks. This is correct behaviour.

> It looks wrong to me.
> What am I missing?
> And why doesn't sb_fdblocks need to be updated in this case.

Because if we update mp->m_sb.sb_fdblocks, the value minus the reservation
gets written to disk, and that means it is incorrect (xfs-check
would fail) as the reservation is purely an in-memory construct...

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-04  6:13 ` Timothy Shimmin
@ 2007-06-08  5:28   ` Timothy Shimmin
  2007-06-08  7:33     ` David Chinner
  0 siblings, 1 reply; 8+ messages in thread
From: Timothy Shimmin @ 2007-06-08  5:28 UTC (permalink / raw)
  To: David Chinner, xfs-dev; +Cc: xfs-oss

Hi Dave,

Putting the xfs_reserve_blocks discussion to the side....
(discussed separately)


Back to the review, looking at the changes:

--On 4 June 2007 2:52:19 PM +1000 David Chinner <dgc@sgi.com> wrote:

>
> During delayed allocation extent conversion or unwritten extent
> conversion, we need to reserve some blocks for transactions
> reservations. We need to reserve these blocks in case a btree
> split occurs and we need to allocate some blocks.
>
> Unfortunately, we've only ever reserved the number of data blocks we
> are allocating, so in both the unwritten and delalloc case we can
> get ENOSPC to the transaction reservation. This is bad because in
> both cases we cannot report the failure to the writing application.
>
> The fix is two-fold:
>
> 	1 - leverage the reserved block infrastructure XFS already
> 	has to reserve a small pool of blocks by default to allow
> 	specially marked transactions to dip into when we are at
> 	ENOSPC.
>
> 	    Default setting is min(5%, 1024 blocks).
>
> 	2 - convert critical transaction reservations to be allowed
> 	to dip into this pool. Spots changed are delalloc
> 	conversion, unwritten extent conversion and growing a
> 	filesystem at ENOSPC.
>
> Comments?
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> Principal Engineer
> SGI Australian Software Group
>
> ---


>  fs/xfs/xfs_fsops.c |   10 +++++++---

* allow xfs_reserve_blocks() to handle a null outval so that
  we can call xfs_reserve_blocks other than thru ioctl,
  where we don't care about outval
* xfs_growfs_data_private() or's in XFS_TRANS_RESERVE like we do for root EAs
  -> allow growfs transaction to dip in to reserve space

>  fs/xfs/xfs_mount.c |   37 +++++++++++++++++++++++++++++++++++--

* xfs_mountfs(): cleanup - restrict a variable (ret64) to the block its used in
* xfs_mountfs(): do our xfs_reserve_blocks() for what we think we'll need
  - pass NULL for 2nd param to it as we don't care (why we changed xfs_fsops.c)
  - defaults to min(1024 FSBs, 5% dblocks)
    -> not sure how one would choose this but it sounds big enough
* xfs_unmountfs(): xfs_reserve_blocks of zero and so restoring the sb free counter

Q: so I guess, for DMF systems which presumably turn this stuff on using the ioctl;
   we should tell them to stop doing this - they could stuff us up by overriding it
   maybe and they don't need to.

>  fs/xfs/xfs_iomap.c |   22 ++++++++--------------

* some whitespace cleanup
xfs_iomap_write_allocate():
* delalloc extent conversion - mark transaction for reserved blocks space
* don't handle ENOSPC here, as we shouldn't get it now I presume
xfs_iomap_write_unwritten
* unwritten extent conversion - mark trans for reserved blocks

Seems simple enough :)
Will we get questions from people about reduced space from df? :)

--Tim



>  3 files changed, 50 insertions(+), 19 deletions(-)
>
> Index: 2.6.x-xfs-new/fs/xfs/xfs_fsops.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_fsops.c	2007-05-11 10:35:29.288847149 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_fsops.c	2007-05-11 11:13:34.195363437 +1000
> @@ -179,6 +179,7 @@ xfs_growfs_data_private(
>  		up_write(&mp->m_peraglock);
>  	}
>  	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
> +	tp->t_flags |= XFS_TRANS_RESERVE;
>  	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
>  			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
>  		xfs_trans_cancel(tp, 0);
> @@ -500,8 +501,9 @@ xfs_reserve_blocks(
>  	unsigned long		s;
>
>  	/* If inval is null, report current values and return */
> -
>  	if (inval == (__uint64_t *)NULL) {
> +		if (!outval)
> +			return EINVAL;
>  		outval->resblks = mp->m_resblks;
>  		outval->resblks_avail = mp->m_resblks_avail;
>  		return 0;
> @@ -564,8 +566,10 @@ retry:
>  		}
>  	}
>  out:
> -	outval->resblks = mp->m_resblks;
> -	outval->resblks_avail = mp->m_resblks_avail;
> +	if (outval) {
> +		outval->resblks = mp->m_resblks;
> +		outval->resblks_avail = mp->m_resblks_avail;
> +	}
>  	XFS_SB_UNLOCK(mp, s);
>
>  	if (fdblks_delta) {
> Index: 2.6.x-xfs-new/fs/xfs/xfs_mount.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_mount.c	2007-05-11 10:35:29.292846630 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_mount.c	2007-05-11 11:13:47.229662318 +1000
> @@ -718,7 +718,7 @@ xfs_mountfs(
>  	bhv_vnode_t	*rvp = NULL;
>  	int		readio_log, writeio_log;
>  	xfs_daddr_t	d;
> -	__uint64_t	ret64;
> +	__uint64_t	resblks;
>  	__int64_t	update_flags;
>  	uint		quotamount, quotaflags;
>  	int		agno;
> @@ -835,6 +835,7 @@ xfs_mountfs(
>  	 */
>  	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
>  	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
> +		__uint64_t	ret64;
>  		if (xfs_uuid_mount(mp)) {
>  			error = XFS_ERROR(EINVAL);
>  			goto error1;
> @@ -1127,13 +1128,27 @@ xfs_mountfs(
>  		goto error4;
>  	}
>
> -
>  	/*
>  	 * Complete the quota initialisation, post-log-replay component.
>  	 */
>  	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
>  		goto error4;
>
> +	/*
> +	 * Now we are mounted, reserve a small amount of unused space for
> +	 * privileged transactions. This is needed so that transaction
> +	 * space required for critical operations can dip into this pool
> +	 * when at ENOSPC. This is needed for operations like create with
> +	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
> +	 * are not allowed to use this reserved space.
> +	 *
> +	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
> +	 * This may drive us straight to ENOSPC on mount, but that implies
> +	 * we were already there on the last unmount.
> +	 */
> +	resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
> +	xfs_reserve_blocks(mp, &resblks, NULL);
> +
>  	return 0;
>
>   error4:
> @@ -1172,6 +1187,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
>  #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
>  	int64_t		fsid;
>  #endif
> +	__uint64_t	resblks;
>
>  	/*
>  	 * We can potentially deadlock here if we have an inode cluster
> @@ -1200,6 +1216,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cr
>  		xfs_binval(mp->m_rtdev_targp);
>  	}
>
> +	/*
> +	 * Unreserve any blocks we have so that when we unmount we don't account
> +	 * the reserved free space as used. This is really only necessary for
> +	 * lazy superblock counting because it trusts the incore superblock
> +	 * counters to be aboslutely correct on clean unmount.
> +	 *
> +	 * We don't bother correcting this elsewhere for lazy superblock
> +	 * counting because on mount of an unclean filesystem we reconstruct the
> +	 * correct counter value and this is irrelevant.
> +	 *
> +	 * For non-lazy counter filesystems, this doesn't matter at all because
> +	 * we only every apply deltas to the superblock and hence the incore
> +	 * value does not matter....
> +	 */
> +	resblks = 0;
> +	xfs_reserve_blocks(mp, &resblks, NULL);
> +
>  	xfs_log_sbcount(mp, 1);
>  	xfs_unmountfs_writesb(mp);
>  	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
> Index: 2.6.x-xfs-new/fs/xfs/xfs_iomap.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_iomap.c	2007-05-11 11:13:13.862017149 +1000
> +++ 2.6.x-xfs-new/fs/xfs/xfs_iomap.c	2007-05-11 11:13:34.199362915 +1000
> @@ -489,13 +489,13 @@ xfs_iomap_write_direct(
>  	if (unlikely(rt)) {
>  		resrtextents = qblocks = resaligned;
>  		resrtextents /= mp->m_sb.sb_rextsize;
> -  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
> -  		quota_flag = XFS_QMOPT_RES_RTBLKS;
> -  	} else {
> -  		resrtextents = 0;
> +		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
> +		quota_flag = XFS_QMOPT_RES_RTBLKS;
> +	} else {
> +		resrtextents = 0;
>  		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> -  		quota_flag = XFS_QMOPT_RES_REGBLKS;
> -  	}
> +		quota_flag = XFS_QMOPT_RES_REGBLKS;
> +	}
>
>  	/*
>  	 * Allocate and setup the transaction
> @@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
>  		nimaps = 0;
>  		while (nimaps == 0) {
>  			tp = xfs_trans_alloc(mp, XFS_TRANS_START_WRITE);
> +			tp->t_flags |= XFS_TRANS_RESERVE;
>  			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
>  			error = xfs_trans_reserve(tp, nres,
>  					XFS_WRITE_LOG_RES(mp),
>  					0, XFS_TRANS_PERM_LOG_RES,
>  					XFS_WRITE_LOG_COUNT);
> -			if (error == ENOSPC) {
> -				error = xfs_trans_reserve(tp, 0,
> -						XFS_WRITE_LOG_RES(mp),
> -						0,
> -						XFS_TRANS_PERM_LOG_RES,
> -						XFS_WRITE_LOG_COUNT);
> -			}
>  			if (error) {
>  				xfs_trans_cancel(tp, 0);
>  				return XFS_ERROR(error);
> @@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
>  		 * from unwritten to real. Do allocations in a loop until
>  		 * we have covered the range passed in.
>  		 */
> -
>  		tp = xfs_trans_alloc(mp, XFS_TRANS_START_WRITE);
> +		tp->t_flags |= XFS_TRANS_RESERVE;
>  		error = xfs_trans_reserve(tp, resblks,
>  				XFS_WRITE_LOG_RES(mp), 0,
>  				XFS_TRANS_PERM_LOG_RES,

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-08  5:28   ` Timothy Shimmin
@ 2007-06-08  7:33     ` David Chinner
  2007-06-11 23:13       ` Nathan Scott
  0 siblings, 1 reply; 8+ messages in thread
From: David Chinner @ 2007-06-08  7:33 UTC (permalink / raw)
  To: Timothy Shimmin; +Cc: David Chinner, xfs-dev, xfs-oss

On Fri, Jun 08, 2007 at 03:28:14PM +1000, Timothy Shimmin wrote:
> Hi Dave,
> 
> Putting the xfs_reserve_blocks discussion to the side....
> (discussed separately)

*nod*

BTW, did you try that patch I sent?

> > fs/xfs/xfs_fsops.c |   10 +++++++---
> 
> * allow xfs_reserve_blocks() to handle a null outval so that
>  we can call xfs_reserve_blocks other than thru ioctl,
>  where we don't care about outval
> * xfs_growfs_data_private() or's in XFS_TRANS_RESERVE like we do for root 
> EAs
>  -> allow growfs transaction to dip in to reserve space

Yes, and so now you can grow a completely full filesystem :)

> > fs/xfs/xfs_mount.c |   37 +++++++++++++++++++++++++++++++++++--
> 
> * xfs_mountfs(): cleanup - restrict a variable (ret64) to the block its 
> used in
> * xfs_mountfs(): do our xfs_reserve_blocks() for what we think we'll need
>  - pass NULL for 2nd param to it as we don't care (why we changed 
>  xfs_fsops.c)
>  - defaults to min(1024 FSBs, 5% dblocks)
>    -> not sure how one would choose this but it sounds big enough

It's a SWAG. I think it's sufficient to begin with. If it proves to
be a problem, then we can change it later....

> * xfs_unmountfs(): xfs_reserve_blocks of zero and so restoring the sb free 
> counter
> 
> Q: so I guess, for DMF systems which presumably turn this stuff on using 
> the ioctl;

yeah - the rope is long enough ;)

>   we should tell them to stop doing this - they could stuff us up by 
>   overriding it
>   maybe and they don't need to.

All they need to do is check first before setting a new value...

> > fs/xfs/xfs_iomap.c |   22 ++++++++--------------
> 
> * some whitespace cleanup
> xfs_iomap_write_allocate():
> * delalloc extent conversion - mark transaction for reserved blocks space
> * don't handle ENOSPC here, as we shouldn't get it now I presume

We still can, just much more unlikely. I need to do another set of
patches for ENOSPC notification but I haven't had a chance yet.

> xfs_iomap_write_unwritten
> * unwritten extent conversion - mark trans for reserved blocks

Ditto.

> Seems simple enough :)

It's just one of a few to begin with?

> Will we get questions from people about reduced space from df? :)

If we do, I think you just volunteered to write the FAQ entry ;)

Thanks for the review.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-08  7:33     ` David Chinner
@ 2007-06-11 23:13       ` Nathan Scott
  2007-06-12  3:09         ` David Chinner
  0 siblings, 1 reply; 8+ messages in thread
From: Nathan Scott @ 2007-06-11 23:13 UTC (permalink / raw)
  To: David Chinner; +Cc: Timothy Shimmin, xfs-dev, xfs-oss

On Fri, 2007-06-08 at 17:33 +1000, David Chinner wrote:
> On Fri, Jun 08, 2007 at 03:28:14PM +1000, Timothy Shimmin wrote:

> > Will we get questions from people about reduced space from df? :)
> 
> If we do, I think you just volunteered to write the FAQ entry ;)

It would be more correct of XFS to start doing the right thing by
reporting different values for b_free and b_avail in statfs(2) -
this code in xfs_mount.c::xfs_statvfs() ...

    statp->f_bfree = statp->f_bavail =
                            sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);

I know this wasn't done for the original per-mount space reserving
ioctls (that was for one user though - dmapi, so I can see why there
may have been a shortcut done there) ... but if it affects everyone
now, there will be questions asked, and there is a standard interface
for reporting this space discrepency that tools like df(1) already
use.

cheers.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Review: Be smarter about handling ENOSPC during writeback
  2007-06-11 23:13       ` Nathan Scott
@ 2007-06-12  3:09         ` David Chinner
  0 siblings, 0 replies; 8+ messages in thread
From: David Chinner @ 2007-06-12  3:09 UTC (permalink / raw)
  To: Nathan Scott; +Cc: David Chinner, Timothy Shimmin, xfs-dev, xfs-oss

On Tue, Jun 12, 2007 at 09:13:45AM +1000, Nathan Scott wrote:
> On Fri, 2007-06-08 at 17:33 +1000, David Chinner wrote:
> > On Fri, Jun 08, 2007 at 03:28:14PM +1000, Timothy Shimmin wrote:
> 
> > > Will we get questions from people about reduced space from df? :)
> > 
> > If we do, I think you just volunteered to write the FAQ entry ;)
> 
> It would be more correct of XFS to start doing the right thing by
> reporting different values for b_free and b_avail in statfs(2) -
> this code in xfs_mount.c::xfs_statvfs() ...
> 
>     statp->f_bfree = statp->f_bavail =
>                             sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);

Ok, yeah, that'd work.

Something like:

---
 fs/xfs/xfs_vfsops.c |    1 +
 1 file changed, 1 insertion(+)

Index: 2.6.x-xfs-new/fs/xfs/xfs_vfsops.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_vfsops.c	2007-06-08 21:46:29.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_vfsops.c	2007-06-12 13:08:49.933837815 +1000
@@ -876,6 +876,7 @@ xfs_statvfs(
 	statp->f_blocks = sbp->sb_dblocks - lsize;
 	statp->f_bfree = statp->f_bavail =
 				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+	statp->f_bfree += mp->m_resblks_avail;
 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
 #if XFS_BIG_INUMS
 	fakeinos += mp->m_inoadd;

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2007-06-12  3:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-06-04  4:52 Review: Be smarter about handling ENOSPC during writeback David Chinner
2007-06-04  6:13 ` Timothy Shimmin
2007-06-08  5:28   ` Timothy Shimmin
2007-06-08  7:33     ` David Chinner
2007-06-11 23:13       ` Nathan Scott
2007-06-12  3:09         ` David Chinner
2007-06-04  9:41 ` Timothy Shimmin
2007-06-04 14:11   ` David Chinner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox