public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 4/4] xfs: implement parallism quota check
@ 2013-11-12  9:30 Jeff Liu
  2013-11-15 17:26 ` Christoph Hellwig
  0 siblings, 1 reply; 5+ messages in thread
From: Jeff Liu @ 2013-11-12  9:30 UTC (permalink / raw)
  To: xfs@oss.sgi.com

From: Jie Liu <jeff.liu@oracle.com>

XFS does quota check at mount time with a single thread if required,
and this process must done before a successful file system mount.
That is fun if the desired quota options has been enabled when user
creating/removing files, however, it need to travel the whole file
system to figure out the quota usages if previously those options
were not enabled.  Hence, the mount procedure will stuck for a long
time depending on the how many inodes resides on the storage as well
as the disk IO speed.

This patch is implement parallism quota check based on allocation
groups, therefore the quota check is performed among each AG via
work queues combine with a completion.  In this way, I can observed
significant speedup on faster devices.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>

---
 fs/xfs/xfs_qm.c |  357 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_qm.h |   18 +++
 2 files changed, 359 insertions(+), 16 deletions(-)

diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996..110df7b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -35,8 +35,11 @@
 #include "xfs_trans.h"
 #include "xfs_trans_space.h"
 #include "xfs_qm.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_inum.h"
 #include "xfs_cksum.h"
 #include "xfs_dinode.h"
 
@@ -51,6 +54,9 @@ STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 
 
 STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
+STATIC int	xfs_qm_dqusage_adjust(struct xfs_mount *mp, xfs_ino_t ino,
+				      int *res);
+
 /*
  * We use the batch lookup interface to iterate over the dquots as it
  * currently is the only interface into the radix tree code that allows
@@ -1349,9 +1355,6 @@ STATIC int
 xfs_qm_dqusage_adjust(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
 	xfs_ino_t	ino,		/* inode number to get data for */
-	void		__user *buffer,	/* not used */
-	int		ubsize,		/* not used */
-	int		*ubused,	/* not used */
 	int		*res)		/* result code value */
 {
 	xfs_inode_t	*ip;
@@ -1439,6 +1442,337 @@ error0:
 	return error;
 }
 
+static int
+xfs_qm_dqusage_adjust_ichunk(
+	struct xfs_mount		*mp,
+	xfs_agnumber_t			agno,
+	struct xfs_inobt_rec_incore	*irbp,
+	xfs_ino_t			*lastinop)
+{
+	xfs_ino_t			lastino = *lastinop;
+	int				chunkidx, clustidx;
+	int				error = 0;
+	xfs_agino_t			agino;
+
+	for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
+	     irbp->ir_freecount < XFS_INODES_PER_CHUNK;
+	     chunkidx++, clustidx++, agino++) {
+		xfs_ino_t	ino = XFS_AGINO_TO_INO(mp, agno, agino);
+		int		stat;
+
+		ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
+
+		/* Skip if this inode is free */
+		if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
+			lastino = ino;
+			continue;
+		}
+
+		/*
+		 * Count used inodes as free so we can tell when the
+		 * chunk is used up.
+		 */
+		irbp->ir_freecount++;
+
+		error = xfs_qm_dqusage_adjust(mp, ino, &stat);
+		if (stat == BULKSTAT_RV_NOTHING) {
+			if (error && error != ENOENT && error != EINVAL)
+				break;
+
+			lastino = ino;
+			continue;
+		}
+		if (stat == BULKSTAT_RV_GIVEUP) {
+			ASSERT(error);
+			break;
+		}
+		lastino = ino;
+	}
+
+	*lastinop = lastino;
+	return error;
+}
+
+static int
+xfs_qm_dqusage_adjust_perag(
+	struct xfs_dq_adjuster	*qa)
+{
+	struct xfs_mount	*mp = qa->qa_mp;
+	xfs_agnumber_t		agno = qa->qa_agno;
+	xfs_inobt_rec_incore_t	*irbp;	/* current irec buffer pointer */
+	xfs_inobt_rec_incore_t	*irbuf;	/* start of irec buffer */
+	xfs_inobt_rec_incore_t	*irbufend; /* end of good irec buffer entries */
+	xfs_btree_cur_t		*cur;	/* btree cursor for ialloc btree */
+	xfs_ino_t		lastino;/* last inode # in question */
+	xfs_agino_t		agino;	/* inode # in allocation group */
+	size_t			irbsize; /* size of irec buffer in bytes */
+	int			nirbuf;	/* size of irbuf */
+	int			rval;	/* return value error code */
+	int			error;	/* error code */
+
+	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+	if (!irbuf)
+		return ENOMEM;
+	nirbuf = irbsize / sizeof(*irbuf);
+
+	rval = 0;
+	agino = 0;
+	lastino = 0;
+
+	/*
+	 * Loop over the allocation groups, starting from the last
+	 * inode returned; 0 means start of the allocation group.
+	 */
+	do {
+		xfs_buf_t	*agbp;	/* agi header buffer */
+		xfs_agi_t	*agi;	/* agi header data */
+		int		stat;	/* result value from btree calls */
+		bool		end_of_ag = false;
+
+		cond_resched();
+
+		irbp = irbuf;
+		irbufend = irbuf + nirbuf;
+
+		error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+		if (error) {
+			rval = error;
+			break;
+		}
+		agi = XFS_BUF_TO_AGI(agbp);
+
+		/* Allocate and initialize a btree cursor for ialloc btree */
+		cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
+		error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, &stat);
+
+		/*
+		 * Loop through inode btree records in this ag until we run out
+		 * of inodes or space in the buffer.
+		 */
+		while (irbp < irbufend) {
+			xfs_inobt_rec_incore_t r;
+
+			/* Loop as long as we're unable to read the inode btree */
+			while (error) {
+				agino += XFS_INODES_PER_CHUNK;
+				if (XFS_AGINO_TO_AGBNO(mp, agino) >=
+				    be32_to_cpu(agi->agi_length))
+					break;
+
+				error = xfs_inobt_lookup(cur, agino,
+							 XFS_LOOKUP_GE, &stat);
+				cond_resched();
+			}
+
+			/*
+			 * If ran off the end of the ag either with an error,
+			 * or the normal way, set end and stop collecting.
+			 */
+			if (error) {
+				end_of_ag = true;
+				break;
+			}
+
+			error = xfs_inobt_get_rec(cur, &r, &stat);
+			if (error || stat == 0) {
+				end_of_ag = true;
+				break;
+			}
+
+			/*
+			 * If this chunk has any allocated inodes, save it.
+			 * Also start read-ahead now for this chunk.
+			 */
+			if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
+				struct blk_plug plug;
+
+				blk_start_plug(&plug);
+				xfs_inobt_reada_chunk(mp, agno, &r);
+				blk_finish_plug(&plug);
+
+				irbp->ir_startino = r.ir_startino;
+				irbp->ir_freecount = r.ir_freecount;
+				irbp->ir_free = r.ir_free;
+				irbp++;
+			}
+
+			/* Set agino to after this chunk and bump the cursor */
+			agino = r.ir_startino + XFS_INODES_PER_CHUNK;
+			error = xfs_btree_increment(cur, 0, &stat);
+			cond_resched();
+		}
+
+		/*
+		 * Drop the btree buffers and the agi buffer.  We can't hold
+		 * any of the locks these represent when calling iget.
+		 */
+		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+		xfs_buf_relse(agbp);
+
+		irbufend = irbp;
+		for (irbp = irbuf; irbp < irbufend; irbp++) {
+			error = xfs_qm_dqusage_adjust_ichunk(mp, agno, irbp, &lastino);
+			if (error)
+				rval = error;
+			cond_resched();
+		}
+
+		if (end_of_ag)
+			break;
+
+		/* Set up for the next loop iteration */
+		agino = XFS_INO_TO_AGINO(mp, lastino);
+	} while (1);
+
+	/* Done, we're either out of filesystem or space to put the data */
+	kmem_free(irbuf);
+
+	return rval;
+}
+
+/*
+ * Iterate thru the file system to fetch all the inodes in the given
+ * inode range and adjusting the corresponding dquot counters in core.
+ */
+STATIC void
+xfs_qm_dq_adjust_worker(
+	struct work_struct	*work)
+{
+	struct xfs_dq_adjuster	*qa = container_of(work,
+				      struct xfs_dq_adjuster, qa_work);
+	int			error;
+
+	error = xfs_qm_dqusage_adjust_perag(qa);
+	complete(&qa->qa_complete);
+}
+
+STATIC int
+xfs_qm_init_quotacheck(
+	struct xfs_mount	*mp,
+	struct xfs_quotacheck	*qc)
+{
+	memset(qc, 0, sizeof(*qc));
+
+	INIT_LIST_HEAD(&qc->qc_adjusters);
+	spin_lock_init(&qc->qc_lock);
+	qc->qc_mp = mp;
+	qc->qc_wq = alloc_workqueue("xfs-dqcheck/%s", WQ_NON_REENTRANT,
+				    0, mp->m_fsname);
+	if (!qc->qc_wq) {
+		list_del(&qc->qc_adjusters);
+		return ENOMEM;
+	}
+
+	return 0;
+}
+
+STATIC void
+xfs_qm_destroy_quotacheck(
+	struct xfs_quotacheck	*qc)
+{
+	destroy_workqueue(qc->qc_wq);
+	spinlock_destroy(&qc->qc_lock);
+	list_del(&qc->qc_adjusters);
+}
+
+STATIC void
+xfs_qm_destroy_adjusters(
+	struct xfs_quotacheck	*qc)
+{
+	struct xfs_dq_adjuster	*qa, *tmp;
+
+	list_for_each_entry_safe(qa, tmp, &qc->qc_adjusters, qa_node) {
+		list_del(&qa->qa_node);
+		kfree(qa);
+	}
+}
+
+STATIC struct xfs_dq_adjuster *
+xfs_qm_alloc_adjuster(
+	struct xfs_quotacheck	*qc,
+	xfs_agnumber_t		agno)
+{
+	struct xfs_dq_adjuster	*qa;
+
+	qa = kzalloc(sizeof(*qa), GFP_NOFS);
+	if (!qa)
+		return NULL;
+
+	qa->qa_qc = qc;
+	qa->qa_mp = qc->qc_mp;
+	qa->qa_agno = agno;
+	INIT_LIST_HEAD(&qa->qa_node);
+	INIT_WORK(&qa->qa_work, xfs_qm_dq_adjust_worker);
+	init_completion(&qa->qa_complete);
+	list_add_tail(&qa->qa_node, &qc->qc_adjusters);
+
+	return qa;
+}
+
+STATIC int
+xfs_qm_alloc_queue_adjusters(
+	struct xfs_quotacheck	*qc)
+{
+	xfs_agnumber_t		agcount = qc->qc_mp->m_sb.sb_agcount;
+	int			i, error = 0;
+
+	for (i = 0; i < agcount; i++) {
+		struct xfs_dq_adjuster	*qa;
+
+		spin_lock(&qc->qc_lock);
+		qa = xfs_qm_alloc_adjuster(qc, i);
+		if (!qa) {
+			error = ENOMEM;
+			spin_unlock(&qc->qc_lock);
+			goto out_destroy_adjusters;
+		}
+		queue_work(qc->qc_wq, &qa->qa_work);
+		spin_unlock(&qc->qc_lock);
+	}
+
+	return error;
+
+out_destroy_adjusters:
+	xfs_qm_destroy_adjusters(qc);
+	return error;
+}
+
+STATIC void
+xfs_qm_wait_for_adjusters(
+	struct xfs_quotacheck	*qc)
+{
+	struct xfs_dq_adjuster	*qa;
+
+	list_for_each_entry(qa, &qc->qc_adjusters, qa_node)
+		wait_for_completion(&qa->qa_complete);
+}
+
+STATIC int
+xfs_qm_do_quotacheck(
+	struct xfs_mount	*mp)
+{
+	struct xfs_quotacheck	qc;
+	int			error;
+
+	error = xfs_qm_init_quotacheck(mp, &qc);
+	if (error)
+		return error;
+
+	/* Allocate and queue adjusters */
+	error = xfs_qm_alloc_queue_adjusters(&qc);
+	if (error)
+		goto out_destroy_quotacheck;
+
+	xfs_qm_wait_for_adjusters(&qc);
+
+	xfs_qm_destroy_adjusters(&qc);
+
+out_destroy_quotacheck:
+	xfs_qm_destroy_quotacheck(&qc);
+
+	return error;
+}
+
 STATIC int
 xfs_qm_flush_one(
 	struct xfs_dquot	*dqp,
@@ -1474,7 +1808,7 @@ int
 xfs_qm_quotacheck(
 	xfs_mount_t	*mp)
 {
-	int			done, count, error, error2;
+	int			count, error, error2;
 	xfs_ino_t		lastino;
 	size_t			structsz;
 	uint			flags;
@@ -1522,18 +1856,9 @@ xfs_qm_quotacheck(
 		flags |= XFS_PQUOTA_CHKD;
 	}
 
-	do {
-		/*
-		 * Iterate thru all the inodes in the file system,
-		 * adjusting the corresponding dquot counters in core.
-		 */
-		error = xfs_bulkstat(mp, &lastino, &count,
-				     xfs_qm_dqusage_adjust,
-				     structsz, NULL, &done);
-		if (error)
-			break;
-
-	} while (!done);
+	error = xfs_qm_do_quotacheck(mp);
+	if (error)
+		goto error_return;
 
 	/*
 	 * We've made all the changes that we need to make incore.  Flush them
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index a788b66..c7e2e6d 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -26,6 +26,24 @@ struct xfs_inode;
 
 extern struct kmem_zone	*xfs_qm_dqtrxzone;
 
+struct xfs_dq_adjuster {
+	struct list_head	qa_node;
+	struct xfs_mount	*qa_mp;
+	struct xfs_quotacheck	*qa_qc;
+	xfs_agnumber_t		qa_agno;
+	int			qa_error;
+	struct work_struct	qa_work;
+	struct completion	qa_complete;
+};
+
+struct xfs_quotacheck {
+	struct list_head	qc_adjusters;
+	spinlock_t		qc_lock;
+	struct xfs_mount	*qc_mp;
+	int			qc_done;
+	struct workqueue_struct	*qc_wq;
+};
+
 /*
  * This defines the unit of allocation of dquots.
  * Currently, it is just one file system block, and a 4K blk contains 30
-- 
1.7.9.5

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 4/4] xfs: implement parallism quota check
  2013-11-12  9:30 [RFC PATCH 4/4] xfs: implement parallism quota check Jeff Liu
@ 2013-11-15 17:26 ` Christoph Hellwig
  2013-11-17 13:01   ` Jeff Liu
  0 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2013-11-15 17:26 UTC (permalink / raw)
  To: Jeff Liu; +Cc: xfs@oss.sgi.com

As Dave pointed out this really should be xfs_bukstat_ag.  But looking
at the code you're almost 90% there anyway.

The actual workqueue code should probably stay in the quota files as
other users of the per-ag bulkstate would drive the parallelsim by
themselves.  

> +STATIC void
> +xfs_qm_dq_adjust_worker(
> +	struct work_struct	*work)
> +{
> +	struct xfs_dq_adjuster	*qa = container_of(work,
> +				      struct xfs_dq_adjuster, qa_work);
> +	int			error;
> +
> +	error = xfs_qm_dqusage_adjust_perag(qa);
> +	complete(&qa->qa_complete);

Seems like xfs_quotacheck should just have a nr_inprogress counter
and a single waitqueue, that way we'd only wake the waiter once 
the whole quotacheck is done.

Actually you even just do a flush_workqueue on the workqueue as it's
per-quotacheck, which simplifies this even more.

> +STATIC int
> +xfs_qm_init_quotacheck(
> +	struct xfs_mount	*mp,
> +	struct xfs_quotacheck	*qc)
> +{
> +	memset(qc, 0, sizeof(*qc));
> +
> +	INIT_LIST_HEAD(&qc->qc_adjusters);
> +	spin_lock_init(&qc->qc_lock);
> +	qc->qc_mp = mp;
> +	qc->qc_wq = alloc_workqueue("xfs-dqcheck/%s", WQ_NON_REENTRANT,

The WQ_NON_REENTRANT behaviour is now the default, no need to use the
flag.

> +				    0, mp->m_fsname);
> +	if (!qc->qc_wq) {
> +		list_del(&qc->qc_adjusters);

I don't see why you'd do a list_del here given that we never added
anything to the list.  either way no need for the list once we use
the flush_workqueue trick above :)

In fact once that is implemented the xfs_quotacheck structure will
contain nothing but the workqueue and can go away entirely.

> +STATIC struct xfs_dq_adjuster *
> +xfs_qm_alloc_adjuster(
> +	struct xfs_quotacheck	*qc,
> +	xfs_agnumber_t		agno)
> +{
> +	struct xfs_dq_adjuster	*qa;
> +
> +	qa = kzalloc(sizeof(*qa), GFP_NOFS);
> +	if (!qa)
> +		return NULL;

> +		spin_lock(&qc->qc_lock);
> +		qa = xfs_qm_alloc_adjuster(qc, i);
> +		if (!qa) {
> +			error = ENOMEM;
> +			spin_unlock(&qc->qc_lock);
> +			goto out_destroy_adjusters;
> +		}
> +		queue_work(qc->qc_wq, &qa->qa_work);
> +		spin_unlock(&qc->qc_lock);

This gives you a sleeping allocation under a spinlock.  But I can't
really find any reason for the lock to be taken here anyway, nor
anywhere else.

But to catch this please test the code with lockdep enabled for the
next version.

Thanks for looking into this!

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 4/4] xfs: implement parallism quota check
  2013-11-15 17:26 ` Christoph Hellwig
@ 2013-11-17 13:01   ` Jeff Liu
  2013-11-18 11:04     ` Christoph Hellwig
  0 siblings, 1 reply; 5+ messages in thread
From: Jeff Liu @ 2013-11-17 13:01 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs@oss.sgi.com

On 11/16 2013 01:26 AM, Christoph Hellwig wrote:
> As Dave pointed out this really should be xfs_bukstat_ag.  But looking
> at the code you're almost 90% there anyway.
One main reason I did not make a per ag bulkstat is because bulkstat() will
skip an allocation group if read agi buffer failed, i.e,

while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
	cond_resched();
	error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
	if (error) {
		/*
		 * Skip this allocation group and go to the next one.
		 */
		agno++;
                agino = 0;
		continue;
	}
	....
}

Should it capture this issue and drop a warning in this case?

> The actual workqueue code should probably stay in the quota files as
> other users of the per-ag bulkstate would drive the parallelsim by
> themselves.  
> 
>> +STATIC void
>> +xfs_qm_dq_adjust_worker(
>> +	struct work_struct	*work)
>> +{
>> +	struct xfs_dq_adjuster	*qa = container_of(work,
>> +				      struct xfs_dq_adjuster, qa_work);
>> +	int			error;
>> +
>> +	error = xfs_qm_dqusage_adjust_perag(qa);
>> +	complete(&qa->qa_complete);
> 
> Seems like xfs_quotacheck should just have a nr_inprogress counter
> and a single waitqueue, that way we'd only wake the waiter once 
> the whole quotacheck is done.
> 
> Actually you even just do a flush_workqueue on the workqueue as it's
> per-quotacheck, which simplifies this even more.
It looks perform flush_work() should just works fine, I do see less coding
efforts in this way although the revised version is not yet ready to post. 
>> +STATIC int
>> +xfs_qm_init_quotacheck(
>> +	struct xfs_mount	*mp,
>> +	struct xfs_quotacheck	*qc)
>> +{
>> +	memset(qc, 0, sizeof(*qc));
>> +
>> +	INIT_LIST_HEAD(&qc->qc_adjusters);
>> +	spin_lock_init(&qc->qc_lock);
>> +	qc->qc_mp = mp;
>> +	qc->qc_wq = alloc_workqueue("xfs-dqcheck/%s", WQ_NON_REENTRANT,
> 
> The WQ_NON_REENTRANT behaviour is now the default, no need to use the
> flag.
Well, I realized this change before, but forgot to get rid of this flag
for rebasing the code after writing it done for several months.
> 
>> +				    0, mp->m_fsname);
>> +	if (!qc->qc_wq) {
>> +		list_del(&qc->qc_adjusters);
> 
> I don't see why you'd do a list_del here given that we never added
> anything to the list.  either way no need for the list once we use
> the flush_workqueue trick above :)
Ah, a stupid mistake! :-P.
> In fact once that is implemented the xfs_quotacheck structure will
> contain nothing but the workqueue and can go away entirely.
> 
>> +STATIC struct xfs_dq_adjuster *
>> +xfs_qm_alloc_adjuster(
>> +	struct xfs_quotacheck	*qc,
>> +	xfs_agnumber_t		agno)
>> +{
>> +	struct xfs_dq_adjuster	*qa;
>> +
>> +	qa = kzalloc(sizeof(*qa), GFP_NOFS);
>> +	if (!qa)
>> +		return NULL;
> 
>> +		spin_lock(&qc->qc_lock);
>> +		qa = xfs_qm_alloc_adjuster(qc, i);
>> +		if (!qa) {
>> +			error = ENOMEM;
>> +			spin_unlock(&qc->qc_lock);
>> +			goto out_destroy_adjusters;
>> +		}
>> +		queue_work(qc->qc_wq, &qa->qa_work);
>> +		spin_unlock(&qc->qc_lock);
> 
> This gives you a sleeping allocation under a spinlock.  But I can't
> really find any reason for the lock to be taken here anyway, nor
> anywhere else.
Sorry, I can not recall it all why I need to introduce a spinlock here.
but yes, that is not a normal way and especially cause a sleeping allocation.
> 
> But to catch this please test the code with lockdep enabled for the
> next version.
Sure.


Thanks,
-Jeff

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 4/4] xfs: implement parallism quota check
  2013-11-17 13:01   ` Jeff Liu
@ 2013-11-18 11:04     ` Christoph Hellwig
  2013-11-18 12:41       ` Jeff Liu
  0 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2013-11-18 11:04 UTC (permalink / raw)
  To: Jeff Liu; +Cc: Christoph Hellwig, xfs@oss.sgi.com

On Sun, Nov 17, 2013 at 09:01:08PM +0800, Jeff Liu wrote:
> On 11/16 2013 01:26 AM, Christoph Hellwig wrote:
> > As Dave pointed out this really should be xfs_bukstat_ag.  But looking
> > at the code you're almost 90% there anyway.
> One main reason I did not make a per ag bulkstat is because bulkstat() will
> skip an allocation group if read agi buffer failed, i.e,
> 
> while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
> 	cond_resched();
> 	error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
> 	if (error) {
> 		/*
> 		 * Skip this allocation group and go to the next one.
> 		 */
> 		agno++;
>                 agino = 0;
> 		continue;
> 	}
> 	....
> }
>
> Should it capture this issue and drop a warning in this case?

I've been thinking hard about this, but I can't really see any reason
why we would skip an AG instead of propagating the error.  The only
error xfs_ialloc_read_agi can return is an I/O error from reading
the buffer from disk, and we'd really want to propagate that sort
of I/O errror.  I'd suggest a patch at the beginning of the series
to just change that behavior for all the two places in bulkstat that
call xfs_ialloc_read_agi.  None of the other callers seem to behave
this way either.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 4/4] xfs: implement parallism quota check
  2013-11-18 11:04     ` Christoph Hellwig
@ 2013-11-18 12:41       ` Jeff Liu
  0 siblings, 0 replies; 5+ messages in thread
From: Jeff Liu @ 2013-11-18 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs@oss.sgi.com

On 11/18 2013 07:04 PM, Christoph Hellwig wrote:

> On Sun, Nov 17, 2013 at 09:01:08PM +0800, Jeff Liu wrote:
>> On 11/16 2013 01:26 AM, Christoph Hellwig wrote:
>>> As Dave pointed out this really should be xfs_bukstat_ag.  But looking
>>> at the code you're almost 90% there anyway.
>> One main reason I did not make a per ag bulkstat is because bulkstat() will
>> skip an allocation group if read agi buffer failed, i.e,
>>
>> while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
>> 	cond_resched();
>> 	error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
>> 	if (error) {
>> 		/*
>> 		 * Skip this allocation group and go to the next one.
>> 		 */
>> 		agno++;
>>                 agino = 0;
>> 		continue;
>> 	}
>> 	....
>> }
>>
>> Should it capture this issue and drop a warning in this case?
> 
> I've been thinking hard about this, but I can't really see any reason
> why we would skip an AG instead of propagating the error.  The only
> error xfs_ialloc_read_agi can return is an I/O error from reading
> the buffer from disk, and we'd really want to propagate that sort
> of I/O errror.  I'd suggest a patch at the beginning of the series
> to just change that behavior for all the two places in bulkstat that
> call xfs_ialloc_read_agi.  None of the other callers seem to behave
> this way either.

Ok, thanks for clearing up my confusion.

Thanks,
-Jeff

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2013-11-18 12:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-12  9:30 [RFC PATCH 4/4] xfs: implement parallism quota check Jeff Liu
2013-11-15 17:26 ` Christoph Hellwig
2013-11-17 13:01   ` Jeff Liu
2013-11-18 11:04     ` Christoph Hellwig
2013-11-18 12:41       ` Jeff Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox