public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 07/10] repair: prefetch runs too far ahead
Date: Mon, 24 Feb 2014 17:29:26 +1100	[thread overview]
Message-ID: <1393223369-4696-8-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1393223369-4696-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

When trying to work out why a non-crc filesystem took 1m57 to repair
and the same CRC enabled filesystem took 11m35 to repair, I noticed
that the was way to much CRC checking going on. Prefetched buffers
should not be being CRCed, yet shortly after the starting this began
to happen. perf profiling also showed up an awful lot of time doing
buffer cache lookups, and the cache profile output indicated that
the hit rate was way below 3%. IOWs, the readahead was getting so
far ahead of the processing that it was thrashing the cache.

That there is a difference in processing rate between CRC and
non-CRC filesystems is not surprising. What is surprising is the
readahead behaviour - it basically just keeps reading ahead until it
has read everything on an AG, and then it goes on to the next AG,
and reads everything on it, and then goes on to the next AG,....

This goes on until it pushes all the buffers the processing threads
need out of the cache, and suddening they start re-reading from disk
with the various CRC checking verifiers enabled, and we end up going
-really- slow. Yes, threading made up for it a bit, but it's just
wrong.

Basically, the code assumes that IO is going to be slower than
processing, so it doesn't throttle prefetch across AGs to slow
down prefetch to match the processing rate.

So, to fix this, don't let a prefetch thread get more than a single
AG ahead of it's processing thread, just like occurs for single
threaded (i.e. -o ag_stride=-1) operation.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 repair/prefetch.c | 81 ++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 19 deletions(-)

diff --git a/repair/prefetch.c b/repair/prefetch.c
index e573e35..7135d67 100644
--- a/repair/prefetch.c
+++ b/repair/prefetch.c
@@ -842,7 +842,7 @@ start_inode_prefetch(
 	 * and not any other associated metadata like directories
 	 */
 
-	max_queue = libxfs_bcache->c_maxcount / thread_count / 8;
+	max_queue = libxfs_bcache->c_maxcount / thread_count / 32;
 	if (XFS_INODE_CLUSTER_SIZE(mp) > mp->m_sb.sb_blocksize)
 		max_queue = max_queue * (XFS_INODE_CLUSTER_SIZE(mp) >>
 				mp->m_sb.sb_blocklog) / XFS_IALLOC_BLOCKS(mp);
@@ -865,6 +865,48 @@ start_inode_prefetch(
 	return args;
 }
 
+void
+prefetch_ag_range(
+	struct work_queue	*work,
+	xfs_agnumber_t		start_ag,
+	xfs_agnumber_t		end_ag,
+	bool			dirs_only,
+	void			(*func)(struct work_queue *,
+					xfs_agnumber_t, void *))
+{
+	int			i;
+	struct prefetch_args	*pf_args[2];
+
+	pf_args[start_ag & 1] = start_inode_prefetch(start_ag, dirs_only, NULL);
+	for (i = start_ag; i < end_ag; i++) {
+		/* Don't prefetch end_ag */
+		if (i + 1 < end_ag)
+			pf_args[(~i) & 1] = start_inode_prefetch(i + 1,
+						dirs_only, pf_args[i & 1]);
+		func(work, i, pf_args[i & 1]);
+	}
+}
+
+struct pf_work_args {
+	xfs_agnumber_t	start_ag;
+	xfs_agnumber_t	end_ag;
+	bool		dirs_only;
+	void		(*func)(struct work_queue *, xfs_agnumber_t, void *);
+};
+
+static void
+prefetch_ag_range_work(
+	struct work_queue	*work,
+	xfs_agnumber_t		unused,
+	void			*args)
+{
+	struct pf_work_args *wargs = args;
+
+	prefetch_ag_range(work, wargs->start_ag, wargs->end_ag, 
+			  wargs->dirs_only, wargs->func);
+	free(args);
+}
+
 /*
  * Do inode prefetch in the most optimal way for the context under which repair
  * has been run.
@@ -878,11 +920,9 @@ do_inode_prefetch(
 	bool			check_cache,
 	bool			dirs_only)
 {
-	int			i, j;
-	xfs_agnumber_t		agno;
+	int			i;
 	struct work_queue	queue;
 	struct work_queue	*queues;
-	struct prefetch_args	*pf_args[2];
 
 	/*
 	 * If the previous phases of repair have not overflowed the buffer
@@ -905,12 +945,8 @@ do_inode_prefetch(
 	 */
 	if (!stride) {
 		queue.mp = mp;
-		pf_args[0] = start_inode_prefetch(0, dirs_only, NULL);
-		for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-			pf_args[(~i) & 1] = start_inode_prefetch(i + 1,
-					dirs_only, pf_args[i & 1]);
-			func(&queue, i, pf_args[i & 1]);
-		}
+		prefetch_ag_range(&queue, 0, mp->m_sb.sb_agcount,
+				  dirs_only, func);
 		return;
 	}
 
@@ -918,20 +954,27 @@ do_inode_prefetch(
 	 * create one worker thread for each segment of the volume
 	 */
 	queues = malloc(thread_count * sizeof(work_queue_t));
-	for (i = 0, agno = 0; i < thread_count; i++) {
+	for (i = 0; i < thread_count; i++) {
+		struct pf_work_args *wargs;
+
+		wargs = malloc(sizeof(struct pf_work_args));
+		wargs->start_ag = i * stride;
+		wargs->end_ag = min((i + 1) * stride,
+				    mp->m_sb.sb_agcount);
+		wargs->dirs_only = dirs_only;
+		wargs->func = func;
+
 		create_work_queue(&queues[i], mp, 1);
-		pf_args[0] = NULL;
-		for (j = 0; j < stride && agno < mp->m_sb.sb_agcount;
-				j++, agno++) {
-			pf_args[0] = start_inode_prefetch(agno, dirs_only,
-							  pf_args[0]);
-			queue_work(&queues[i], func, agno, pf_args[0]);
-		}
+		queue_work(&queues[i], prefetch_ag_range_work, 0, wargs);
+
+		if (wargs->end_ag >= mp->m_sb.sb_agcount)
+			break;
 	}
+
 	/*
 	 * wait for workers to complete
 	 */
-	for (i = 0; i < thread_count; i++)
+	for (; i >= 0; i--)
 		destroy_work_queue(&queues[i]);
 	free(queues);
 }
-- 
1.8.4.rc3

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2014-02-24  6:29 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-24  6:29 [PATCH 00/10, v2] repair: scalability and prefetch fixes Dave Chinner
2014-02-24  6:29 ` [PATCH 01/10] repair: translation lookups limit scalability Dave Chinner
2014-02-24 20:42   ` Brian Foster
2014-02-25 20:01   ` Christoph Hellwig
2014-02-24  6:29 ` [PATCH 02/10] repair: per AG locks contend for cachelines Dave Chinner
2014-02-24  6:29 ` [PATCH 03/10] libxfs: buffer cache hashing is suboptimal Dave Chinner
2014-02-24  6:29 ` [PATCH 04/10] repair: limit auto-striding concurrency apprpriately Dave Chinner
2014-02-24  6:29 ` [PATCH 05/10] repair: factor out threading setup code Dave Chinner
2014-02-24 20:43   ` Brian Foster
2014-02-24 23:16     ` Dave Chinner
2014-02-24 23:30       ` Brian Foster
2014-02-24  6:29 ` [PATCH 06/10] repair: use a listhead for the dotdot list Dave Chinner
2014-02-25 20:03   ` Christoph Hellwig
2014-02-27  2:06     ` Dave Chinner
2014-02-24  6:29 ` Dave Chinner [this message]
2014-02-26  1:52   ` [PATCH 07/10] repair: prefetch runs too far ahead Christoph Hellwig
2014-02-26  5:51     ` Dave Chinner
2014-02-24  6:29 ` [PATCH 08/10] libxfs: remove a couple of locks Dave Chinner
2014-02-25 20:05   ` Christoph Hellwig
2014-02-25 23:43     ` Dave Chinner
2014-02-26  1:54       ` Christoph Hellwig
2014-02-26  5:53         ` Dave Chinner
2014-02-24  6:29 ` [PATCH 09/10] repair: fix prefetch queue limiting Dave Chinner
2014-02-25 20:08   ` Christoph Hellwig
2014-02-24  6:29 ` [PATCH 10/10] repair: BMBT prefetch needs to be CRC aware Dave Chinner
2014-02-25 17:25   ` Christoph Hellwig
2014-02-25 23:51     ` Dave Chinner
2014-02-26  1:40       ` Christoph Hellwig
2014-02-26  1:44   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1393223369-4696-8-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox