[PATCH 1/2] xfs: dynamic speculative EOF preallocation

* [PATCH 1/2] xfs: dynamic speculative EOF preallocation
  2010-10-04 10:13 [RFC, PATCH 0/2] xfs: dynamic speculative preallocation for delalloc Dave Chinner
@ 2010-10-04 10:13 ` Dave Chinner
  2010-10-14 17:22   ` Alex Elder
  0 siblings, 1 reply; 14+ messages in thread
From: Dave Chinner @ 2010-10-04 10:13 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Currently the size of the speculative preallocation during delayed
allocation is fixed by either the allocsize mount option of a
default size. We are seeing a lot of cases where we need to
recommend using the allocsize mount option to prevent fragmentation
when buffered writes land in the same AG.

Rather than using a fixed preallocation size by default (up to 64k),
make it dynamic by exponentially increasing it on each subsequent
preallocation. This will result in the preallocation size increasing
as the file increases, so for streaming writes we are much more
likely to get large preallocations exactly when we need it to reduce
fragementation. It should also prevent the need for using the
allocsize mount option for most workloads involving concurrent
streaming writes.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_inode.h |    1 +
 fs/xfs/xfs_iomap.c |   39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 39f8c78..1594190 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -248,6 +248,7 @@ typedef struct xfs_inode {
 	mrlock_t		i_iolock;	/* inode IO lock */
 	struct completion	i_flush;	/* inode flush completion q */
 	atomic_t		i_pincount;	/* inode pin count */
+	unsigned int		i_last_prealloc; /* last EOF prealloc size */
 	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */
 	spinlock_t		i_flags_lock;	/* inode i_flags lock */
 	/* Miscellaneous state. */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2057614..b2e4782 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -389,6 +389,9 @@ error_out:
  * If the caller is doing a write at the end of the file, then extend the
  * allocation out to the file system's write iosize.  We clean up any extra
  * space left over when the file is closed in xfs_inactive().
+ *
+ * If we find we already have delalloc preallocation out to alloc_blocks
+ * beyond EOF, don't do more preallocation as it it not needed.
  */
 STATIC int
 xfs_iomap_eof_want_preallocate(
@@ -405,6 +408,7 @@ xfs_iomap_eof_want_preallocate(
 	xfs_filblks_t   count_fsb;
 	xfs_fsblock_t	firstblock;
 	int		n, error, imaps;
+	int		found_delalloc = 0;
 
 	*prealloc = 0;
 	if ((offset + count) <= ip->i_size)
@@ -427,11 +431,25 @@ xfs_iomap_eof_want_preallocate(
 			if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
 			    (imap[n].br_startblock != DELAYSTARTBLOCK))
 				return 0;
+
 			start_fsb += imap[n].br_blockcount;
 			count_fsb -= imap[n].br_blockcount;
+
+			/* count delalloc blocks beyond EOF */
+			if (imap[n].br_startblock == DELAYSTARTBLOCK)
+				found_delalloc += imap[n].br_blockcount;
 		}
 	}
-	*prealloc = 1;
+	if (!found_delalloc) {
+		/* haven't got any prealloc, so need some */
+		*prealloc = 1;
+	} else if (found_delalloc <= count_fsb) {
+		/* almost run out of prealloc */
+		*prealloc = 1;
+	} else {
+		/* still lots of prealloc left */
+		*prealloc = 0;
+	}
 	return 0;
 }
 
@@ -469,6 +487,7 @@ xfs_iomap_write_delay(
 	extsz = xfs_get_extsz_hint(ip);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
+
 	error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
 				ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
 	if (error)
@@ -476,9 +495,25 @@ xfs_iomap_write_delay(
 
 retry:
 	if (prealloc) {
+		xfs_fileoff_t	alloc_blocks = 0;
+		/*
+		 * If we don't have a user specified preallocation size, dynamically
+		 * increase the preallocation size as we do more preallocation.
+		 * Cap the maximum size at a single extent.
+		 */
+		if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+			alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
+						(ip->i_last_prealloc * 4));
+		}
+		if (alloc_blocks == 0)
+			alloc_blocks = mp->m_writeio_blocks;
+		ip->i_last_prealloc = alloc_blocks;
+
 		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
 		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-		last_fsb = ioalign + mp->m_writeio_blocks;
+		last_fsb = ioalign + alloc_blocks;
+		printk("ino %lld, ioalign 0x%llx, alloc_blocks 0x%llx\n",
+				ip->i_ino, ioalign, alloc_blocks);
 	} else {
 		last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
 	}
-- 
1.7.1

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 14+ messages in thread