linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* small fixes and optimizations for delalloc and reflink
@ 2019-02-11 12:54 Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
                   ` (10 more replies)
  0 siblings, 11 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Hi all,

this series contains my changes to fix up the delalloc and reflink
code to prepare for the always COW mode.  This sits on top of
the 'xfs: properly invalidate cached writeback mapping' series from
Brian.

To make reviewing easier I also have a git tree available here:

    git://git.infradead.org/users/hch/xfs.git xfs-mapping-validation.4

Gitweb:

    http://git.infradead.org/users/hch/xfs.git/shortlog/refs/heads/xfs-mapping-validation.4

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks Christoph Hellwig
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster, Darrick J . Wong

The io_type field contains what is basically a summary of information
from the inode fork and the imap.  But we can just as easily use that
information directly, simplifying a few bits here and there and
improving the trace points.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c    | 93 ++++++++++++++++++++------------------------
 fs/xfs/xfs_aops.h    | 24 +-----------
 fs/xfs/xfs_iomap.c   |  8 ++--
 fs/xfs/xfs_reflink.c |  2 +-
 fs/xfs/xfs_trace.h   | 34 +++++++---------
 5 files changed, 63 insertions(+), 98 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 515532f45beb..a3fa60d1d2df 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -28,7 +28,7 @@
  */
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
-	unsigned int		io_type;
+	int			fork;
 	unsigned int		data_seq;
 	unsigned int		cow_seq;
 	struct xfs_ioend	*ioend;
@@ -256,30 +256,20 @@ xfs_end_io(
 	 */
 	error = blk_status_to_errno(ioend->io_bio->bi_status);
 	if (unlikely(error)) {
-		switch (ioend->io_type) {
-		case XFS_IO_COW:
+		if (ioend->io_fork == XFS_COW_FORK)
 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
-			break;
-		}
-
 		goto done;
 	}
 
 	/*
-	 * Success:  commit the COW or unwritten blocks if needed.
+	 * Success: commit the COW or unwritten blocks if needed.
 	 */
-	switch (ioend->io_type) {
-	case XFS_IO_COW:
+	if (ioend->io_fork == XFS_COW_FORK)
 		error = xfs_reflink_end_cow(ip, offset, size);
-		break;
-	case XFS_IO_UNWRITTEN:
-		/* writeback should never update isize */
+	else if (ioend->io_state == XFS_EXT_UNWRITTEN)
 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
-		break;
-	default:
+	else
 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
-		break;
-	}
 
 done:
 	if (ioend->io_append_trans)
@@ -294,7 +284,8 @@ xfs_end_bio(
 	struct xfs_ioend	*ioend = bio->bi_private;
 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
 
-	if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
+	if (ioend->io_fork == XFS_COW_FORK ||
+	    ioend->io_state == XFS_EXT_UNWRITTEN)
 		queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 	else if (ioend->io_append_trans)
 		queue_work(mp->m_data_workqueue, &ioend->io_work);
@@ -320,7 +311,7 @@ xfs_imap_valid(
 	 * covers the offset. Be careful to check this first because the caller
 	 * can revalidate a COW mapping without updating the data seqno.
 	 */
-	if (wpc->io_type == XFS_IO_COW)
+	if (wpc->fork == XFS_COW_FORK)
 		return true;
 
 	/*
@@ -350,7 +341,6 @@ xfs_map_blocks(
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
 	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
-	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
 
@@ -400,6 +390,9 @@ xfs_map_blocks(
 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
 		wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+		wpc->fork = XFS_COW_FORK;
+
 		/*
 		 * Truncate can race with writeback since writeback doesn't
 		 * take the iolock and truncate decreases the file size before
@@ -412,11 +405,13 @@ xfs_map_blocks(
 		 * will kill the contents anyway.
 		 */
 		if (offset > i_size_read(inode)) {
-			wpc->io_type = XFS_IO_HOLE;
+			wpc->imap.br_blockcount = end_fsb - offset_fsb;
+			wpc->imap.br_startoff = offset_fsb;
+			wpc->imap.br_startblock = HOLESTARTBLOCK;
+			wpc->imap.br_state = XFS_EXT_NORM;
 			return 0;
 		}
-		whichfork = XFS_COW_FORK;
-		wpc->io_type = XFS_IO_COW;
+
 		goto allocate_blocks;
 	}
 
@@ -439,12 +434,14 @@ xfs_map_blocks(
 	wpc->data_seq = READ_ONCE(ip->i_df.if_seq);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
+	wpc->fork = XFS_DATA_FORK;
+
 	if (imap.br_startoff > offset_fsb) {
 		/* landed in a hole or beyond EOF */
 		imap.br_blockcount = imap.br_startoff - offset_fsb;
 		imap.br_startoff = offset_fsb;
 		imap.br_startblock = HOLESTARTBLOCK;
-		wpc->io_type = XFS_IO_HOLE;
+		imap.br_state = XFS_EXT_NORM;
 	} else {
 		/*
 		 * Truncate to the next COW extent if there is one.  This is the
@@ -456,31 +453,24 @@ xfs_map_blocks(
 		    cow_fsb < imap.br_startoff + imap.br_blockcount)
 			imap.br_blockcount = cow_fsb - imap.br_startoff;
 
-		if (isnullstartblock(imap.br_startblock)) {
-			/* got a delalloc extent */
-			wpc->io_type = XFS_IO_DELALLOC;
+		/* got a delalloc extent? */
+		if (isnullstartblock(imap.br_startblock))
 			goto allocate_blocks;
-		}
-
-		if (imap.br_state == XFS_EXT_UNWRITTEN)
-			wpc->io_type = XFS_IO_UNWRITTEN;
-		else
-			wpc->io_type = XFS_IO_OVERWRITE;
 	}
 
 	wpc->imap = imap;
-	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
+	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
-			whichfork == XFS_COW_FORK ?
+	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
+			wpc->fork == XFS_COW_FORK ?
 					 &wpc->cow_seq : &wpc->data_seq);
 	if (error)
 		return error;
-	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
 	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
 	wpc->imap = imap;
-	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
+	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
 	return 0;
 }
 
@@ -505,7 +495,7 @@ xfs_submit_ioend(
 	int			status)
 {
 	/* Convert CoW extents to regular */
-	if (!status && ioend->io_type == XFS_IO_COW) {
+	if (!status && ioend->io_fork == XFS_COW_FORK) {
 		/*
 		 * Yuk. This can do memory allocation, but is not a
 		 * transactional operation so everything is done in GFP_KERNEL
@@ -523,7 +513,8 @@ xfs_submit_ioend(
 
 	/* Reserve log space if we might write beyond the on-disk inode size. */
 	if (!status &&
-	    ioend->io_type != XFS_IO_UNWRITTEN &&
+	    (ioend->io_fork == XFS_COW_FORK ||
+	     ioend->io_state != XFS_EXT_UNWRITTEN) &&
 	    xfs_ioend_is_append(ioend) &&
 	    !ioend->io_append_trans)
 		status = xfs_setfilesize_trans_alloc(ioend);
@@ -552,7 +543,8 @@ xfs_submit_ioend(
 static struct xfs_ioend *
 xfs_alloc_ioend(
 	struct inode		*inode,
-	unsigned int		type,
+	int			fork,
+	xfs_exntst_t		state,
 	xfs_off_t		offset,
 	struct block_device	*bdev,
 	sector_t		sector)
@@ -566,7 +558,8 @@ xfs_alloc_ioend(
 
 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
 	INIT_LIST_HEAD(&ioend->io_list);
-	ioend->io_type = type;
+	ioend->io_fork = fork;
+	ioend->io_state = state;
 	ioend->io_inode = inode;
 	ioend->io_size = 0;
 	ioend->io_offset = offset;
@@ -627,13 +620,15 @@ xfs_add_to_ioend(
 	sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
 		((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
 
-	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
+	if (!wpc->ioend ||
+	    wpc->fork != wpc->ioend->io_fork ||
+	    wpc->imap.br_state != wpc->ioend->io_state ||
 	    sector != bio_end_sector(wpc->ioend->io_bio) ||
 	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
 		if (wpc->ioend)
 			list_add(&wpc->ioend->io_list, iolist);
-		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
-				bdev, sector);
+		wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
+				wpc->imap.br_state, offset, bdev, sector);
 	}
 
 	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
@@ -742,7 +737,7 @@ xfs_writepage_map(
 		error = xfs_map_blocks(wpc, inode, file_offset);
 		if (error)
 			break;
-		if (wpc->io_type == XFS_IO_HOLE)
+		if (wpc->imap.br_startblock == HOLESTARTBLOCK)
 			continue;
 		xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
 				 &submit_list);
@@ -937,9 +932,7 @@ xfs_vm_writepage(
 	struct page		*page,
 	struct writeback_control *wbc)
 {
-	struct xfs_writepage_ctx wpc = {
-		.io_type = XFS_IO_HOLE,
-	};
+	struct xfs_writepage_ctx wpc = { };
 	int			ret;
 
 	ret = xfs_do_writepage(page, wbc, &wpc);
@@ -953,9 +946,7 @@ xfs_vm_writepages(
 	struct address_space	*mapping,
 	struct writeback_control *wbc)
 {
-	struct xfs_writepage_ctx wpc = {
-		.io_type = XFS_IO_HOLE,
-	};
+	struct xfs_writepage_ctx wpc = { };
 	int			ret;
 
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index e5c23948a8ab..6c2615b83c5d 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -8,33 +8,13 @@
 
 extern struct bio_set xfs_ioend_bioset;
 
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- *
- * This enum is used in string mapping in xfs_trace.h; please keep the
- * TRACE_DEFINE_ENUMs for it up to date.
- */
-enum {
-	XFS_IO_HOLE,		/* covers region without any block allocation */
-	XFS_IO_DELALLOC,	/* covers delalloc region */
-	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
-	XFS_IO_OVERWRITE,	/* covers already allocated extent */
-	XFS_IO_COW,		/* covers copy-on-write extent */
-};
-
-#define XFS_IO_TYPES \
-	{ XFS_IO_HOLE,			"hole" },	\
-	{ XFS_IO_DELALLOC,		"delalloc" },	\
-	{ XFS_IO_UNWRITTEN,		"unwritten" },	\
-	{ XFS_IO_OVERWRITE,		"overwrite" },	\
-	{ XFS_IO_COW,			"CoW" }
-
 /*
  * Structure for buffered I/O completions.
  */
 struct xfs_ioend {
 	struct list_head	io_list;	/* next ioend in chain */
-	unsigned int		io_type;	/* delalloc / unwritten */
+	int			io_fork;	/* inode fork written back */
+	xfs_exntst_t		io_state;	/* extent state */
 	struct inode		*io_inode;	/* file being written to */
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6af1d3ec0a9c..fd3aacd4bf02 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -575,7 +575,7 @@ xfs_file_iomap_begin_delay(
 				goto out_unlock;
 		}
 
-		trace_xfs_iomap_found(ip, offset, count, 0, &got);
+		trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got);
 		goto done;
 	}
 
@@ -647,7 +647,7 @@ xfs_file_iomap_begin_delay(
 	 * them out if the write happens to fail.
 	 */
 	iomap->flags |= IOMAP_F_NEW;
-	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
+	trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
 		got.br_startblock = DELAYSTARTBLOCK;
@@ -1082,7 +1082,7 @@ xfs_file_iomap_begin(
 		return error;
 
 	iomap->flags |= IOMAP_F_NEW;
-	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+	trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
 
 out_finish:
 	if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
@@ -1098,7 +1098,7 @@ xfs_file_iomap_begin(
 out_found:
 	ASSERT(nimaps);
 	xfs_iunlock(ip, lockmode);
-	trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+	trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
 	goto out_finish;
 
 out_unlock:
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c5b4fa004ca4..2babc2cbe103 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1192,7 +1192,7 @@ xfs_reflink_remap_blocks(
 			break;
 		ASSERT(nimaps == 1);
 
-		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
+		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_DATA_FORK,
 				&imap);
 
 		/* Translate imap into the destination file. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6fcc893dfc91..f75c6d380543 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1218,23 +1218,17 @@ DEFINE_EVENT(xfs_readpage_class, name,	\
 DEFINE_READPAGE_EVENT(xfs_vm_readpage);
 DEFINE_READPAGE_EVENT(xfs_vm_readpages);
 
-TRACE_DEFINE_ENUM(XFS_IO_HOLE);
-TRACE_DEFINE_ENUM(XFS_IO_DELALLOC);
-TRACE_DEFINE_ENUM(XFS_IO_UNWRITTEN);
-TRACE_DEFINE_ENUM(XFS_IO_OVERWRITE);
-TRACE_DEFINE_ENUM(XFS_IO_COW);
-
 DECLARE_EVENT_CLASS(xfs_imap_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-		 int type, struct xfs_bmbt_irec *irec),
-	TP_ARGS(ip, offset, count, type, irec),
+		 int whichfork, struct xfs_bmbt_irec *irec),
+	TP_ARGS(ip, offset, count, whichfork, irec),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(loff_t, size)
 		__field(loff_t, offset)
 		__field(size_t, count)
-		__field(int, type)
+		__field(int, whichfork)
 		__field(xfs_fileoff_t, startoff)
 		__field(xfs_fsblock_t, startblock)
 		__field(xfs_filblks_t, blockcount)
@@ -1245,33 +1239,33 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
 		__entry->size = ip->i_d.di_size;
 		__entry->offset = offset;
 		__entry->count = count;
-		__entry->type = type;
+		__entry->whichfork = whichfork;
 		__entry->startoff = irec ? irec->br_startoff : 0;
 		__entry->startblock = irec ? irec->br_startblock : 0;
 		__entry->blockcount = irec ? irec->br_blockcount : 0;
 	),
 	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
-		  "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
+		  "fork %s startoff 0x%llx startblock %lld blockcount 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
 		  __entry->offset,
 		  __entry->count,
-		  __print_symbolic(__entry->type, XFS_IO_TYPES),
+		  __entry->whichfork == XFS_COW_FORK ? "cow" : "data",
 		  __entry->startoff,
 		  (int64_t)__entry->startblock,
 		  __entry->blockcount)
 )
 
-#define DEFINE_IOMAP_EVENT(name)	\
+#define DEFINE_IMAP_EVENT(name)	\
 DEFINE_EVENT(xfs_imap_class, name,	\
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,	\
-		 int type, struct xfs_bmbt_irec *irec),		\
-	TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
-DEFINE_IOMAP_EVENT(xfs_iomap_found);
+		 int whichfork, struct xfs_bmbt_irec *irec),		\
+	TP_ARGS(ip, offset, count, whichfork, irec))
+DEFINE_IMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IMAP_EVENT(xfs_iomap_alloc);
+DEFINE_IMAP_EVENT(xfs_iomap_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -3078,7 +3072,7 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \
 DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag);
 DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag);
 DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size);
-DEFINE_IOMAP_EVENT(xfs_reflink_remap_imap);
+DEFINE_IMAP_EVENT(xfs_reflink_remap_imap);
 TRACE_EVENT(xfs_reflink_remap_blocks_loop,
 	TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset,
 		 xfs_filblks_t len, struct xfs_inode *dest,
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster, Darrick J . Wong

We already ensure all data fits into s_maxbytes in the write / fault
path.  The only reason we have them here is that they were copy and
pasted from xfs_bmapi_read when we stopped using that function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a3fa60d1d2df..8bfb62d8776f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -338,7 +338,8 @@ xfs_map_blocks(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
-	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
 	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	struct xfs_iext_cursor	icur;
@@ -374,11 +375,6 @@ xfs_map_blocks(
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
-	ASSERT(offset <= mp->m_super->s_maxbytes);
-
-	if (offset > mp->m_super->s_maxbytes - count)
-		count = mp->m_super->s_maxbytes - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 
 	/*
 	 * Check if this is offset is covered by a COW extents, and if yes use
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Move boilerplate code from the callers into xfs_bmap_btree_to_extents:

 - exit early without failure if we don't need to convert to the
   extent format
 - assert that we have a btree cursor
 - don't reinitialize the passed in logflags argument

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 78 ++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 52 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f4a65330a2a9..7fa454f71f46 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -577,42 +577,44 @@ __xfs_bmap_add_free(
  */
 
 /*
- * Transform a btree format file with only one leaf node, where the
- * extents list will fit in the inode, into an extents format file.
- * Since the file extents are already in-core, all we have to do is
- * give up the space for the btree root and pitch the leaf block.
+ * Convert the inode format to extent format if it currently is in btree format,
+ * but the extent list is small enough that it fits into the extent format.
+ 8
+ * Since the extents are already in-core, all we have to do is give up the space
+ * for the btree root and pitch the leaf block.
  */
 STATIC int				/* error */
 xfs_bmap_btree_to_extents(
-	xfs_trans_t		*tp,	/* transaction pointer */
-	xfs_inode_t		*ip,	/* incore inode pointer */
-	xfs_btree_cur_t		*cur,	/* btree cursor */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	struct xfs_inode	*ip,	/* incore inode pointer */
+	struct xfs_btree_cur	*cur,	/* btree cursor */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork)  /* data or attr fork */
 {
-	/* REFERENCED */
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_btree_block	*rblock = ifp->if_broot;
 	struct xfs_btree_block	*cblock;/* child btree block */
 	xfs_fsblock_t		cbno;	/* child block number */
 	xfs_buf_t		*cbp;	/* child block's buffer */
 	int			error;	/* error return value */
-	struct xfs_ifork	*ifp;	/* inode fork data */
-	xfs_mount_t		*mp;	/* mount point structure */
 	__be64			*pp;	/* ptr to block address */
-	struct xfs_btree_block	*rblock;/* root btree block */
 	struct xfs_owner_info	oinfo;
 
-	mp = ip->i_mount;
-	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/* check if we actually need the extent format first: */
+	if (!xfs_bmap_wants_extents(ip, whichfork))
+		return 0;
+
+	ASSERT(cur);
 	ASSERT(whichfork != XFS_COW_FORK);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
-	rblock = ifp->if_broot;
 	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
+
 	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 	cbno = be64_to_cpu(*pp);
-	*logflagsp = 0;
 #ifdef DEBUG
 	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
 			xfs_btree_check_lptr(cur, cbno, 1));
@@ -635,7 +637,7 @@ xfs_bmap_btree_to_extents(
 	ASSERT(ifp->if_broot == NULL);
 	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
-	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
+	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 	return 0;
 }
 
@@ -4424,19 +4426,10 @@ xfs_bmapi_write(
 	}
 	*nmap = n;
 
-	/*
-	 * Transform from btree to extents, give it cur.
-	 */
-	if (xfs_bmap_wants_extents(ip, whichfork)) {
-		int		tmp_logflags = 0;
-
-		ASSERT(bma.cur);
-		error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
-			&tmp_logflags, whichfork);
-		bma.logflags |= tmp_logflags;
-		if (error)
-			goto error0;
-	}
+	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
+			whichfork);
+	if (error)
+		goto error0;
 
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
 	       XFS_IFORK_NEXTENTS(ip, whichfork) >
@@ -4574,13 +4567,7 @@ xfs_bmapi_remap(
 	if (error)
 		goto error0;
 
-	if (xfs_bmap_wants_extents(ip, whichfork)) {
-		int		tmp_logflags = 0;
-
-		error = xfs_bmap_btree_to_extents(tp, ip, cur,
-			&tmp_logflags, whichfork);
-		logflags |= tmp_logflags;
-	}
+	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
 
 error0:
 	if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
@@ -5444,24 +5431,11 @@ __xfs_bunmapi(
 		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
 				&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
-		if (error)
-			goto error0;
-	}
-	/*
-	 * transform from btree to extents, give it cur
-	 */
-	else if (xfs_bmap_wants_extents(ip, whichfork)) {
-		ASSERT(cur != NULL);
-		error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
+	} else {
+		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
 			whichfork);
-		logflags |= tmp_logflags;
-		if (error)
-			goto error0;
 	}
-	/*
-	 * transform from extents to local?
-	 */
-	error = 0;
+
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (2 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 12:54 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

We want to be able to reuse them for the upcoming dedidcated delalloc
convert routine.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 78 ++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 34 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7fa454f71f46..a9c9bd39d822 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4194,6 +4194,44 @@ xfs_bmapi_convert_unwritten(
 	return 0;
 }
 
+static inline xfs_extlen_t
+xfs_bmapi_minleft(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	int			fork)
+{
+	if (tp && tp->t_firstblock != NULLFSBLOCK)
+		return 0;
+	if (XFS_IFORK_FORMAT(ip, fork) != XFS_DINODE_FMT_BTREE)
+		return 1;
+	return be16_to_cpu(XFS_IFORK_PTR(ip, fork)->if_broot->bb_level) + 1;
+}
+
+/*
+ * Log whatever the flags say, even if error.  Otherwise we might miss detecting
+ * a case where the data is changed, there's an error, and it's not logged so we
+ * don't shutdown when we should.  Don't bother logging extents/btree changes if
+ * we converted to the other format.
+ */
+static void
+xfs_bmapi_finish(
+	struct xfs_bmalloca	*bma,
+	int			whichfork,
+	int			error)
+{
+	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
+	    XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+		bma->logflags &= ~xfs_ilog_fext(whichfork);
+	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
+		 XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
+
+	if (bma->logflags)
+		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
+	if (bma->cur)
+		xfs_btree_del_cursor(bma->cur, error);
+}
+
 /*
  * Map file blocks to filesystem blocks, and allocate blocks or convert the
  * extent state if necessary.  Details behaviour is controlled by the flags
@@ -4273,15 +4311,6 @@ xfs_bmapi_write(
 
 	XFS_STATS_INC(mp, xs_blk_mapw);
 
-	if (!tp || tp->t_firstblock == NULLFSBLOCK) {
-		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
-			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
-		else
-			bma.minleft = 1;
-	} else {
-		bma.minleft = 0;
-	}
-
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(tp, ip, whichfork);
 		if (error)
@@ -4296,6 +4325,7 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
+	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
 
 	/*
 	 * The delalloc flag means the caller wants to allocate the entire
@@ -4434,32 +4464,12 @@ xfs_bmapi_write(
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
 	       XFS_IFORK_NEXTENTS(ip, whichfork) >
 		XFS_IFORK_MAXEXT(ip, whichfork));
-	error = 0;
+	xfs_bmapi_finish(&bma, whichfork, 0);
+	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
+		orig_nmap, *nmap);
+	return 0;
 error0:
-	/*
-	 * Log everything.  Do this after conversion, there's no point in
-	 * logging the extent records if we've converted to btree format.
-	 */
-	if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
-	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-		bma.logflags &= ~xfs_ilog_fext(whichfork);
-	else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
-		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
-		bma.logflags &= ~xfs_ilog_fbroot(whichfork);
-	/*
-	 * Log whatever the flags say, even if error.  Otherwise we might miss
-	 * detecting a case where the data is changed, there's an error,
-	 * and it's not logged so we don't shutdown when we should.
-	 */
-	if (bma.logflags)
-		xfs_trans_log_inode(tp, ip, bma.logflags);
-
-	if (bma.cur) {
-		xfs_btree_del_cursor(bma.cur, error);
-	}
-	if (!error)
-		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
-			orig_nmap, *nmap);
+	xfs_bmapi_finish(&bma, whichfork, error);
 	return error;
 }
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling from xfs_bmapi_write
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (3 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:21   ` Brian Foster
  2019-02-11 12:54 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Delalloc conversion has traditionally been part of our function to
allocate blocks on disk (first xfs_bmapi, then xfs_bmapi_write), but
delalloc conversion is a little special as we really do not want
to allocate blocks over holes, for which we don't have reservations.

Split the delalloc conversions into a separate helper to keep the
code simple and structured.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 104 +++++++++++++++++++++------------------
 fs/xfs/libxfs/xfs_bmap.h |   4 --
 2 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c9bd39d822..be2cb5800e02 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4327,22 +4327,6 @@ xfs_bmapi_write(
 	bma.datatype = 0;
 	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
 
-	/*
-	 * The delalloc flag means the caller wants to allocate the entire
-	 * delalloc extent backing bno where bno may not necessarily match the
-	 * startoff. Now that we've looked up the extent, reset the range to
-	 * map based on the extent in the file. If we're in a hole, this may be
-	 * an error so don't adjust anything.
-	 */
-	if ((flags & XFS_BMAPI_DELALLOC) &&
-	    !eof && bno >= bma.got.br_startoff) {
-		bno = bma.got.br_startoff;
-		len = bma.got.br_blockcount;
-#ifdef DEBUG
-		orig_bno = bno;
-		orig_len = len;
-#endif
-	}
 	n = 0;
 	end = bno + len;
 	obno = bno;
@@ -4359,26 +4343,7 @@ xfs_bmapi_write(
 			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
 			         (flags & XFS_BMAPI_COWFORK)));
 
-			if (flags & XFS_BMAPI_DELALLOC) {
-				/*
-				 * For the COW fork we can reasonably get a
-				 * request for converting an extent that races
-				 * with other threads already having converted
-				 * part of it, as there converting COW to
-				 * regular blocks is not protected using the
-				 * IOLOCK.
-				 */
-				ASSERT(flags & XFS_BMAPI_COWFORK);
-				if (!(flags & XFS_BMAPI_COWFORK)) {
-					error = -EIO;
-					goto error0;
-				}
-
-				if (eof || bno >= end)
-					break;
-			} else {
-				need_alloc = true;
-			}
+			need_alloc = true;
 		} else if (isnullstartblock(bma.got.br_startblock)) {
 			wasdelay = true;
 		}
@@ -4487,23 +4452,66 @@ xfs_bmapi_convert_delalloc(
 	int			whichfork,
 	struct xfs_bmbt_irec	*imap)
 {
-	int			flags = XFS_BMAPI_DELALLOC;
-	int			nimaps = 1;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_bmalloca	bma = { NULL };
 	int			error;
-	int			total = XFS_EXTENTADD_SPACE_RES(ip->i_mount,
-								XFS_DATA_FORK);
 
-	if (whichfork == XFS_COW_FORK)
-		flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
+	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
+	    bma.got.br_startoff > offset_fsb) {
+		/*
+		 * No extent found in the range we are trying to convert.  This
+		 * should only happen for the COW fork, where another thread
+		 * might have moved the extent to the data fork in the meantime.
+		 */
+		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
+		return -EAGAIN;
+	}
 
 	/*
-	 * The delalloc flag means to allocate the entire extent; pass a dummy
-	 * length of 1.
+	 * If we find a real extent here we raced with another thread converting
+	 * the extent.  Just return the real extent at this offset.
 	 */
-	error = xfs_bmapi_write(tp, ip, offset_fsb, 1, flags, total, imap,
-				&nimaps);
-	if (!error && !nimaps)
-		error = -EFSCORRUPTED;
+	if (!isnullstartblock(bma.got.br_startblock)) {
+		*imap = bma.got;
+		return 0;
+	}
+
+	bma.tp = tp;
+	bma.ip = ip;
+	bma.wasdel = true;
+	bma.offset = bma.got.br_startoff;
+	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
+	bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
+	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
+	if (whichfork == XFS_COW_FORK)
+		bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
+
+	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
+		bma.prev.br_startoff = NULLFILEOFF;
+
+	error = xfs_bmapi_allocate(&bma);
+	if (error)
+		goto out_finish;
+	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) {
+		error = -ENOSPC;
+		goto out_finish;
+	}
+
+	ASSERT(!isnullstartblock(bma.got.br_startblock));
+	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
+	*imap = bma.got;
+
+	if (whichfork == XFS_COW_FORK) {
+		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
+				bma.length);
+		if (error)
+			goto out_finish;
+	}
+
+	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
+			whichfork);
+out_finish:
+	xfs_bmapi_finish(&bma, whichfork, error);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 4dc7d1a02b35..b5eca7a26949 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -95,9 +95,6 @@ struct xfs_extent_free_item
 /* Map something in the CoW fork. */
 #define XFS_BMAPI_COWFORK	0x200
 
-/* Only convert delalloc space, don't allocate entirely new extents */
-#define XFS_BMAPI_DELALLOC	0x400
-
 /* Only convert unwritten extents, don't allocate new blocks */
 #define XFS_BMAPI_CONVERT_ONLY	0x800
 
@@ -117,7 +114,6 @@ struct xfs_extent_free_item
 	{ XFS_BMAPI_ZERO,	"ZERO" }, \
 	{ XFS_BMAPI_REMAP,	"REMAP" }, \
 	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
-	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
 	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }, \
 	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
 	{ XFS_BMAPI_NORMAP,	"NORMAP" }
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (4 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:21   ` Brian Foster
  2019-02-14 19:38   ` Darrick J. Wong
  2019-02-11 12:54 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
                   ` (4 subsequent siblings)
  10 siblings, 2 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

No need to deal with the transaction and the inode locking in the
caller.  Also move to automatic unlocking on transaction commit or
cancel to simplify the code a little more.

Note that we also switch to passing whichfork as the second paramters,
matching what most related functions do.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 35 ++++++++++++++++++++++++++++++-----
 fs/xfs/libxfs/xfs_bmap.h |  5 +++--
 fs/xfs/xfs_iomap.c       | 32 ++++----------------------------
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index be2cb5800e02..d9d66e1856d7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4446,16 +4446,30 @@ xfs_bmapi_write(
  */
 int
 xfs_bmapi_convert_delalloc(
-	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		offset_fsb,
 	int			whichfork,
-	struct xfs_bmbt_irec	*imap)
+	xfs_fileoff_t		offset_fsb,
+	struct xfs_bmbt_irec	*imap,
+	unsigned int		*seq)
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_bmalloca	bma = { NULL };
+	struct xfs_trans	*tp;
 	int			error;
 
+	/*
+	 * Space for the extent and indirect blocks was reserved when the
+	 * delalloc extent was created so there's no need to do so here.
+	 */
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
+				XFS_TRANS_RESERVE, &tp);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
 	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
 	    bma.got.br_startoff > offset_fsb) {
 		/*
@@ -4464,7 +4478,8 @@ xfs_bmapi_convert_delalloc(
 		 * might have moved the extent to the data fork in the meantime.
 		 */
 		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
-		return -EAGAIN;
+		error = -EAGAIN;
+		goto out_trans_cancel;
 	}
 
 	/*
@@ -4473,7 +4488,8 @@ xfs_bmapi_convert_delalloc(
 	 */
 	if (!isnullstartblock(bma.got.br_startblock)) {
 		*imap = bma.got;
-		return 0;
+		*seq = READ_ONCE(ifp->if_seq);
+		goto out_trans_cancel;
 	}
 
 	bma.tp = tp;
@@ -4500,6 +4516,7 @@ xfs_bmapi_convert_delalloc(
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
 	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
 	*imap = bma.got;
+	*seq = READ_ONCE(ifp->if_seq);
 
 	if (whichfork == XFS_COW_FORK) {
 		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
@@ -4510,8 +4527,16 @@ xfs_bmapi_convert_delalloc(
 
 	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
 			whichfork);
+	if (error)
+		goto out_finish;
+
+	xfs_bmapi_finish(&bma, whichfork, 0);
+	return xfs_trans_commit(tp);
+
 out_finish:
 	xfs_bmapi_finish(&bma, whichfork, error);
+out_trans_cancel:
+	xfs_trans_cancel(tp);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b5eca7a26949..78b190b6e908 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -223,8 +223,9 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
 		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
 		int eof);
-int	xfs_bmapi_convert_delalloc(struct xfs_trans *, struct xfs_inode *,
-		xfs_fileoff_t, int, struct xfs_bmbt_irec *);
+int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
+		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
+		unsigned int *seq);
 
 static inline void
 xfs_bmap_add_free(
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fd3aacd4bf02..39be741cac5a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -684,11 +684,9 @@ xfs_iomap_write_allocate(
 	unsigned int		*seq)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_fileoff_t		offset_fsb;
 	xfs_fileoff_t		map_start_fsb;
 	xfs_extlen_t		map_count_fsb;
-	struct xfs_trans	*tp;
 	int			error = 0;
 
 	/*
@@ -716,17 +714,8 @@ xfs_iomap_write_allocate(
 		/*
 		 * Allocate in a loop because it may take several attempts to
 		 * allocate real blocks for a contiguous delalloc extent if free
-		 * space is sufficiently fragmented. Note that space for the
-		 * extent and indirect blocks was reserved when the delalloc
-		 * extent was created so there's no need to do so here.
+		 * space is sufficiently fragmented.
 		 */
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
-					XFS_TRANS_RESERVE, &tp);
-		if (error)
-			return error;
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
 
 		/*
 		 * ilock was dropped since imap was populated which means it
@@ -737,17 +726,10 @@ xfs_iomap_write_allocate(
 		 * caller. We'll trim it down to the caller's most recently
 		 * validated range before we return.
 		 */
-		error = xfs_bmapi_convert_delalloc(tp, ip, offset_fsb,
-						   whichfork, imap);
-		if (error)
-			goto trans_cancel;
-
-		error = xfs_trans_commit(tp);
+		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
+				imap, seq);
 		if (error)
-			goto error0;
-
-		*seq = READ_ONCE(ifp->if_seq);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			return error;
 
 		/*
 		 * See if we were able to allocate an extent that covers at
@@ -766,12 +748,6 @@ xfs_iomap_write_allocate(
 			return 0;
 		}
 	}
-
-trans_cancel:
-	xfs_trans_cancel(tp);
-error0:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return error;
 }
 
 int
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 07/10] xfs: move stat accounting to xfs_bmapi_convert_delalloc
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (5 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:21   ` Brian Foster
  2019-02-11 12:54 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

This way we can actually count how many bytes got converted and how many
calls we need, unlike in the caller which doesn't have the detailed
view.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 3 +++
 fs/xfs/xfs_iomap.c       | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d9d66e1856d7..dc3f0608377d 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4513,6 +4513,9 @@ xfs_bmapi_convert_delalloc(
 		goto out_finish;
 	}
 
+	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
+	XFS_STATS_INC(mp, xs_xstrat_quick);
+
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
 	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
 	*imap = bma.got;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 39be741cac5a..15da53b5fb53 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -707,9 +707,6 @@ xfs_iomap_write_allocate(
 	map_start_fsb = imap->br_startoff;
 	map_count_fsb = imap->br_blockcount;
 
-	XFS_STATS_ADD(mp, xs_xstrat_bytes,
-		      XFS_FSB_TO_B(mp, imap->br_blockcount));
-
 	while (true) {
 		/*
 		 * Allocate in a loop because it may take several attempts to
@@ -741,7 +738,6 @@ xfs_iomap_write_allocate(
 		if ((offset_fsb >= imap->br_startoff) &&
 		    (offset_fsb < (imap->br_startoff +
 				   imap->br_blockcount))) {
-			XFS_STATS_INC(mp, xs_xstrat_quick);
 			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
 			ASSERT(offset_fsb >= imap->br_startoff &&
 			       offset_fsb < imap->br_startoff + imap->br_blockcount);
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (6 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:22   ` Brian Foster
  2019-02-11 12:54 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

This function is a small wrapper only used by the writeback code, so
move it together with the writeback code and simplify it down to the
glorified do { } while loop that is now is.

A few bits intentionally got lost here: no need to call xfs_qm_dqattach
because quotas are always attached when we create the delalloc
reservation, and no need for the imap->br_startblock == 0 check given
that xfs_bmapi_convert_delalloc already has a WARN_ON_ONCE for exactly
that condition.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 47 ++++++++++++++++++++++++---
 fs/xfs/xfs_iomap.c | 81 ----------------------------------------------
 fs/xfs/xfs_iomap.h |  2 --
 3 files changed, 42 insertions(+), 88 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8bfb62d8776f..403df647c0e4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -329,6 +329,44 @@ xfs_imap_valid(
 	return true;
 }
 
+/*
+ * Pass in a dellalloc extent and convert it to real extents, return the real
+ * extent that maps offset_fsb in wpc->imap.
+ *
+ * Given that ilock was dropped since got was populated it might no longer be
+ * valid, and we only use it to trim the return extent to this range to maintain
+ * consistency with what the caller expects.
+ *
+ * The current page is held locked so nothing could have removed the block
+ * backing offset_fsb.
+ */
+static int
+xfs_convert_blocks(
+	struct xfs_writepage_ctx *wpc,
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		offset_fsb,
+	struct xfs_bmbt_irec	*got)
+{
+	int			error;
+
+	/*
+	 * Attempt to allocate whatever delalloc extent currently backs
+	 * offset_fsb and put the result into wpc->imap.  Allocate in a loop
+	 * because it may take several attempts to allocate real blocks for a
+	 * contiguous delalloc extent if free space is sufficiently fragmented.
+	 */
+	do {
+		error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset_fsb,
+				&wpc->imap, wpc->fork == XFS_COW_FORK ?
+					&wpc->cow_seq : &wpc->data_seq);
+		if (error)
+			return error;
+	} while (wpc->imap.br_startoff + wpc->imap.br_blockcount <= offset_fsb);
+
+	xfs_trim_extent(&wpc->imap, got->br_startoff, got->br_blockcount);
+	return 0;
+}
+
 STATIC int
 xfs_map_blocks(
 	struct xfs_writepage_ctx *wpc,
@@ -458,14 +496,13 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
-			wpc->fork == XFS_COW_FORK ?
-					 &wpc->cow_seq : &wpc->data_seq);
+	error = xfs_convert_blocks(wpc, ip, offset_fsb, &imap);
 	if (error)
 		return error;
+	ASSERT(wpc->imap.br_startoff <= offset_fsb);
+	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount >= offset_fsb);
 	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
-	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
-	wpc->imap = imap;
+	       wpc->imap.br_startoff + wpc->imap.br_blockcount <= cow_fsb);
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
 	return 0;
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 15da53b5fb53..361dfe7af783 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -665,87 +665,6 @@ xfs_file_iomap_begin_delay(
 	return error;
 }
 
-/*
- * Pass in a delayed allocate extent, convert it to real extents;
- * return to the caller the extent we create which maps on top of
- * the originating callers request.
- *
- * Called without a lock on the inode.
- *
- * We no longer bother to look at the incoming map - all we have to
- * guarantee is that whatever we allocate fills the required range.
- */
-int
-xfs_iomap_write_allocate(
-	struct xfs_inode	*ip,
-	int			whichfork,
-	xfs_off_t		offset,
-	struct xfs_bmbt_irec	*imap,
-	unsigned int		*seq)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb;
-	xfs_fileoff_t		map_start_fsb;
-	xfs_extlen_t		map_count_fsb;
-	int			error = 0;
-
-	/*
-	 * Make sure that the dquots are there.
-	 */
-	error = xfs_qm_dqattach(ip);
-	if (error)
-		return error;
-
-	/*
-	 * Store the file range the caller is interested in because it encodes
-	 * state such as potential overlap with COW fork blocks. We must trim
-	 * the allocated extent down to this range to maintain consistency with
-	 * what the caller expects. Revalidation of the range itself is the
-	 * responsibility of the caller.
-	 */
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	map_start_fsb = imap->br_startoff;
-	map_count_fsb = imap->br_blockcount;
-
-	while (true) {
-		/*
-		 * Allocate in a loop because it may take several attempts to
-		 * allocate real blocks for a contiguous delalloc extent if free
-		 * space is sufficiently fragmented.
-		 */
-
-		/*
-		 * ilock was dropped since imap was populated which means it
-		 * might no longer be valid. The current page is held locked so
-		 * nothing could have removed the block backing offset_fsb.
-		 * Attempt to allocate whatever delalloc extent currently backs
-		 * offset_fsb and put the result in the imap pointer from the
-		 * caller. We'll trim it down to the caller's most recently
-		 * validated range before we return.
-		 */
-		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
-				imap, seq);
-		if (error)
-			return error;
-
-		/*
-		 * See if we were able to allocate an extent that covers at
-		 * least part of the callers request.
-		 */
-		if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
-			return xfs_alert_fsblock_zero(ip, imap);
-
-		if ((offset_fsb >= imap->br_startoff) &&
-		    (offset_fsb < (imap->br_startoff +
-				   imap->br_blockcount))) {
-			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
-			ASSERT(offset_fsb >= imap->br_startoff &&
-			       offset_fsb < imap->br_startoff + imap->br_blockcount);
-			return 0;
-		}
-	}
-}
-
 int
 xfs_iomap_write_unwritten(
 	xfs_inode_t	*ip,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index c6170548831b..6b16243db0b7 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -13,8 +13,6 @@ struct xfs_bmbt_irec;
 
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
-int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (7 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:22   ` Brian Foster
  2019-02-11 12:54 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no extent was found Christoph Hellwig
  2019-02-12  0:03 ` small fixes and optimizations for delalloc and reflink Darrick J. Wong
  10 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Now that we properly handle the race with truncate in the delalloc
allocator there is no need to short cut this exceptional case earlier
on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 403df647c0e4..6a8937a833ad 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -426,26 +426,6 @@ xfs_map_blocks(
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 		wpc->fork = XFS_COW_FORK;
-
-		/*
-		 * Truncate can race with writeback since writeback doesn't
-		 * take the iolock and truncate decreases the file size before
-		 * it starts truncating the pages between new_size and old_size.
-		 * Therefore, we can end up in the situation where writeback
-		 * gets a CoW fork mapping but the truncate makes the mapping
-		 * invalid and we end up in here trying to get a new mapping.
-		 * bail out here so that we simply never get a valid mapping
-		 * and so we drop the write altogether.  The page truncation
-		 * will kill the contents anyway.
-		 */
-		if (offset > i_size_read(inode)) {
-			wpc->imap.br_blockcount = end_fsb - offset_fsb;
-			wpc->imap.br_startoff = offset_fsb;
-			wpc->imap.br_startblock = HOLESTARTBLOCK;
-			wpc->imap.br_state = XFS_EXT_NORM;
-			return 0;
-		}
-
 		goto allocate_blocks;
 	}
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 10/10] xfs: retry COW fork delalloc conversion when no extent was found
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (8 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:23   ` Brian Foster
  2019-02-12  0:03 ` small fixes and optimizations for delalloc and reflink Darrick J. Wong
  10 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

While we can only truncate a block under the page lock for the current
page, there is no high-level synchronization for moving extents from the
COW to the data fork.  Because of that there is a chance that a
delalloc conversion for the COW fork might not find any extents to
convert.  In that case we should retry the whole block lookup and now
find the blocks in the data fork.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6a8937a833ad..e1723ac6c533 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -338,7 +338,8 @@ xfs_imap_valid(
  * consistency with what the caller expects.
  *
  * The current page is held locked so nothing could have removed the block
- * backing offset_fsb.
+ * backing offset_fsb, although it could have moved from the COW to the data
+ * fork by another thread.
  */
 static int
 xfs_convert_blocks(
@@ -381,6 +382,7 @@ xfs_map_blocks(
 	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	struct xfs_iext_cursor	icur;
+	int			retries = 0;
 	int			error = 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -410,6 +412,7 @@ xfs_map_blocks(
 	 * into real extents.  If we return without a valid map, it means we
 	 * landed in a hole and we skip the block.
 	 */
+retry:
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
@@ -477,8 +480,19 @@ xfs_map_blocks(
 	return 0;
 allocate_blocks:
 	error = xfs_convert_blocks(wpc, ip, offset_fsb, &imap);
-	if (error)
+	if (error) {
+		/*
+		 * If we failed to find the extent in the COW fork we might have
+		 * raced with a COW to data fork conversion or truncate.
+		 * Restart the lookup to catch the extent in the data fork for
+		 * the former case, but prevent additional retries to avoid
+		 * looping forever for the latter case.
+		 */
+		if (error == -EAGAIN && wpc->fork == XFS_COW_FORK && !retries++)
+			goto retry;
+		ASSERT(error != -EAGAIN);
 		return error;
+	}
 	ASSERT(wpc->imap.br_startoff <= offset_fsb);
 	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount >= offset_fsb);
 	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling from xfs_bmapi_write
  2019-02-11 12:54 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
@ 2019-02-11 15:21   ` Brian Foster
  0 siblings, 0 replies; 26+ messages in thread
From: Brian Foster @ 2019-02-11 15:21 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:22PM +0100, Christoph Hellwig wrote:
> Delalloc conversion has traditionally been part of our function to
> allocate blocks on disk (first xfs_bmapi, then xfs_bmapi_write), but
> delalloc conversion is a little special as we really do not want
> to allocate blocks over holes, for which we don't have reservations.
> 
> Split the delalloc conversions into a separate helper to keep the
> code simple and structured.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/libxfs/xfs_bmap.c | 104 +++++++++++++++++++++------------------
>  fs/xfs/libxfs/xfs_bmap.h |   4 --
>  2 files changed, 56 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index a9c9bd39d822..be2cb5800e02 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4327,22 +4327,6 @@ xfs_bmapi_write(
>  	bma.datatype = 0;
>  	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
>  
> -	/*
> -	 * The delalloc flag means the caller wants to allocate the entire
> -	 * delalloc extent backing bno where bno may not necessarily match the
> -	 * startoff. Now that we've looked up the extent, reset the range to
> -	 * map based on the extent in the file. If we're in a hole, this may be
> -	 * an error so don't adjust anything.
> -	 */
> -	if ((flags & XFS_BMAPI_DELALLOC) &&
> -	    !eof && bno >= bma.got.br_startoff) {
> -		bno = bma.got.br_startoff;
> -		len = bma.got.br_blockcount;
> -#ifdef DEBUG
> -		orig_bno = bno;
> -		orig_len = len;
> -#endif
> -	}
>  	n = 0;
>  	end = bno + len;
>  	obno = bno;
> @@ -4359,26 +4343,7 @@ xfs_bmapi_write(
>  			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
>  			         (flags & XFS_BMAPI_COWFORK)));
>  
> -			if (flags & XFS_BMAPI_DELALLOC) {
> -				/*
> -				 * For the COW fork we can reasonably get a
> -				 * request for converting an extent that races
> -				 * with other threads already having converted
> -				 * part of it, as there converting COW to
> -				 * regular blocks is not protected using the
> -				 * IOLOCK.
> -				 */
> -				ASSERT(flags & XFS_BMAPI_COWFORK);
> -				if (!(flags & XFS_BMAPI_COWFORK)) {
> -					error = -EIO;
> -					goto error0;
> -				}
> -
> -				if (eof || bno >= end)
> -					break;
> -			} else {
> -				need_alloc = true;
> -			}
> +			need_alloc = true;
>  		} else if (isnullstartblock(bma.got.br_startblock)) {
>  			wasdelay = true;
>  		}
> @@ -4487,23 +4452,66 @@ xfs_bmapi_convert_delalloc(
>  	int			whichfork,
>  	struct xfs_bmbt_irec	*imap)
>  {
> -	int			flags = XFS_BMAPI_DELALLOC;
> -	int			nimaps = 1;
> +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> +	struct xfs_bmalloca	bma = { NULL };
>  	int			error;
> -	int			total = XFS_EXTENTADD_SPACE_RES(ip->i_mount,
> -								XFS_DATA_FORK);
>  
> -	if (whichfork == XFS_COW_FORK)
> -		flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
> +	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
> +	    bma.got.br_startoff > offset_fsb) {
> +		/*
> +		 * No extent found in the range we are trying to convert.  This
> +		 * should only happen for the COW fork, where another thread
> +		 * might have moved the extent to the data fork in the meantime.
> +		 */
> +		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
> +		return -EAGAIN;
> +	}
>  
>  	/*
> -	 * The delalloc flag means to allocate the entire extent; pass a dummy
> -	 * length of 1.
> +	 * If we find a real extent here we raced with another thread converting
> +	 * the extent.  Just return the real extent at this offset.
>  	 */
> -	error = xfs_bmapi_write(tp, ip, offset_fsb, 1, flags, total, imap,
> -				&nimaps);
> -	if (!error && !nimaps)
> -		error = -EFSCORRUPTED;
> +	if (!isnullstartblock(bma.got.br_startblock)) {
> +		*imap = bma.got;
> +		return 0;
> +	}
> +
> +	bma.tp = tp;
> +	bma.ip = ip;
> +	bma.wasdel = true;
> +	bma.offset = bma.got.br_startoff;
> +	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
> +	bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
> +	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
> +	if (whichfork == XFS_COW_FORK)
> +		bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
> +
> +	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
> +		bma.prev.br_startoff = NULLFILEOFF;
> +
> +	error = xfs_bmapi_allocate(&bma);
> +	if (error)
> +		goto out_finish;
> +	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) {
> +		error = -ENOSPC;
> +		goto out_finish;
> +	}
> +
> +	ASSERT(!isnullstartblock(bma.got.br_startblock));
> +	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
> +	*imap = bma.got;
> +
> +	if (whichfork == XFS_COW_FORK) {
> +		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
> +				bma.length);
> +		if (error)
> +			goto out_finish;
> +	}
> +
> +	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
> +			whichfork);
> +out_finish:
> +	xfs_bmapi_finish(&bma, whichfork, error);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index 4dc7d1a02b35..b5eca7a26949 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -95,9 +95,6 @@ struct xfs_extent_free_item
>  /* Map something in the CoW fork. */
>  #define XFS_BMAPI_COWFORK	0x200
>  
> -/* Only convert delalloc space, don't allocate entirely new extents */
> -#define XFS_BMAPI_DELALLOC	0x400
> -
>  /* Only convert unwritten extents, don't allocate new blocks */
>  #define XFS_BMAPI_CONVERT_ONLY	0x800
>  
> @@ -117,7 +114,6 @@ struct xfs_extent_free_item
>  	{ XFS_BMAPI_ZERO,	"ZERO" }, \
>  	{ XFS_BMAPI_REMAP,	"REMAP" }, \
>  	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
> -	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
>  	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }, \
>  	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
>  	{ XFS_BMAPI_NORMAP,	"NORMAP" }
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc
  2019-02-11 12:54 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
@ 2019-02-11 15:21   ` Brian Foster
  2019-02-14 19:38   ` Darrick J. Wong
  1 sibling, 0 replies; 26+ messages in thread
From: Brian Foster @ 2019-02-11 15:21 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:23PM +0100, Christoph Hellwig wrote:
> No need to deal with the transaction and the inode locking in the
> caller.  Also move to automatic unlocking on transaction commit or
> cancel to simplify the code a little more.
> 
> Note that we also switch to passing whichfork as the second paramters,
> matching what most related functions do.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/libxfs/xfs_bmap.c | 35 ++++++++++++++++++++++++++++++-----
>  fs/xfs/libxfs/xfs_bmap.h |  5 +++--
>  fs/xfs/xfs_iomap.c       | 32 ++++----------------------------
>  3 files changed, 37 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index be2cb5800e02..d9d66e1856d7 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4446,16 +4446,30 @@ xfs_bmapi_write(
>   */
>  int
>  xfs_bmapi_convert_delalloc(
> -	struct xfs_trans	*tp,
>  	struct xfs_inode	*ip,
> -	xfs_fileoff_t		offset_fsb,
>  	int			whichfork,
> -	struct xfs_bmbt_irec	*imap)
> +	xfs_fileoff_t		offset_fsb,
> +	struct xfs_bmbt_irec	*imap,
> +	unsigned int		*seq)
>  {
>  	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> +	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_bmalloca	bma = { NULL };
> +	struct xfs_trans	*tp;
>  	int			error;
>  
> +	/*
> +	 * Space for the extent and indirect blocks was reserved when the
> +	 * delalloc extent was created so there's no need to do so here.
> +	 */
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> +				XFS_TRANS_RESERVE, &tp);
> +	if (error)
> +		return error;
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> +
>  	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
>  	    bma.got.br_startoff > offset_fsb) {
>  		/*
> @@ -4464,7 +4478,8 @@ xfs_bmapi_convert_delalloc(
>  		 * might have moved the extent to the data fork in the meantime.
>  		 */
>  		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
> -		return -EAGAIN;
> +		error = -EAGAIN;
> +		goto out_trans_cancel;
>  	}
>  
>  	/*
> @@ -4473,7 +4488,8 @@ xfs_bmapi_convert_delalloc(
>  	 */
>  	if (!isnullstartblock(bma.got.br_startblock)) {
>  		*imap = bma.got;
> -		return 0;
> +		*seq = READ_ONCE(ifp->if_seq);
> +		goto out_trans_cancel;
>  	}
>  
>  	bma.tp = tp;
> @@ -4500,6 +4516,7 @@ xfs_bmapi_convert_delalloc(
>  	ASSERT(!isnullstartblock(bma.got.br_startblock));
>  	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
>  	*imap = bma.got;
> +	*seq = READ_ONCE(ifp->if_seq);
>  
>  	if (whichfork == XFS_COW_FORK) {
>  		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
> @@ -4510,8 +4527,16 @@ xfs_bmapi_convert_delalloc(
>  
>  	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
>  			whichfork);
> +	if (error)
> +		goto out_finish;
> +
> +	xfs_bmapi_finish(&bma, whichfork, 0);
> +	return xfs_trans_commit(tp);
> +
>  out_finish:
>  	xfs_bmapi_finish(&bma, whichfork, error);
> +out_trans_cancel:
> +	xfs_trans_cancel(tp);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index b5eca7a26949..78b190b6e908 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -223,8 +223,9 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
>  		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
>  		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
>  		int eof);
> -int	xfs_bmapi_convert_delalloc(struct xfs_trans *, struct xfs_inode *,
> -		xfs_fileoff_t, int, struct xfs_bmbt_irec *);
> +int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
> +		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
> +		unsigned int *seq);
>  
>  static inline void
>  xfs_bmap_add_free(
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index fd3aacd4bf02..39be741cac5a 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -684,11 +684,9 @@ xfs_iomap_write_allocate(
>  	unsigned int		*seq)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
>  	xfs_fileoff_t		offset_fsb;
>  	xfs_fileoff_t		map_start_fsb;
>  	xfs_extlen_t		map_count_fsb;
> -	struct xfs_trans	*tp;
>  	int			error = 0;
>  
>  	/*
> @@ -716,17 +714,8 @@ xfs_iomap_write_allocate(
>  		/*
>  		 * Allocate in a loop because it may take several attempts to
>  		 * allocate real blocks for a contiguous delalloc extent if free
> -		 * space is sufficiently fragmented. Note that space for the
> -		 * extent and indirect blocks was reserved when the delalloc
> -		 * extent was created so there's no need to do so here.
> +		 * space is sufficiently fragmented.
>  		 */
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> -					XFS_TRANS_RESERVE, &tp);
> -		if (error)
> -			return error;
> -
> -		xfs_ilock(ip, XFS_ILOCK_EXCL);
> -		xfs_trans_ijoin(tp, ip, 0);
>  
>  		/*
>  		 * ilock was dropped since imap was populated which means it
> @@ -737,17 +726,10 @@ xfs_iomap_write_allocate(
>  		 * caller. We'll trim it down to the caller's most recently
>  		 * validated range before we return.
>  		 */
> -		error = xfs_bmapi_convert_delalloc(tp, ip, offset_fsb,
> -						   whichfork, imap);
> -		if (error)
> -			goto trans_cancel;
> -
> -		error = xfs_trans_commit(tp);
> +		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
> +				imap, seq);
>  		if (error)
> -			goto error0;
> -
> -		*seq = READ_ONCE(ifp->if_seq);
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +			return error;
>  
>  		/*
>  		 * See if we were able to allocate an extent that covers at
> @@ -766,12 +748,6 @@ xfs_iomap_write_allocate(
>  			return 0;
>  		}
>  	}
> -
> -trans_cancel:
> -	xfs_trans_cancel(tp);
> -error0:
> -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -	return error;
>  }
>  
>  int
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 07/10] xfs: move stat accounting to xfs_bmapi_convert_delalloc
  2019-02-11 12:54 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
@ 2019-02-11 15:21   ` Brian Foster
  0 siblings, 0 replies; 26+ messages in thread
From: Brian Foster @ 2019-02-11 15:21 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:24PM +0100, Christoph Hellwig wrote:
> This way we can actually count how many bytes got converted and how many
> calls we need, unlike in the caller which doesn't have the detailed
> view.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

I'm a little confused over what the original xs_xstrat_quick was
supposed to be (and the counter names aren't very helpful). We'd only
bump it once we completed an allocation that covered the associated
writeback. This changes that behavior to bump it for every allocation,
which seems more useful to me:

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/libxfs/xfs_bmap.c | 3 +++
>  fs/xfs/xfs_iomap.c       | 4 ----
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index d9d66e1856d7..dc3f0608377d 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4513,6 +4513,9 @@ xfs_bmapi_convert_delalloc(
>  		goto out_finish;
>  	}
>  
> +	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
> +	XFS_STATS_INC(mp, xs_xstrat_quick);
> +
>  	ASSERT(!isnullstartblock(bma.got.br_startblock));
>  	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
>  	*imap = bma.got;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 39be741cac5a..15da53b5fb53 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -707,9 +707,6 @@ xfs_iomap_write_allocate(
>  	map_start_fsb = imap->br_startoff;
>  	map_count_fsb = imap->br_blockcount;
>  
> -	XFS_STATS_ADD(mp, xs_xstrat_bytes,
> -		      XFS_FSB_TO_B(mp, imap->br_blockcount));
> -
>  	while (true) {
>  		/*
>  		 * Allocate in a loop because it may take several attempts to
> @@ -741,7 +738,6 @@ xfs_iomap_write_allocate(
>  		if ((offset_fsb >= imap->br_startoff) &&
>  		    (offset_fsb < (imap->br_startoff +
>  				   imap->br_blockcount))) {
> -			XFS_STATS_INC(mp, xs_xstrat_quick);
>  			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
>  			ASSERT(offset_fsb >= imap->br_startoff &&
>  			       offset_fsb < imap->br_startoff + imap->br_blockcount);
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-11 12:54 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
@ 2019-02-11 15:22   ` Brian Foster
  2019-02-15  6:56     ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: Brian Foster @ 2019-02-11 15:22 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:25PM +0100, Christoph Hellwig wrote:
> This function is a small wrapper only used by the writeback code, so
> move it together with the writeback code and simplify it down to the
> glorified do { } while loop that is now is.
> 
> A few bits intentionally got lost here: no need to call xfs_qm_dqattach
> because quotas are always attached when we create the delalloc
> reservation, and no need for the imap->br_startblock == 0 check given
> that xfs_bmapi_convert_delalloc already has a WARN_ON_ONCE for exactly
> that condition.
> 

It's an assert in xfs_bmapi_convert_delalloc() FWIW. That also doesn't
account for the fact that this changes an explicit error into a debug
mode notification. I'm not familiar with the history of this check and
the whole xfs_alert_fsblock_zero() thing, but AFAICT it's the only thing
that prevents an in-core record corruption from constructing a
superblock overwrite in this path.

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_aops.c  | 47 ++++++++++++++++++++++++---
>  fs/xfs/xfs_iomap.c | 81 ----------------------------------------------
>  fs/xfs/xfs_iomap.h |  2 --
>  3 files changed, 42 insertions(+), 88 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 8bfb62d8776f..403df647c0e4 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -329,6 +329,44 @@ xfs_imap_valid(
>  	return true;
>  }
>  
> +/*
> + * Pass in a dellalloc extent and convert it to real extents, return the real
> + * extent that maps offset_fsb in wpc->imap.
> + *
> + * Given that ilock was dropped since got was populated it might no longer be
> + * valid, and we only use it to trim the return extent to this range to maintain
> + * consistency with what the caller expects.
> + *

Can we please fix this comment to explain what "what the caller expects"
means? This could be as simple as appending "(i.e., the caller has
already trimmed against overlapping COW fork blocks)" to the last
sentence above.

> + * The current page is held locked so nothing could have removed the block
> + * backing offset_fsb.
> + */
...
> @@ -458,14 +496,13 @@ xfs_map_blocks(
>  	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
>  	return 0;
>  allocate_blocks:
> -	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
> -			wpc->fork == XFS_COW_FORK ?
> -					 &wpc->cow_seq : &wpc->data_seq);
> +	error = xfs_convert_blocks(wpc, ip, offset_fsb, &imap);
>  	if (error)
>  		return error;
> +	ASSERT(wpc->imap.br_startoff <= offset_fsb);
> +	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount >= offset_fsb);

Looks like this one should be >.

Brian

>  	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> -	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
> -	wpc->imap = imap;
> +	       wpc->imap.br_startoff + wpc->imap.br_blockcount <= cow_fsb);
>  	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
>  	return 0;
>  }
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 15da53b5fb53..361dfe7af783 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -665,87 +665,6 @@ xfs_file_iomap_begin_delay(
>  	return error;
>  }
>  
> -/*
> - * Pass in a delayed allocate extent, convert it to real extents;
> - * return to the caller the extent we create which maps on top of
> - * the originating callers request.
> - *
> - * Called without a lock on the inode.
> - *
> - * We no longer bother to look at the incoming map - all we have to
> - * guarantee is that whatever we allocate fills the required range.
> - */
> -int
> -xfs_iomap_write_allocate(
> -	struct xfs_inode	*ip,
> -	int			whichfork,
> -	xfs_off_t		offset,
> -	struct xfs_bmbt_irec	*imap,
> -	unsigned int		*seq)
> -{
> -	struct xfs_mount	*mp = ip->i_mount;
> -	xfs_fileoff_t		offset_fsb;
> -	xfs_fileoff_t		map_start_fsb;
> -	xfs_extlen_t		map_count_fsb;
> -	int			error = 0;
> -
> -	/*
> -	 * Make sure that the dquots are there.
> -	 */
> -	error = xfs_qm_dqattach(ip);
> -	if (error)
> -		return error;
> -
> -	/*
> -	 * Store the file range the caller is interested in because it encodes
> -	 * state such as potential overlap with COW fork blocks. We must trim
> -	 * the allocated extent down to this range to maintain consistency with
> -	 * what the caller expects. Revalidation of the range itself is the
> -	 * responsibility of the caller.
> -	 */
> -	offset_fsb = XFS_B_TO_FSBT(mp, offset);
> -	map_start_fsb = imap->br_startoff;
> -	map_count_fsb = imap->br_blockcount;
> -
> -	while (true) {
> -		/*
> -		 * Allocate in a loop because it may take several attempts to
> -		 * allocate real blocks for a contiguous delalloc extent if free
> -		 * space is sufficiently fragmented.
> -		 */
> -
> -		/*
> -		 * ilock was dropped since imap was populated which means it
> -		 * might no longer be valid. The current page is held locked so
> -		 * nothing could have removed the block backing offset_fsb.
> -		 * Attempt to allocate whatever delalloc extent currently backs
> -		 * offset_fsb and put the result in the imap pointer from the
> -		 * caller. We'll trim it down to the caller's most recently
> -		 * validated range before we return.
> -		 */
> -		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
> -				imap, seq);
> -		if (error)
> -			return error;
> -
> -		/*
> -		 * See if we were able to allocate an extent that covers at
> -		 * least part of the callers request.
> -		 */
> -		if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
> -			return xfs_alert_fsblock_zero(ip, imap);
> -
> -		if ((offset_fsb >= imap->br_startoff) &&
> -		    (offset_fsb < (imap->br_startoff +
> -				   imap->br_blockcount))) {
> -			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
> -			ASSERT(offset_fsb >= imap->br_startoff &&
> -			       offset_fsb < imap->br_startoff + imap->br_blockcount);
> -			return 0;
> -		}
> -	}
> -}
> -
>  int
>  xfs_iomap_write_unwritten(
>  	xfs_inode_t	*ip,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index c6170548831b..6b16243db0b7 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -13,8 +13,6 @@ struct xfs_bmbt_irec;
>  
>  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
>  			struct xfs_bmbt_irec *, int);
> -int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> -			struct xfs_bmbt_irec *, unsigned int *);
>  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>  
>  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks
  2019-02-11 12:54 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
@ 2019-02-11 15:22   ` Brian Foster
  0 siblings, 0 replies; 26+ messages in thread
From: Brian Foster @ 2019-02-11 15:22 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:26PM +0100, Christoph Hellwig wrote:
> Now that we properly handle the race with truncate in the delalloc
> allocator there is no need to short cut this exceptional case earlier
> on.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_aops.c | 20 --------------------
>  1 file changed, 20 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 403df647c0e4..6a8937a833ad 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -426,26 +426,6 @@ xfs_map_blocks(
>  		xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  
>  		wpc->fork = XFS_COW_FORK;
> -
> -		/*
> -		 * Truncate can race with writeback since writeback doesn't
> -		 * take the iolock and truncate decreases the file size before
> -		 * it starts truncating the pages between new_size and old_size.
> -		 * Therefore, we can end up in the situation where writeback
> -		 * gets a CoW fork mapping but the truncate makes the mapping
> -		 * invalid and we end up in here trying to get a new mapping.
> -		 * bail out here so that we simply never get a valid mapping
> -		 * and so we drop the write altogether.  The page truncation
> -		 * will kill the contents anyway.
> -		 */
> -		if (offset > i_size_read(inode)) {
> -			wpc->imap.br_blockcount = end_fsb - offset_fsb;
> -			wpc->imap.br_startoff = offset_fsb;
> -			wpc->imap.br_startblock = HOLESTARTBLOCK;
> -			wpc->imap.br_state = XFS_EXT_NORM;
> -			return 0;
> -		}
> -
>  		goto allocate_blocks;
>  	}
>  
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] xfs: retry COW fork delalloc conversion when no extent was found
  2019-02-11 12:54 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no extent was found Christoph Hellwig
@ 2019-02-11 15:23   ` Brian Foster
  0 siblings, 0 replies; 26+ messages in thread
From: Brian Foster @ 2019-02-11 15:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:27PM +0100, Christoph Hellwig wrote:
> While we can only truncate a block under the page lock for the current
> page, there is no high-level synchronization for moving extents from the
> COW to the data fork.  Because of that there is a chance that a
> delalloc conversion for the COW fork might not find any extents to
> convert.  In that case we should retry the whole block lookup and now
> find the blocks in the data fork.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

Can't say I fully understand the race that motivates this, but the code
seems fine:

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_aops.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 6a8937a833ad..e1723ac6c533 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -338,7 +338,8 @@ xfs_imap_valid(
>   * consistency with what the caller expects.
>   *
>   * The current page is held locked so nothing could have removed the block
> - * backing offset_fsb.
> + * backing offset_fsb, although it could have moved from the COW to the data
> + * fork by another thread.
>   */
>  static int
>  xfs_convert_blocks(
> @@ -381,6 +382,7 @@ xfs_map_blocks(
>  	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
>  	struct xfs_bmbt_irec	imap;
>  	struct xfs_iext_cursor	icur;
> +	int			retries = 0;
>  	int			error = 0;
>  
>  	if (XFS_FORCED_SHUTDOWN(mp))
> @@ -410,6 +412,7 @@ xfs_map_blocks(
>  	 * into real extents.  If we return without a valid map, it means we
>  	 * landed in a hole and we skip the block.
>  	 */
> +retry:
>  	xfs_ilock(ip, XFS_ILOCK_SHARED);
>  	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
>  	       (ip->i_df.if_flags & XFS_IFEXTENTS));
> @@ -477,8 +480,19 @@ xfs_map_blocks(
>  	return 0;
>  allocate_blocks:
>  	error = xfs_convert_blocks(wpc, ip, offset_fsb, &imap);
> -	if (error)
> +	if (error) {
> +		/*
> +		 * If we failed to find the extent in the COW fork we might have
> +		 * raced with a COW to data fork conversion or truncate.
> +		 * Restart the lookup to catch the extent in the data fork for
> +		 * the former case, but prevent additional retries to avoid
> +		 * looping forever for the latter case.
> +		 */
> +		if (error == -EAGAIN && wpc->fork == XFS_COW_FORK && !retries++)
> +			goto retry;
> +		ASSERT(error != -EAGAIN);
>  		return error;
> +	}
>  	ASSERT(wpc->imap.br_startoff <= offset_fsb);
>  	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount >= offset_fsb);
>  	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: small fixes and optimizations for delalloc and reflink
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
                   ` (9 preceding siblings ...)
  2019-02-11 12:54 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no extent was found Christoph Hellwig
@ 2019-02-12  0:03 ` Darrick J. Wong
  2019-02-13 18:02   ` Christoph Hellwig
  10 siblings, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2019-02-12  0:03 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Mon, Feb 11, 2019 at 01:54:17PM +0100, Christoph Hellwig wrote:
> Hi all,
> 
> this series contains my changes to fix up the delalloc and reflink
> code to prepare for the always COW mode.  This sits on top of
> the 'xfs: properly invalidate cached writeback mapping' series from
> Brian.
> 
> To make reviewing easier I also have a git tree available here:
> 
>     git://git.infradead.org/users/hch/xfs.git xfs-mapping-validation.4
> 
> Gitweb:
> 
>     http://git.infradead.org/users/hch/xfs.git/shortlog/refs/heads/xfs-mapping-validation.4

I pulled this series in and saw a bunch of new regressions in
generic/127 generic/269 generic/476 generic/521 generic/522 xfs/442 with
quota, rmap, and reflink enabled.  The regressions were all the
XFS_WANT_CORRUPTED_GOTO near the top of xfs_rmap_unmap_shared.

--D

The generic/127 crash produced the attached stacktrace; the other tests crashed
with similar stack traces.

[ 1695.579535] XFS: Assertion failed: fs_is_ok, file: fs/xfs/libxfs/xfs_rmap.c, line: 1776
[ 1695.584880] WARNING: CPU: 3 PID: 5240 at fs/xfs/xfs_message.c:104 assfail+0x27/0x2a [xfs]
[ 1695.586419] Modules linked in: mq_deadline dm_snapshot dm_bufio ext4 mbcache jbd2 dm_flakey xfs libcrc32c xt_REDIRECT xt_set ip_set_hash_net ip_set nfnetlink iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 bfq dax_pmem device_dax nd_pmem sch_fq_codel ip_tables x_tables nfsv4 af_packet [last unloaded: scsi_debug]
[ 1695.591749] CPU: 3 PID: 5240 Comm: kworker/3:11 Not tainted 5.0.0-rc5-djw #rc5
[ 1695.593119] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-1ubuntu1 04/01/2014
[ 1695.594774] Workqueue: xfs-conv/sdc xfs_end_io [xfs]
[ 1695.595805] RIP: 0010:assfail+0x27/0x2a [xfs]
[ 1695.596667] Code: 0f 0b c3 66 66 66 66 90 48 89 f1 41 89 d0 48 c7 c6 40 e2 39 a0 48 89 fa 31 ff e8 6d f9 ff ff 80 3d 4c 84 0a 00 00 74 02 0f 0b <0f> 0b c3 48 8b b3 a8 02 00 00 48 c7 c7 b8 e6 39 a0 c6 05 b4 7f 0a
[ 1695.600041] RSP: 0018:ffffc900024b7b18 EFLAGS: 00010246
[ 1695.601045] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[ 1695.602387] RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffffa039033f
[ 1695.603725] RBP: ffff888077d5f1c0 R08: 0000000000000000 R09: 0000000000000000
[ 1695.605060] R10: 000000000000000a R11: f000000000000000 R12: ffffc900024b7bd8
[ 1695.606391] R13: 0000000000000000 R14: 0000000000002f9e R15: ffff8880277d3000
[ 1695.607718] FS:  0000000000000000(0000) GS:ffff88807e000000(0000) knlGS:0000000000000000
[ 1695.609224] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1695.610308] CR2: 000055db35944008 CR3: 000000003c7fd002 CR4: 00000000000606a0
[ 1695.611730] Call Trace:
[ 1695.612334]  xfs_rmap_unmap_shared+0x4e8/0x750 [xfs]
[ 1695.613362]  xfs_rmap_finish_one+0x2c3/0x3e0 [xfs]
[ 1695.614392]  xfs_trans_log_finish_rmap_update+0x2f/0x40 [xfs]
[ 1695.615620]  xfs_rmap_update_finish_item+0x2c/0x40 [xfs]
[ 1695.616742]  xfs_defer_finish_noroll+0x1c6/0x770 [xfs]
[ 1695.617813]  ? xfs_reflink_end_cow_extent+0x281/0x3a0 [xfs]
[ 1695.619058]  __xfs_trans_commit+0x16f/0x420 [xfs]
[ 1695.620121]  xfs_reflink_end_cow_extent+0x281/0x3a0 [xfs]
[ 1695.621330]  xfs_reflink_end_cow+0x7f/0x280 [xfs]
[ 1695.622420]  xfs_end_io+0xd0/0x120 [xfs]
[ 1695.623296]  process_one_work+0x258/0x610
[ 1695.624190]  worker_thread+0x3d/0x390
[ 1695.624980]  ? wq_calc_node_cpumask+0x80/0x80
[ 1695.625896]  kthread+0x121/0x140
[ 1695.626650]  ? kthread_create_on_node+0x60/0x60
[ 1695.627625]  ret_from_fork+0x3a/0x50
[ 1695.628427] irq event stamp: 651462
[ 1695.629229] hardirqs last  enabled at (651461): [<ffffffff810cb74d>] console_unlock+0x43d/0x5e0
[ 1695.631054] hardirqs last disabled at (651462): [<ffffffff81001ba0>] trace_hardirqs_off_thunk+0x1a/0x1c
[ 1695.632988] softirqs last  enabled at (651458): [<ffffffff81a003a8>] __do_softirq+0x3a8/0x4bd
[ 1695.634637] softirqs last disabled at (651449): [<ffffffff810616bc>] irq_exit+0xbc/0xe0
[ 1695.636165] ---[ end trace bb01f9243a156636 ]---

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: small fixes and optimizations for delalloc and reflink
  2019-02-12  0:03 ` small fixes and optimizations for delalloc and reflink Darrick J. Wong
@ 2019-02-13 18:02   ` Christoph Hellwig
  2019-02-14  8:11     ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-13 18:02 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs, Brian Foster

On Mon, Feb 11, 2019 at 04:03:37PM -0800, Darrick J. Wong wrote:
> I pulled this series in and saw a bunch of new regressions in
> generic/127 generic/269 generic/476 generic/521 generic/522 xfs/442 with
> quota, rmap, and reflink enabled.  The regressions were all the
> XFS_WANT_CORRUPTED_GOTO near the top of xfs_rmap_unmap_shared.

Quick status update:  I managed to reproduce this ones on xfs/442 only
yesterday.  But as soon as I started bisecting it it disappeared again..

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: small fixes and optimizations for delalloc and reflink
  2019-02-13 18:02   ` Christoph Hellwig
@ 2019-02-14  8:11     ` Christoph Hellwig
  2019-02-14 15:57       ` Darrick J. Wong
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-14  8:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs, Brian Foster

On Wed, Feb 13, 2019 at 07:02:53PM +0100, Christoph Hellwig wrote:
> On Mon, Feb 11, 2019 at 04:03:37PM -0800, Darrick J. Wong wrote:
> > I pulled this series in and saw a bunch of new regressions in
> > generic/127 generic/269 generic/476 generic/521 generic/522 xfs/442 with
> > quota, rmap, and reflink enabled.  The regressions were all the
> > XFS_WANT_CORRUPTED_GOTO near the top of xfs_rmap_unmap_shared.
> 
> Quick status update:  I managed to reproduce this ones on xfs/442 only
> yesterday.  But as soon as I started bisecting it it disappeared again..

And another full overnight run couldn't report this even once.

How reproducible is the issue for you?   Are you sure it never happens
with the baseline, given that this series really doesn't touch anything
interacting with the rmap?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: small fixes and optimizations for delalloc and reflink
  2019-02-14  8:11     ` Christoph Hellwig
@ 2019-02-14 15:57       ` Darrick J. Wong
  0 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2019-02-14 15:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Thu, Feb 14, 2019 at 09:11:20AM +0100, Christoph Hellwig wrote:
> On Wed, Feb 13, 2019 at 07:02:53PM +0100, Christoph Hellwig wrote:
> > On Mon, Feb 11, 2019 at 04:03:37PM -0800, Darrick J. Wong wrote:
> > > I pulled this series in and saw a bunch of new regressions in
> > > generic/127 generic/269 generic/476 generic/521 generic/522 xfs/442 with
> > > quota, rmap, and reflink enabled.  The regressions were all the
> > > XFS_WANT_CORRUPTED_GOTO near the top of xfs_rmap_unmap_shared.
> > 
> > Quick status update:  I managed to reproduce this ones on xfs/442 only
> > yesterday.  But as soon as I started bisecting it it disappeared again..
> 
> And another full overnight run couldn't report this even once.
> 
> How reproducible is the issue for you?   Are you sure it never happens
> with the baseline, given that this series really doesn't touch anything
> interacting with the rmap?

Nearly every time, so I'll go digging and report back what I find.

(It's odd, because I didn't see anything obvious in the patches that
would cause this...)

--D

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc
  2019-02-11 12:54 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
  2019-02-11 15:21   ` Brian Foster
@ 2019-02-14 19:38   ` Darrick J. Wong
  2019-02-14 21:39     ` Christoph Hellwig
  1 sibling, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2019-02-14 19:38 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Mon, Feb 11, 2019 at 01:54:23PM +0100, Christoph Hellwig wrote:
> No need to deal with the transaction and the inode locking in the
> caller.  Also move to automatic unlocking on transaction commit or
> cancel to simplify the code a little more.
> 
> Note that we also switch to passing whichfork as the second paramters,
> matching what most related functions do.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 35 ++++++++++++++++++++++++++++++-----
>  fs/xfs/libxfs/xfs_bmap.h |  5 +++--
>  fs/xfs/xfs_iomap.c       | 32 ++++----------------------------
>  3 files changed, 37 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index be2cb5800e02..d9d66e1856d7 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4446,16 +4446,30 @@ xfs_bmapi_write(
>   */
>  int
>  xfs_bmapi_convert_delalloc(
> -	struct xfs_trans	*tp,
>  	struct xfs_inode	*ip,
> -	xfs_fileoff_t		offset_fsb,
>  	int			whichfork,
> -	struct xfs_bmbt_irec	*imap)
> +	xfs_fileoff_t		offset_fsb,
> +	struct xfs_bmbt_irec	*imap,
> +	unsigned int		*seq)
>  {
>  	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> +	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_bmalloca	bma = { NULL };
> +	struct xfs_trans	*tp;
>  	int			error;
>  
> +	/*
> +	 * Space for the extent and indirect blocks was reserved when the
> +	 * delalloc extent was created so there's no need to do so here.
> +	 */
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> +				XFS_TRANS_RESERVE, &tp);
> +	if (error)
> +		return error;
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);

Aha, this is the problem -- this operation involves deferred rmap
updates, which means that we have to retain the ILOCK until we've
finished processing the rmap updates so that another thread cannot jump
in and start modifying the file's bmap while we're still trying to
finish the rmap updates.

The patch below fixes generic/127 for me, for this configuration:

FSTYP=xfs
MKFS_OPTIONS="-m reflink=1,rmapbt=1 -i sparse=1"
MOUNT_OPTIONS="-o usrquota,grpquota,prjquota"

--D

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d9d66e1856d7..fd7757b205a6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4468,7 +4468,7 @@ xfs_bmapi_convert_delalloc(
 		return error;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
 
 	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
 	    bma.got.br_startoff > offset_fsb) {
@@ -4531,12 +4531,15 @@ xfs_bmapi_convert_delalloc(
 		goto out_finish;
 
 	xfs_bmapi_finish(&bma, whichfork, 0);
-	return xfs_trans_commit(tp);
+	error = xfs_trans_commit(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
 
 out_finish:
 	xfs_bmapi_finish(&bma, whichfork, error);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
 
> +
>  	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
>  	    bma.got.br_startoff > offset_fsb) {
>  		/*
> @@ -4464,7 +4478,8 @@ xfs_bmapi_convert_delalloc(
>  		 * might have moved the extent to the data fork in the meantime.
>  		 */
>  		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
> -		return -EAGAIN;
> +		error = -EAGAIN;
> +		goto out_trans_cancel;
>  	}
>  
>  	/*
> @@ -4473,7 +4488,8 @@ xfs_bmapi_convert_delalloc(
>  	 */
>  	if (!isnullstartblock(bma.got.br_startblock)) {
>  		*imap = bma.got;
> -		return 0;
> +		*seq = READ_ONCE(ifp->if_seq);
> +		goto out_trans_cancel;
>  	}
>  
>  	bma.tp = tp;
> @@ -4500,6 +4516,7 @@ xfs_bmapi_convert_delalloc(
>  	ASSERT(!isnullstartblock(bma.got.br_startblock));
>  	ASSERT(bma.got.br_startblock || XFS_IS_REALTIME_INODE(ip));
>  	*imap = bma.got;
> +	*seq = READ_ONCE(ifp->if_seq);
>  
>  	if (whichfork == XFS_COW_FORK) {
>  		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
> @@ -4510,8 +4527,16 @@ xfs_bmapi_convert_delalloc(
>  
>  	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
>  			whichfork);
> +	if (error)
> +		goto out_finish;
> +
> +	xfs_bmapi_finish(&bma, whichfork, 0);
> +	return xfs_trans_commit(tp);
> +
>  out_finish:
>  	xfs_bmapi_finish(&bma, whichfork, error);
> +out_trans_cancel:
> +	xfs_trans_cancel(tp);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index b5eca7a26949..78b190b6e908 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -223,8 +223,9 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
>  		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
>  		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
>  		int eof);
> -int	xfs_bmapi_convert_delalloc(struct xfs_trans *, struct xfs_inode *,
> -		xfs_fileoff_t, int, struct xfs_bmbt_irec *);
> +int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
> +		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
> +		unsigned int *seq);
>  
>  static inline void
>  xfs_bmap_add_free(
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index fd3aacd4bf02..39be741cac5a 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -684,11 +684,9 @@ xfs_iomap_write_allocate(
>  	unsigned int		*seq)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
>  	xfs_fileoff_t		offset_fsb;
>  	xfs_fileoff_t		map_start_fsb;
>  	xfs_extlen_t		map_count_fsb;
> -	struct xfs_trans	*tp;
>  	int			error = 0;
>  
>  	/*
> @@ -716,17 +714,8 @@ xfs_iomap_write_allocate(
>  		/*
>  		 * Allocate in a loop because it may take several attempts to
>  		 * allocate real blocks for a contiguous delalloc extent if free
> -		 * space is sufficiently fragmented. Note that space for the
> -		 * extent and indirect blocks was reserved when the delalloc
> -		 * extent was created so there's no need to do so here.
> +		 * space is sufficiently fragmented.
>  		 */
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> -					XFS_TRANS_RESERVE, &tp);
> -		if (error)
> -			return error;
> -
> -		xfs_ilock(ip, XFS_ILOCK_EXCL);
> -		xfs_trans_ijoin(tp, ip, 0);
>  
>  		/*
>  		 * ilock was dropped since imap was populated which means it
> @@ -737,17 +726,10 @@ xfs_iomap_write_allocate(
>  		 * caller. We'll trim it down to the caller's most recently
>  		 * validated range before we return.
>  		 */
> -		error = xfs_bmapi_convert_delalloc(tp, ip, offset_fsb,
> -						   whichfork, imap);
> -		if (error)
> -			goto trans_cancel;
> -
> -		error = xfs_trans_commit(tp);
> +		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
> +				imap, seq);
>  		if (error)
> -			goto error0;
> -
> -		*seq = READ_ONCE(ifp->if_seq);
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +			return error;
>  
>  		/*
>  		 * See if we were able to allocate an extent that covers at
> @@ -766,12 +748,6 @@ xfs_iomap_write_allocate(
>  			return 0;
>  		}
>  	}
> -
> -trans_cancel:
> -	xfs_trans_cancel(tp);
> -error0:
> -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -	return error;
>  }
>  
>  int
> -- 
> 2.20.1
> 

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc
  2019-02-14 19:38   ` Darrick J. Wong
@ 2019-02-14 21:39     ` Christoph Hellwig
  0 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-14 21:39 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs, Brian Foster

On Thu, Feb 14, 2019 at 11:38:16AM -0800, Darrick J. Wong wrote:
> > +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> > +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> 
> Aha, this is the problem -- this operation involves deferred rmap
> updates, which means that we have to retain the ILOCK until we've
> finished processing the rmap updates so that another thread cannot jump
> in and start modifying the file's bmap while we're still trying to
> finish the rmap updates.

Hmm, if our automatic unlock doesn't work with deferred operations
we should probably just get rid of that mechanism entirely, as it is
highly unsafe..

> 
> The patch below fixes generic/127 for me, for this configuration:
> 
> FSTYP=xfs
> MKFS_OPTIONS="-m reflink=1,rmapbt=1 -i sparse=1"
> MOUNT_OPTIONS="-o usrquota,grpquota,prjquota"

I didn't have -i sparc in my test config, but I doubt it makes
difference.  Let me check if I could reproduce the original issue
with it.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-11 15:22   ` Brian Foster
@ 2019-02-15  6:56     ` Christoph Hellwig
  0 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-15  6:56 UTC (permalink / raw)
  To: Brian Foster; +Cc: Christoph Hellwig, linux-xfs

On Mon, Feb 11, 2019 at 10:22:45AM -0500, Brian Foster wrote:
> It's an assert in xfs_bmapi_convert_delalloc() FWIW. That also doesn't
> account for the fact that this changes an explicit error into a debug
> mode notification. I'm not familiar with the history of this check and
> the whole xfs_alert_fsblock_zero() thing, but AFAICT it's the only thing
> that prevents an in-core record corruption from constructing a
> superblock overwrite in this path.

Ok, I'll change the earlier patch to return an error.

> Can we please fix this comment to explain what "what the caller expects"
> means? This could be as simple as appending "(i.e., the caller has
> already trimmed against overlapping COW fork blocks)" to the last
> sentence above.

Actually, I think the better idea is to just do the explicit trim
to cow_fsb in the caller, as that is a lot more obvious.

> > +	ASSERT(wpc->imap.br_startoff <= offset_fsb);
> > +	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount >= offset_fsb);
> 
> Looks like this one should be >.

Indeed.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 23:40   ` Darrick J. Wong
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

This function is a small wrapper only used by the writeback code, so
move it together with the writeback code and simplify it down to the
glorified do { } while loop that is now is.

A few bits intentionally got lost here: no need to call xfs_qm_dqattach
because quotas are always attached when we create the delalloc
reservation, and no need for the imap->br_startblock == 0 check given
that xfs_bmapi_convert_delalloc already has a WARN_ON_ONCE for exactly
that condition.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 51 +++++++++++++++++++++++++----
 fs/xfs/xfs_iomap.c | 81 ----------------------------------------------
 fs/xfs/xfs_iomap.h |  2 --
 3 files changed, 45 insertions(+), 89 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8bfb62d8776f..42017ecf78ed 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -329,6 +329,38 @@ xfs_imap_valid(
 	return true;
 }
 
+/*
+ * Pass in a dellalloc extent and convert it to real extents, return the real
+ * extent that maps offset_fsb in wpc->imap.
+ *
+ * The current page is held locked so nothing could have removed the block
+ * backing offset_fsb.
+ */
+static int
+xfs_convert_blocks(
+	struct xfs_writepage_ctx *wpc,
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		offset_fsb)
+{
+	int			error;
+
+	/*
+	 * Attempt to allocate whatever delalloc extent currently backs
+	 * offset_fsb and put the result into wpc->imap.  Allocate in a loop
+	 * because it may take several attempts to allocate real blocks for a
+	 * contiguous delalloc extent if free space is sufficiently fragmented.
+	 */
+	do {
+		error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset_fsb,
+				&wpc->imap, wpc->fork == XFS_COW_FORK ?
+					&wpc->cow_seq : &wpc->data_seq);
+		if (error)
+			return error;
+	} while (wpc->imap.br_startoff + wpc->imap.br_blockcount <= offset_fsb);
+
+	return 0;
+}
+
 STATIC int
 xfs_map_blocks(
 	struct xfs_writepage_ctx *wpc,
@@ -458,14 +490,21 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
-			wpc->fork == XFS_COW_FORK ?
-					 &wpc->cow_seq : &wpc->data_seq);
+	error = xfs_convert_blocks(wpc, ip, offset_fsb);
 	if (error)
 		return error;
-	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
-	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
-	wpc->imap = imap;
+
+	/*
+	 * Due to merging the return real extent might be larger than the
+	 * original delalloc one.  Trim the return extent to the next COW
+	 * boundary again to force a re-lookup.
+	 */
+	if (wpc->fork != XFS_COW_FORK && cow_fsb != NULLFILEOFF &&
+	    cow_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount)
+		wpc->imap.br_blockcount = cow_fsb - wpc->imap.br_startoff;
+
+	ASSERT(wpc->imap.br_startoff <= offset_fsb);
+	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount > offset_fsb);
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
 	return 0;
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 15da53b5fb53..361dfe7af783 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -665,87 +665,6 @@ xfs_file_iomap_begin_delay(
 	return error;
 }
 
-/*
- * Pass in a delayed allocate extent, convert it to real extents;
- * return to the caller the extent we create which maps on top of
- * the originating callers request.
- *
- * Called without a lock on the inode.
- *
- * We no longer bother to look at the incoming map - all we have to
- * guarantee is that whatever we allocate fills the required range.
- */
-int
-xfs_iomap_write_allocate(
-	struct xfs_inode	*ip,
-	int			whichfork,
-	xfs_off_t		offset,
-	struct xfs_bmbt_irec	*imap,
-	unsigned int		*seq)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb;
-	xfs_fileoff_t		map_start_fsb;
-	xfs_extlen_t		map_count_fsb;
-	int			error = 0;
-
-	/*
-	 * Make sure that the dquots are there.
-	 */
-	error = xfs_qm_dqattach(ip);
-	if (error)
-		return error;
-
-	/*
-	 * Store the file range the caller is interested in because it encodes
-	 * state such as potential overlap with COW fork blocks. We must trim
-	 * the allocated extent down to this range to maintain consistency with
-	 * what the caller expects. Revalidation of the range itself is the
-	 * responsibility of the caller.
-	 */
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	map_start_fsb = imap->br_startoff;
-	map_count_fsb = imap->br_blockcount;
-
-	while (true) {
-		/*
-		 * Allocate in a loop because it may take several attempts to
-		 * allocate real blocks for a contiguous delalloc extent if free
-		 * space is sufficiently fragmented.
-		 */
-
-		/*
-		 * ilock was dropped since imap was populated which means it
-		 * might no longer be valid. The current page is held locked so
-		 * nothing could have removed the block backing offset_fsb.
-		 * Attempt to allocate whatever delalloc extent currently backs
-		 * offset_fsb and put the result in the imap pointer from the
-		 * caller. We'll trim it down to the caller's most recently
-		 * validated range before we return.
-		 */
-		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
-				imap, seq);
-		if (error)
-			return error;
-
-		/*
-		 * See if we were able to allocate an extent that covers at
-		 * least part of the callers request.
-		 */
-		if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
-			return xfs_alert_fsblock_zero(ip, imap);
-
-		if ((offset_fsb >= imap->br_startoff) &&
-		    (offset_fsb < (imap->br_startoff +
-				   imap->br_blockcount))) {
-			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
-			ASSERT(offset_fsb >= imap->br_startoff &&
-			       offset_fsb < imap->br_startoff + imap->br_blockcount);
-			return 0;
-		}
-	}
-}
-
 int
 xfs_iomap_write_unwritten(
 	xfs_inode_t	*ip,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index c6170548831b..6b16243db0b7 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -13,8 +13,6 @@ struct xfs_bmbt_irec;
 
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
-int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-15 14:47 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
@ 2019-02-15 23:40   ` Darrick J. Wong
  0 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2019-02-15 23:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:23PM +0100, Christoph Hellwig wrote:
> This function is a small wrapper only used by the writeback code, so
> move it together with the writeback code and simplify it down to the
> glorified do { } while loop that is now is.
> 
> A few bits intentionally got lost here: no need to call xfs_qm_dqattach
> because quotas are always attached when we create the delalloc
> reservation, and no need for the imap->br_startblock == 0 check given
> that xfs_bmapi_convert_delalloc already has a WARN_ON_ONCE for exactly
> that condition.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_aops.c  | 51 +++++++++++++++++++++++++----
>  fs/xfs/xfs_iomap.c | 81 ----------------------------------------------
>  fs/xfs/xfs_iomap.h |  2 --
>  3 files changed, 45 insertions(+), 89 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 8bfb62d8776f..42017ecf78ed 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -329,6 +329,38 @@ xfs_imap_valid(
>  	return true;
>  }
>  
> +/*
> + * Pass in a dellalloc extent and convert it to real extents, return the real
> + * extent that maps offset_fsb in wpc->imap.
> + *
> + * The current page is held locked so nothing could have removed the block
> + * backing offset_fsb.
> + */
> +static int
> +xfs_convert_blocks(
> +	struct xfs_writepage_ctx *wpc,
> +	struct xfs_inode	*ip,
> +	xfs_fileoff_t		offset_fsb)
> +{
> +	int			error;
> +
> +	/*
> +	 * Attempt to allocate whatever delalloc extent currently backs
> +	 * offset_fsb and put the result into wpc->imap.  Allocate in a loop
> +	 * because it may take several attempts to allocate real blocks for a
> +	 * contiguous delalloc extent if free space is sufficiently fragmented.
> +	 */
> +	do {
> +		error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset_fsb,
> +				&wpc->imap, wpc->fork == XFS_COW_FORK ?
> +					&wpc->cow_seq : &wpc->data_seq);
> +		if (error)
> +			return error;
> +	} while (wpc->imap.br_startoff + wpc->imap.br_blockcount <= offset_fsb);
> +
> +	return 0;
> +}
> +
>  STATIC int
>  xfs_map_blocks(
>  	struct xfs_writepage_ctx *wpc,
> @@ -458,14 +490,21 @@ xfs_map_blocks(
>  	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
>  	return 0;
>  allocate_blocks:
> -	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
> -			wpc->fork == XFS_COW_FORK ?
> -					 &wpc->cow_seq : &wpc->data_seq);
> +	error = xfs_convert_blocks(wpc, ip, offset_fsb);
>  	if (error)
>  		return error;
> -	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> -	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
> -	wpc->imap = imap;
> +
> +	/*
> +	 * Due to merging the return real extent might be larger than the
> +	 * original delalloc one.  Trim the return extent to the next COW
> +	 * boundary again to force a re-lookup.
> +	 */
> +	if (wpc->fork != XFS_COW_FORK && cow_fsb != NULLFILEOFF &&
> +	    cow_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount)
> +		wpc->imap.br_blockcount = cow_fsb - wpc->imap.br_startoff;
> +
> +	ASSERT(wpc->imap.br_startoff <= offset_fsb);
> +	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount > offset_fsb);
>  	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
>  	return 0;
>  }
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 15da53b5fb53..361dfe7af783 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -665,87 +665,6 @@ xfs_file_iomap_begin_delay(
>  	return error;
>  }
>  
> -/*
> - * Pass in a delayed allocate extent, convert it to real extents;
> - * return to the caller the extent we create which maps on top of
> - * the originating callers request.
> - *
> - * Called without a lock on the inode.
> - *
> - * We no longer bother to look at the incoming map - all we have to
> - * guarantee is that whatever we allocate fills the required range.
> - */
> -int
> -xfs_iomap_write_allocate(
> -	struct xfs_inode	*ip,
> -	int			whichfork,
> -	xfs_off_t		offset,
> -	struct xfs_bmbt_irec	*imap,
> -	unsigned int		*seq)
> -{
> -	struct xfs_mount	*mp = ip->i_mount;
> -	xfs_fileoff_t		offset_fsb;
> -	xfs_fileoff_t		map_start_fsb;
> -	xfs_extlen_t		map_count_fsb;
> -	int			error = 0;
> -
> -	/*
> -	 * Make sure that the dquots are there.
> -	 */
> -	error = xfs_qm_dqattach(ip);
> -	if (error)
> -		return error;
> -
> -	/*
> -	 * Store the file range the caller is interested in because it encodes
> -	 * state such as potential overlap with COW fork blocks. We must trim
> -	 * the allocated extent down to this range to maintain consistency with
> -	 * what the caller expects. Revalidation of the range itself is the
> -	 * responsibility of the caller.
> -	 */
> -	offset_fsb = XFS_B_TO_FSBT(mp, offset);
> -	map_start_fsb = imap->br_startoff;
> -	map_count_fsb = imap->br_blockcount;
> -
> -	while (true) {
> -		/*
> -		 * Allocate in a loop because it may take several attempts to
> -		 * allocate real blocks for a contiguous delalloc extent if free
> -		 * space is sufficiently fragmented.
> -		 */
> -
> -		/*
> -		 * ilock was dropped since imap was populated which means it
> -		 * might no longer be valid. The current page is held locked so
> -		 * nothing could have removed the block backing offset_fsb.
> -		 * Attempt to allocate whatever delalloc extent currently backs
> -		 * offset_fsb and put the result in the imap pointer from the
> -		 * caller. We'll trim it down to the caller's most recently
> -		 * validated range before we return.
> -		 */
> -		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
> -				imap, seq);
> -		if (error)
> -			return error;
> -
> -		/*
> -		 * See if we were able to allocate an extent that covers at
> -		 * least part of the callers request.
> -		 */
> -		if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
> -			return xfs_alert_fsblock_zero(ip, imap);
> -
> -		if ((offset_fsb >= imap->br_startoff) &&
> -		    (offset_fsb < (imap->br_startoff +
> -				   imap->br_blockcount))) {
> -			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
> -			ASSERT(offset_fsb >= imap->br_startoff &&
> -			       offset_fsb < imap->br_startoff + imap->br_blockcount);
> -			return 0;
> -		}
> -	}
> -}
> -
>  int
>  xfs_iomap_write_unwritten(
>  	xfs_inode_t	*ip,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index c6170548831b..6b16243db0b7 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -13,8 +13,6 @@ struct xfs_bmbt_irec;
>  
>  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
>  			struct xfs_bmbt_irec *, int);
> -int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> -			struct xfs_bmbt_irec *, unsigned int *);
>  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>  
>  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2019-02-15 23:40 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
2019-02-11 12:54 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
2019-02-11 12:54 ` [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks Christoph Hellwig
2019-02-11 12:54 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
2019-02-11 12:54 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
2019-02-11 12:54 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
2019-02-11 15:21   ` Brian Foster
2019-02-11 12:54 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
2019-02-11 15:21   ` Brian Foster
2019-02-14 19:38   ` Darrick J. Wong
2019-02-14 21:39     ` Christoph Hellwig
2019-02-11 12:54 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
2019-02-11 15:21   ` Brian Foster
2019-02-11 12:54 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
2019-02-11 15:22   ` Brian Foster
2019-02-15  6:56     ` Christoph Hellwig
2019-02-11 12:54 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
2019-02-11 15:22   ` Brian Foster
2019-02-11 12:54 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no extent was found Christoph Hellwig
2019-02-11 15:23   ` Brian Foster
2019-02-12  0:03 ` small fixes and optimizations for delalloc and reflink Darrick J. Wong
2019-02-13 18:02   ` Christoph Hellwig
2019-02-14  8:11     ` Christoph Hellwig
2019-02-14 15:57       ` Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
2019-02-15 14:47 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
2019-02-15 23:40   ` Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).