public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] log all inode updates V2
@ 2012-02-21  0:38 Christoph Hellwig
  2012-02-21  0:38 ` [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-21  0:38 UTC (permalink / raw)
  To: xfs

Changes since V1:
  - address all review feedback
  - reimplement an optimized fdatasync

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues
  2012-02-21  0:38 [PATCH 0/8] log all inode updates V2 Christoph Hellwig
@ 2012-02-21  0:38 ` Christoph Hellwig
  2012-02-21  4:58   ` Dave Chinner
  2012-02-28 16:13   ` Mark Tinguely
  2012-02-21  0:38 ` [PATCH 2/8] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-21  0:38 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-split-workqueues --]
[-- Type: text/plain, Size: 7435 bytes --]

The new concurrency managed workqueues are cheap enough that we can create
per-filesystem instead of global workqueues.  This allows us to remove the
trylock or defer scheme on the ilock, which is not helpful once we have
outstanding log reservations until finishing a size update.

Also allow the default concurrency on this workqueues so that I/O completions
blocking on the ilock for one inode do not block process for another inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_aops.c  |   39 ++++++++++-----------------------------
 fs/xfs/xfs_aops.h  |    2 --
 fs/xfs/xfs_buf.c   |   17 -----------------
 fs/xfs/xfs_mount.h |    3 +++
 fs/xfs/xfs_super.c |   39 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 51 insertions(+), 49 deletions(-)

Index: xfs/fs/xfs/xfs_aops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.c	2012-02-13 15:59:57.997159607 -0800
+++ xfs/fs/xfs/xfs_aops.c	2012-02-19 13:21:45.653291774 -0800
@@ -126,21 +126,15 @@ static inline bool xfs_ioend_is_append(s
 
 /*
  * Update on-disk file size now that data has been written to disk.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
  */
-STATIC int
+STATIC void
 xfs_setfilesize(
-	xfs_ioend_t		*ioend)
+	struct xfs_ioend	*ioend)
 {
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_fsize_t		isize;
 
-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-		return EAGAIN;
-
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	isize = xfs_ioend_new_eof(ioend);
 	if (isize) {
 		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
@@ -149,7 +143,6 @@ xfs_setfilesize(
 	}
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return 0;
 }
 
 /*
@@ -163,10 +156,12 @@ xfs_finish_ioend(
 	struct xfs_ioend	*ioend)
 {
 	if (atomic_dec_and_test(&ioend->io_remaining)) {
+		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
+
 		if (ioend->io_type == IO_UNWRITTEN)
-			queue_work(xfsconvertd_workqueue, &ioend->io_work);
+			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 		else if (xfs_ioend_is_append(ioend))
-			queue_work(xfsdatad_workqueue, &ioend->io_work);
+			queue_work(mp->m_data_workqueue, &ioend->io_work);
 		else
 			xfs_destroy_ioend(ioend);
 	}
@@ -207,23 +202,9 @@ xfs_end_io(
 	 * We might have to update the on-disk file size after extending
 	 * writes.
 	 */
-	error = xfs_setfilesize(ioend);
-	ASSERT(!error || error == EAGAIN);
-
+	xfs_setfilesize(ioend);
 done:
-	/*
-	 * If we didn't complete processing of the ioend, requeue it to the
-	 * tail of the workqueue for another attempt later. Otherwise destroy
-	 * it.
-	 */
-	if (error == EAGAIN) {
-		atomic_inc(&ioend->io_remaining);
-		xfs_finish_ioend(ioend);
-		/* ensure we don't spin on blocked ioends */
-		delay(1);
-	} else {
-		xfs_destroy_ioend(ioend);
-	}
+	xfs_destroy_ioend(ioend);
 }
 
 /*
Index: xfs/fs/xfs/xfs_aops.h
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.h	2012-02-13 13:48:02.390346163 -0800
+++ xfs/fs/xfs/xfs_aops.h	2012-02-19 13:21:45.653291774 -0800
@@ -18,8 +18,6 @@
 #ifndef __XFS_AOPS_H__
 #define __XFS_AOPS_H__
 
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
Index: xfs/fs/xfs/xfs_buf.c
===================================================================
--- xfs.orig/fs/xfs/xfs_buf.c	2012-02-13 13:48:02.393679497 -0800
+++ xfs/fs/xfs/xfs_buf.c	2012-02-19 13:21:45.653291774 -0800
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
 
 static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_LOCK_TRACKING
 # define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-	xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-	if (!xfsdatad_workqueue)
-		goto out_destroy_xfslogd_workqueue;
-
-	xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-						WQ_MEM_RECLAIM, 1);
-	if (!xfsconvertd_workqueue)
-		goto out_destroy_xfsdatad_workqueue;
-
 	return 0;
 
- out_destroy_xfsdatad_workqueue:
-	destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-	destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
 	kmem_zone_destroy(xfs_buf_zone);
  out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-	destroy_workqueue(xfsconvertd_workqueue);
-	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
 }
Index: xfs/fs/xfs/xfs_super.c
===================================================================
--- xfs.orig/fs/xfs/xfs_super.c	2012-02-19 12:58:45.823295509 -0800
+++ xfs/fs/xfs/xfs_super.c	2012-02-19 13:23:48.836624774 -0800
@@ -759,6 +759,36 @@ xfs_setup_devices(
 	return 0;
 }
 
+STATIC int
+xfs_init_mount_workqueues(
+	struct xfs_mount	*mp)
+{
+	mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
+			WQ_MEM_RECLAIM, 0, mp->m_fsname);
+	if (!mp->m_data_workqueue)
+		goto out;
+
+	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
+			WQ_MEM_RECLAIM, 0, mp->m_fsname);
+	if (!mp->m_unwritten_workqueue)
+		goto out_destroy_data_iodone_queue;
+
+	return 0;
+
+out_destroy_data_iodone_queue:
+	destroy_workqueue(mp->m_data_workqueue);
+out:
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_mount_workqueues(
+	struct xfs_mount	*mp)
+{
+	destroy_workqueue(mp->m_data_workqueue);
+	destroy_workqueue(mp->m_unwritten_workqueue);
+}
+
 /* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
@@ -982,6 +1012,7 @@ xfs_fs_put_super(
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
 	xfs_icsb_destroy_counters(mp);
+	xfs_destroy_mount_workqueues(mp);
 	xfs_close_devices(mp);
 	xfs_free_fsname(mp);
 	kfree(mp);
@@ -1308,10 +1339,14 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_free_fsname;
 
-	error = xfs_icsb_init_counters(mp);
+	error = xfs_init_mount_workqueues(mp);
 	if (error)
 		goto out_close_devices;
 
+	error = xfs_icsb_init_counters(mp);
+	if (error)
+		goto out_destroy_workqueues;
+
 	error = xfs_readsb(mp, flags);
 	if (error)
 		goto out_destroy_counters;
@@ -1374,6 +1409,8 @@ xfs_fs_fill_super(
 	xfs_freesb(mp);
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
+out_destroy_workqueues:
+	xfs_destroy_mount_workqueues(mp);
  out_close_devices:
 	xfs_close_devices(mp);
  out_free_fsname:
Index: xfs/fs/xfs/xfs_mount.h
===================================================================
--- xfs.orig/fs/xfs/xfs_mount.h	2012-02-13 13:48:02.410346163 -0800
+++ xfs/fs/xfs/xfs_mount.h	2012-02-19 13:21:45.656625108 -0800
@@ -211,6 +211,9 @@ typedef struct xfs_mount {
 	struct shrinker		m_inode_shrink;	/* inode reclaim shrinker */
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 						/* low free space thresholds */
+
+	struct workqueue_struct	*m_data_workqueue;
+	struct workqueue_struct	*m_unwritten_workqueue;
 } xfs_mount_t;
 
 /*

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 2/8] xfs: do not require an ioend for new EOF calculation
  2012-02-21  0:38 [PATCH 0/8] log all inode updates V2 Christoph Hellwig
  2012-02-21  0:38 ` [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
@ 2012-02-21  0:38 ` Christoph Hellwig
  2012-02-28 16:14   ` Mark Tinguely
  2012-02-21  0:38 ` [PATCH 6/8] xfs: make xfs_inode_item_size idempotent Christoph Hellwig
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-21  0:38 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-simplify-eof-calculation --]
[-- Type: text/plain, Size: 3123 bytes --]

Replace xfs_ioend_new_eof with a new inline xfs_new_eof helper that
doesn't require and ioend, and is available also outside of xfs_aops.c.

Also make the code a bit more clear by using a normal if statement
instead of a slightly misleading MIN().

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_aops.c  |   24 ++++--------------------
 fs/xfs/xfs_inode.h |   14 ++++++++++++++
 2 files changed, 18 insertions(+), 20 deletions(-)

Index: xfs/fs/xfs/xfs_aops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.c	2011-11-30 11:05:19.260046232 +0100
+++ xfs/fs/xfs/xfs_aops.c	2011-11-30 11:06:07.983115611 +0100
@@ -99,23 +99,6 @@ xfs_destroy_ioend(
 }
 
 /*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-	xfs_ioend_t		*ioend)
-{
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
-	xfs_fsize_t		isize;
-	xfs_fsize_t		bsize;
-
-	bsize = ioend->io_offset + ioend->io_size;
-	isize = MIN(i_size_read(VFS_I(ip)), bsize);
-	return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
  * Fast and loose check if this write could update the on-disk inode size.
  */
 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -135,7 +118,7 @@ xfs_setfilesize(
 	xfs_fsize_t		isize;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	isize = xfs_ioend_new_eof(ioend);
+	isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
 	if (isize) {
 		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
 		ip->i_d.di_size = isize;
@@ -357,6 +340,7 @@ xfs_submit_ioend_bio(
 	xfs_ioend_t		*ioend,
 	struct bio		*bio)
 {
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	atomic_inc(&ioend->io_remaining);
 	bio->bi_private = ioend;
 	bio->bi_end_io = xfs_end_bio;
@@ -365,8 +349,8 @@ xfs_submit_ioend_bio(
 	 * If the I/O is beyond EOF we mark the inode dirty immediately
 	 * but don't update the inode size until I/O completion.
 	 */
-	if (xfs_ioend_new_eof(ioend))
-		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+	if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
+		xfs_mark_inode_dirty(ip);
 
 	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
Index: xfs/fs/xfs/xfs_inode.h
===================================================================
--- xfs.orig/fs/xfs/xfs_inode.h	2011-11-30 11:05:16.670060264 +0100
+++ xfs/fs/xfs/xfs_inode.h	2011-11-30 11:06:03.769805103 +0100
@@ -275,6 +275,20 @@ static inline xfs_fsize_t XFS_ISIZE(stru
 }
 
 /*
+ * If this I/O goes past the on-disk inode size update it unless it would
+ * be past the current in-core inode size.
+ */
+static inline xfs_fsize_t
+xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
+{
+	xfs_fsize_t i_size = i_size_read(VFS_I(ip));
+
+	if (new_size > i_size)
+		new_size = i_size;
+	return new_size > ip->i_d.di_size ? new_size : 0;
+}
+
+/*
  * i_flags helper functions
  */
 static inline void

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 6/8] xfs: make xfs_inode_item_size idempotent
  2012-02-21  0:38 [PATCH 0/8] log all inode updates V2 Christoph Hellwig
  2012-02-21  0:38 ` [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
  2012-02-21  0:38 ` [PATCH 2/8] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
@ 2012-02-21  0:38 ` Christoph Hellwig
  2012-02-21  5:14   ` Dave Chinner
  2012-02-21  0:38 ` [PATCH 7/8] xfs: split in-core and on-disk inode log item fields Christoph Hellwig
  2012-02-21  0:38 ` [PATCH 8/8] xfs: add back fdatasync support Christoph Hellwig
  4 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-21  0:38 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-idempotent-inode_item_size --]
[-- Type: text/plain, Size: 11477 bytes --]

Move all code messing with the inode log item flags into xfs_inode_item_format
to make sure xfs_inode_item_size really only calculates the the number of
vectors, but doesn't modify any state of the inode item.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_inode_item.c |  214 ++++++++++++++++++------------------------------
 1 file changed, 83 insertions(+), 131 deletions(-)

Index: xfs/fs/xfs/xfs_inode_item.c
===================================================================
--- xfs.orig/fs/xfs/xfs_inode_item.c	2012-02-20 12:08:36.976655590 -0800
+++ xfs/fs/xfs/xfs_inode_item.c	2012-02-20 12:08:39.456655583 -0800
@@ -57,77 +57,28 @@ xfs_inode_item_size(
 	struct xfs_inode	*ip = iip->ili_inode;
 	uint			nvecs = 2;
 
-	/*
-	 * Only log the data/extents/b-tree root if there is something
-	 * left to log.
-	 */
-	iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
-
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-			  XFS_ILOG_DEV | XFS_ILOG_UUID);
 		if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
-		    (ip->i_d.di_nextents > 0) &&
-		    (ip->i_df.if_bytes > 0)) {
-			ASSERT(ip->i_df.if_u1.if_extents != NULL);
+		    ip->i_d.di_nextents > 0 &&
+		    ip->i_df.if_bytes > 0)
 			nvecs++;
-		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
-		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
-			  XFS_ILOG_DEV | XFS_ILOG_UUID);
 		if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
-		    (ip->i_df.if_broot_bytes > 0)) {
-			ASSERT(ip->i_df.if_broot != NULL);
+		    ip->i_df.if_broot_bytes > 0)
 			nvecs++;
-		} else {
-			ASSERT(!(iip->ili_format.ilf_fields &
-				 XFS_ILOG_DBROOT));
-#ifdef XFS_TRANS_DEBUG
-			if (iip->ili_root_size > 0) {
-				ASSERT(iip->ili_root_size ==
-				       ip->i_df.if_broot_bytes);
-				ASSERT(memcmp(iip->ili_orig_root,
-					    ip->i_df.if_broot,
-					    iip->ili_root_size) == 0);
-			} else {
-				ASSERT(ip->i_df.if_broot_bytes == 0);
-			}
-#endif
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
-		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
-			  XFS_ILOG_DEV | XFS_ILOG_UUID);
 		if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
-		    (ip->i_df.if_bytes > 0)) {
-			ASSERT(ip->i_df.if_u1.if_data != NULL);
-			ASSERT(ip->i_d.di_size > 0);
+		    ip->i_df.if_bytes > 0)
 			nvecs++;
-		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
-		}
 		break;
 
 	case XFS_DINODE_FMT_DEV:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-			  XFS_ILOG_DEXT | XFS_ILOG_UUID);
-		break;
-
 	case XFS_DINODE_FMT_UUID:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-			  XFS_ILOG_DEXT | XFS_ILOG_DEV);
 		break;
 
 	default:
@@ -135,56 +86,31 @@ xfs_inode_item_size(
 		break;
 	}
 
-	/*
-	 * If there are no attributes associated with this file,
-	 * then there cannot be anything more to log.
-	 * Clear all attribute-related log flags.
-	 */
-	if (!XFS_IFORK_Q(ip)) {
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
+	if (!XFS_IFORK_Q(ip))
 		return nvecs;
-	}
+
 
 	/*
 	 * Log any necessary attribute data.
 	 */
 	switch (ip->i_d.di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
 		if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
-		    (ip->i_d.di_anextents > 0) &&
-		    (ip->i_afp->if_bytes > 0)) {
-			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
+		    ip->i_d.di_anextents > 0 &&
+		    ip->i_afp->if_bytes > 0)
 			nvecs++;
-		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
-		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
 		if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
-		    (ip->i_afp->if_broot_bytes > 0)) {
-			ASSERT(ip->i_afp->if_broot != NULL);
+		    ip->i_afp->if_broot_bytes > 0)
 			nvecs++;
-		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
-		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		iip->ili_format.ilf_fields &=
-			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
 		if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
-		    (ip->i_afp->if_bytes > 0)) {
-			ASSERT(ip->i_afp->if_u1.if_data != NULL);
+		    ip->i_afp->if_bytes > 0)
 			nvecs++;
-		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
-		}
 		break;
 
 	default:
@@ -292,16 +218,17 @@ xfs_inode_item_format(
 
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-			  XFS_ILOG_DEV | XFS_ILOG_UUID)));
-		if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) {
-			ASSERT(ip->i_df.if_bytes > 0);
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
+			  XFS_ILOG_DEV | XFS_ILOG_UUID);
+
+		if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
+		    ip->i_d.di_nextents > 0 &&
+		    ip->i_df.if_bytes > 0) {
 			ASSERT(ip->i_df.if_u1.if_extents != NULL);
-			ASSERT(ip->i_d.di_nextents > 0);
+			ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
 			ASSERT(iip->ili_extents_buf == NULL);
-			ASSERT((ip->i_df.if_bytes /
-				(uint)sizeof(xfs_bmbt_rec_t)) > 0);
+
 #ifdef XFS_NATIVE_HOST
                        if (ip->i_d.di_nextents == ip->i_df.if_bytes /
                                                (uint)sizeof(xfs_bmbt_rec_t)) {
@@ -323,15 +250,18 @@ xfs_inode_item_format(
 			iip->ili_format.ilf_dsize = vecp->i_len;
 			vecp++;
 			nvecs++;
+		} else {
+			iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_DDATA | XFS_ILOG_DEXT |
-			  XFS_ILOG_DEV | XFS_ILOG_UUID)));
-		if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
-			ASSERT(ip->i_df.if_broot_bytes > 0);
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
+			  XFS_ILOG_DEV | XFS_ILOG_UUID);
+
+		if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
+		    ip->i_df.if_broot_bytes > 0) {
 			ASSERT(ip->i_df.if_broot != NULL);
 			vecp->i_addr = ip->i_df.if_broot;
 			vecp->i_len = ip->i_df.if_broot_bytes;
@@ -339,15 +269,30 @@ xfs_inode_item_format(
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
+		} else {
+			ASSERT(!(iip->ili_format.ilf_fields &
+				 XFS_ILOG_DBROOT));
+#ifdef XFS_TRANS_DEBUG
+			if (iip->ili_root_size > 0) {
+				ASSERT(iip->ili_root_size ==
+				       ip->i_df.if_broot_bytes);
+				ASSERT(memcmp(iip->ili_orig_root,
+					    ip->i_df.if_broot,
+					    iip->ili_root_size) == 0);
+			} else {
+				ASSERT(ip->i_df.if_broot_bytes == 0);
+			}
+#endif
+			iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
-			  XFS_ILOG_DEV | XFS_ILOG_UUID)));
-		if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) {
-			ASSERT(ip->i_df.if_bytes > 0);
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
+			  XFS_ILOG_DEV | XFS_ILOG_UUID);
+		if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
+		    ip->i_df.if_bytes > 0) {
 			ASSERT(ip->i_df.if_u1.if_data != NULL);
 			ASSERT(ip->i_d.di_size > 0);
 
@@ -365,13 +310,15 @@ xfs_inode_item_format(
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_dsize = (unsigned)data_bytes;
+		} else {
+			iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
 		}
 		break;
 
 	case XFS_DINODE_FMT_DEV:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
-			  XFS_ILOG_DDATA | XFS_ILOG_UUID)));
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
+			  XFS_ILOG_DEXT | XFS_ILOG_UUID);
 		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
 			iip->ili_format.ilf_u.ilfu_rdev =
 				ip->i_df.if_u2.if_rdev;
@@ -379,9 +326,9 @@ xfs_inode_item_format(
 		break;
 
 	case XFS_DINODE_FMT_UUID:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
-			  XFS_ILOG_DDATA | XFS_ILOG_DEV)));
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
+			  XFS_ILOG_DEXT | XFS_ILOG_DEV);
 		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
 			iip->ili_format.ilf_u.ilfu_uuid =
 				ip->i_df.if_u2.if_uuid;
@@ -394,31 +341,26 @@ xfs_inode_item_format(
 	}
 
 	/*
-	 * If there are no attributes associated with the file,
-	 * then we're done.
-	 * Assert that no attribute-related log flags are set.
+	 * If there are no attributes associated with the file, then we're done.
 	 */
 	if (!XFS_IFORK_Q(ip)) {
 		iip->ili_format.ilf_size = nvecs;
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
 		return;
 	}
 
 	switch (ip->i_d.di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
-		if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
-#ifdef DEBUG
-			int nrecs = ip->i_afp->if_bytes /
-				(uint)sizeof(xfs_bmbt_rec_t);
-			ASSERT(nrecs > 0);
-			ASSERT(nrecs == ip->i_d.di_anextents);
-			ASSERT(ip->i_afp->if_bytes > 0);
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
+
+		if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
+		    ip->i_d.di_anextents > 0 &&
+		    ip->i_afp->if_bytes > 0) {
+			ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
+				ip->i_d.di_anextents);
 			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
-			ASSERT(ip->i_d.di_anextents > 0);
-#endif
 #ifdef XFS_NATIVE_HOST
 			/*
 			 * There are not delayed allocation extents
@@ -435,28 +377,36 @@ xfs_inode_item_format(
 			iip->ili_format.ilf_asize = vecp->i_len;
 			vecp++;
 			nvecs++;
+		} else {
+			iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_ADATA | XFS_ILOG_AEXT)));
-		if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
-			ASSERT(ip->i_afp->if_broot_bytes > 0);
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
+
+		if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
+		    ip->i_afp->if_broot_bytes > 0) {
 			ASSERT(ip->i_afp->if_broot != NULL);
+
 			vecp->i_addr = ip->i_afp->if_broot;
 			vecp->i_len = ip->i_afp->if_broot_bytes;
 			vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
+		} else {
+			iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		ASSERT(!(iip->ili_format.ilf_fields &
-			 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
-		if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
+		iip->ili_format.ilf_fields &=
+			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
+
+		if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
+		    ip->i_afp->if_bytes > 0) {
 			ASSERT(ip->i_afp->if_bytes > 0);
 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
 
@@ -474,6 +424,8 @@ xfs_inode_item_format(
 			vecp++;
 			nvecs++;
 			iip->ili_format.ilf_asize = (unsigned)data_bytes;
+		} else {
+			iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
 		}
 		break;
 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 7/8] xfs: split in-core and on-disk inode log item fields
  2012-02-21  0:38 [PATCH 0/8] log all inode updates V2 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2012-02-21  0:38 ` [PATCH 6/8] xfs: make xfs_inode_item_size idempotent Christoph Hellwig
@ 2012-02-21  0:38 ` Christoph Hellwig
  2012-02-21  5:18   ` Dave Chinner
  2012-02-21  0:38 ` [PATCH 8/8] xfs: add back fdatasync support Christoph Hellwig
  4 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-21  0:38 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-ilf_fields-cleanup --]
[-- Type: text/plain, Size: 19230 bytes --]

Add a new ili_fields member to the inode log item to isolate the in-memory
flags from the ones that actually go to the log.  This will allow tracking
timestamp-only updates for fdatasync and O_DSYNC in the next patch and
prepares for divorcing the on-disk log format from the in-memory log item
a little further down the road.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dfrag.c       |   24 ++++++-------
 fs/xfs/xfs_inode.c       |   71 +++++++++++++++++++---------------------
 fs/xfs/xfs_inode_item.c  |   83 ++++++++++++++++++++++++-----------------------
 fs/xfs/xfs_inode_item.h  |    4 +-
 fs/xfs/xfs_trans_inode.c |    4 +-
 5 files changed, 93 insertions(+), 93 deletions(-)

Index: xfs/fs/xfs/xfs_dfrag.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dfrag.c	2012-02-20 12:08:36.489988926 -0800
+++ xfs/fs/xfs/xfs_dfrag.c	2012-02-20 12:08:44.379988903 -0800
@@ -215,7 +215,7 @@ xfs_swap_extents(
 	xfs_trans_t	*tp;
 	xfs_bstat_t	*sbp = &sxp->sx_stat;
 	xfs_ifork_t	*tempifp, *ifp, *tifp;
-	int		ilf_fields, tilf_fields;
+	int		src_log_flags, target_log_flags;
 	int		error = 0;
 	int		aforkblks = 0;
 	int		taforkblks = 0;
@@ -385,9 +385,8 @@ xfs_swap_extents(
 	tip->i_delayed_blks = ip->i_delayed_blks;
 	ip->i_delayed_blks = 0;
 
-	ilf_fields = XFS_ILOG_CORE;
-
-	switch(ip->i_d.di_format) {
+	src_log_flags = XFS_ILOG_CORE;
+	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		/* If the extents fit in the inode, fix the
 		 * pointer.  Otherwise it's already NULL or
@@ -397,16 +396,15 @@ xfs_swap_extents(
 			ifp->if_u1.if_extents =
 				ifp->if_u2.if_inline_ext;
 		}
-		ilf_fields |= XFS_ILOG_DEXT;
+		src_log_flags |= XFS_ILOG_DEXT;
 		break;
 	case XFS_DINODE_FMT_BTREE:
-		ilf_fields |= XFS_ILOG_DBROOT;
+		src_log_flags |= XFS_ILOG_DBROOT;
 		break;
 	}
 
-	tilf_fields = XFS_ILOG_CORE;
-
-	switch(tip->i_d.di_format) {
+	target_log_flags = XFS_ILOG_CORE;
+	switch (tip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		/* If the extents fit in the inode, fix the
 		 * pointer.  Otherwise it's already NULL or
@@ -416,10 +414,10 @@ xfs_swap_extents(
 			tifp->if_u1.if_extents =
 				tifp->if_u2.if_inline_ext;
 		}
-		tilf_fields |= XFS_ILOG_DEXT;
+		target_log_flags |= XFS_ILOG_DEXT;
 		break;
 	case XFS_DINODE_FMT_BTREE:
-		tilf_fields |= XFS_ILOG_DBROOT;
+		target_log_flags |= XFS_ILOG_DBROOT;
 		break;
 	}
 
@@ -427,8 +425,8 @@ xfs_swap_extents(
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
-	xfs_trans_log_inode(tp, ip,  ilf_fields);
-	xfs_trans_log_inode(tp, tip, tilf_fields);
+	xfs_trans_log_inode(tp, ip,  src_log_flags);
+	xfs_trans_log_inode(tp, tip, target_log_flags);
 
 	/*
 	 * If this is a synchronous mount, make sure that the
Index: xfs/fs/xfs/xfs_inode.c
===================================================================
--- xfs.orig/fs/xfs/xfs_inode.c	2012-02-20 12:08:36.499988925 -0800
+++ xfs/fs/xfs/xfs_inode.c	2012-02-20 12:08:44.379988903 -0800
@@ -1661,8 +1661,8 @@ retry:
 				continue;
 			}
 
-			iip->ili_last_fields = iip->ili_format.ilf_fields;
-			iip->ili_format.ilf_fields = 0;
+			iip->ili_last_fields = iip->ili_fields;
+			iip->ili_fields = 0;
 			iip->ili_logged = 1;
 			xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
 						&iip->ili_item.li_lsn);
@@ -2176,7 +2176,7 @@ xfs_iflush_fork(
 	mp = ip->i_mount;
 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
 	case XFS_DINODE_FMT_LOCAL:
-		if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
+		if ((iip->ili_fields & dataflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -2186,8 +2186,8 @@ xfs_iflush_fork(
 
 	case XFS_DINODE_FMT_EXTENTS:
 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
-		       !(iip->ili_format.ilf_fields & extflag[whichfork]));
-		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
+		       !(iip->ili_fields & extflag[whichfork]));
+		if ((iip->ili_fields & extflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(xfs_iext_get_ext(ifp, 0));
 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -2197,7 +2197,7 @@ xfs_iflush_fork(
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
+		if ((iip->ili_fields & brootflag[whichfork]) &&
 		    (ifp->if_broot_bytes > 0)) {
 			ASSERT(ifp->if_broot != NULL);
 			ASSERT(ifp->if_broot_bytes <=
@@ -2210,14 +2210,14 @@ xfs_iflush_fork(
 		break;
 
 	case XFS_DINODE_FMT_DEV:
-		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+		if (iip->ili_fields & XFS_ILOG_DEV) {
 			ASSERT(whichfork == XFS_DATA_FORK);
 			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
 		}
 		break;
 
 	case XFS_DINODE_FMT_UUID:
-		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+		if (iip->ili_fields & XFS_ILOG_UUID) {
 			ASSERT(whichfork == XFS_DATA_FORK);
 			memcpy(XFS_DFORK_DPTR(dip),
 			       &ip->i_df.if_u2.if_uuid,
@@ -2451,7 +2451,7 @@ xfs_iflush(
 	 */
 	if (XFS_FORCED_SHUTDOWN(mp)) {
 		if (iip)
-			iip->ili_format.ilf_fields = 0;
+			iip->ili_fields = 0;
 		xfs_ifunlock(ip);
 		return XFS_ERROR(EIO);
 	}
@@ -2641,36 +2641,33 @@ xfs_iflush_int(
 	xfs_inobp_check(mp, bp);
 
 	/*
-	 * We've recorded everything logged in the inode, so we'd
-	 * like to clear the ilf_fields bits so we don't log and
-	 * flush things unnecessarily.  However, we can't stop
-	 * logging all this information until the data we've copied
-	 * into the disk buffer is written to disk.  If we did we might
-	 * overwrite the copy of the inode in the log with all the
-	 * data after re-logging only part of it, and in the face of
-	 * a crash we wouldn't have all the data we need to recover.
+	 * We've recorded everything logged in the inode, so we'd like to clear
+	 * the ili_fields bits so we don't log and flush things unnecessarily.
+	 * However, we can't stop logging all this information until the data
+	 * we've copied into the disk buffer is written to disk.  If we did we
+	 * might overwrite the copy of the inode in the log with all the data
+	 * after re-logging only part of it, and in the face of a crash we
+	 * wouldn't have all the data we need to recover.
 	 *
-	 * What we do is move the bits to the ili_last_fields field.
-	 * When logging the inode, these bits are moved back to the
-	 * ilf_fields field.  In the xfs_iflush_done() routine we
-	 * clear ili_last_fields, since we know that the information
-	 * those bits represent is permanently on disk.  As long as
-	 * the flush completes before the inode is logged again, then
-	 * both ilf_fields and ili_last_fields will be cleared.
+	 * What we do is move the bits to the ili_last_fields field.  When
+	 * logging the inode, these bits are moved back to the ili_fields field.
+	 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
+	 * know that the information those bits represent is permanently on
+	 * disk.  As long as the flush completes before the inode is logged
+	 * again, then both ili_fields and ili_last_fields will be cleared.
 	 *
-	 * We can play with the ilf_fields bits here, because the inode
-	 * lock must be held exclusively in order to set bits there
-	 * and the flush lock protects the ili_last_fields bits.
-	 * Set ili_logged so the flush done
-	 * routine can tell whether or not to look in the AIL.
-	 * Also, store the current LSN of the inode so that we can tell
-	 * whether the item has moved in the AIL from xfs_iflush_done().
-	 * In order to read the lsn we need the AIL lock, because
-	 * it is a 64 bit value that cannot be read atomically.
-	 */
-	if (iip != NULL && iip->ili_format.ilf_fields != 0) {
-		iip->ili_last_fields = iip->ili_format.ilf_fields;
-		iip->ili_format.ilf_fields = 0;
+	 * We can play with the ili_fields bits here, because the inode lock
+	 * must be held exclusively in order to set bits there and the flush
+	 * lock protects the ili_last_fields bits.  Set ili_logged so the flush
+	 * done routine can tell whether or not to look in the AIL.  Also, store
+	 * the current LSN of the inode so that we can tell whether the item has
+	 * moved in the AIL from xfs_iflush_done().  In order to read the lsn we
+	 * need the AIL lock, because it is a 64 bit value that cannot be read
+	 * atomically.
+	 */
+	if (iip != NULL && iip->ili_fields != 0) {
+		iip->ili_last_fields = iip->ili_fields;
+		iip->ili_fields = 0;
 		iip->ili_logged = 1;
 
 		xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
Index: xfs/fs/xfs/xfs_inode_item.c
===================================================================
--- xfs.orig/fs/xfs/xfs_inode_item.c	2012-02-20 12:08:39.456655583 -0800
+++ xfs/fs/xfs/xfs_inode_item.c	2012-02-20 12:08:44.379988903 -0800
@@ -59,20 +59,20 @@ xfs_inode_item_size(
 
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
+		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
 		    ip->i_d.di_nextents > 0 &&
 		    ip->i_df.if_bytes > 0)
 			nvecs++;
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
+		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
 		    ip->i_df.if_broot_bytes > 0)
 			nvecs++;
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
+		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
 		    ip->i_df.if_bytes > 0)
 			nvecs++;
 		break;
@@ -95,20 +95,20 @@ xfs_inode_item_size(
 	 */
 	switch (ip->i_d.di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
+		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
 		    ip->i_d.di_anextents > 0 &&
 		    ip->i_afp->if_bytes > 0)
 			nvecs++;
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
+		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
 		    ip->i_afp->if_broot_bytes > 0)
 			nvecs++;
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
+		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
 		    ip->i_afp->if_bytes > 0)
 			nvecs++;
 		break;
@@ -185,7 +185,6 @@ xfs_inode_item_format(
 	vecp->i_type = XLOG_REG_TYPE_ICORE;
 	vecp++;
 	nvecs++;
-	iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
 
 	/*
 	 * If this is really an old format inode, then we need to
@@ -218,11 +217,11 @@ xfs_inode_item_format(
 
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
 
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
+		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
 		    ip->i_d.di_nextents > 0 &&
 		    ip->i_df.if_bytes > 0) {
 			ASSERT(ip->i_df.if_u1.if_extents != NULL);
@@ -251,16 +250,16 @@ xfs_inode_item_format(
 			vecp++;
 			nvecs++;
 		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
+			iip->ili_fields &= ~XFS_ILOG_DEXT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
 
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
+		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
 		    ip->i_df.if_broot_bytes > 0) {
 			ASSERT(ip->i_df.if_broot != NULL);
 			vecp->i_addr = ip->i_df.if_broot;
@@ -270,7 +269,7 @@ xfs_inode_item_format(
 			nvecs++;
 			iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
 		} else {
-			ASSERT(!(iip->ili_format.ilf_fields &
+			ASSERT(!(iip->ili_fields &
 				 XFS_ILOG_DBROOT));
 #ifdef XFS_TRANS_DEBUG
 			if (iip->ili_root_size > 0) {
@@ -283,15 +282,15 @@ xfs_inode_item_format(
 				ASSERT(ip->i_df.if_broot_bytes == 0);
 			}
 #endif
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
+			iip->ili_fields &= ~XFS_ILOG_DBROOT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
+		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
 		    ip->i_df.if_bytes > 0) {
 			ASSERT(ip->i_df.if_u1.if_data != NULL);
 			ASSERT(ip->i_d.di_size > 0);
@@ -311,25 +310,25 @@ xfs_inode_item_format(
 			nvecs++;
 			iip->ili_format.ilf_dsize = (unsigned)data_bytes;
 		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
+			iip->ili_fields &= ~XFS_ILOG_DDATA;
 		}
 		break;
 
 	case XFS_DINODE_FMT_DEV:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
 			  XFS_ILOG_DEXT | XFS_ILOG_UUID);
-		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+		if (iip->ili_fields & XFS_ILOG_DEV) {
 			iip->ili_format.ilf_u.ilfu_rdev =
 				ip->i_df.if_u2.if_rdev;
 		}
 		break;
 
 	case XFS_DINODE_FMT_UUID:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
 			  XFS_ILOG_DEXT | XFS_ILOG_DEV);
-		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+		if (iip->ili_fields & XFS_ILOG_UUID) {
 			iip->ili_format.ilf_u.ilfu_uuid =
 				ip->i_df.if_u2.if_uuid;
 		}
@@ -344,18 +343,17 @@ xfs_inode_item_format(
 	 * If there are no attributes associated with the file, then we're done.
 	 */
 	if (!XFS_IFORK_Q(ip)) {
-		iip->ili_format.ilf_size = nvecs;
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
-		return;
+		goto out;
 	}
 
 	switch (ip->i_d.di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
 
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
+		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
 		    ip->i_d.di_anextents > 0 &&
 		    ip->i_afp->if_bytes > 0) {
 			ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
@@ -378,15 +376,15 @@ xfs_inode_item_format(
 			vecp++;
 			nvecs++;
 		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
+			iip->ili_fields &= ~XFS_ILOG_AEXT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
 
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
+		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
 		    ip->i_afp->if_broot_bytes > 0) {
 			ASSERT(ip->i_afp->if_broot != NULL);
 
@@ -397,15 +395,15 @@ xfs_inode_item_format(
 			nvecs++;
 			iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
 		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
+			iip->ili_fields &= ~XFS_ILOG_ABROOT;
 		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
-		iip->ili_format.ilf_fields &=
+		iip->ili_fields &=
 			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
 
-		if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
+		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
 		    ip->i_afp->if_bytes > 0) {
 			ASSERT(ip->i_afp->if_bytes > 0);
 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
@@ -425,7 +423,7 @@ xfs_inode_item_format(
 			nvecs++;
 			iip->ili_format.ilf_asize = (unsigned)data_bytes;
 		} else {
-			iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
+			iip->ili_fields &= ~XFS_ILOG_ADATA;
 		}
 		break;
 
@@ -434,6 +432,14 @@ xfs_inode_item_format(
 		break;
 	}
 
+out:
+	/*
+	 * Now update the log format that goes out to disk from the in-core
+	 * values.  We always write the inode core to make the arithmetic
+	 * games in recovery easier, which isn't a big deal as just about any
+	 * transaction would dirty it anyway.
+	 */
+	iip->ili_format.ilf_fields = XFS_ILOG_CORE | iip->ili_fields;
 	iip->ili_format.ilf_size = nvecs;
 }
 
@@ -518,7 +524,7 @@ xfs_inode_item_trylock(
 
 #ifdef DEBUG
 	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		ASSERT(iip->ili_format.ilf_fields != 0);
+		ASSERT(iip->ili_fields != 0);
 		ASSERT(iip->ili_logged == 0);
 		ASSERT(lip->li_flags & XFS_LI_IN_AIL);
 	}
@@ -550,7 +556,7 @@ xfs_inode_item_unlock(
 	if (iip->ili_extents_buf != NULL) {
 		ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
 		ASSERT(ip->i_d.di_nextents > 0);
-		ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
+		ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
 		ASSERT(ip->i_df.if_bytes > 0);
 		kmem_free(iip->ili_extents_buf);
 		iip->ili_extents_buf = NULL;
@@ -558,7 +564,7 @@ xfs_inode_item_unlock(
 	if (iip->ili_aextents_buf != NULL) {
 		ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
 		ASSERT(ip->i_d.di_anextents > 0);
-		ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
+		ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
 		ASSERT(ip->i_afp->if_bytes > 0);
 		kmem_free(iip->ili_aextents_buf);
 		iip->ili_aextents_buf = NULL;
@@ -673,8 +679,7 @@ xfs_inode_item_push(
 	 * lock without sleeping, then there must not have been
 	 * anyone in the process of flushing the inode.
 	 */
-	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
-	       iip->ili_format.ilf_fields != 0);
+	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
 
 	/*
 	 * Push the inode to it's backing buffer. This will not remove the
@@ -897,7 +902,7 @@ xfs_iflush_abort(
 		 * Clear the inode logging fields so no more flushes are
 		 * attempted.
 		 */
-		iip->ili_format.ilf_fields = 0;
+		iip->ili_fields = 0;
 	}
 	/*
 	 * Release the inode's flush lock since we're done with it.
Index: xfs/fs/xfs/xfs_inode_item.h
===================================================================
--- xfs.orig/fs/xfs/xfs_inode_item.h	2012-02-20 12:08:36.523322258 -0800
+++ xfs/fs/xfs/xfs_inode_item.h	2012-02-20 12:08:44.383322236 -0800
@@ -134,6 +134,7 @@ typedef struct xfs_inode_log_item {
 	unsigned short		ili_lock_flags;	   /* lock flags */
 	unsigned short		ili_logged;	   /* flushed logged data */
 	unsigned int		ili_last_fields;   /* fields when flushed */
+	unsigned int		ili_fields;	   /* fields to be logged */
 	struct xfs_bmbt_rec	*ili_extents_buf;  /* array of logged
 						      data exts */
 	struct xfs_bmbt_rec	*ili_aextents_buf; /* array of logged
@@ -148,8 +149,7 @@ typedef struct xfs_inode_log_item {
 
 static inline int xfs_inode_clean(xfs_inode_t *ip)
 {
-	return !ip->i_itemp ||
-		!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL);
+	return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
 }
 
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
Index: xfs/fs/xfs/xfs_trans_inode.c
===================================================================
--- xfs.orig/fs/xfs/xfs_trans_inode.c	2012-02-20 12:08:36.539988924 -0800
+++ xfs/fs/xfs/xfs_trans_inode.c	2012-02-20 12:08:44.383322236 -0800
@@ -130,12 +130,12 @@ xfs_trans_log_inode(
 	/*
 	 * Always OR in the bits from the ili_last_fields field.
 	 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
-	 * routines in the eventual clearing of the ilf_fields bits.
+	 * routines in the eventual clearing of the ili_fields bits.
 	 * See the big comment in xfs_iflush() for an explanation of
 	 * this coordination mechanism.
 	 */
 	flags |= ip->i_itemp->ili_last_fields;
-	ip->i_itemp->ili_format.ilf_fields |= flags;
+	ip->i_itemp->ili_fields |= flags;
 }
 
 #ifdef XFS_TRANS_DEBUG

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 8/8] xfs: add back fdatasync support
  2012-02-21  0:38 [PATCH 0/8] log all inode updates V2 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2012-02-21  0:38 ` [PATCH 7/8] xfs: split in-core and on-disk inode log item fields Christoph Hellwig
@ 2012-02-21  0:38 ` Christoph Hellwig
  2012-02-21  5:23   ` Dave Chinner
  2012-03-08 20:04   ` Ben Myers
  4 siblings, 2 replies; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-21  0:38 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-implement-fdatasync --]
[-- Type: text/plain, Size: 3374 bytes --]

Add an in-memory only flag to say we logged timestamps only, and use it to
check if fdatasync can optimize away the log force.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_file.c       |    7 +++++--
 fs/xfs/xfs_inode_item.c |    3 ++-
 fs/xfs/xfs_inode_item.h |   12 +++++++++++-
 fs/xfs/xfs_super.c      |    2 +-
 4 files changed, 19 insertions(+), 5 deletions(-)

Index: xfs/fs/xfs/xfs_inode_item.c
===================================================================
--- xfs.orig/fs/xfs/xfs_inode_item.c	2012-02-20 12:08:44.379988903 -0800
+++ xfs/fs/xfs/xfs_inode_item.c	2012-02-20 12:10:27.239988625 -0800
@@ -439,7 +439,8 @@ out:
 	 * games in recovery easier, which isn't a big deal as just about any
 	 * transaction would dirty it anyway.
 	 */
-	iip->ili_format.ilf_fields = XFS_ILOG_CORE | iip->ili_fields;
+	iip->ili_format.ilf_fields = XFS_ILOG_CORE |
+		(iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
 	iip->ili_format.ilf_size = nvecs;
 }
 
Index: xfs/fs/xfs/xfs_inode_item.h
===================================================================
--- xfs.orig/fs/xfs/xfs_inode_item.h	2012-02-20 12:08:44.383322236 -0800
+++ xfs/fs/xfs/xfs_inode_item.h	2012-02-20 12:10:27.239988625 -0800
@@ -86,6 +86,16 @@ typedef struct xfs_inode_log_format_64 {
 #define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
 #define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
 
+
+/*
+ * The timestamps in the core are dirty, but not nessecarily anything
+ * else.
+ *
+ * This is an incore only value store in ilf_fields & co, which must
+ * never make it to disk, unlike the other fields above.
+ */
+#define XFS_ILOG_TIMESTAMP	0x4000
+
 #define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
 				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
 				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
@@ -101,7 +111,7 @@ typedef struct xfs_inode_log_format_64 {
 				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
 				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
 				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-				 XFS_ILOG_ABROOT)
+				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
 
 static inline int xfs_ilog_fbroot(int w)
 {
Index: xfs/fs/xfs/xfs_file.c
===================================================================
--- xfs.orig/fs/xfs/xfs_file.c	2012-02-20 12:08:35.513322261 -0800
+++ xfs/fs/xfs/xfs_file.c	2012-02-20 12:11:40.246655094 -0800
@@ -197,8 +197,11 @@ xfs_file_fsync(
 	 * to flush the log up to the latest LSN that touched the inode.
 	 */
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	if (xfs_ipincount(ip))
-		lsn = ip->i_itemp->ili_last_lsn;
+	if (xfs_ipincount(ip)) {
+		if (!datasync ||
+		    (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+			lsn = ip->i_itemp->ili_last_lsn;
+	}
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 	if (lsn)
Index: xfs/fs/xfs/xfs_super.c
===================================================================
--- xfs.orig/fs/xfs/xfs_super.c	2012-02-20 12:08:35.526655594 -0800
+++ xfs/fs/xfs/xfs_super.c	2012-02-20 12:10:27.239988625 -0800
@@ -907,7 +907,7 @@ xfs_fs_dirty_inode(
 	ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
 
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
 	error = xfs_trans_commit(tp, 0);
 	if (error)
 		goto trouble;

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues
  2012-02-21  0:38 ` [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
@ 2012-02-21  4:58   ` Dave Chinner
  2012-02-28 16:13   ` Mark Tinguely
  1 sibling, 0 replies; 15+ messages in thread
From: Dave Chinner @ 2012-02-21  4:58 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Feb 20, 2012 at 07:38:25PM -0500, Christoph Hellwig wrote:
> The new concurrency managed workqueues are cheap enough that we can create
> per-filesystem instead of global workqueues.  This allows us to remove the
> trylock or defer scheme on the ilock, which is not helpful once we have
> outstanding log reservations until finishing a size update.
> 
> Also allow the default concurrency on this workqueues so that I/O completions
> blocking on the ilock for one inode do not block process for another inode.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/8] xfs: make xfs_inode_item_size idempotent
  2012-02-21  0:38 ` [PATCH 6/8] xfs: make xfs_inode_item_size idempotent Christoph Hellwig
@ 2012-02-21  5:14   ` Dave Chinner
  2012-02-28 11:08     ` Christoph Hellwig
  0 siblings, 1 reply; 15+ messages in thread
From: Dave Chinner @ 2012-02-21  5:14 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Feb 20, 2012 at 07:38:30PM -0500, Christoph Hellwig wrote:
> Move all code messing with the inode log item flags into xfs_inode_item_format
> to make sure xfs_inode_item_size really only calculates the the number of
> vectors, but doesn't modify any state of the inode item.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok so:

Reviewed-by: Dave Chinner <dchinner@redhat.com>

One minor cleanup:

>  	case XFS_DINODE_FMT_LOCAL:
> -		ASSERT(!(iip->ili_format.ilf_fields &
> -			 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
> -		if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
> +		iip->ili_format.ilf_fields &=
> +			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
> +
> +		if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
> +		    ip->i_afp->if_bytes > 0) {
>  			ASSERT(ip->i_afp->if_bytes > 0);

That assert can go - it's checked in the if statement now.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/8] xfs: split in-core and on-disk inode log item fields
  2012-02-21  0:38 ` [PATCH 7/8] xfs: split in-core and on-disk inode log item fields Christoph Hellwig
@ 2012-02-21  5:18   ` Dave Chinner
  0 siblings, 0 replies; 15+ messages in thread
From: Dave Chinner @ 2012-02-21  5:18 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Feb 20, 2012 at 07:38:31PM -0500, Christoph Hellwig wrote:
> Add a new ili_fields member to the inode log item to isolate the in-memory
> flags from the ones that actually go to the log.  This will allow tracking
> timestamp-only updates for fdatasync and O_DSYNC in the next patch and
> prepares for divorcing the on-disk log format from the in-memory log item
> a little further down the road.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 8/8] xfs: add back fdatasync support
  2012-02-21  0:38 ` [PATCH 8/8] xfs: add back fdatasync support Christoph Hellwig
@ 2012-02-21  5:23   ` Dave Chinner
  2012-03-08 20:04   ` Ben Myers
  1 sibling, 0 replies; 15+ messages in thread
From: Dave Chinner @ 2012-02-21  5:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Feb 20, 2012 at 07:38:32PM -0500, Christoph Hellwig wrote:
> Add an in-memory only flag to say we logged timestamps only, and use it to
> check if fdatasync can optimize away the log force.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good. Couple of minor things below.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

> +/*
> + * The timestamps in the core are dirty, but not nessecarily anything
                                                    necessarily
> + * else.
> + *
> + * This is an incore only value store in ilf_fields & co, which must
> + * never make it to disk, unlike the other fields above.

That comment is a bit incoherent. Perhaps:

 * this is an incore-only flag stored in ili_fields which, unlike
 * the above flags, must never make it to disk in the ilf_fields.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/8] xfs: make xfs_inode_item_size idempotent
  2012-02-21  5:14   ` Dave Chinner
@ 2012-02-28 11:08     ` Christoph Hellwig
  0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-28 11:08 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Tue, Feb 21, 2012 at 04:14:04PM +1100, Dave Chinner wrote:
> One minor cleanup:
> 
> >  	case XFS_DINODE_FMT_LOCAL:
> > -		ASSERT(!(iip->ili_format.ilf_fields &
> > -			 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
> > -		if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
> > +		iip->ili_format.ilf_fields &=
> > +			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
> > +
> > +		if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
> > +		    ip->i_afp->if_bytes > 0) {
> >  			ASSERT(ip->i_afp->if_bytes > 0);
> 
> That assert can go - it's checked in the if statement now.

Fixed.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues
  2012-02-21  0:38 ` [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
  2012-02-21  4:58   ` Dave Chinner
@ 2012-02-28 16:13   ` Mark Tinguely
  1 sibling, 0 replies; 15+ messages in thread
From: Mark Tinguely @ 2012-02-28 16:13 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On 02/20/12 18:38, Christoph Hellwig wrote:
> The new concurrency managed workqueues are cheap enough that we can create
> per-filesystem instead of global workqueues.  This allows us to remove the
> trylock or defer scheme on the ilock, which is not helpful once we have
> outstanding log reservations until finishing a size update.
>
> Also allow the default concurrency on this workqueues so that I/O completions
> blocking on the ilock for one inode do not block process for another inode.
>
> Signed-off-by: Christoph Hellwig<hch@lst.de>

Looks good.

Reviewed-by: Mark Tinguely <tinguely@sgi.com>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/8] xfs: do not require an ioend for new EOF calculation
  2012-02-21  0:38 ` [PATCH 2/8] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
@ 2012-02-28 16:14   ` Mark Tinguely
  0 siblings, 0 replies; 15+ messages in thread
From: Mark Tinguely @ 2012-02-28 16:14 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On 02/20/12 18:38, Christoph Hellwig wrote:
> Replace xfs_ioend_new_eof with a new inline xfs_new_eof helper that
> doesn't require and ioend, and is available also outside of xfs_aops.c.
>
> Also make the code a bit more clear by using a normal if statement
> instead of a slightly misleading MIN().
>
> Reviewed-by: Dave Chinner<dchinner@redhat.com>
> Signed-off-by: Christoph Hellwig<hch@lst.de>

Looks good.

Reviewed-by: Mark Tinguely <tinguely@sgi.com>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 2/8] xfs: do not require an ioend for new EOF calculation
  2012-02-29  9:53 [PATCH 0/8] log all inode updates V3 Christoph Hellwig
@ 2012-02-29  9:53 ` Christoph Hellwig
  0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2012-02-29  9:53 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-simplify-eof-calculation --]
[-- Type: text/plain, Size: 3169 bytes --]

Replace xfs_ioend_new_eof with a new inline xfs_new_eof helper that
doesn't require and ioend, and is available also outside of xfs_aops.c.

Also make the code a bit more clear by using a normal if statement
instead of a slightly misleading MIN().

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_aops.c  |   24 ++++--------------------
 fs/xfs/xfs_inode.h |   14 ++++++++++++++
 2 files changed, 18 insertions(+), 20 deletions(-)

Index: xfs/fs/xfs/xfs_aops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.c	2011-11-30 11:05:19.260046232 +0100
+++ xfs/fs/xfs/xfs_aops.c	2011-11-30 11:06:07.983115611 +0100
@@ -99,23 +99,6 @@ xfs_destroy_ioend(
 }
 
 /*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-	xfs_ioend_t		*ioend)
-{
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
-	xfs_fsize_t		isize;
-	xfs_fsize_t		bsize;
-
-	bsize = ioend->io_offset + ioend->io_size;
-	isize = MIN(i_size_read(VFS_I(ip)), bsize);
-	return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
  * Fast and loose check if this write could update the on-disk inode size.
  */
 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -135,7 +118,7 @@ xfs_setfilesize(
 	xfs_fsize_t		isize;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	isize = xfs_ioend_new_eof(ioend);
+	isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
 	if (isize) {
 		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
 		ip->i_d.di_size = isize;
@@ -357,6 +340,7 @@ xfs_submit_ioend_bio(
 	xfs_ioend_t		*ioend,
 	struct bio		*bio)
 {
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	atomic_inc(&ioend->io_remaining);
 	bio->bi_private = ioend;
 	bio->bi_end_io = xfs_end_bio;
@@ -365,8 +349,8 @@ xfs_submit_ioend_bio(
 	 * If the I/O is beyond EOF we mark the inode dirty immediately
 	 * but don't update the inode size until I/O completion.
 	 */
-	if (xfs_ioend_new_eof(ioend))
-		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+	if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
+		xfs_mark_inode_dirty(ip);
 
 	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
Index: xfs/fs/xfs/xfs_inode.h
===================================================================
--- xfs.orig/fs/xfs/xfs_inode.h	2011-11-30 11:05:16.670060264 +0100
+++ xfs/fs/xfs/xfs_inode.h	2011-11-30 11:06:03.769805103 +0100
@@ -275,6 +275,20 @@ static inline xfs_fsize_t XFS_ISIZE(stru
 }
 
 /*
+ * If this I/O goes past the on-disk inode size update it unless it would
+ * be past the current in-core inode size.
+ */
+static inline xfs_fsize_t
+xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
+{
+	xfs_fsize_t i_size = i_size_read(VFS_I(ip));
+
+	if (new_size > i_size)
+		new_size = i_size;
+	return new_size > ip->i_d.di_size ? new_size : 0;
+}
+
+/*
  * i_flags helper functions
  */
 static inline void

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 8/8] xfs: add back fdatasync support
  2012-02-21  0:38 ` [PATCH 8/8] xfs: add back fdatasync support Christoph Hellwig
  2012-02-21  5:23   ` Dave Chinner
@ 2012-03-08 20:04   ` Ben Myers
  1 sibling, 0 replies; 15+ messages in thread
From: Ben Myers @ 2012-03-08 20:04 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Feb 20, 2012 at 07:38:32PM -0500, Christoph Hellwig wrote:
> Add an in-memory only flag to say we logged timestamps only, and use it to
> check if fdatasync can optimize away the log force.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok.  I was a little concerned that xfs_inode_item_format doesn't
know what an XFS_ILOG_TIMESTAMP is, until it dawned that we always log
the inode core.

Reviewed-by: Ben Myers <bpm@sgi.com>
> 
> ---
>  fs/xfs/xfs_file.c       |    7 +++++--
>  fs/xfs/xfs_inode_item.c |    3 ++-
>  fs/xfs/xfs_inode_item.h |   12 +++++++++++-
>  fs/xfs/xfs_super.c      |    2 +-
>  4 files changed, 19 insertions(+), 5 deletions(-)
> 
> Index: xfs/fs/xfs/xfs_inode_item.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_inode_item.c	2012-02-20 12:08:44.379988903 -0800
> +++ xfs/fs/xfs/xfs_inode_item.c	2012-02-20 12:10:27.239988625 -0800
> @@ -439,7 +439,8 @@ out:
>  	 * games in recovery easier, which isn't a big deal as just about any
>  	 * transaction would dirty it anyway.
>  	 */
> -	iip->ili_format.ilf_fields = XFS_ILOG_CORE | iip->ili_fields;
> +	iip->ili_format.ilf_fields = XFS_ILOG_CORE |
> +		(iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
>  	iip->ili_format.ilf_size = nvecs;
>  }
>  
> Index: xfs/fs/xfs/xfs_inode_item.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_inode_item.h	2012-02-20 12:08:44.383322236 -0800
> +++ xfs/fs/xfs/xfs_inode_item.h	2012-02-20 12:10:27.239988625 -0800
> @@ -86,6 +86,16 @@ typedef struct xfs_inode_log_format_64 {
>  #define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
>  #define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
>  
> +
> +/*
> + * The timestamps in the core are dirty, but not nessecarily anything
> + * else.
> + *
> + * This is an incore only value store in ilf_fields & co, which must
> + * never make it to disk, unlike the other fields above.
> + */
> +#define XFS_ILOG_TIMESTAMP	0x4000
> +
>  #define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
>  				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
>  				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
> @@ -101,7 +111,7 @@ typedef struct xfs_inode_log_format_64 {
>  				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
>  				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
>  				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
> -				 XFS_ILOG_ABROOT)
> +				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
>  
>  static inline int xfs_ilog_fbroot(int w)
>  {
> Index: xfs/fs/xfs/xfs_file.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_file.c	2012-02-20 12:08:35.513322261 -0800
> +++ xfs/fs/xfs/xfs_file.c	2012-02-20 12:11:40.246655094 -0800
> @@ -197,8 +197,11 @@ xfs_file_fsync(
>  	 * to flush the log up to the latest LSN that touched the inode.
>  	 */
>  	xfs_ilock(ip, XFS_ILOCK_SHARED);
> -	if (xfs_ipincount(ip))
> -		lsn = ip->i_itemp->ili_last_lsn;
> +	if (xfs_ipincount(ip)) {
> +		if (!datasync ||
> +		    (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
> +			lsn = ip->i_itemp->ili_last_lsn;
> +	}
>  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  
>  	if (lsn)
> Index: xfs/fs/xfs/xfs_super.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_super.c	2012-02-20 12:08:35.526655594 -0800
> +++ xfs/fs/xfs/xfs_super.c	2012-02-20 12:10:27.239988625 -0800
> @@ -907,7 +907,7 @@ xfs_fs_dirty_inode(
>  	ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
>  
>  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> -	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> +	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
>  	error = xfs_trans_commit(tp, 0);
>  	if (error)
>  		goto trouble;
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2012-03-08 20:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-21  0:38 [PATCH 0/8] log all inode updates V2 Christoph Hellwig
2012-02-21  0:38 ` [PATCH 1/8] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
2012-02-21  4:58   ` Dave Chinner
2012-02-28 16:13   ` Mark Tinguely
2012-02-21  0:38 ` [PATCH 2/8] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
2012-02-28 16:14   ` Mark Tinguely
2012-02-21  0:38 ` [PATCH 6/8] xfs: make xfs_inode_item_size idempotent Christoph Hellwig
2012-02-21  5:14   ` Dave Chinner
2012-02-28 11:08     ` Christoph Hellwig
2012-02-21  0:38 ` [PATCH 7/8] xfs: split in-core and on-disk inode log item fields Christoph Hellwig
2012-02-21  5:18   ` Dave Chinner
2012-02-21  0:38 ` [PATCH 8/8] xfs: add back fdatasync support Christoph Hellwig
2012-02-21  5:23   ` Dave Chinner
2012-03-08 20:04   ` Ben Myers
  -- strict thread matches above, loose matches on Subject: below --
2012-02-29  9:53 [PATCH 0/8] log all inode updates V3 Christoph Hellwig
2012-02-29  9:53 ` [PATCH 2/8] xfs: do not require an ioend for new EOF calculation Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox