Proof of concept for swapping the contents of two inodes.
This is desirable for moving inode locations with changing
anything else such as when renumbering inodes or shrinking
the filesystem. Untested.

Signed-off-by: Dave Chinner <dgc@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c |    4 
 fs/xfs/xfs_dfrag.c           |  295 ++++++++++++++++++++++++++++++++++++++-----
 fs/xfs/xfs_dfrag.h           |   24 ++-
 fs/xfs/xfs_fs.h              |    1 
 fs/xfs/xfs_trans.h           |    3 
 fs/xfs/xfsidbg.c             |    9 -
 6 files changed, 294 insertions(+), 42 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_ioctl.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/linux-2.6/xfs_ioctl.c	2008-05-16 12:00:30.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_ioctl.c	2008-05-16 23:36:22.032365218 +1000
@@ -1139,6 +1139,10 @@ xfs_ioctl(
 		error = xfs_swapext((struct xfs_swapext __user *)arg);
 		return -error;
 	}
+	case XFS_IOC_SWAPINO: {
+		error = xfs_swapino((struct xfs_swapino __user *)arg);
+		return -error;
+	}
 
 	case XFS_IOC_FSCOUNTS: {
 		xfs_fsop_counts_t out;
Index: 2.6.x-xfs-new/fs/xfs/xfs_dfrag.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_dfrag.c	2008-05-09 11:57:29.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_dfrag.c	2008-05-17 00:05:46.100658000 +1000
@@ -44,6 +44,20 @@
 #include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 
+
+STATIC int
+xfs_swap_fd_to_inode(int fd, struct file **filp, xfs_inode_t **ip)
+{
+	*filp = fget(fd);
+	if (!*filp)
+		return EINVAL;
+
+	*ip = XFS_I((*filp)->f_path.dentry->d_inode);
+	if (!*ip)
+		return EBADF;
+	return 0;
+}
+
 /*
  * Syssgi interface for swapext
  */
@@ -54,60 +68,46 @@ xfs_swapext(
 	xfs_swapext_t	*sxp;
 	xfs_inode_t     *ip, *tip;
 	struct file	*file, *target_file;
-	int		error = 0;
+	int		error;
 
+	error = ENOMEM;
 	sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL);
-	if (!sxp) {
-		error = XFS_ERROR(ENOMEM);
+	if (!sxp)
 		goto out;
-	}
-
-	if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) {
-		error = XFS_ERROR(EFAULT);
+	error = EFAULT;
+	if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t)))
 		goto out_free_sxp;
-	}
 
-	/* Pull information for the target fd */
-	file = fget((int)sxp->sx_fdtarget);
-	if (!file) {
-		error = XFS_ERROR(EINVAL);
+	error = xfs_swap_fd_to_inode((int)sxp->sx_fdtarget, &file, &ip);
+	if (error)
 		goto out_free_sxp;
-	}
 
-	if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
-		error = XFS_ERROR(EBADF);
+	error = EBADF;
+	if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
 		goto out_put_file;
-	}
 
-	target_file = fget((int)sxp->sx_fdtmp);
-	if (!target_file) {
-		error = XFS_ERROR(EINVAL);
+	error = xfs_swap_fd_to_inode((int)sxp->sx_fdtmp, &target_file, &tip);
+	if (error)
 		goto out_put_file;
-	}
 
+	error = EBADF;
 	if (!(target_file->f_mode & FMODE_WRITE) ||
-	    (target_file->f_flags & O_APPEND)) {
-		error = XFS_ERROR(EBADF);
+	    (target_file->f_flags & O_APPEND))
 		goto out_put_target_file;
-	}
 
 	ip = XFS_I(file->f_path.dentry->d_inode);
 	tip = XFS_I(target_file->f_path.dentry->d_inode);
 
-	if (ip->i_mount != tip->i_mount) {
-		error = XFS_ERROR(EINVAL);
+	error = EINVAL;
+	if (ip->i_mount != tip->i_mount)
 		goto out_put_target_file;
-	}
 
-	if (ip->i_ino == tip->i_ino) {
-		error = XFS_ERROR(EINVAL);
+	if (ip->i_ino == tip->i_ino)
 		goto out_put_target_file;
-	}
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		error = XFS_ERROR(EIO);
+	error = EIO;
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		goto out_put_target_file;
-	}
 
 	error = xfs_swap_extents(ip, tip, sxp);
 
@@ -122,6 +122,52 @@ xfs_swapext(
 }
 
 int
+xfs_swapino(
+	xfs_swapino_t	__user *siu)
+{
+	xfs_swapino_t	*sino;
+	xfs_inode_t     *ip, *tip;
+	struct file	*file, *target_file;
+	int		error;
+
+	error = ENOMEM;
+	sino = kmem_alloc(sizeof(xfs_swapino_t), KM_MAYFAIL);
+	if (!sino)
+		goto out;
+
+	error = EFAULT;
+	if (copy_from_user(sino, siu, sizeof(xfs_swapino_t)))
+		goto out_free_sino;
+
+	error = xfs_swap_fd_to_inode((int)sino->sx_fdtarget, &file, &ip);
+	if (error)
+		goto out_free_sino;
+	error = xfs_swap_fd_to_inode((int)sino->sx_fdtmp, &target_file, &tip);
+	if (error)
+		goto out_put_file;
+
+	error = EINVAL;
+	if (ip->i_mount != tip->i_mount)
+		goto out_put_target_file;
+	if (ip->i_ino == tip->i_ino)
+		goto out_put_target_file;
+
+	error = EIO;
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		goto out_put_target_file;
+
+	error = xfs_swap_inodes(ip, tip, sino);
+
+ out_put_target_file:
+	fput(target_file);
+ out_put_file:
+	fput(file);
+ out_free_sino:
+	kmem_free(sino);
+ out:
+	return error;
+}
+int
 xfs_swap_extents(
 	xfs_inode_t	*ip,
 	xfs_inode_t	*tip,
@@ -384,3 +430,188 @@ xfs_swap_extents(
 		kmem_free(tempifp);
 	return error;
 }
+
+STATIC void
+xfs_swapino_log_fields(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip)
+{
+	int		ilf_fields = XFS_ILOG_CORE;
+
+	switch(ip->i_d.di_format) {
+	case XFS_DINODE_FMT_EXTENTS:
+		/* If the extents fit in the inode, fix the
+		 * pointer.  Otherwise it's already NULL or
+		 * pointing to the extent.
+		 */
+		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
+			xfs_ifork_t	*ifp = &ip->i_df;
+			ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+		}
+		ilf_fields |= XFS_ILOG_DEXT;
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		ilf_fields |= XFS_ILOG_DBROOT;
+		break;
+	}
+
+	switch(ip->i_d.di_aformat) {
+	case XFS_DINODE_FMT_LOCAL:
+		ilf_fields |= XFS_ILOG_ADATA;
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		/* If the extents fit in the inode, fix the
+		 * pointer.  Otherwise it's already NULL or
+		 * pointing to the extent.
+		 */
+		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
+			xfs_ifork_t	*ifp = ip->i_afp;
+			ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+		}
+		ilf_fields |= XFS_ILOG_AEXT;
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		ilf_fields |= XFS_ILOG_ABROOT;
+		break;
+	}
+	xfs_trans_log_inode(tp, ip, ilf_fields);
+}
+
+int
+xfs_swap_inodes(
+	xfs_inode_t	*ip,
+	xfs_inode_t	*tip,
+	xfs_swapino_t	*sino)
+{
+	xfs_mount_t	*mp;
+	xfs_inode_t	*ips[2];
+	xfs_trans_t	*tp;
+	xfs_icdinode_t	*dic = NULL;
+	xfs_ifork_t	*tempifp, *ifp, *tifp, *i_afp;
+	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
+	int		error;
+	char		locked = 0;
+
+	mp = ip->i_mount;
+	error = ENOMEM;
+	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
+	if (!tempifp)
+		goto error0;
+	dic = kmem_alloc(sizeof(xfs_dinode_core_t), KM_MAYFAIL);
+	if (!dic)
+		goto error0;
+
+	/* Lock in i_ino order */
+	if (ip->i_ino < tip->i_ino) {
+		ips[0] = ip;
+		ips[1] = tip;
+	} else {
+		ips[0] = tip;
+		ips[1] = ip;
+	}
+
+	xfs_lock_inodes(ips, 2, lock_flags);
+	locked = 1;
+
+	/* Verify that both files have the same format */
+	error = EINVAL;
+	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT))
+		goto error0;
+
+	/* Verify both files are either real-time or non-realtime */
+	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip))
+		goto error0;
+
+	if (VN_CACHED(tip->i_vnode) != 0) {
+		xfs_inval_cached_trace(tip, 0, -1, 0, -1);
+		error = xfs_flushinval_pages(tip, 0, -1,
+				FI_REMAPF_LOCKED);
+		if (error)
+			goto error0;
+	}
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL);
+
+	/*
+	 * There is a race condition here since we gave up the
+	 * ilock.  However, the data fork will not change since
+	 * we have the iolock (locked for truncation too) so we
+	 * are safe.  We don't really care if non-io related
+	 * fields change.
+	 */
+
+	xfs_tosspages(ip, 0, -1, FI_REMAPF);
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPINO);
+	error = xfs_trans_reserve(tp, 0, 2 * XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+	if (error) {
+		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
+		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
+		xfs_trans_cancel(tp, 0);
+		locked = 0;
+		goto error0;
+	}
+	xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+
+	/*
+	 * Swap the inode cores - structure copies.
+	 */
+	*dic = ip->i_d;
+	ip->i_d = tip->i_d;
+	tip->i_d = *dic;
+
+	/*
+	 * Swap the data forks of the inodes - structure copies
+	 */
+	ifp = &ip->i_df;
+	tifp = &tip->i_df;
+	*tempifp = *ifp;
+	*ifp = *tifp;
+	*tifp = *tempifp;
+
+	/*
+	 * Swap the attribute forks
+	 */
+	i_afp = ip->i_afp;
+	ip->i_afp = tip->i_afp;
+	tip->i_afp = i_afp;
+
+	/*
+	 * Increment vnode ref counts since xfs_trans_commit &
+	 * xfs_trans_cancel will both unlock the inodes and
+	 * decrement the associated ref counts.
+	 */
+	VN_HOLD(ip->i_vnode);
+	VN_HOLD(tip->i_vnode);
+	xfs_trans_ijoin(tp, ip, lock_flags);
+	xfs_trans_ijoin(tp, tip, lock_flags);
+
+
+	/*
+	 * log both entire inodes
+	 */
+	xfs_swapino_log_fields(tp, ip);
+	xfs_swapino_log_fields(tp, tip);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * transaction goes to disk before returning to the user.
+	 */
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+
+	error = xfs_trans_commit(tp, XFS_TRANS_SWAPINO);
+	locked = 0;
+
+ error0:
+	if (locked) {
+		xfs_iunlock(ip,  lock_flags);
+		xfs_iunlock(tip, lock_flags);
+	}
+	vn_revalidate(ip->i_vnode);
+	vn_revalidate(tip->i_vnode);
+	kmem_free(dic);
+	kmem_free(tempifp);
+	return error;
+}
Index: 2.6.x-xfs-new/fs/xfs/xfs_dfrag.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_dfrag.h	2007-11-20 18:38:49.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_dfrag.h	2008-05-16 23:36:22.036364701 +1000
@@ -21,7 +21,6 @@
 /*
  * Structure passed to xfs_swapext
  */
-
 typedef struct xfs_swapext
 {
 	__int64_t	sx_version;	/* version */
@@ -38,19 +37,34 @@ typedef struct xfs_swapext
  */
 #define XFS_SX_VERSION		0
 
-#ifdef __KERNEL__
 /*
- * Prototypes for visible xfs_dfrag.c routines.
+ * Structure passed to xfs_swapext
  */
+typedef struct xfs_swapino
+{
+	__int64_t	sx_version;	/* version */
+	__int64_t	sx_fdtarget;	/* fd of target file */
+	__int64_t	sx_fdtmp;	/* fd of tmp file */
+	char		sx_pad[16];	/* pad space, unused */
+} xfs_swapino_t;
 
 /*
- * Syscall interface for xfs_swapext
+ * Version flag
+ */
+#define XFS_SI_VERSION		0
+
+#ifdef __KERNEL__
+/*
+ * Prototypes for visible xfs_dfrag.c routines.
  */
-int	xfs_swapext(struct xfs_swapext __user *sx);
 
+int	xfs_swapext(struct xfs_swapext __user *sx);
 int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
 		struct xfs_swapext *sxp);
 
+int	xfs_swapino(struct xfs_swapino __user *si);
+int	xfs_swap_inodes(struct xfs_inode *ip, struct xfs_inode *tip,
+		struct xfs_swapino *sino);
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_DFRAG_H__ */
Index: 2.6.x-xfs-new/fs/xfs/xfs_fs.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_fs.h	2008-05-16 12:00:30.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_fs.h	2008-05-16 23:36:22.036364701 +1000
@@ -480,6 +480,7 @@ typedef struct xfs_handle {
 #define XFS_IOC_ATTRMULTI_BY_HANDLE  _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
 #define XFS_IOC_FSGEOMETRY	     _IOR ('X', 124, struct xfs_fsop_geom)
 #define XFS_IOC_GOINGDOWN	     _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_SWAPINO		     _IOWR('X', 126, struct xfs_swapino)
 /*	XFS_IOC_GETFSUUID ---------- deprecated 140	 */
 
 
Index: 2.6.x-xfs-new/fs/xfs/xfs_trans.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_trans.h	2008-05-16 23:13:48.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_trans.h	2008-05-16 23:36:22.036364701 +1000
@@ -95,7 +95,8 @@ typedef struct xfs_trans_header {
 #define	XFS_TRANS_GROWFSRT_FREE		39
 #define	XFS_TRANS_SWAPEXT		40
 #define	XFS_TRANS_SB_COUNT		41
-#define	XFS_TRANS_TYPE_MAX		41
+#define	XFS_TRANS_SWAPINO		42
+#define	XFS_TRANS_TYPE_MAX		42
 /* new transaction types need to be reflected in xfs_logprint(8) */
 
 
Index: 2.6.x-xfs-new/fs/xfs/xfsidbg.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfsidbg.c	2008-05-16 23:13:50.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfsidbg.c	2008-05-16 23:36:22.040364184 +1000
@@ -6004,11 +6004,12 @@ xfsidbg_print_trans_type(unsigned int t_
 	case XFS_TRANS_GROWFSRT_ALLOC:	kdb_printf("GROWFSRT_ALLOC");	break;
 	case XFS_TRANS_GROWFSRT_ZERO:	kdb_printf("GROWFSRT_ZERO");	break;
 	case XFS_TRANS_GROWFSRT_FREE:	kdb_printf("GROWFSRT_FREE");	break;
-  	case XFS_TRANS_SWAPEXT:		kdb_printf("SWAPEXT");		break;
+	case XFS_TRANS_SWAPEXT:		kdb_printf("SWAPEXT");		break;
 	case XFS_TRANS_SB_COUNT:	kdb_printf("SB_COUNT");		break;
- 	case XFS_TRANS_DUMMY1:		kdb_printf("DUMMY1");		break;
- 	case XFS_TRANS_DUMMY2:		kdb_printf("DUMMY2");		break;
- 	case XLOG_UNMOUNT_REC_TYPE:	kdb_printf("UNMOUNT");		break;
+	case XFS_TRANS_SWAPINO:		kdb_printf("SWAPINO");		break;
+	case XFS_TRANS_DUMMY1:		kdb_printf("DUMMY1");		break;
+	case XFS_TRANS_DUMMY2:		kdb_printf("DUMMY2");		break;
+	case XLOG_UNMOUNT_REC_TYPE:	kdb_printf("UNMOUNT");		break;
 	default:			kdb_printf("unknown(0x%x)", t_type); break;
 	}
 }
