public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* REVIEW: xfs_reno
@ 2007-10-02  7:08 Barry Naujok
  2007-10-02  7:20 ` Nathan Scott
  2007-10-02  9:02 ` Christoph Hellwig
  0 siblings, 2 replies; 10+ messages in thread
From: Barry Naujok @ 2007-10-02  7:08 UTC (permalink / raw)
  To: xfs@oss.sgi.com, xfs-dev

[-- Attachment #1: Type: text/plain, Size: 218 bytes --]


The attached tool allows an inode64 filesystem to be converted to inode32.
For this to work, the filesystem has to be mounted inode32 before it's run.

I'm not sure if there is any packaging changes required.

Barry.

[-- Attachment #2: xfs_reno.patch --]
[-- Type: application/octet-stream, Size: 42171 bytes --]


===========================================================================
xfsdump/Makefile
===========================================================================

--- a/xfsdump/Makefile	2007-10-02 17:06:24.000000000 +1000
+++ b/xfsdump/Makefile	2007-09-14 17:31:31.916437140 +1000
@@ -16,7 +16,7 @@
 	Logs/* built .census install.* install-dev.* *.gz
 
 SUBDIRS = include librmt \
-	common estimate fsr inventory invutil dump restore \
+	common estimate fsr inventory invutil dump reno restore \
 	m4 man doc po debian build
 
 default: $(CONFIGURE)

===========================================================================
xfsdump/reno/Makefile
===========================================================================

--- a/xfsdump/reno/Makefile	2006-06-17 00:58:24.000000000 +1000
+++ b/xfsdump/reno/Makefile	2007-10-02 17:06:18.658320738 +1000
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2007 Silicon Graphics, Inc.  All Rights Reserved.
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+LTCOMMAND = xfs_reno
+CFILES = xfs_reno.c
+LLDLIBS = $(LIBATTR)
+
+default: $(LTCOMMAND)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(PKG_BIN_DIR)
+	$(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_BIN_DIR)
+install-dev:

===========================================================================
xfsdump/reno/xfs_reno.c
===========================================================================

--- a/xfsdump/reno/xfs_reno.c	2006-06-17 00:58:24.000000000 +1000
+++ b/xfsdump/reno/xfs_reno.c	2007-10-02 17:05:38.403556260 +1000
@@ -0,0 +1,1841 @@
+/*
+ * Copyright (c) 2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/*
+ * xfs_reno - renumber 64-bit inodes
+ *
+ * xfs_reno [-f] [-n] [-p] [-q] [-v] [-P seconds] path ...
+ * xfs_reno [-r] path ...
+ *
+ * Renumbers all inodes > 32 bits into 32 bit space. Requires the filesytem
+ * to be mounted with inode32.
+ *
+ *	-f		force conversion on all inodes rather than just
+ *			those with a 64bit inode number.
+ *	-n		nothing, do not renumber inodes
+ *	-p		show progress status.
+ *	-q		quiet, do not report progress, only errors.
+ *	-v		verbose, more -v's more verbose.
+ *	-P seconds	set the interval for the progress status in seconds.
+ *	-r		recover from an interrupted run.
+ */
+
+#include <xfs/xfs.h>
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+#include <attr/attributes.h>
+#include <xfs/xfs_dfrag.h>
+#include <xfs/xfs_inum.h>
+
+#define ATTRBUFSIZE	1024
+
+#define SCAN_PHASE	0x00
+#define DIR_PHASE	0x10	/* nothing done or all done */
+#define DIR_PHASE_1	0x11	/* target dir created */
+#define DIR_PHASE_2	0x12	/* temp dir created */
+#define DIR_PHASE_3	0x13	/* attributes backed up to temp */
+#define DIR_PHASE_4	0x14	/* dirents moved to target dir */
+#define DIR_PHASE_5	0x15	/* attributes applied to target dir */
+#define DIR_PHASE_6	0x16	/* src dir removed */
+#define DIR_PHASE_7	0x17	/* temp dir removed */
+#define DIR_PHASE_MAX	0x17
+#define FILE_PHASE	0x20	/* nothing done or all done */
+#define FILE_PHASE_1	0x21	/* temp file created */
+#define FILE_PHASE_2	0x22	/* swapped extents */
+#define FILE_PHASE_3	0x23	/* unlinked source */
+#define FILE_PHASE_4	0x24	/* renamed temp to source name */
+#define FILE_PHASE_MAX	0x24
+
+static void update_recoverfile(void);
+#define SET_PHASE(x)	(cur_phase = x, update_recoverfile())
+
+#define LOG_ERR		0
+#define LOG_NORMAL	1
+#define LOG_INFO	2
+#define LOG_DEBUG	3
+#define LOG_NITTY	4
+
+#define NH_BUCKETS	65536
+#define NH_HASH(ino)	(nodehash + ((ino) % NH_BUCKETS))
+
+typedef struct {
+	xfs_ino_t	ino;
+	int		ftw_flags;
+	nlink_t		numpaths;
+	char		**paths;
+} bignode_t;
+
+typedef struct {
+	bignode_t	*nodes;
+	uint64_t	listlen;
+	uint64_t	lastnode;
+} nodelist_t;
+
+static const char	*cmd_prefix = "xfs_reno_";
+
+static char		*progname;
+static int		log_level = LOG_NORMAL;
+static int		force_all;
+static nodelist_t	*nodehash;
+static int		realuid;
+static uint64_t		numdirnodes;
+static uint64_t		numfilenodes;
+static uint64_t		numdirsdone;
+static uint64_t		numfilesdone;
+static int		poll_interval;
+static time_t		starttime;
+static bignode_t	*cur_node;
+static char		*cur_target;
+static char		*cur_temp;
+static int		cur_phase;
+static int		highest_numpaths;
+static char		*recover_file;
+static int		recover_fd;
+static volatile int	poll_output;
+static int		global_rval;
+
+/*
+ * message handling
+ */
+static void
+log_message(
+	int		level,
+	char		*fmt, ...)
+{
+	char		buf[1024];
+	va_list		ap;
+
+	if (log_level < level)
+		return;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, 1024, fmt, ap);
+	va_end(ap);
+
+	printf("%c%s: %s\n", poll_output ? '\n' : '\r', progname, buf);
+	poll_output = 0;
+}
+
+static void
+err_message(
+	char		*fmt, ...)
+{
+	char		buf[1024];
+	va_list		ap;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, 1024, fmt, ap);
+	va_end(ap);
+
+	fprintf(stderr, "%c%s: %s\n", poll_output ? '\n' : '\r', progname, buf);
+	poll_output = 0;
+}
+
+static void
+err_nomem(void)
+{
+	err_message(_("Out of memory"));
+}
+
+static void
+err_open(
+	const char	*s)
+{
+	err_message(_("Cannot open %s: %s"), s, strerror(errno));
+}
+
+static void
+err_not_xfs(
+	const char 	*s)
+{
+	err_message(_("%s is not on an XFS filesystem"), s);
+}
+
+static void
+err_stat(
+	const char	*s)
+{
+	err_message(_("Cannot stat %s: %s\n"), s, strerror(errno));
+}
+
+/*
+ * usage message
+ */
+static void
+usage(void)
+{
+	fprintf(stderr, _("%s [-fnpqv] [-P <interval>] [-r] <path>\n"),
+			progname);
+	exit(1);
+}
+
+
+/*
+ * XFS interface functions
+ */
+
+static int
+xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
+{
+	xfs_fsop_bulkreq_t  bulkreq;
+
+	bulkreq.lastip = (__u64 *)lastip;
+	bulkreq.icount = 1;
+	bulkreq.ubuffer = ubuffer;
+	bulkreq.ocount = NULL;
+	return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
+}
+
+static int
+xfs_swapext(int fd, xfs_swapext_t *sx)
+{
+	return ioctl(fd, XFS_IOC_SWAPEXT, sx);
+}
+
+static int
+xfs_getxattr(int fd, struct fsxattr *attr)
+{
+	return ioctl(fd, XFS_IOC_FSGETXATTR, attr);
+}
+
+static int
+xfs_setxattr(int fd, struct fsxattr *attr)
+{
+	return ioctl(fd, XFS_IOC_FSSETXATTR, attr);
+}
+
+/*
+ * A hash table of inode numbers and associated paths.
+ */
+static nodelist_t *
+init_nodehash(void)
+{
+	int		i;
+
+	nodehash = calloc(NH_BUCKETS, sizeof(nodelist_t));
+	if (nodehash == NULL) {
+		err_nomem();
+		return NULL;
+	}
+
+	for (i = 0; i < NH_BUCKETS; i++) {
+		nodehash[i].nodes = NULL;
+		nodehash[i].lastnode = 0;
+		nodehash[i].listlen = 0;
+	}
+
+	return nodehash;
+}
+
+static void
+free_nodehash(void)
+{
+	int		i, j, k;
+
+	for (i = 0; i < NH_BUCKETS; i++) {
+		bignode_t *nodes = nodehash[i].nodes;
+
+		for (j = 0; j < nodehash[i].lastnode; j++) {
+			for (k = 0; k < nodes[j].numpaths; k++) {
+				free(nodes[j].paths[k]);
+			}
+			free(nodes[j].paths);
+		}
+
+		free(nodes);
+	}
+	free(nodehash);
+}
+
+static nlink_t
+add_path(
+	bignode_t	*node,
+	const char	*path)
+{
+	node->paths = realloc(node->paths,
+			      sizeof(char *) * (node->numpaths + 1));
+	if (node->paths == NULL) {
+		err_nomem();
+		exit(1);
+	}
+
+	node->paths[node->numpaths] = strdup(path);
+	if (node->paths[node->numpaths] == NULL) {
+		err_nomem();
+		exit(1);
+	}
+
+	node->numpaths++;
+	if (node->numpaths > highest_numpaths)
+		highest_numpaths = node->numpaths;
+
+	return node->numpaths;
+}
+
+static bignode_t *
+add_node(
+	nodelist_t	*list,
+	xfs_ino_t	ino,
+	int		ftw_flags,
+	const char	*path)
+{
+	bignode_t	*node;
+
+	if (list->lastnode >= list->listlen) {
+		list->listlen += 500;
+		list->nodes = realloc(list->nodes,
+					sizeof(bignode_t) * list->listlen);
+		if (list->nodes == NULL) {
+			err_nomem();
+			return NULL;
+		}
+	}
+
+	node = list->nodes + list->lastnode;
+
+	node->ino = ino;
+	node->ftw_flags = ftw_flags;
+	node->paths = NULL;
+	node->numpaths = 0;
+	add_path(node, path);
+
+	list->lastnode++;
+
+	return node;
+}
+
+static bignode_t *
+find_node(
+	xfs_ino_t	ino)
+{
+	int		i;
+	nodelist_t	*nodelist;
+	bignode_t	*nodes;
+
+	nodelist = NH_HASH(ino);
+	nodes = nodelist->nodes;
+
+	for(i = 0; i < nodelist->lastnode; i++) {
+		if (nodes[i].ino == ino) {
+			return &nodes[i];
+		}
+	}
+
+	return NULL;
+}
+
+static bignode_t *
+add_node_path(
+	xfs_ino_t	ino,
+	int		ftw_flags,
+	const char	*path)
+{
+	nodelist_t	*nodelist;
+	bignode_t	*node;
+
+	log_message(LOG_NITTY, "add_node_path: ino %llu, path %s", ino, path);
+
+	node = find_node(ino);
+	if (node == NULL) {
+		nodelist = NH_HASH(ino);
+		return add_node(nodelist, ino, ftw_flags, path);
+	}
+
+	add_path(node, path);
+	return node;
+}
+
+static void
+dump_node(
+	char		*msg,
+	bignode_t	*node)
+{
+	int		k;
+
+	if (log_level < LOG_DEBUG)
+		return;
+
+	log_message(LOG_DEBUG, "%s: %llu %llu %s", msg, node->ino,
+			node->numpaths, node->paths[0]);
+
+	for (k = 1; k < node->numpaths; k++)
+		log_message(LOG_DEBUG, "\t%s", node->paths[k]);
+}
+
+static void
+dump_nodehash(void)
+{
+	int		i, j;
+
+	if (log_level < LOG_NITTY)
+		return;
+
+	for (i = 0; i < NH_BUCKETS; i++) {
+		bignode_t	*nodes = nodehash[i].nodes;
+		for (j = 0; j < nodehash[i].lastnode; j++, nodes++)
+			dump_node("nodehash", nodes);
+	}
+}
+
+static int
+for_all_nodes(
+	int		(*fn)(bignode_t *node),
+	int		ftw_flags,
+	int		quit_on_error)
+{
+	int		i;
+	int		j;
+	int		rval = 0;
+
+	for (i = 0; i < NH_BUCKETS; i++) {
+		bignode_t	*nodes = nodehash[i].nodes;
+
+		for (j = 0; j < nodehash[i].lastnode; j++, nodes++) {
+			if (nodes->ftw_flags == ftw_flags) {
+				rval = fn(nodes);
+				if (rval && quit_on_error)
+					goto quit;
+			}
+		}
+	}
+
+quit:
+	return rval;
+}
+
+/*
+ * Adds appropriate files to the inode hash table
+ */
+static int
+nftw_addnodes(
+	const char	*path,
+	const struct stat64 *st,
+	int		flags,
+	struct FTW	*sntfw)
+{
+	if (st->st_ino <= XFS_MAXINUMBER_32 && !force_all)
+		return 0;
+
+	if (flags == FTW_F)
+		numfilenodes++;
+	else if (flags == FTW_D)
+		numdirnodes++;
+	else
+		return 0;
+
+	add_node_path(st->st_ino, flags, path);
+
+	return 0;
+}
+
+/*
+ * Attribute cloning code - most of this is here because attr_copy does not
+ * let us pick and choose which attributes we want to copy.
+ */
+
+attr_multiop_t	attr_ops[ATTR_MAX_MULTIOPS];
+
+/*
+ * Grab attributes specified in attr_ops from source file and write them
+ * out on the destination file.
+ */
+
+static int
+attr_replicate(int src_fd, int dst_fd, int count)
+{
+	int	j, k;
+
+	if (attr_multif(src_fd, attr_ops, count, 0) < 0)
+		return -1;
+
+	for (k = 0; k < count; k++) {
+		if (attr_ops[k].am_error) {
+			err_message(_("Error %d getting attribute"),
+					attr_ops[k].am_error);
+			break;
+		}
+		attr_ops[k].am_opcode = ATTR_OP_SET;
+	}
+	if (attr_multif(dst_fd, attr_ops, k, 0) < 0)
+		err_message("on attr_multif set");
+	for (j = 0; j < k; j++) {
+		if (attr_ops[j].am_error) {
+			err_message(_("Error %d setting attribute"),
+					attr_ops[j].am_error);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Copy all the attributes specified from src to dst.
+ */
+
+static int
+attr_clone_copy(
+	int		src_fd,
+	int		dst_fd,
+	char		*list_buf,
+	char		*attr_buf,
+	int		buf_len,
+	int		flags)
+{
+        attrlist_t 	*alist;
+        attrlist_ent_t	*attr;
+        attrlist_cursor_t cursor;
+        int		space, i, j;
+	char		*ptr;
+
+        bzero((char *)&cursor, sizeof(cursor));
+        do {
+                if (attr_listf(src_fd, list_buf, ATTRBUFSIZE, flags,
+                		&cursor) < 0) {
+			err_message("on attr_listf");
+                        return 1;
+		}
+
+                alist = (attrlist_t *)list_buf;
+
+		space = buf_len;
+		ptr = attr_buf;
+                for (j = 0, i = 0; i < alist->al_count; i++) {
+                        attr = ATTR_ENTRY(list_buf, i);
+			if (space < attr->a_valuelen) {
+				attr_replicate(src_fd, dst_fd, j);
+				j = 0;
+				space = buf_len;
+				ptr = attr_buf;
+			}
+			attr_ops[j].am_opcode = ATTR_OP_GET;
+			attr_ops[j].am_attrname = attr->a_name;
+			attr_ops[j].am_attrvalue = ptr;
+			attr_ops[j].am_length = (int) attr->a_valuelen;
+			attr_ops[j].am_flags = flags;
+			attr_ops[j].am_error = 0;
+			j++;
+			ptr += attr->a_valuelen;
+			space -= attr->a_valuelen;
+                }
+
+		log_message(LOG_NITTY, "copying attribute %d", i);
+
+		if (j)
+			attr_replicate(src_fd, dst_fd, j);
+
+        } while (alist->al_more);
+
+        return 0;
+}
+
+static int
+clone_attribs(int in_fd, int out_fd)
+{
+	char	list_buf[ATTRBUFSIZE];
+	char	*attr_buf;
+
+	attr_buf = malloc(ATTR_MAX_VALUELEN * 2);
+	attr_clone_copy(in_fd, out_fd, list_buf,
+			attr_buf, ATTR_MAX_VALUELEN * 2, 0);
+	attr_clone_copy(in_fd, out_fd, list_buf,
+			attr_buf, ATTR_MAX_VALUELEN * 2, ATTR_ROOT);
+	attr_clone_copy(in_fd, out_fd, list_buf,
+			attr_buf, ATTR_MAX_VALUELEN * 2, ATTR_SECURE);
+	free(attr_buf);
+	return 0;
+}
+
+static int
+dup_attributes(
+	char		*source,
+	int		sfd,
+	char		*target,
+	int		tfd)
+{
+	struct stat64	st;
+	struct timeval	tv[2];
+	struct fsxattr	fsx;
+
+	if (lstat64(source, &st) < 0) {
+		err_stat(source);
+		return 1;
+	}
+
+	if (xfs_getxattr(sfd, &fsx) < 0) {
+		err_stat(source);
+		return 1;
+	}
+
+	tv[0].tv_sec = st.st_atim.tv_sec;
+	tv[0].tv_usec = st.st_atim.tv_nsec / 1000;
+	tv[1].tv_sec = st.st_mtim.tv_sec;
+	tv[1].tv_usec = st.st_mtim.tv_nsec / 1000;
+
+	if (utimes(target, tv) < 0)
+		err_message(_("%s: Cannot update target times"), target);
+
+	if (chown(target, st.st_uid, st.st_gid) < 0) {
+		err_message(_("%s: Cannot change target ownership to "
+			"uid(%d) gid(%d)"), target, st.st_uid, st.st_gid);
+
+		if (chmod(target, st.st_mode & ~(S_ISUID | S_ISGID)) < 0)
+			err_message(_("%s: Cannot change target mode "
+					"to (%o)"), target, st.st_mode);
+	} else if (chmod(target, st.st_mode) < 0)
+		err_message(_("%s: Cannot change target mode to (%o)"),
+				target, st.st_mode);
+
+	if (xfs_setxattr(tfd, &fsx) < 0)
+		err_message(_("%s: Cannet set target extended attributes"),
+				target);
+
+	return clone_attribs(sfd, tfd);
+}
+
+static int
+move_dirents(
+	char		*srcpath,
+	char		*targetpath,
+	int		*move_count)
+{
+	int		rval = 0;
+	DIR		*srcd;
+	struct dirent64	*dp;
+	char		srcname[PATH_MAX];
+	char		targetname[PATH_MAX];
+
+	*move_count = 0;
+
+	srcd = opendir(srcpath);
+	if (srcd == NULL) {
+		err_open(srcpath);
+		return 1;
+	}
+
+	while ((dp = readdir64(srcd)) != NULL) {
+		if (dp->d_ino == 0 || !strcmp(dp->d_name, ".") ||
+				!strcmp(dp->d_name, ".."))
+			continue;
+
+		if (strlen(srcpath) + 1 + strlen(dp->d_name) >=
+				sizeof(srcname) - 1) {
+
+			err_message(_("%s/%s: Name too long"), srcpath,
+					dp->d_name);
+			rval = 1;
+			goto quit;
+		}
+
+		sprintf(srcname, "%s/%s", srcpath, dp->d_name);
+		sprintf(targetname, "%s/%s", targetpath, dp->d_name);
+
+		rval = rename(srcname, targetname);
+		if (rval != 0) {
+			err_message(_("failed to rename: \'%s\' to \'%s\'"),
+					srcname, targetname);
+			goto quit;
+		}
+
+		log_message(LOG_DEBUG, "rename %s -> %s", srcname, targetname);
+
+		(*move_count)++;
+	}
+
+quit:
+	closedir(srcd);
+	return rval;
+}
+
+static int
+process_dir(
+	bignode_t	*node)
+{
+	int		sfd = -1;
+	int		tfd = -1;
+	int		targetfd = -1;
+	int		rval = 0;
+	int		move_count = 0;
+	char		*srcname = NULL;
+	char		*pname = NULL;
+	struct stat64	s1;
+	struct fsxattr  fsx;
+	char		target[PATH_MAX] = "";
+
+	SET_PHASE(DIR_PHASE);
+
+	dump_node("directory", node);
+
+	cur_node = node;
+	srcname = node->paths[0];
+
+	if (stat64(srcname, &s1) < 0) {
+		if (errno != ENOENT) {
+			err_stat(srcname);
+			global_rval |= 2;
+		}
+		goto quit;
+	}
+	if (s1.st_ino <= XFS_MAXINUMBER_32 && !force_all) {
+		/*
+		 * This directory has already changed ino's, probably due
+		 * to being moved during processing of a parent directory.
+		 */
+		log_message(LOG_DEBUG, "process_dir: skipping %s", srcname);
+		goto quit;
+	}
+
+	rval = 1;
+
+	sfd = open(srcname, O_RDONLY);
+	if (sfd < 0) {
+		err_open(srcname);
+		goto quit;
+	}
+
+	if (!platform_test_xfs_fd(sfd)) {
+		err_not_xfs(srcname);
+		goto quit;
+	}
+
+	if (xfs_getxattr(sfd, &fsx) < 0) {
+		err_message(_("failed to get inode attrs: %s"), srcname);
+		goto quit;
+	}
+	if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND)) {
+		err_message(_("%s: immutable/append, ignoring"), srcname);
+		global_rval |= 2;
+		rval = 0;
+		goto quit;
+	}
+
+	/* mkdir parent/target */
+	pname = strdup(srcname);
+	if (pname == NULL) {
+		err_nomem();
+		goto quit;
+	}
+	dirname(pname);
+	sprintf(target, "%s/%sXXXXXX", pname, cmd_prefix);
+	if (mkdtemp(target) == NULL) {
+		err_message(_("Unable to create directory copy: %s"), srcname);
+		goto quit;
+	}
+	SET_PHASE(DIR_PHASE_1);
+
+	cur_target = strdup(target);
+	if (!cur_target) {
+		err_nomem();
+		goto quit;
+	}
+
+	sprintf(target, "%s/%sXXXXXX", pname, cmd_prefix);
+	if (mkdtemp(target) == NULL) {
+		err_message(_("unable to create tmp directory copy"));
+		goto quit;
+	}
+	SET_PHASE(DIR_PHASE_2);
+
+	cur_temp = strdup(target);
+	if (!cur_temp) {
+		err_nomem();
+		goto quit;
+	}
+
+	tfd = open(cur_temp, O_RDONLY);
+	if (tfd < 0) {
+		err_open(cur_temp);
+		goto quit;
+	}
+
+	targetfd = open(cur_target, O_RDONLY);
+	if (tfd < 0) {
+		err_open(cur_target);
+		goto quit;
+	}
+
+
+	/* copy timestamps, attribs and EAs, to cur_temp */
+	rval = dup_attributes(srcname, sfd, cur_temp, tfd);
+	if (rval != 0) {
+		err_message(_("unable to duplicate directory attributes: %s"),
+			    srcname);
+		goto quit_unlink;
+	}
+
+	SET_PHASE(DIR_PHASE_3);
+
+	/* move src dirents to cur_target (this changes timestamps on src) */
+	rval = move_dirents(srcname, cur_target, &move_count);
+	if (rval != 0) {
+		err_message(_("unable to move directory contents: %s to %s"),
+				srcname, cur_target);
+		/* uh oh, move everything back... */
+		if (move_count > 0)
+			goto quit_undo;
+	}
+
+	SET_PHASE(DIR_PHASE_4);
+
+	/* copy timestamps, attribs and EAs from cur_temp to cur_target */
+	rval = dup_attributes(cur_temp, tfd, cur_target, targetfd);
+	if (rval != 0) {
+		err_message(_("unable to duplicate directory attributes: %s"),
+				cur_temp);
+		goto quit_unlink;
+	}
+
+	SET_PHASE(DIR_PHASE_5);
+
+	/* rmdir src */
+	rval = rmdir(srcname);
+	if (rval != 0) {
+		err_message(_("unable to remove directory: %s"), srcname);
+		goto quit_undo;
+	}
+
+	SET_PHASE(DIR_PHASE_6);
+
+	rval = rmdir(cur_temp);
+	if (rval != 0)
+		err_message(_("unable to remove tmp directory: %s"), cur_temp);
+
+	SET_PHASE(DIR_PHASE_7);
+
+	/* rename cur_target src */
+	rval = rename(cur_target, srcname);
+	if (rval != 0) {
+		/*
+		 * we can't abort since the src dir is now gone.
+		 * let the admin clean this one up
+		 */
+		err_message(_("unable to rename directory: %s to %s"),
+				cur_target, srcname);
+	}
+	goto quit;
+
+ quit_undo:
+	if (move_dirents(cur_target, srcname, &move_count) != 0) {
+		/* oh, dear lord... let the admin clean this one up */
+		err_message(_("unable to move directory contents back: %s to %s"),
+				cur_target, srcname);
+		goto quit;
+	}
+	SET_PHASE(DIR_PHASE_3);
+
+ quit_unlink:
+	rmdir(cur_target);
+	rmdir(cur_temp);
+
+ quit:
+
+	SET_PHASE(DIR_PHASE);
+
+	if (sfd >= 0)
+		close(sfd);
+	if (tfd >= 0)
+		close(tfd);
+	if (targetfd >= 0)
+		close(targetfd);
+
+	free(pname);
+	free(cur_target);
+	free(cur_temp);
+
+	cur_target = NULL;
+	cur_temp = NULL;
+	cur_node = NULL;
+	numdirsdone++;
+	return rval;
+}
+
+int
+process_file(bignode_t *node)
+{
+	int		sfd = -1;
+	int		tfd = -1;
+	int		i = 0;
+	int		rval = 0;
+	struct stat64	s1;
+	char		*srcname = NULL;
+	char		*pname = NULL;
+	xfs_swapext_t	sx;
+	xfs_bstat_t	bstatbuf;
+	struct fsxattr  fsx;
+	char		target[PATH_MAX] = "";
+
+	SET_PHASE(FILE_PHASE);
+
+	dump_node("file", node);
+
+	cur_node = node;
+	srcname = node->paths[0];
+
+	bzero(&s1, sizeof(s1));
+	bzero(&bstatbuf, sizeof(bstatbuf));
+	bzero(&sx, sizeof(sx));
+
+	if (stat64(srcname, &s1) < 0) {
+		if (errno != ENOENT) {
+			err_stat(srcname);
+			global_rval |= 2;
+		}
+		goto quit;
+	}
+	if (s1.st_ino <= XFS_MAXINUMBER_32 && !force_all)
+		/* this file has changed, and no longer needs processing */
+		goto quit;
+
+	/* open and sync source */
+	sfd = open(srcname, O_RDWR | O_DIRECT);
+	if (sfd < 0) {
+		err_open(srcname);
+		rval = 1;
+		goto quit;
+	}
+	if (!platform_test_xfs_fd(sfd)) {
+		err_not_xfs(srcname);
+		rval = 1;
+		goto quit;
+	}
+	if (fsync(sfd) < 0) {
+		err_message(_("sync failed: %s: %s"),
+				srcname, strerror(errno));
+		rval = 1;
+		goto quit;
+	}
+
+
+	/*
+	 * Check if a mandatory lock is set on the file to try and
+	 * avoid blocking indefinitely on the reads later. Note that
+	 * someone could still set a mandatory lock after this check
+	 * but before all reads have completed to block xfs_reno reads.
+	 * This change just closes the window a bit.
+	 */
+	if ((s1.st_mode & S_ISGID) && !(s1.st_mode & S_IXGRP)) {
+		struct flock fl;
+
+		fl.l_type = F_RDLCK;
+		fl.l_whence = SEEK_SET;
+		fl.l_start = (off_t)0;
+		fl.l_len = 0;
+		if (fcntl(sfd, F_GETLK, &fl) < 0 ) {
+			if (log_level >= LOG_DEBUG)
+				err_message("locking check failed: %s",
+						srcname);
+			global_rval |= 2;
+			goto quit;
+		}
+		if (fl.l_type != F_UNLCK) {
+			if (log_level >= LOG_DEBUG)
+				err_message("mandatory lock: %s: ignoring",
+						srcname);
+			global_rval |= 2;
+			goto quit;
+		}
+	}
+
+	if (xfs_getxattr(sfd, &fsx) < 0) {
+		err_message(_("failed to get inode attrs: %s"), srcname);
+		rval = 1;
+		goto quit;
+	}
+	if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND)) {
+		err_message(_("%s: immutable/append, ignoring"), srcname);
+		global_rval |= 2;
+		goto quit;
+	}
+
+	rval = 1;
+
+	if (realuid != 0 && realuid != s1.st_uid) {
+		errno = EACCES;
+		err_open(srcname);
+		goto quit;
+	}
+
+	/* creat target */
+	pname = strdup(srcname);
+	if (pname == NULL) {
+		err_nomem();
+		goto quit;
+	}
+	dirname(pname);
+	sprintf(target, "%s/%sXXXXXX", pname, cmd_prefix);
+	tfd = mkstemp(target);
+	if (tfd < 0) {
+		err_message("unable to create file copy");
+		goto quit;
+	}
+	cur_target = strdup(target);
+	if (cur_target == NULL) {
+		err_nomem();
+		goto quit;
+	}
+
+	SET_PHASE(FILE_PHASE_1);
+
+	/* Setup direct I/O */
+	if (fcntl(tfd, F_SETFL, O_DIRECT) < 0 ) {
+		err_message(_("could not set O_DIRECT for %s on tmp: %s"),
+				srcname, target);
+		unlink(target);
+		goto quit;
+	}
+
+	/* copy attribs & EAs to target */
+	if (dup_attributes(srcname, sfd, target, tfd) != 0) {
+		err_message(_("unable to duplicate file attributes: %s"),
+				srcname);
+		unlink(target);
+		goto quit;
+	}
+
+	if (xfs_bulkstat_single(sfd, &s1.st_ino, &bstatbuf) < 0) {
+		err_message(_("unable to bulkstat source file: %s"),
+				srcname);
+		unlink(target);
+		goto quit;
+	}
+
+	if (bstatbuf.bs_ino != s1.st_ino) {
+		err_message(_("bulkstat of source file returned wrong inode: %s"),
+				srcname);
+		unlink(target);
+		goto quit;
+	}
+
+	ftruncate64(tfd, bstatbuf.bs_size);
+
+	/* swapextents src target */
+	sx.sx_stat     = bstatbuf; /* struct copy */
+	sx.sx_version  = XFS_SX_VERSION;
+	sx.sx_fdtarget = sfd;
+	sx.sx_fdtmp    = tfd;
+	sx.sx_offset   = 0;
+	sx.sx_length   = bstatbuf.bs_size;
+
+	/* Swap the extents */
+	rval = xfs_swapext(sfd, &sx);
+	if (rval < 0) {
+		if (log_level >= LOG_DEBUG) {
+			switch (errno) {
+			case ENOTSUP:
+				err_message("%s: file type not supported",
+					srcname);
+				break;
+			case EFAULT:
+				/* The file has changed since we started the copy */
+				err_message("%s: file modified, "
+					 "inode renumber aborted: %ld",
+					 srcname, bstatbuf.bs_size);
+				break;
+			case EBUSY:
+				/* Timestamp has changed or mmap'ed file */
+				err_message("%s: file busy", srcname);
+				break;
+			default:
+				err_message(_("Swap extents failed: %s: %s"),
+					srcname, strerror(errno));
+				break;
+			}
+		} else
+			err_message(_("Swap extents failed: %s: %s"),
+					srcname, strerror(errno));
+		goto quit;
+	}
+
+	if (bstatbuf.bs_dmevmask | bstatbuf.bs_dmstate) {
+		struct fsdmidata fssetdm;
+
+		/* Set the DMAPI Fields. */
+		fssetdm.fsd_dmevmask = bstatbuf.bs_dmevmask;
+		fssetdm.fsd_padding = 0;
+		fssetdm.fsd_dmstate = bstatbuf.bs_dmstate;
+
+		if (ioctl(tfd, XFS_IOC_FSSETDM, (void *)&fssetdm ) < 0)
+			err_message(_("attempt to set DMI attributes "
+					"of %s failed"), target);
+	}
+
+	SET_PHASE(FILE_PHASE_2);
+
+	/* unlink src */
+	rval = unlink(srcname);
+	if (rval != 0) {
+		err_message(_("unable to remove file: %s"), srcname);
+		goto quit;
+	}
+
+	SET_PHASE(FILE_PHASE_3);
+
+	/* rename target src */
+	rval = rename(target, srcname);
+	if (rval != 0) {
+		/*
+		 * we can't abort since the src file is now gone.
+		 * let the admin clean this one up
+		 */
+		err_message(_("unable to rename file: %s to %s"),
+				target, srcname);
+		goto quit;
+	}
+
+	SET_PHASE(FILE_PHASE_4);
+
+	/* for each hardlink, unlink and creat pointing to target */
+	for (i = 1; i < node->numpaths; i++) {
+		/* unlink src */
+		rval = unlink(node->paths[i]);
+		if (rval != 0) {
+			err_message(_("unable to remove file: %s"),
+				       node->paths[i]);
+			goto quit;
+		}
+
+		rval = link(srcname, node->paths[i]);
+		if (rval != 0) {
+			err_message("unable to link to file: %s", srcname);
+			goto quit;
+		}
+		numfilesdone++;
+	}
+
+ quit:
+	cur_node = NULL;
+
+	SET_PHASE(FILE_PHASE);
+
+	if (sfd >= 0)
+		close(sfd);
+	if (tfd >= 0)
+		close(tfd);
+
+	free(pname);
+	free(cur_target);
+
+	cur_target = NULL;
+
+	numfilesdone++;
+	return rval;
+}
+
+static int
+open_recoverfile(void)
+{
+	recover_fd = open(recover_file, O_RDWR | O_SYNC | O_CREAT | O_EXCL,
+			0600);
+	if (recover_fd < 0) {
+		if (errno == EEXIST)
+			err_message(_("Recovery file already exists, either "
+				"run '%s -r %s' or remove the file."),
+				progname, recover_file);
+		else
+			err_open(recover_file);
+		return 1;
+	}
+
+	if (!platform_test_xfs_fd(recover_fd)) {
+		err_not_xfs(recover_file);
+		close(recover_fd);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void
+update_recoverfile(void)
+{
+	static const char null_file[] = "0\n0\n0\n\ntarget: \ntemp: \nend\n";
+	static size_t	buf_size = 0;
+	static char	*buf = NULL;
+	int 		i, len;
+
+	if (recover_fd <= 0)
+		return;
+
+	if (cur_node == NULL || cur_phase == 0) {
+		/* inbetween processing or still scanning */
+		lseek(recover_fd, 0, SEEK_SET);
+		write(recover_fd, null_file, sizeof(null_file));
+		return;
+	}
+
+	ASSERT(highest_numpaths > 0);
+	if (buf == NULL) {
+		buf_size = (highest_numpaths + 3) * PATH_MAX;
+		buf = malloc(buf_size);
+		if (buf == NULL) {
+			err_nomem();
+			exit(1);
+		}
+	}
+
+	len = sprintf(buf, "%d\n%llu\n%d\n", cur_phase,
+			(long long)cur_node->ino, cur_node->ftw_flags);
+
+	for (i = 0; i < cur_node->numpaths; i++)
+		len += sprintf(buf + len, "%s\n", cur_node->paths[i]);
+
+	len += sprintf(buf + len, "target: %s\ntemp: %s\nend\n",
+			cur_target, cur_temp);
+
+	ASSERT(len < buf_size);
+
+	lseek(recover_fd, 0, SEEK_SET);
+	ftruncate(recover_fd, 0);
+	write(recover_fd, buf, len);
+}
+
+static void
+cleanup(void)
+{
+	log_message(LOG_NORMAL, _("Interrupted -- cleaning up..."));
+
+	free_nodehash();
+
+	log_message(LOG_NORMAL, _("Done."));
+}
+
+static void
+sighandler(int sig)
+{
+	static char	cycle[4] = "-\\|/";
+	static uint64_t	cur_cycle = 0;
+	double		percent;
+
+	alarm(0);
+
+	if (sig != SIGALRM) {
+		cleanup();
+		exit(1);
+	}
+
+	if (cur_phase == SCAN_PHASE) {
+		if (log_level >= LOG_INFO)
+			fprintf(stderr, _("\r%llu files and %llu dirs "
+				"to renumber found... %c"),
+				(long long)numfilenodes,
+				(long long)numdirnodes,
+				cycle[cur_cycle % 4]);
+		else
+			fprintf(stderr, "\r%c",
+				cycle[cur_cycle % 4]);
+		cur_cycle++;
+	} else if (cur_phase >= DIR_PHASE && cur_phase <= DIR_PHASE_MAX) {
+		percent = (double)numdirsdone / (double)numdirnodes;
+		percent *= 100.0;
+		if (percent > 100.0)
+			percent = 100.0;
+		if (log_level >= LOG_INFO)
+			fprintf(stderr, _("\r%.1f%%, %llu of %llu "
+				"dirs, %u seconds elapsed"), percent,
+				(long long)numdirsdone,
+				(long long)numdirnodes,
+				(int)(time(0) - starttime));
+		else
+			fprintf(stderr, "\r%.1f%%", percent);
+	} else if (cur_phase >= FILE_PHASE && cur_phase <= FILE_PHASE_MAX) {
+		percent = (double)numfilesdone / (double)numfilenodes;
+		percent *= 100.0;
+		if (percent > 100.0)
+			percent = 100.0;
+		if (log_level >= LOG_INFO)
+			fprintf(stderr, _("\r%.1f%%, %llu of %llu "
+				"files, %u seconds elapsed"),
+				percent, (long long)numfilesdone,
+				(long long)numfilenodes,
+				(int)(time(0) - starttime));
+		else
+			fprintf(stderr, "\r%.1f%%", percent);
+	}
+	poll_output = 1;
+	signal(SIGALRM, sighandler);
+
+	if (poll_interval)
+		alarm(poll_interval);
+}
+
+static int
+read_recover_file(
+	char		*recover_file,
+	bignode_t	**node,
+	char		**target,
+	char		**temp,
+	int		*phase)
+{
+	FILE		*file;
+	int		rval = 1;
+	ino_t		ino;
+	int		ftw_flags;
+	char		buf[PATH_MAX + 10]; /* path + "target: " */
+	struct stat64	s;
+	int		first_path;
+
+	/*
+
+	A recovery file should look like:
+
+	<phase>
+	<ino number>
+	<ftw flags>
+	<first path to inode>
+	<hardlinks to inode>
+	target: <path to target dir or file>
+	temp: <path to temp dir if dir phase>
+	end
+	*/
+
+	file = fopen(recover_file, "r");
+	if (file == NULL) {
+		err_open(recover_file);
+		return 1;
+	}
+
+	/* read phase */
+	*phase = 0;
+	if (fgets(buf, PATH_MAX + 10, file) == NULL) {
+		err_message("Recovery failed: unable to read phase");
+		goto quit;
+	}
+	buf[strlen(buf) - 1] = '\0';
+	*phase = atoi(buf);
+	if (*phase == SCAN_PHASE) {
+		fclose(file);
+		return 0;
+	}
+	if ((*phase < DIR_PHASE || *phase > DIR_PHASE_MAX) &&
+			(*phase < FILE_PHASE || *phase > FILE_PHASE_MAX)) {
+		err_message("Recovery failed: failed to read valid recovery phase");
+		goto quit;
+	}
+
+	/* read inode number */
+	if (fgets(buf, PATH_MAX + 10, file) == NULL) {
+		err_message("Recovery failed: unable to read inode number");
+		goto quit;
+	}
+	buf[strlen(buf) - 1] = '\0';
+	ino = strtoull(buf, NULL, 10);
+	if (ino == 0) {
+		err_message("Recovery failed: unable to read inode number");
+		goto quit;
+	}
+
+	/* read ftw_flags */
+	if (fgets(buf, PATH_MAX + 10, file) == NULL) {
+		err_message("Recovery failed: unable to read flags");
+		goto quit;
+	}
+	buf[strlen(buf) - 1] = '\0';
+	if (buf[1] != '\0' || (buf[0] != '0' && buf[0] != '1')) {
+		err_message("Recovery failed: unable to read flags: '%s'", buf);
+		goto quit;
+	}
+	ftw_flags = atoi(buf);
+
+	/* read paths and target path */
+	*node = NULL;
+	*target = NULL;
+	first_path = 1;
+	while (fgets(buf, PATH_MAX + 10, file) != NULL) {
+		buf[strlen(buf) - 1] = '\0';
+
+		log_message(LOG_DEBUG, "path: '%s'", buf);
+
+		if (buf[0] == '/') {
+			if (stat64(buf, &s) < 0) {
+				err_message(_("Recovery failed: cannot "
+						"stat '%s'"), buf);
+				goto quit;
+			}
+			if (s.st_ino != ino) {
+				err_message(_("Recovery failed: inode "
+						"number for '%s' does not "
+						"match recorded number"), buf);
+				goto quit;
+			}
+
+			if (first_path) {
+				first_path = 0;
+				*node = add_node_path(ino, ftw_flags, buf);
+			}
+			else {
+				add_path(*node, buf);
+			}
+		}
+		else if (strncmp(buf, "target: ", 8) == 0) {
+			*target = strdup(buf + 8);
+			if (*target == NULL) {
+				err_nomem();
+				goto quit;
+			}
+			if (stat64(*target, &s) < 0) {
+				err_message(_("Recovery failed: cannot "
+						"stat '%s'"), *target);
+				goto quit;
+			}
+		}
+		else if (strncmp(buf, "temp: ", 6) == 0) {
+			*temp = strdup(buf + 6);
+			if (*temp == NULL) {
+				err_nomem();
+				goto quit;
+			}
+		}
+		else if (strcmp(buf, "end") == 0) {
+			rval = 0;
+			goto quit;
+	 	}
+	 	else {
+			err_message(_("Recovery failed: unrecognised "
+					"string: '%s'"), buf);
+			goto quit;
+		}
+	}
+
+	err_message(_("Recovery failed: end of recovery file not found"));
+
+ quit:
+	if (*node == NULL) {
+		err_message(_("Recovery failed: no valid inode or paths "
+				"specified"));
+		rval = 1;
+	}
+
+	if (*target == NULL) {
+		err_message(_("Recovery failed: no inode target specified"));
+		rval = 1;
+	}
+
+	fclose(file);
+
+	return rval;
+}
+
+int
+recover(
+	bignode_t	*node,
+	char		*target,
+	char		*tname,
+	int		phase)
+{
+	int		tfd = -1;
+	int		targetfd = -1;
+	char		*srcname = NULL;
+	int		rval = 0;
+	int		i;
+	int		move_count = 0;
+
+	dump_node("recover", node);
+	log_message(LOG_DEBUG, "target: %s, phase: %x", target, phase);
+
+	if (node)
+		srcname = node->paths[0];
+
+	switch (phase) {
+
+	case DIR_PHASE_2:
+rmtemps:
+		log_message(LOG_NORMAL, _("Removing temporary directory: '%s'"),
+				tname);
+		if (rmdir(tname) < 0 && errno != ENOENT) {
+			err_message(_("unable to remove directory: %s"), tname);
+			rval = 1;
+		}
+		/* FALL THRU */
+	case DIR_PHASE_1:
+		log_message(LOG_NORMAL, _("Removing target directory: '%s'"),
+				target);
+		if (rmdir(target) < 0 && errno != ENOENT) {
+			err_message(_("unable to remove directory: %s"),
+					target);
+			rval = 1;
+		}
+		break;
+
+	case DIR_PHASE_3:
+		log_message(LOG_NORMAL, _("Completing moving directory "
+				"contents: '%s' to '%s'"), srcname, target);
+		if (move_dirents(srcname, target, &move_count) != 0) {
+			err_message(_("unable to move directory contents: "
+					"%s to %s"), srcname, target);
+			/* uh oh, move everything back... */
+			if (move_count > 0) {
+				if (move_dirents(target, srcname,
+						&move_count) != 0) {
+					/* oh, dear lord... let the admin
+					 * clean this one up */
+					err_message(_("unable to move directory "
+						"contents back: %s to %s"),
+						target, srcname);
+					exit(1);
+				}
+			}
+			goto rmtemps;
+		}
+		/* FALL THRU */
+	case DIR_PHASE_4:
+		log_message(LOG_NORMAL, _("Setting attributes for target "
+				"directory: \'%s\'"), target);
+		tfd = open(tname, O_RDONLY);
+		if (tfd < 0) {
+			err_open(tname);
+			rval = 1;
+			break;
+		}
+		targetfd = open(target, O_RDONLY);
+		if (targetfd < 0) {
+			err_open(target);
+			rval = 1;
+			break;
+		}
+		rval = dup_attributes(tname, tfd, target, targetfd);
+		if (rval != 0) {
+			err_message(_("unable to duplicate directory "
+					"attributes: %s"), tname);
+			break;
+		}
+		close(tfd);
+		close(targetfd);
+		/* FALL THRU */
+	case DIR_PHASE_6:
+		log_message(LOG_NORMAL, _("Removing temporary directory: \'%s\'"),
+				tname);
+		if (rmdir(tname) < 0 && errno != ENOENT) {
+			err_message(_("unable to remove directory: %s"),
+					tname);
+			rval = 1;
+			break;
+		}
+		/* FALL THRU */
+	case DIR_PHASE_5:
+		log_message(LOG_NORMAL, _("Removing old directory: \'%s\'"),
+				srcname);
+		if (rmdir(srcname) < 0 && errno != ENOENT) {
+			err_message(_("unable to remove directory: %s"),
+					srcname);
+			rval = 1;
+			break;
+		}
+		/* FALL THRU */
+	case DIR_PHASE_7:
+		log_message(LOG_NORMAL, _("Renaming new directory to old "
+			"directory: \'%s\' -> \'%s\'"), target, srcname);
+		rval = rename(target, srcname);
+		if (rval != 0) {
+			/* we can't abort since the src dir is now gone.
+			 * let the admin clean this one up
+			 */
+			err_message(_("unable to rename directory: %s to %s"),
+					target, srcname);
+			break;
+		}
+		break;
+
+
+	case FILE_PHASE_1:
+		log_message(LOG_NORMAL, _("Unlinking temporary file: \'%s\'"), target);
+		unlink(target);
+		break;
+
+	case FILE_PHASE_2:
+		log_message(LOG_NORMAL, _("Unlinking old file: \'%s\'"), srcname);
+		rval = unlink(srcname);
+		if (rval != 0) {
+			err_message(_("unable to remove file: %s"), srcname);
+			break;
+		}
+		/* FALL THRU */
+	case FILE_PHASE_3:
+		log_message(LOG_NORMAL, _("Renaming new file to old file: "
+				"\'%s\' -> \'%s\'"), target, srcname);
+		rval = rename(target, srcname);
+		if (rval != 0) {
+			/* we can't abort since the src file is now gone.
+			 * let the admin clean this one up
+			 */
+			err_message(_("unable to rename file: %s to %s"),
+					target, srcname);
+			break;
+		}
+		/* FALL THRU */
+	case FILE_PHASE_4:
+		/* for each hardlink, unlink and creat pointing to target */
+		for (i = 1; i < node->numpaths; i++) {
+			if (i == 1)
+				log_message(LOG_NORMAL, _("Resetting hardlinks to "
+						"new file"));
+
+			rval = unlink(node->paths[i]);
+			if (rval != 0) {
+				err_message(_("unable to remove file: %s"),
+						node->paths[i]);
+				break;
+			}
+			rval = link(srcname, node->paths[i]);
+			if (rval != 0) {
+				err_message(_("unable to link to file: %s"),
+						srcname);
+				break;
+			}
+		}
+		break;
+	}
+
+	if (rval == 0) {
+		log_message(LOG_NORMAL, _("Removing recover file: \'%s\'"),
+				recover_file);
+		unlink(recover_file);
+		log_message(LOG_NORMAL, _("Recovery done."));
+	}
+	else {
+		log_message(LOG_NORMAL, _("Leaving recover file: \'%s\'"),
+				recover_file);
+		log_message(LOG_NORMAL, _("Recovery failed."));
+	}
+
+	return rval;
+}
+
+int
+main(
+	int		argc,
+	char		*argv[])
+{
+	int		c = 0;
+	int		rval = 0;
+	int		q_opt = 0;
+	int		v_opt = 0;
+	int		p_opt = 0;
+	int		n_opt = 0;
+	char		pathname[PATH_MAX];
+	struct stat64	st;
+
+	progname = basename(argv[0]);
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+
+	while ((c = getopt(argc, argv, "fnpqvP:r:")) != -1) {
+		switch (c) {
+		case 'f':
+			force_all = 1;
+			break;
+		case 'n':
+			n_opt++;
+			break;
+		case 'p':
+			p_opt++;
+			break;
+		case 'q':
+			if (v_opt)
+				err_message(_("'q' option incompatible "
+						"with 'v' option"));
+			q_opt++;
+			log_level=0;
+			break;
+		case 'v':
+			if (q_opt)
+				err_message(_("'v' option incompatible "
+						"with 'q' option"));
+			v_opt++;
+			log_level++;
+			break;
+		case 'P':
+			poll_interval = atoi(optarg);
+			break;
+		case 'r':
+			recover_file = optarg;
+			break;
+		default:
+			err_message(_("%s: illegal option -- %c\n"), c);
+			usage();
+			/* NOTREACHED */
+			break;
+		}
+	}
+
+	if (optind != argc - 1 && recover_file == NULL) {
+		usage();
+		exit(1);
+	}
+
+	realuid = getuid();
+	starttime = time(0);
+
+	init_nodehash();
+
+	signal(SIGALRM, sighandler);
+	signal(SIGABRT, sighandler);
+	signal(SIGHUP, sighandler);
+	signal(SIGINT, sighandler);
+	signal(SIGQUIT, sighandler);
+	signal(SIGTERM, sighandler);
+
+	if (p_opt && poll_interval == 0) {
+		poll_interval = 1;
+	}
+	if (poll_interval)
+		alarm(poll_interval);
+
+	if (recover_file) {
+		bignode_t	*node = NULL;
+		char		*target = NULL;
+		char		*tname = NULL;
+		int		phase = 0;
+
+		if (n_opt)
+			goto quit;
+
+		/* read node info from recovery file */
+		if (read_recover_file(recover_file, &node, &target,
+				&tname, &phase) != 0)
+			exit(1);
+
+		rval = recover(node, target, tname, phase);
+
+		free(target);
+		free(tname);
+
+		return rval;
+	}
+
+	recover_file = malloc(PATH_MAX);
+	if (recover_file == NULL) {
+		err_nomem();
+		exit(1);
+	}
+	recover_file[0] = '\0';
+
+	strcpy(pathname, argv[optind]);
+	if (pathname[0] != '/') {
+		err_message(_("pathname must begin with a slash ('/')"));
+		exit(1);
+	}
+
+	if (stat64(pathname, &st) < 0) {
+		err_stat(pathname);
+		exit(1);
+	}
+	if (S_ISREG(st.st_mode)) {
+		/* single file specified */
+		if (st.st_nlink > 1) {
+			err_message(_("cannot process single file with a "
+					"link count greater than 1"));
+			exit(1);
+		}
+
+		strcpy(recover_file, pathname);
+		dirname(recover_file);
+
+		strcpy(recover_file + strlen(recover_file), "/xfs_reno.recover");
+		if (!n_opt) {
+			if (open_recoverfile() != 0)
+				exit(1);
+		}
+		add_node_path(st.st_ino, FTW_F, pathname);
+	}
+	else if (S_ISDIR(st.st_mode)) {
+		/* directory tree specified */
+		strcpy(recover_file, pathname);
+
+		strcpy(recover_file + strlen(recover_file), "/xfs_reno.recover");
+		if (!n_opt) {
+			if (open_recoverfile() != 0)
+				exit(1);
+		}
+
+		/* directory scan */
+		log_message(LOG_INFO, _("\rScanning directory tree..."));
+		SET_PHASE(SCAN_PHASE);
+		nftw64(pathname, nftw_addnodes, 100, FTW_PHYS | FTW_MOUNT);
+	}
+	else {
+		err_message(_("pathname must be either a regular file "
+				"or directory"));
+		exit(1);
+	}
+
+	dump_nodehash();
+
+	if (n_opt) {
+		/* n flag set, don't do anything */
+		if (numdirnodes)
+			log_message(LOG_NORMAL, "\rWould process %d %s",
+					numdirnodes, numdirnodes == 1 ?
+						"directory" : "directories");
+		else
+			log_message(LOG_NORMAL, "\rNo directories to process");
+
+		if (numfilenodes)
+			/* process files */
+			log_message(LOG_NORMAL, "\rWould process %d %s",
+					numfilenodes, numfilenodes == 1 ?
+						"file" : "files");
+		else
+			log_message(LOG_NORMAL, "\rNo files to process");
+	} else {
+		/* process directories */
+		if (numdirnodes) {
+			log_message(LOG_INFO, _("\rProcessing %d %s..."),
+					numdirnodes, numdirnodes == 1 ?
+					    _("directory") : _("directories"));
+			cur_phase = DIR_PHASE;
+			rval = for_all_nodes(process_dir, FTW_D, 1);
+			if (rval != 0)
+				goto quit;
+		}
+		else
+			log_message(LOG_INFO, _("\rNo directories to process..."));
+
+		if (numfilenodes) {
+			/* process files */
+			log_message(LOG_INFO, _("\rProcessing %d %s..."),
+					numfilenodes, numfilenodes == 1 ?
+						_("file") : _("files"));
+			cur_phase = FILE_PHASE;
+			for_all_nodes(process_file, FTW_F, 0);
+		}
+		else
+			log_message(LOG_INFO, _("\rNo files to process..."));
+	}
+quit:
+	free_nodehash();
+
+	close(recover_fd);
+
+	if (rval == 0)
+		unlink(recover_file);
+
+	log_message(LOG_DEBUG, "\r%u seconds elapsed", time(0) - starttime);
+	log_message(LOG_INFO, _("\rDone."));
+
+	return rval | global_rval;
+}

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02  7:08 REVIEW: xfs_reno Barry Naujok
@ 2007-10-02  7:20 ` Nathan Scott
  2007-10-02  9:02 ` Christoph Hellwig
  1 sibling, 0 replies; 10+ messages in thread
From: Nathan Scott @ 2007-10-02  7:20 UTC (permalink / raw)
  To: Barry Naujok; +Cc: xfs@oss.sgi.com, xfs-dev

On Tue, 2007-10-02 at 17:08 +1000, Barry Naujok wrote:
> The attached tool allows an inode64 filesystem to be converted to inode32.
> For this to work, the filesystem has to be mounted inode32 before it's run.
> 
> I'm not sure if there is any packaging changes required.

I expect not, the Makefile handles that.  Is there a man page?

cheers.

--
Nathan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02  7:08 REVIEW: xfs_reno Barry Naujok
  2007-10-02  7:20 ` Nathan Scott
@ 2007-10-02  9:02 ` Christoph Hellwig
  2007-10-02  9:19   ` David Chinner
  1 sibling, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2007-10-02  9:02 UTC (permalink / raw)
  To: Barry Naujok; +Cc: xfs@oss.sgi.com, xfs-dev

On Tue, Oct 02, 2007 at 05:08:59PM +1000, Barry Naujok wrote:
> 
> The attached tool allows an inode64 filesystem to be converted to inode32.
> For this to work, the filesystem has to be mounted inode32 before it's run.
> 
> I'm not sure if there is any packaging changes required.

Together with the stop allocating from specific AGs patch this should be
90% towards an xfs_shrinkfs, right?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02  9:02 ` Christoph Hellwig
@ 2007-10-02  9:19   ` David Chinner
  2007-10-02 16:41     ` Russell Cattelan
                       ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: David Chinner @ 2007-10-02  9:19 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Barry Naujok, xfs@oss.sgi.com, xfs-dev

On Tue, Oct 02, 2007 at 10:02:16AM +0100, Christoph Hellwig wrote:
> On Tue, Oct 02, 2007 at 05:08:59PM +1000, Barry Naujok wrote:
> > 
> > The attached tool allows an inode64 filesystem to be converted to inode32.
> > For this to work, the filesystem has to be mounted inode32 before it's run.
> > 
> > I'm not sure if there is any packaging changes required.
> 
> Together with the stop allocating from specific AGs patch this should be
> 90% towards an xfs_shrinkfs, right?

Well, this just moves the inodes - it's one piece of the puzzle.  We
still need to collide xfs_fsr with xfs_reno to move the data.

After that, we need to work out how to move the orphan metadata
blocks out of the AGs that are to be truncated off. That's not
simple....

After that, we need the transaction to shrink the fs.

At that point, we'll got a "working" shrink that will allow
shrinking to only 50% of the original size because the log will
get in the way. To fix that, we'll need to implement transactions
to move the log...

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02  9:19   ` David Chinner
@ 2007-10-02 16:41     ` Russell Cattelan
  2007-10-02 23:41       ` David Chinner
  2007-10-03  1:05     ` Barry Naujok
  2007-10-03  1:30     ` Timothy Shimmin
  2 siblings, 1 reply; 10+ messages in thread
From: Russell Cattelan @ 2007-10-02 16:41 UTC (permalink / raw)
  To: David Chinner; +Cc: Christoph Hellwig, Barry Naujok, xfs@oss.sgi.com, xfs-dev

[-- Attachment #1: Type: text/plain, Size: 1338 bytes --]

David Chinner wrote:
> On Tue, Oct 02, 2007 at 10:02:16AM +0100, Christoph Hellwig wrote:
>   
>> On Tue, Oct 02, 2007 at 05:08:59PM +1000, Barry Naujok wrote:
>>     
>>> The attached tool allows an inode64 filesystem to be converted to inode32.
>>> For this to work, the filesystem has to be mounted inode32 before it's run.
>>>
>>> I'm not sure if there is any packaging changes required.
>>>       
>> Together with the stop allocating from specific AGs patch this should be
>> 90% towards an xfs_shrinkfs, right?
>>     
>
> Well, this just moves the inodes - it's one piece of the puzzle.  We
> still need to collide xfs_fsr with xfs_reno to move the data.
>
> After that, we need to work out how to move the orphan metadata
> blocks out of the AGs that are to be truncated off. That's not
> simple....
>
> After that, we need the transaction to shrink the fs.
>
> At that point, we'll got a "working" shrink that will allow
> shrinking to only 50% of the original size because the log will
> get in the way. To fix that, we'll need to implement transactions
> to move the log...
>   
If we do that could be move to an inode based log?
Keep it contagious so recovery won't have to parse
up the file system to find the log.
The normal running case should be easier to deal with if
the log was just a file?

> Cheers,
>
> Dave.
>   


[-- Attachment #2: cattelan.vcf --]
[-- Type: text/x-vcard, Size: 131 bytes --]

begin:vcard
fn:Russell Cattelan
n:Cattelan;Russell
email;internet:cattelan@thebarn.com
x-mozilla-html:FALSE
version:2.1
end:vcard


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02 16:41     ` Russell Cattelan
@ 2007-10-02 23:41       ` David Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: David Chinner @ 2007-10-02 23:41 UTC (permalink / raw)
  To: Russell Cattelan
  Cc: David Chinner, Christoph Hellwig, Barry Naujok, xfs@oss.sgi.com,
	xfs-dev

On Tue, Oct 02, 2007 at 11:41:35AM -0500, Russell Cattelan wrote:
> >At that point, we'll got a "working" shrink that will allow
> >shrinking to only 50% of the original size because the log will
> >get in the way. To fix that, we'll need to implement transactions
> >to move the log...
>
> If we do that could be move to an inode based log?

What do we gain from doing that? (I'm a bit slow today)

> Keep it contagious so recovery won't have to parse
> up the file system to find the log.

I'm worried by those contagious logs. What do you catch from them? :)

> The normal running case should be easier to deal with if
> the log was just a file?

I don't see how it changes anything - we address the log directly
by block number and device...

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02  9:19   ` David Chinner
  2007-10-02 16:41     ` Russell Cattelan
@ 2007-10-03  1:05     ` Barry Naujok
  2007-10-03  4:58       ` David Chinner
  2007-10-03  1:30     ` Timothy Shimmin
  2 siblings, 1 reply; 10+ messages in thread
From: Barry Naujok @ 2007-10-03  1:05 UTC (permalink / raw)
  To: David Chinner, Christoph Hellwig; +Cc: xfs@oss.sgi.com, xfs-dev

On Tue, 02 Oct 2007 19:19:51 +1000, David Chinner <dgc@sgi.com> wrote:

> On Tue, Oct 02, 2007 at 10:02:16AM +0100, Christoph Hellwig wrote:
>> On Tue, Oct 02, 2007 at 05:08:59PM +1000, Barry Naujok wrote:
>> >
>> > The attached tool allows an inode64 filesystem to be converted to  
>> inode32.
>> > For this to work, the filesystem has to be mounted inode32 before  
>> it's run.
>> >
>> > I'm not sure if there is any packaging changes required.
>>
>> Together with the stop allocating from specific AGs patch this should be
>> 90% towards an xfs_shrinkfs, right?
>
> Well, this just moves the inodes - it's one piece of the puzzle.  We
> still need to collide xfs_fsr with xfs_reno to move the data.
>
> After that, we need to work out how to move the orphan metadata
> blocks out of the AGs that are to be truncated off. That's not
> simple....

I believe xfs_bmap on all inodes can reveal extended attributes and
directory data in extra AGs. Copying those like xfs_reno does with
"blocked" AGs should perform the desired metadata moving.

> After that, we need the transaction to shrink the fs.
>
> At that point, we'll got a "working" shrink that will allow
> shrinking to only 50% of the original size because the log will
> get in the way. To fix that, we'll need to implement transactions
> to move the log...
>
> Cheers,
>
> Dave.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-02  9:19   ` David Chinner
  2007-10-02 16:41     ` Russell Cattelan
  2007-10-03  1:05     ` Barry Naujok
@ 2007-10-03  1:30     ` Timothy Shimmin
  2007-10-03  5:50       ` David Chinner
  2 siblings, 1 reply; 10+ messages in thread
From: Timothy Shimmin @ 2007-10-03  1:30 UTC (permalink / raw)
  To: David Chinner; +Cc: Christoph Hellwig, Barry Naujok, xfs@oss.sgi.com, xfs-dev

David Chinner wrote:
> At that point, we'll got a "working" shrink that will allow
> shrinking to only 50% of the original size because the log will
> get in the way. To fix that, we'll need to implement transactions
> to move the log...
> 
Moving the log sounds pretty tricky.

Either we'd need to clean out the log (a la freeze) or
copy the active part (tail->head) to the new location and zero out the rest of
the new log space (or may even need to write sectors with
previous cycle#s at the start of each sector for the rest).
So how would one do that with the copying approach because
we'd need to be writing in to the new log and we'd need the log
pointer in the superblock to be logged somewhere ughhhh.
I think a type of freezing may be the way to go.
The trouble is we need to point the sb to the new log and the
only place to log that is in the old log.
So I guess before unfreezing you write the sb logptr change
using the old log and then after the unfreeze, everything uses the new log.
If you die before the sb change to disk then on mount you replay the sb change
using the old log and then start writing to the new log. If you die before writing the
sb change in the old log then you are stuck.
You need this log change and freespace change (for making room for the log)
in a transaction together and probably with other stuff.
Okay, I'm getting lost :)

--Tim

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-03  1:05     ` Barry Naujok
@ 2007-10-03  4:58       ` David Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: David Chinner @ 2007-10-03  4:58 UTC (permalink / raw)
  To: Barry Naujok; +Cc: David Chinner, Christoph Hellwig, xfs@oss.sgi.com, xfs-dev

On Wed, Oct 03, 2007 at 11:05:05AM +1000, Barry Naujok wrote:
> On Tue, 02 Oct 2007 19:19:51 +1000, David Chinner <dgc@sgi.com> wrote:
> 
> >On Tue, Oct 02, 2007 at 10:02:16AM +0100, Christoph Hellwig wrote:
> >>On Tue, Oct 02, 2007 at 05:08:59PM +1000, Barry Naujok wrote:
> >>>
> >>> The attached tool allows an inode64 filesystem to be converted to  
> >>inode32.
> >>> For this to work, the filesystem has to be mounted inode32 before  
> >>it's run.
> >>>
> >>> I'm not sure if there is any packaging changes required.
> >>
> >>Together with the stop allocating from specific AGs patch this should be
> >>90% towards an xfs_shrinkfs, right?
> >
> >Well, this just moves the inodes - it's one piece of the puzzle.  We
> >still need to collide xfs_fsr with xfs_reno to move the data.
> >
> >After that, we need to work out how to move the orphan metadata
> >blocks out of the AGs that are to be truncated off. That's not
> >simple....
> 
> I believe xfs_bmap on all inodes can reveal extended attributes and
> directory data in extra AGs. Copying those like xfs_reno does with
> "blocked" AGs should perform the desired metadata moving.

Sure. But I'm thinking of metadata like the blocks in an extent
btree that indexes the data or attribute fork of an inode. I don't
think xfs_bmap can tell us where those blocks are, and they could
be anywhere on the filesystem...

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: REVIEW: xfs_reno
  2007-10-03  1:30     ` Timothy Shimmin
@ 2007-10-03  5:50       ` David Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: David Chinner @ 2007-10-03  5:50 UTC (permalink / raw)
  To: Timothy Shimmin
  Cc: David Chinner, Christoph Hellwig, Barry Naujok, xfs@oss.sgi.com,
	xfs-dev

On Wed, Oct 03, 2007 at 11:30:53AM +1000, Timothy Shimmin wrote:
> David Chinner wrote:
> >At that point, we'll got a "working" shrink that will allow
> >shrinking to only 50% of the original size because the log will
> >get in the way. To fix that, we'll need to implement transactions
> >to move the log...
> >
> Moving the log sounds pretty tricky.
> 
> Either we'd need to clean out the log (a la freeze) or
> copy the active part (tail->head) to the new location and zero out the rest 
> of
> the new log space (or may even need to write sectors with
> previous cycle#s at the start of each sector for the rest).
> So how would one do that with the copying approach because
> we'd need to be writing in to the new log and we'd need the log
> pointer in the superblock to be logged somewhere ughhhh.
> I think a type of freezing may be the way to go.

Yes.

> The trouble is we need to point the sb to the new log and the
> only place to log that is in the old log.

Sure.

> So I guess before unfreezing you write the sb logptr change
> using the old log and then after the unfreeze, everything uses the new log.

Yes.

> If you die before the sb change to disk then on mount you replay the sb 
> change
> using the old log and then start writing to the new log.

Yes.

> If you die before 
> writing the
> sb change in the old log then you are stuck.

No, the log swap never occurred. These would be sync transactions,
so if it never hit the disk no further transactions every occurred.
Hence this doesn't really cause any problems.

> You need this log change and freespace change (for making room for the log)
> in a transaction together and probably with other stuff.

a new log involves:

	freeze
	allocate new log space
	do log swap xaction
	sync old log
	<now safe to use new log>
	free old log
	sync new log
	unfreeze

Recovery is only complex in terms of freeing the new log space if we
crash without the logswap transaction being on disk, or freeing the
old log if we crash before that transaction hits the disk....

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-10-03  5:50 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-02  7:08 REVIEW: xfs_reno Barry Naujok
2007-10-02  7:20 ` Nathan Scott
2007-10-02  9:02 ` Christoph Hellwig
2007-10-02  9:19   ` David Chinner
2007-10-02 16:41     ` Russell Cattelan
2007-10-02 23:41       ` David Chinner
2007-10-03  1:05     ` Barry Naujok
2007-10-03  4:58       ` David Chinner
2007-10-03  1:30     ` Timothy Shimmin
2007-10-03  5:50       ` David Chinner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox