public inbox for linux-bcachefs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/4] bcachefs: allow initializing a bcachefs filesystem from a source directory
@ 2024-06-14 18:46 Ariel Miculas
  2024-06-14 18:46 ` [PATCH v2 2/4] Fix performance regression of update_inode Ariel Miculas
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Ariel Miculas @ 2024-06-14 18:46 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Ariel Miculas

Add a new source command line argument which specifies the directory
tree that will be copied onto the newly formatted bcachefs filesystem.

This commit also fixes an issue in copy_link where uninitialized data is
copied into the symlink because of a round_up of the buffer size.

Signed-off-by: Ariel Miculas <ariel.miculas@gmail.com>
---
 c_src/cmd_format.c        |  29 +++
 c_src/cmd_migrate.c       | 472 +-------------------------------------
 c_src/libbcachefs.h       |   2 +
 c_src/posix_to_bcachefs.c | 460 +++++++++++++++++++++++++++++++++++++
 c_src/posix_to_bcachefs.h |  54 +++++
 libbcachefs/bkey.h        |   1 +
 libbcachefs/fs-common.h   |   1 +
 libbcachefs/inode.h       |   1 +
 8 files changed, 557 insertions(+), 463 deletions(-)
 create mode 100644 c_src/posix_to_bcachefs.c
 create mode 100644 c_src/posix_to_bcachefs.h

diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c
index 4bafa171..d0c8e197 100644
--- a/c_src/cmd_format.c
+++ b/c_src/cmd_format.c
@@ -21,6 +21,7 @@
 #include <uuid/uuid.h>
 
 #include "cmds.h"
+#include "posix_to_bcachefs.h"
 #include "libbcachefs.h"
 #include "crypto.h"
 #include "libbcachefs/errcode.h"
@@ -45,6 +46,7 @@ x(0,	data_allowed,		required_argument)	\
 x(0,	durability,		required_argument)	\
 x(0,	version,		required_argument)	\
 x(0,	no_initialize,		no_argument)		\
+x(0,	source,			required_argument)	\
 x('f',	force,			no_argument)		\
 x('q',	quiet,			no_argument)		\
 x('v',	verbose,		no_argument)		\
@@ -66,6 +68,7 @@ static void usage(void)
 	     "  -L, --fs_label=label\n"
 	     "  -U, --uuid=uuid\n"
 	     "      --superblock_size=size\n"
+	     "      --source=path           Initialize the bcachefs filesystem from this root directory\n"
 	     "\n"
 	     "Device specific options:");
 
@@ -113,6 +116,18 @@ u64 read_flag_list_or_die(char *opt, const char * const list[],
 	return v;
 }
 
+void build_fs(struct bch_fs *c, const char *src_path)
+{
+	struct copy_fs_state s = {};
+	int src_fd = xopen(src_path, O_RDONLY|O_NOATIME);
+	struct stat stat = xfstat(src_fd);
+
+	if (!S_ISDIR(stat.st_mode))
+		die("%s is not a directory", src_path);
+
+	copy_fs(c, src_fd, src_path, &s);
+}
+
 int cmd_format(int argc, char *argv[])
 {
 	DARRAY(struct dev_opts) devices = { 0 };
@@ -145,6 +160,9 @@ int cmd_format(int argc, char *argv[])
 			opt_set(fs_opts, metadata_replicas, v);
 			opt_set(fs_opts, data_replicas, v);
 			break;
+		case O_source:
+			opts.source = optarg;
+			break;
 		case O_encrypted:
 			opts.encrypted = true;
 			break;
@@ -277,6 +295,12 @@ int cmd_format(int argc, char *argv[])
 
 	darray_exit(&devices);
 
+	/* don't skip initialization when we have to build an image from a source */
+	if (opts.source && !initialize) {
+		printf("Warning: Forcing the initialization because the source flag was supplied\n");
+		initialize = 1;
+	}
+
 	if (initialize) {
 		struct bch_opts mount_opts = bch2_opts_empty();
 
@@ -294,6 +318,11 @@ int cmd_format(int argc, char *argv[])
 			die("error opening %s: %s", device_paths.data[0],
 			    bch2_err_str(PTR_ERR(c)));
 
+		if (opts.source) {
+			build_fs(c, opts.source);
+		}
+
+
 		bch2_fs_stop(c);
 	}
 
diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
index 24937822..a5b7786d 100644
--- a/c_src/cmd_migrate.c
+++ b/c_src/cmd_migrate.c
@@ -1,9 +1,7 @@
-#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <string.h>
-#include <sys/xattr.h>
 #include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
@@ -20,24 +18,18 @@
 #include "cmds.h"
 #include "crypto.h"
 #include "libbcachefs.h"
+#include "posix_to_bcachefs.h"
 
 #include <linux/dcache.h>
 #include <linux/generic-radix-tree.h>
-#include <linux/xattr.h>
 #include "libbcachefs/bcachefs.h"
-#include "libbcachefs/alloc_background.h"
-#include "libbcachefs/alloc_foreground.h"
 #include "libbcachefs/btree_update.h"
 #include "libbcachefs/buckets.h"
 #include "libbcachefs/dirent.h"
 #include "libbcachefs/errcode.h"
-#include "libbcachefs/fs-common.h"
 #include "libbcachefs/inode.h"
-#include "libbcachefs/io_write.h"
 #include "libbcachefs/replicas.h"
-#include "libbcachefs/str_hash.h"
 #include "libbcachefs/super.h"
-#include "libbcachefs/xattr.h"
 
 /* XXX cut and pasted from fsck.c */
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
@@ -117,403 +109,6 @@ static void mark_unreserved_space(struct bch_fs *c, ranges extents)
 	}
 }
 
-static void update_inode(struct bch_fs *c,
-			 struct bch_inode_unpacked *inode)
-{
-	struct bkey_inode_buf packed;
-	int ret;
-
-	bch2_inode_pack(&packed, inode);
-	packed.inode.k.p.snapshot = U32_MAX;
-	ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
-				NULL, 0, 0);
-	if (ret)
-		die("error updating inode: %s", bch2_err_str(ret));
-}
-
-static void create_link(struct bch_fs *c,
-			struct bch_inode_unpacked *parent,
-			const char *name, u64 inum, mode_t mode)
-{
-	struct qstr qstr = QSTR(name);
-	struct bch_inode_unpacked parent_u;
-	struct bch_inode_unpacked inode;
-
-	int ret = bch2_trans_do(c, NULL, NULL, 0,
-		bch2_link_trans(trans,
-				(subvol_inum) { 1, parent->bi_inum }, &parent_u,
-				(subvol_inum) { 1, inum }, &inode, &qstr));
-	if (ret)
-		die("error creating hardlink: %s", bch2_err_str(ret));
-}
-
-static struct bch_inode_unpacked create_file(struct bch_fs *c,
-					     struct bch_inode_unpacked *parent,
-					     const char *name,
-					     uid_t uid, gid_t gid,
-					     mode_t mode, dev_t rdev)
-{
-	struct qstr qstr = QSTR(name);
-	struct bch_inode_unpacked new_inode;
-
-	bch2_inode_init_early(c, &new_inode);
-
-	int ret = bch2_trans_do(c, NULL, NULL, 0,
-		bch2_create_trans(trans,
-				  (subvol_inum) { 1, parent->bi_inum }, parent,
-				  &new_inode, &qstr,
-				  uid, gid, mode, rdev, NULL, NULL,
-				  (subvol_inum) {}, 0));
-	if (ret)
-		die("error creating %s: %s", name, bch2_err_str(ret));
-
-	return new_inode;
-}
-
-#define for_each_xattr_handler(handlers, handler)		\
-	if (handlers)						\
-		for ((handler) = *(handlers)++;			\
-			(handler) != NULL;			\
-			(handler) = *(handlers)++)
-
-static const struct xattr_handler *xattr_resolve_name(char **name)
-{
-	const struct xattr_handler **handlers = bch2_xattr_handlers;
-	const struct xattr_handler *handler;
-
-	for_each_xattr_handler(handlers, handler) {
-		char *n;
-
-		n = strcmp_prefix(*name, xattr_prefix(handler));
-		if (n) {
-			if (!handler->prefix ^ !*n) {
-				if (*n)
-					continue;
-				return ERR_PTR(-EINVAL);
-			}
-			*name = n;
-			return handler;
-		}
-	}
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
-		       struct stat *src)
-{
-	dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
-	dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
-	dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
-}
-
-static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
-			char *src)
-{
-	struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
-
-	char attrs[XATTR_LIST_MAX];
-	ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
-	if (attrs_size < 0)
-		die("listxattr error: %m");
-
-	char *next, *attr;
-	for (attr = attrs;
-	     attr < attrs + attrs_size;
-	     attr = next) {
-		next = attr + strlen(attr) + 1;
-
-		char val[XATTR_SIZE_MAX];
-		ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
-
-		if (val_size < 0)
-			die("error getting xattr val: %m");
-
-		const struct xattr_handler *h = xattr_resolve_name(&attr);
-		struct bch_inode_unpacked inode_u;
-
-		int ret = bch2_trans_do(c, NULL, NULL, 0,
-				bch2_xattr_set(trans,
-					       (subvol_inum) { 1, dst->bi_inum },
-					       &inode_u, &hash_info, attr,
-					       val, val_size, h->flags, 0));
-		if (ret < 0)
-			die("error creating xattr: %s", bch2_err_str(ret));
-	}
-}
-
-#define WRITE_DATA_BUF	(1 << 20)
-
-static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
-
-static void write_data(struct bch_fs *c,
-		       struct bch_inode_unpacked *dst_inode,
-		       u64 dst_offset, void *buf, size_t len)
-{
-	struct bch_write_op op;
-	struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
-
-	BUG_ON(dst_offset	& (block_bytes(c) - 1));
-	BUG_ON(len		& (block_bytes(c) - 1));
-	BUG_ON(len > WRITE_DATA_BUF);
-
-	bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
-	bch2_bio_map(&op.wbio.bio, buf, len);
-
-	bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
-	op.write_point	= writepoint_hashed(0);
-	op.nr_replicas	= 1;
-	op.subvol	= 1;
-	op.pos		= SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
-	op.flags |= BCH_WRITE_SYNC;
-
-	int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
-					    c->opts.data_replicas, 0);
-	if (ret)
-		die("error reserving space in new filesystem: %s", bch2_err_str(ret));
-
-	closure_call(&op.cl, bch2_write, NULL, NULL);
-
-	BUG_ON(!(op.flags & BCH_WRITE_DONE));
-	dst_inode->bi_sectors += len >> 9;
-
-	if (op.error)
-		die("write error: %s", bch2_err_str(op.error));
-}
-
-static void copy_data(struct bch_fs *c,
-		      struct bch_inode_unpacked *dst_inode,
-		      int src_fd, u64 start, u64 end)
-{
-	while (start < end) {
-		unsigned len = min_t(u64, end - start, sizeof(buf));
-		unsigned pad = round_up(len, block_bytes(c)) - len;
-
-		xpread(src_fd, buf, len, start);
-		memset(buf + len, 0, pad);
-
-		write_data(c, dst_inode, start, buf, len + pad);
-		start += len;
-	}
-}
-
-static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
-		      u64 logical, u64 physical, u64 length)
-{
-	struct bch_dev *ca = c->devs[0];
-
-	BUG_ON(logical	& (block_bytes(c) - 1));
-	BUG_ON(physical & (block_bytes(c) - 1));
-	BUG_ON(length	& (block_bytes(c) - 1));
-
-	logical		>>= 9;
-	physical	>>= 9;
-	length		>>= 9;
-
-	BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
-
-	while (length) {
-		struct bkey_i_extent *e;
-		BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
-		u64 b = sector_to_bucket(ca, physical);
-		struct disk_reservation res;
-		unsigned sectors;
-		int ret;
-
-		sectors = min(ca->mi.bucket_size -
-			      (physical & (ca->mi.bucket_size - 1)),
-			      length);
-
-		e = bkey_extent_init(&k.k);
-		e->k.p.inode	= dst->bi_inum;
-		e->k.p.offset	= logical + sectors;
-		e->k.p.snapshot	= U32_MAX;
-		e->k.size	= sectors;
-		bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
-					.offset = physical,
-					.dev = 0,
-					.gen = *bucket_gen(ca, b),
-				  });
-
-		ret = bch2_disk_reservation_get(c, &res, sectors, 1,
-						BCH_DISK_RESERVATION_NOFAIL);
-		if (ret)
-			die("error reserving space in new filesystem: %s",
-			    bch2_err_str(ret));
-
-		ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0, 0);
-		if (ret)
-			die("btree insert error %s", bch2_err_str(ret));
-
-		bch2_disk_reservation_put(c, &res);
-
-		dst->bi_sectors	+= sectors;
-		logical		+= sectors;
-		physical	+= sectors;
-		length		-= sectors;
-	}
-}
-
-static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
-		      char *src)
-{
-	ssize_t ret = readlink(src, buf, sizeof(buf));
-	if (ret < 0)
-		die("readlink error: %m");
-
-	write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
-}
-
-static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
-		      int src_fd, u64 src_size,
-		      char *src_path, ranges *extents)
-{
-	struct fiemap_iter iter;
-	struct fiemap_extent e;
-
-	fiemap_for_each(src_fd, iter, e)
-		if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
-			fsync(src_fd);
-			break;
-		}
-	fiemap_iter_exit(&iter);
-
-	fiemap_for_each(src_fd, iter, e) {
-		u64 src_max = roundup(src_size, block_bytes(c));
-
-		e.fe_length = min(e.fe_length, src_max - e.fe_logical);
-
-		if ((e.fe_logical	& (block_bytes(c) - 1)) ||
-		    (e.fe_length	& (block_bytes(c) - 1)))
-			die("Unaligned extent in %s - can't handle", src_path);
-
-		if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
-				  FIEMAP_EXTENT_ENCODED|
-				  FIEMAP_EXTENT_NOT_ALIGNED|
-				  FIEMAP_EXTENT_DATA_INLINE)) {
-			copy_data(c, dst, src_fd, e.fe_logical,
-				  min(src_size - e.fe_logical,
-				      e.fe_length));
-			continue;
-		}
-
-		/*
-		 * if the data is below 1 MB, copy it so it doesn't conflict
-		 * with bcachefs's potentially larger superblock:
-		 */
-		if (e.fe_physical < 1 << 20) {
-			copy_data(c, dst, src_fd, e.fe_logical,
-				  min(src_size - e.fe_logical,
-				      e.fe_length));
-			continue;
-		}
-
-		if ((e.fe_physical	& (block_bytes(c) - 1)))
-			die("Unaligned extent in %s - can't handle", src_path);
-
-		range_add(extents, e.fe_physical, e.fe_length);
-		link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
-	}
-	fiemap_iter_exit(&iter);
-}
-
-struct copy_fs_state {
-	u64			bcachefs_inum;
-	dev_t			dev;
-
-	GENRADIX(u64)		hardlinks;
-	ranges			extents;
-};
-
-static void copy_dir(struct copy_fs_state *s,
-		     struct bch_fs *c,
-		     struct bch_inode_unpacked *dst,
-		     int src_fd, const char *src_path)
-{
-	DIR *dir = fdopendir(src_fd);
-	struct dirent *d;
-
-	while ((errno = 0), (d = readdir(dir))) {
-		struct bch_inode_unpacked inode;
-		int fd;
-
-		if (fchdir(src_fd))
-			die("chdir error: %m");
-
-		struct stat stat =
-			xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
-
-		if (!strcmp(d->d_name, ".") ||
-		    !strcmp(d->d_name, "..") ||
-		    !strcmp(d->d_name, "lost+found") ||
-		    stat.st_ino == s->bcachefs_inum)
-			continue;
-
-		char *child_path = mprintf("%s/%s", src_path, d->d_name);
-
-		if (stat.st_dev != s->dev)
-			die("%s does not have correct st_dev!", child_path);
-
-		u64 *dst_inum = S_ISREG(stat.st_mode)
-			? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
-			: NULL;
-
-		if (dst_inum && *dst_inum) {
-			create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
-			goto next;
-		}
-
-		inode = create_file(c, dst, d->d_name,
-				    stat.st_uid, stat.st_gid,
-				    stat.st_mode, stat.st_rdev);
-
-		if (dst_inum)
-			*dst_inum = inode.bi_inum;
-
-		copy_times(c, &inode, &stat);
-		copy_xattrs(c, &inode, d->d_name);
-
-		/* copy xattrs */
-
-		switch (mode_to_type(stat.st_mode)) {
-		case DT_DIR:
-			fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
-			copy_dir(s, c, &inode, fd, child_path);
-			close(fd);
-			break;
-		case DT_REG:
-			inode.bi_size = stat.st_size;
-
-			fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
-			copy_file(c, &inode, fd, stat.st_size,
-				  child_path, &s->extents);
-			close(fd);
-			break;
-		case DT_LNK:
-			inode.bi_size = stat.st_size;
-
-			copy_link(c, &inode, d->d_name);
-			break;
-		case DT_FIFO:
-		case DT_CHR:
-		case DT_BLK:
-		case DT_SOCK:
-		case DT_WHT:
-			/* nothing else to copy for these: */
-			break;
-		default:
-			BUG();
-		}
-
-		update_inode(c, &inode);
-next:
-		free(child_path);
-	}
-
-	if (errno)
-		die("readdir error: %m");
-	closedir(dir);
-}
-
 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
 				   u64 size, u64 *bcachefs_inum, dev_t dev,
 				   bool force)
@@ -561,62 +156,6 @@ static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
 	return extents;
 }
 
-static void reserve_old_fs_space(struct bch_fs *c,
-				 struct bch_inode_unpacked *root_inode,
-				 ranges *extents)
-{
-	struct bch_dev *ca = c->devs[0];
-	struct bch_inode_unpacked dst;
-	struct hole_iter iter;
-	struct range i;
-
-	dst = create_file(c, root_inode, "old_migrated_filesystem",
-			  0, 0, S_IFREG|0400, 0);
-	dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
-
-	ranges_sort_merge(extents);
-
-	for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
-		link_data(c, &dst, i.start, i.start, i.end - i.start);
-
-	update_inode(c, &dst);
-}
-
-static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
-		    u64 bcachefs_inum, ranges *extents)
-{
-	syncfs(src_fd);
-
-	struct bch_inode_unpacked root_inode;
-	int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
-					  &root_inode);
-	if (ret)
-		die("error looking up root directory: %s", bch2_err_str(ret));
-
-	if (fchdir(src_fd))
-		die("chdir error: %m");
-
-	struct stat stat = xfstat(src_fd);
-	copy_times(c, &root_inode, &stat);
-	copy_xattrs(c, &root_inode, ".");
-
-	struct copy_fs_state s = {
-		.bcachefs_inum	= bcachefs_inum,
-		.dev		= stat.st_dev,
-		.extents	= *extents,
-	};
-
-	/* now, copy: */
-	copy_dir(&s, c, &root_inode, src_fd, src_path);
-
-	reserve_old_fs_space(c, &root_inode, &s.extents);
-
-	update_inode(c, &root_inode);
-
-	darray_exit(&s.extents);
-	genradix_free(&s.hardlinks);
-}
-
 static void find_superblock_space(ranges extents,
 				  struct format_opts opts,
 				  struct dev_opts *dev)
@@ -739,7 +278,14 @@ static int migrate_fs(const char		*fs_path,
 	if (ret)
 		die("Error starting new filesystem: %s", bch2_err_str(ret));
 
-	copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
+	struct copy_fs_state s = {
+		.bcachefs_inum	= bcachefs_inum,
+		.dev		= stat.st_dev,
+		.extents	= extents,
+		.type		= BCH_MIGRATE_migrate,
+	};
+
+	copy_fs(c, fs_fd, fs_path, &s);
 
 	bch2_fs_stop(c);
 
diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h
index 5c7ef6c7..60332bb8 100644
--- a/c_src/libbcachefs.h
+++ b/c_src/libbcachefs.h
@@ -6,6 +6,7 @@
 
 #include "libbcachefs/bcachefs_format.h"
 #include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/inode.h"
 #include "libbcachefs/opts.h"
 #include "libbcachefs/vstructs.h"
 #include "tools-util.h"
@@ -37,6 +38,7 @@ struct format_opts {
 	unsigned	superblock_size;
 	bool		encrypted;
 	char		*passphrase;
+	char		*source;
 };
 
 static inline struct format_opts format_opts_default()
diff --git a/c_src/posix_to_bcachefs.c b/c_src/posix_to_bcachefs.c
new file mode 100644
index 00000000..ee0e300f
--- /dev/null
+++ b/c_src/posix_to_bcachefs.c
@@ -0,0 +1,460 @@
+#include <dirent.h>
+#include <sys/xattr.h>
+#include <linux/xattr.h>
+
+#include "posix_to_bcachefs.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/io_write.h"
+#include "libbcachefs/str_hash.h"
+#include "libbcachefs/xattr.h"
+
+void update_inode(struct bch_fs *c,
+			 struct bch_inode_unpacked *inode)
+{
+	struct bkey_inode_buf packed;
+	int ret;
+
+	bch2_inode_pack(&packed, inode);
+	packed.inode.k.p.snapshot = U32_MAX;
+	ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
+				NULL, 0, 0);
+	if (ret)
+		die("error updating inode: %s", bch2_err_str(ret));
+}
+
+void create_link(struct bch_fs *c,
+			struct bch_inode_unpacked *parent,
+			const char *name, u64 inum, mode_t mode)
+{
+	struct qstr qstr = QSTR(name);
+	struct bch_inode_unpacked parent_u;
+	struct bch_inode_unpacked inode;
+
+	int ret = bch2_trans_do(c, NULL, NULL, 0,
+		bch2_link_trans(trans,
+				(subvol_inum) { 1, parent->bi_inum }, &parent_u,
+				(subvol_inum) { 1, inum }, &inode, &qstr));
+	if (ret)
+		die("error creating hardlink: %s", bch2_err_str(ret));
+}
+
+struct bch_inode_unpacked create_file(struct bch_fs *c,
+					     struct bch_inode_unpacked *parent,
+					     const char *name,
+					     uid_t uid, gid_t gid,
+					     mode_t mode, dev_t rdev)
+{
+	struct qstr qstr = QSTR(name);
+	struct bch_inode_unpacked new_inode;
+
+	bch2_inode_init_early(c, &new_inode);
+
+	int ret = bch2_trans_do(c, NULL, NULL, 0,
+		bch2_create_trans(trans,
+				  (subvol_inum) { 1, parent->bi_inum }, parent,
+				  &new_inode, &qstr,
+				  uid, gid, mode, rdev, NULL, NULL,
+				  (subvol_inum) {}, 0));
+	if (ret)
+		die("error creating %s: %s", name, bch2_err_str(ret));
+
+	return new_inode;
+}
+
+#define for_each_xattr_handler(handlers, handler)		\
+	if (handlers)						\
+		for ((handler) = *(handlers)++;			\
+			(handler) != NULL;			\
+			(handler) = *(handlers)++)
+
+static const struct xattr_handler *xattr_resolve_name(char **name)
+{
+	const struct xattr_handler **handlers = bch2_xattr_handlers;
+	const struct xattr_handler *handler;
+
+	for_each_xattr_handler(handlers, handler) {
+		char *n;
+
+		n = strcmp_prefix(*name, xattr_prefix(handler));
+		if (n) {
+			if (!handler->prefix ^ !*n) {
+				if (*n)
+					continue;
+				return ERR_PTR(-EINVAL);
+			}
+			*name = n;
+			return handler;
+		}
+	}
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
+		       struct stat *src)
+{
+	dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
+	dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
+	dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
+}
+
+void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
+			char *src)
+{
+	struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
+
+	char attrs[XATTR_LIST_MAX];
+	ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
+	if (attrs_size < 0)
+		die("listxattr error: %m");
+
+	char *next, *attr;
+	for (attr = attrs;
+	     attr < attrs + attrs_size;
+	     attr = next) {
+		next = attr + strlen(attr) + 1;
+
+		char val[XATTR_SIZE_MAX];
+		ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
+
+		if (val_size < 0)
+			die("error getting xattr val: %m");
+
+		const struct xattr_handler *h = xattr_resolve_name(&attr);
+		struct bch_inode_unpacked inode_u;
+
+		int ret = bch2_trans_do(c, NULL, NULL, 0,
+				bch2_xattr_set(trans,
+					       (subvol_inum) { 1, dst->bi_inum },
+					       &inode_u, &hash_info, attr,
+					       val, val_size, h->flags, 0));
+		if (ret < 0)
+			die("error creating xattr: %s", bch2_err_str(ret));
+	}
+}
+
+#define WRITE_DATA_BUF	(1 << 20)
+
+static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
+
+static void write_data(struct bch_fs *c,
+		       struct bch_inode_unpacked *dst_inode,
+		       u64 dst_offset, void *buf, size_t len)
+{
+	struct bch_write_op op;
+	struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
+
+	BUG_ON(dst_offset	& (block_bytes(c) - 1));
+	BUG_ON(len		& (block_bytes(c) - 1));
+	BUG_ON(len > WRITE_DATA_BUF);
+
+	bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
+	bch2_bio_map(&op.wbio.bio, buf, len);
+
+	bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
+	op.write_point	= writepoint_hashed(0);
+	op.nr_replicas	= 1;
+	op.subvol	= 1;
+	op.pos		= SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
+	op.flags |= BCH_WRITE_SYNC;
+
+	int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
+					    c->opts.data_replicas, 0);
+	if (ret)
+		die("error reserving space in new filesystem: %s", bch2_err_str(ret));
+
+	closure_call(&op.cl, bch2_write, NULL, NULL);
+
+	BUG_ON(!(op.flags & BCH_WRITE_DONE));
+	dst_inode->bi_sectors += len >> 9;
+
+	if (op.error)
+		die("write error: %s", bch2_err_str(op.error));
+}
+
+void copy_data(struct bch_fs *c,
+		      struct bch_inode_unpacked *dst_inode,
+		      int src_fd, u64 start, u64 end)
+{
+	while (start < end) {
+		unsigned len = min_t(u64, end - start, sizeof(buf));
+		unsigned pad = round_up(len, block_bytes(c)) - len;
+
+		xpread(src_fd, buf, len, start);
+		memset(buf + len, 0, pad);
+
+		write_data(c, dst_inode, start, buf, len + pad);
+		start += len;
+	}
+}
+
+static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
+		      u64 logical, u64 physical, u64 length)
+{
+	struct bch_dev *ca = c->devs[0];
+
+	BUG_ON(logical	& (block_bytes(c) - 1));
+	BUG_ON(physical & (block_bytes(c) - 1));
+	BUG_ON(length	& (block_bytes(c) - 1));
+
+	logical		>>= 9;
+	physical	>>= 9;
+	length		>>= 9;
+
+	BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
+
+	while (length) {
+		struct bkey_i_extent *e;
+		BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
+		u64 b = sector_to_bucket(ca, physical);
+		struct disk_reservation res;
+		unsigned sectors;
+		int ret;
+
+		sectors = min(ca->mi.bucket_size -
+			      (physical & (ca->mi.bucket_size - 1)),
+			      length);
+
+		e = bkey_extent_init(&k.k);
+		e->k.p.inode	= dst->bi_inum;
+		e->k.p.offset	= logical + sectors;
+		e->k.p.snapshot	= U32_MAX;
+		e->k.size	= sectors;
+		bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
+					.offset = physical,
+					.dev = 0,
+					.gen = *bucket_gen(ca, b),
+				  });
+
+		ret = bch2_disk_reservation_get(c, &res, sectors, 1,
+						BCH_DISK_RESERVATION_NOFAIL);
+		if (ret)
+			die("error reserving space in new filesystem: %s",
+			    bch2_err_str(ret));
+
+		ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0, 0);
+		if (ret)
+			die("btree insert error %s", bch2_err_str(ret));
+
+		bch2_disk_reservation_put(c, &res);
+
+		dst->bi_sectors	+= sectors;
+		logical		+= sectors;
+		physical	+= sectors;
+		length		-= sectors;
+	}
+}
+
+void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
+		      char *src)
+{
+	ssize_t i;
+	ssize_t ret = readlink(src, buf, sizeof(buf));
+	if (ret < 0)
+		die("readlink error: %m");
+
+	for (i = ret; i < round_up(ret, block_bytes(c)); i++)
+		buf[i] = 0;
+
+	write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
+}
+
+static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
+		      int src_fd, u64 src_size,
+		      char *src_path, struct copy_fs_state *s)
+{
+	struct fiemap_iter iter;
+	struct fiemap_extent e;
+
+	fiemap_for_each(src_fd, iter, e)
+		if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
+			fsync(src_fd);
+			break;
+		}
+	fiemap_iter_exit(&iter);
+
+	fiemap_for_each(src_fd, iter, e) {
+		u64 src_max = roundup(src_size, block_bytes(c));
+
+		e.fe_length = min(e.fe_length, src_max - e.fe_logical);
+
+		if ((e.fe_logical	& (block_bytes(c) - 1)) ||
+		    (e.fe_length	& (block_bytes(c) - 1)))
+			die("Unaligned extent in %s - can't handle", src_path);
+
+		if (BCH_MIGRATE_copy == s->type || (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+				  FIEMAP_EXTENT_ENCODED|
+				  FIEMAP_EXTENT_NOT_ALIGNED|
+				  FIEMAP_EXTENT_DATA_INLINE))) {
+			copy_data(c, dst, src_fd, e.fe_logical,
+				  min(src_size - e.fe_logical,
+				      e.fe_length));
+			continue;
+		}
+
+		/*
+		 * if the data is below 1 MB, copy it so it doesn't conflict
+		 * with bcachefs's potentially larger superblock:
+		 */
+		if (e.fe_physical < 1 << 20) {
+			copy_data(c, dst, src_fd, e.fe_logical,
+				  min(src_size - e.fe_logical,
+				      e.fe_length));
+			continue;
+		}
+
+		if ((e.fe_physical	& (block_bytes(c) - 1)))
+			die("Unaligned extent in %s - can't handle", src_path);
+
+		range_add(&s->extents, e.fe_physical, e.fe_length);
+		link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
+	}
+	fiemap_iter_exit(&iter);
+}
+
+static void copy_dir(struct copy_fs_state *s,
+		     struct bch_fs *c,
+		     struct bch_inode_unpacked *dst,
+		     int src_fd, const char *src_path)
+{
+	DIR *dir = fdopendir(src_fd);
+	struct dirent *d;
+
+	while ((errno = 0), (d = readdir(dir))) {
+		struct bch_inode_unpacked inode;
+		int fd;
+
+		if (fchdir(src_fd))
+			die("chdir error: %m");
+
+		struct stat stat =
+			xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
+
+		if (!strcmp(d->d_name, ".") ||
+		    !strcmp(d->d_name, "..") ||
+		    !strcmp(d->d_name, "lost+found"))
+			continue;
+
+		if (BCH_MIGRATE_migrate == s->type && stat.st_ino == s->bcachefs_inum)
+			continue;
+
+		char *child_path = mprintf("%s/%s", src_path, d->d_name);
+
+		if (s->type == BCH_MIGRATE_migrate && stat.st_dev != s->dev)
+			die("%s does not have correct st_dev!", child_path);
+
+		u64 *dst_inum = S_ISREG(stat.st_mode)
+			? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
+			: NULL;
+
+		if (dst_inum && *dst_inum) {
+			create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
+			goto next;
+		}
+
+		inode = create_file(c, dst, d->d_name,
+				    stat.st_uid, stat.st_gid,
+				    stat.st_mode, stat.st_rdev);
+
+		if (dst_inum)
+			*dst_inum = inode.bi_inum;
+
+		copy_times(c, &inode, &stat);
+		copy_xattrs(c, &inode, d->d_name);
+
+		/* copy xattrs */
+
+		switch (mode_to_type(stat.st_mode)) {
+		case DT_DIR:
+			fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+			copy_dir(s, c, &inode, fd, child_path);
+			close(fd);
+			break;
+		case DT_REG:
+			inode.bi_size = stat.st_size;
+
+			fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+			copy_file(c, &inode, fd, stat.st_size,
+				  child_path, s);
+			close(fd);
+			break;
+		case DT_LNK:
+			inode.bi_size = stat.st_size;
+
+			copy_link(c, &inode, d->d_name);
+			break;
+		case DT_FIFO:
+		case DT_CHR:
+		case DT_BLK:
+		case DT_SOCK:
+		case DT_WHT:
+			/* nothing else to copy for these: */
+			break;
+		default:
+			BUG();
+		}
+
+		update_inode(c, &inode);
+next:
+		free(child_path);
+	}
+
+	if (errno)
+		die("readdir error: %m");
+	closedir(dir);
+}
+
+static void reserve_old_fs_space(struct bch_fs *c,
+				 struct bch_inode_unpacked *root_inode,
+				 ranges *extents)
+{
+	struct bch_dev *ca = c->devs[0];
+	struct bch_inode_unpacked dst;
+	struct hole_iter iter;
+	struct range i;
+
+	dst = create_file(c, root_inode, "old_migrated_filesystem",
+			  0, 0, S_IFREG|0400, 0);
+	dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
+
+	ranges_sort_merge(extents);
+
+	for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
+		link_data(c, &dst, i.start, i.start, i.end - i.start);
+
+	update_inode(c, &dst);
+}
+
+void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+		    struct copy_fs_state *s)
+{
+	syncfs(src_fd);
+
+	struct bch_inode_unpacked root_inode;
+	int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
+					  &root_inode);
+	if (ret)
+		die("error looking up root directory: %s", bch2_err_str(ret));
+
+	if (fchdir(src_fd))
+		die("chdir error: %m");
+
+	struct stat stat = xfstat(src_fd);
+	copy_times(c, &root_inode, &stat);
+	copy_xattrs(c, &root_inode, ".");
+
+
+	/* now, copy: */
+	copy_dir(s, c, &root_inode, src_fd, src_path);
+
+	if (BCH_MIGRATE_migrate == s->type)
+		reserve_old_fs_space(c, &root_inode, &s->extents);
+
+	update_inode(c, &root_inode);
+
+	if (BCH_MIGRATE_migrate == s->type)
+		darray_exit(&s->extents);
+
+	genradix_free(&s->hardlinks);
+}
diff --git a/c_src/posix_to_bcachefs.h b/c_src/posix_to_bcachefs.h
new file mode 100644
index 00000000..facb75ed
--- /dev/null
+++ b/c_src/posix_to_bcachefs.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _POSIX_TO_BCACHEFS_H
+#define _POSIX_TO_BCACHEFS_H
+
+/*
+ * This header exports the functionality needed for copying data from existing
+ * posix compliant filesystems to bcachefs. There are two use cases:
+ * 1. Creating a new bcachefs filesystem using `bcachefs format`, we can
+ *    specify a source directory tree which will be copied over the new
+ *    bcachefs filesytem.
+ * 2. Migrating an existing filesystem in place, with `bcachefs migrate`.
+ *    This will allocate space for the bcachefs metadata, but the actual data
+ *    represented by the extents will not be duplicated. The bcachefs metadata
+ *    will simply point to the existing extents.
+ *
+ * To avoid code duplication, `copy_fs` deals with both cases. See the function
+ * documentation for more details.
+ */
+
+#include "libbcachefs.h"
+
+enum bch_migrate_type {
+	BCH_MIGRATE_copy,
+	BCH_MIGRATE_migrate
+};
+
+/*
+ * The migrate action uses all the fields in this struct.
+ * The copy action only uses the `hardlinks` field. Since `hardlinks` is
+ * initialized with zeroes, an empty `copy_fs_state` struct can be passed.
+ */
+struct copy_fs_state {
+	u64			bcachefs_inum;
+	dev_t			dev;
+
+	GENRADIX(u64)		hardlinks;
+	ranges			extents;
+	enum bch_migrate_type	type;
+};
+
+/*
+ * The `copy_fs` function is used for both copying a directory tree to a new
+ * bcachefs filesystem and migrating an existing one, depending on the value
+ * from the `type` field in `copy_fs_state` struct.
+ *
+ * In case of copy, an empty `copy_fs_state` structure is passed to `copy_fs`
+ * (only the `hardlinks` field is used, and that is initialized with zeroes).
+ *
+ * In the migrate case, all the fields from `copy_fs_state` need to be
+ * initialized (`hardlinks` is initialized with zeroes).
+ */
+void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+		    struct copy_fs_state *s);
+#endif /* _LIBBCACHE_H */
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
index fcd43915..3dc4cf69 100644
--- a/libbcachefs/bkey.h
+++ b/libbcachefs/bkey.h
@@ -8,6 +8,7 @@
 #include "btree_types.h"
 #include "util.h"
 #include "vstructs.h"
+#include "bcachefs.h"
 
 enum bch_validate_flags {
 	BCH_VALIDATE_write		= (1U << 0),
diff --git a/libbcachefs/fs-common.h b/libbcachefs/fs-common.h
index dde23785..2064ef5b 100644
--- a/libbcachefs/fs-common.h
+++ b/libbcachefs/fs-common.h
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_FS_COMMON_H
 #define _BCACHEFS_FS_COMMON_H
 
+#include "libbcachefs/dirent.h"
 struct posix_acl;
 
 #define BCH_CREATE_TMPFILE		(1U << 0)
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 679f5f5e..b62111bf 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -5,6 +5,7 @@
 #include "bkey.h"
 #include "bkey_methods.h"
 #include "opts.h"
+#include "subvolume_types.h"
 
 enum bch_validate_flags;
 extern const char * const bch2_inode_opts[];
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-06-14 18:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-14 18:46 [PATCH v2 1/4] bcachefs: allow initializing a bcachefs filesystem from a source directory Ariel Miculas
2024-06-14 18:46 ` [PATCH v2 2/4] Fix performance regression of update_inode Ariel Miculas
2024-06-14 18:46 ` [PATCH v2 3/4] Fix incomplete file copy due to copy_data misuse Ariel Miculas
2024-06-14 18:46 ` [PATCH v2 4/4] Preserve directory {a,m,c} times by calling copy_times() after the recursion Ariel Miculas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox