All of lore.kernel.org
 help / color / mirror / Atom feed
* [f2fs-dev] [PATCH] Make sload.f2fs reproduce hard links
@ 2020-12-08 22:08 Jordan Webb
  2020-12-10  2:09 ` Jaegeuk Kim
  0 siblings, 1 reply; 3+ messages in thread
From: Jordan Webb @ 2020-12-08 22:08 UTC (permalink / raw)
  To: linux-f2fs-devel

If sload.f2fs encounters a file with nr_links > 1, it will mark it
as a possible hard link by remembering the original device and
inode. When sload.f2fs creates the file, it will check if it has
already created a file for the same original device and inode. If
so, it will add the original inode to the directory and increment
the number of links to it, instead of writing a new inode.

This allows sload.f2fs to accurately reproduce a directory tree that
contains hard links, such as those created by ostree. Without this
patch, directory trees containing hard links result in the content of
the files being duplicated.
---
 fsck/dir.c   | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 fsck/f2fs.h  |  9 +++++++
 fsck/sload.c |  9 +++++++
 3 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/fsck/dir.c b/fsck/dir.c
index dc03c98..bdd8163 100644
--- a/fsck/dir.c
+++ b/fsck/dir.c
@@ -15,6 +15,7 @@
  */
 #include "fsck.h"
 #include "node.h"
+#include <search.h>
 
 static int room_for_filename(const u8 *bitmap, int slots, int max_slots)
 {
@@ -634,10 +635,19 @@ int convert_inline_dentry(struct f2fs_sb_info *sbi, struct f2fs_node *node,
 	return 0;
 }
 
+int cmp_from_devino(const void *a, const void *b) {
+	u64 devino_a = ((struct hardlink_cache_entry*) a)->from_devino;
+	u64 devino_b = ((struct hardlink_cache_entry*) b)->from_devino;
+
+	return (devino_a > devino_b) - (devino_a < devino_b);
+}
+
 int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
 {
 	struct f2fs_node *parent, *child;
-	struct node_info ni;
+	struct hardlink_cache_entry *find_hardlink = 0, *found_hardlink = 0;
+	void *search_result;
+	struct node_info ni, hardlink_ni;
 	struct f2fs_summary sum;
 	block_t blkaddr = NULL_ADDR;
 	int ret;
@@ -671,10 +681,41 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
 		goto free_parent_dir;
 	}
 
+	if (de->from_devino != 0) {
+		/* This might be a hardlink, try to find it in the cache */
+		find_hardlink = calloc(1, sizeof(struct hardlink_cache_entry));
+		find_hardlink->from_devino = de->from_devino;
+		find_hardlink->to_ino = 0;
+
+		search_result = tsearch(find_hardlink, &(sbi->hardlink_cache), cmp_from_devino);
+		ASSERT(search_result != 0);
+
+		found_hardlink = *(struct hardlink_cache_entry**) search_result;
+		ASSERT(find_hardlink->from_devino == found_hardlink->from_devino);
+
+		/* If it was already in the cache, free the entry we just created */
+		if (found_hardlink != find_hardlink)
+			free(find_hardlink);
+	}
+
 	child = calloc(BLOCK_SZ, 1);
 	ASSERT(child);
 
-	f2fs_alloc_nid(sbi, &de->ino);
+	if ((found_hardlink != 0) && (found_hardlink->to_ino != 0)) {
+		/* If we found this devino in the cache, we're creating a hard link */
+		get_node_info(sbi, found_hardlink->to_ino, &hardlink_ni);
+		if (hardlink_ni.blk_addr == NULL_ADDR) {
+			MSG(0, "No original inode for hard link to_ino=%x\n", found_hardlink->to_ino);
+			return -1;
+		}
+
+		/* Use previously-recorded inode */
+		de->ino = found_hardlink->to_ino;
+		blkaddr = hardlink_ni.blk_addr;
+		MSG(0, "Creating \"%s\" as hard link to inode %d\n", de->path, de->ino);
+	} else {
+		f2fs_alloc_nid(sbi, &de->ino);
+	}
 
 	init_inode_block(sbi, child, de);
 
@@ -689,6 +730,26 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
 		goto free_child_dir;
 	}
 
+	if (found_hardlink != 0) {
+		if (found_hardlink->to_ino == 0) {
+			MSG(0, "Adding inode %d from %s to hardlink cache\n", de->ino, de->path);
+			found_hardlink->to_ino = de->ino;
+		} else {
+			/* Replace child with original block */
+			free(child);
+			child = calloc(BLOCK_SZ, 1);
+			ASSERT(child);
+
+			ret = dev_read_block(child, blkaddr);
+			ASSERT(ret >= 0);
+
+			/* Increment links and skip to writing block */
+			child->i.i_links++;
+			MSG(0, "Number of links on inode %d is now %d\n", de->ino, child->i.i_links);
+			goto write_child_dir;
+		}
+	}
+
 	/* write child */
 	set_summary(&sum, de->ino, 0, ni.version);
 	ret = reserve_new_block(sbi, &blkaddr, &sum, CURSEG_HOT_NODE, 1);
@@ -697,6 +758,7 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
 	/* update nat info */
 	update_nat_blkaddr(sbi, de->ino, de->ino, blkaddr);
 
+write_child_dir:
 	ret = dev_write_block(child, blkaddr);
 	ASSERT(ret >= 0);
 
diff --git a/fsck/f2fs.h b/fsck/f2fs.h
index 76e8272..a182e8e 100644
--- a/fsck/f2fs.h
+++ b/fsck/f2fs.h
@@ -221,6 +221,7 @@ struct dentry {
 	uint64_t capabilities;
 	nid_t ino;
 	nid_t pino;
+	u64 from_devino;
 };
 
 /* different from dnode_of_data in kernel */
@@ -234,6 +235,11 @@ struct dnode_of_data {
 	int idirty, ndirty;
 };
 
+struct hardlink_cache_entry {
+	u64 from_devino;
+	nid_t to_ino;
+};
+
 struct f2fs_sb_info {
 	struct f2fs_fsck *fsck;
 
@@ -276,6 +282,9 @@ struct f2fs_sb_info {
 
 	/* true if late_build_segment_manger() is called */
 	bool seg_manager_done;
+
+	/* keep track of hardlinks so we can recreate them */
+	void *hardlink_cache;
 };
 
 static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
diff --git a/fsck/sload.c b/fsck/sload.c
index 14012fb..f3a6c12 100644
--- a/fsck/sload.c
+++ b/fsck/sload.c
@@ -148,6 +148,12 @@ static void set_inode_metadata(struct dentry *de)
 	}
 
 	if (S_ISREG(stat.st_mode)) {
+		if (stat.st_nlink > 1) {
+			/* This file might have multiple links to it, so remember device and inode */
+			de->from_devino = stat.st_dev;
+			de->from_devino <<= 32;
+			de->from_devino |= stat.st_ino;
+		}
 		de->file_type = F2FS_FT_REG_FILE;
 	} else if (S_ISDIR(stat.st_mode)) {
 		de->file_type = F2FS_FT_DIR;
@@ -333,6 +339,9 @@ int f2fs_sload(struct f2fs_sb_info *sbi)
 	/* flush NAT/SIT journal entries */
 	flush_journal_entries(sbi);
 
+	/* initialize empty hardlink cache */
+	sbi->hardlink_cache = 0;
+
 	ret = build_directory(sbi, c.from_dir, "/",
 					c.target_out_dir, F2FS_ROOT_INO(sbi));
 	if (ret) {
-- 
2.24.3 (Apple Git-128)




_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-12-10 15:29 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-12-08 22:08 [f2fs-dev] [PATCH] Make sload.f2fs reproduce hard links Jordan Webb
2020-12-10  2:09 ` Jaegeuk Kim
2020-12-10 15:05   ` Jordan Webb

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.