public inbox for linux-erofs@ozlabs.org
 help / color / mirror / Atom feed
From: Lucas Karpinski <lkarpinski@nvidia.com>
To: linux-erofs@lists.ozlabs.org
Cc: jcalmels@nvidia.com, Lucas Karpinski <lkarpinski@nvidia.com>
Subject: [PATCH v3 3/4] erofs-utils: mfks: add rebuild FULLDATA for combined EROFS images
Date: Tue, 14 Apr 2026 15:10:41 -0400	[thread overview]
Message-ID: <20260414-merge-fs-v3-3-266bd1367fd2@nvidia.com> (raw)
In-Reply-To: <20260414-merge-fs-v3-0-266bd1367fd2@nvidia.com>

This patch introduces experimental support for merging multiple source
images in mkfs. Each regular file record the source image path and its byte
offset. During the blob mkfs opens the blob and pulls the payload in via
erofs_io_xcopy.

This does not yet support chunk-based files or compressed images.

Signed-off-by: Lucas Karpinski <lkarpinski@nvidia.com>
---
 include/erofs/internal.h |  3 +++
 lib/inode.c              | 31 ++++++++++++++++++---
 lib/rebuild.c            | 70 ++++++++++++++++++++++++++++++++++++++++++++++++
 mkfs/main.c              |  7 +++--
 4 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index c780228c..450e2647 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -208,6 +208,7 @@ struct erofs_diskbuf;
 #define EROFS_INODE_DATA_SOURCE_LOCALPATH	1
 #define EROFS_INODE_DATA_SOURCE_DISKBUF		2
 #define EROFS_INODE_DATA_SOURCE_RESVSP		3
+#define EROFS_INODE_DATA_SOURCE_REBUILD_BLOB	4
 
 #define EROFS_I_BLKADDR_DEV_ID_BIT		48
 
@@ -253,6 +254,8 @@ struct erofs_inode {
 		char *i_link;
 		struct erofs_diskbuf *i_diskbuf;
 	};
+	char *rebuild_blobpath;
+	erofs_off_t rebuild_src_dataoff;
 	unsigned char datalayout;
 	unsigned char inode_isize;
 	/* inline tail-end packing size */
diff --git a/lib/inode.c b/lib/inode.c
index 2f78d9b8..bd10e267 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -158,6 +158,8 @@ unsigned int erofs_iput(struct erofs_inode *inode)
 	if (inode->datasource == EROFS_INODE_DATA_SOURCE_DISKBUF) {
 		erofs_diskbuf_close(inode->i_diskbuf);
 		free(inode->i_diskbuf);
+	} else if (inode->datasource == EROFS_INODE_DATA_SOURCE_REBUILD_BLOB) {
+		free(inode->rebuild_blobpath);
 	} else {
 		free(inode->i_link);
 	}
@@ -697,7 +699,10 @@ static int erofs_write_unencoded_data(struct erofs_inode *inode,
 
 int erofs_write_unencoded_file(struct erofs_inode *inode, int fd, u64 fpos)
 {
-	if (cfg.c_chunkbits) {
+	struct erofs_vfile vf = { .fd = fd };
+
+	if (cfg.c_chunkbits &&
+	    inode->datasource != EROFS_INODE_DATA_SOURCE_REBUILD_BLOB) {
 		inode->u.chunkbits = cfg.c_chunkbits;
 		/* chunk indexes when explicitly specified */
 		inode->u.chunkformat = 0;
@@ -706,10 +711,15 @@ int erofs_write_unencoded_file(struct erofs_inode *inode, int fd, u64 fpos)
 		return erofs_blob_write_chunked_file(inode, fd, fpos);
 	}
 
+	if (inode->datasource == EROFS_INODE_DATA_SOURCE_REBUILD_BLOB) {
+		if (erofs_io_lseek(&vf, fpos, SEEK_SET) != (off_t)fpos)
+			return -EIO;
+		return erofs_write_unencoded_data(inode, &vf, fpos, true, false);
+	}
+
 	inode->datalayout = EROFS_INODE_FLAT_INLINE;
 	/* fallback to all data uncompressed */
-	return erofs_write_unencoded_data(inode,
-			&(struct erofs_vfile){ .fd = fd }, fpos,
+	return erofs_write_unencoded_data(inode, &vf, fpos,
 			inode->datasource == EROFS_INODE_DATA_SOURCE_DISKBUF, false);
 }
 
@@ -1508,6 +1518,12 @@ out:
 		free(inode->i_diskbuf);
 		inode->i_diskbuf = NULL;
 		inode->datasource = EROFS_INODE_DATA_SOURCE_NONE;
+	} else if (inode->datasource == EROFS_INODE_DATA_SOURCE_REBUILD_BLOB) {
+		free(inode->rebuild_blobpath);
+		inode->rebuild_blobpath = NULL;
+		inode->datasource = EROFS_INODE_DATA_SOURCE_NONE;
+		DBG_BUGON(ctx->fd < 0);
+		close(ctx->fd);
 	} else {
 		DBG_BUGON(ctx->fd < 0);
 		close(ctx->fd);
@@ -2014,6 +2030,12 @@ static int erofs_mkfs_begin_nondirectory(const struct erofs_mkfs_btctx *btctx,
 			if (ctx.fd < 0)
 				return -errno;
 			break;
+		case EROFS_INODE_DATA_SOURCE_REBUILD_BLOB:
+			ctx.fd = open(inode->rebuild_blobpath, O_RDONLY | O_BINARY);
+			if (ctx.fd < 0)
+				return -errno;
+			ctx.fpos = inode->rebuild_src_dataoff;
+			break;
 		default:
 			goto out;
 		}
@@ -2022,7 +2044,8 @@ static int erofs_mkfs_begin_nondirectory(const struct erofs_mkfs_btctx *btctx,
 		if (ret < 0)
 			return ret;
 
-		if (inode->sbi->available_compr_algs &&
+		if (inode->datasource != EROFS_INODE_DATA_SOURCE_REBUILD_BLOB &&
+		    inode->sbi->available_compr_algs &&
 		    erofs_file_is_compressible(im, inode)) {
 			ctx.ictx = erofs_prepare_compressed_file(im, inode);
 			if (IS_ERR(ctx.ictx))
diff --git a/lib/rebuild.c b/lib/rebuild.c
index 7ab2b499..3785afd0 100644
--- a/lib/rebuild.c
+++ b/lib/rebuild.c
@@ -14,8 +14,10 @@
 #include "erofs/xattr.h"
 #include "erofs/blobchunk.h"
 #include "erofs/internal.h"
+#include "erofs/io.h"
 #include "liberofs_rebuild.h"
 #include "liberofs_uuid.h"
+#include "liberofs_cache.h"
 
 #ifdef HAVE_LINUX_AUFS_TYPE_H
 #include <linux/aufs_type.h>
@@ -221,6 +223,71 @@ err:
 	return ret;
 }
 
+static int erofs_rebuild_write_full_data(struct erofs_inode *inode)
+{
+	struct erofs_sb_info *src_sbi = inode->sbi;
+	int err = 0;
+
+	if (inode->datalayout == EROFS_INODE_FLAT_PLAIN) {
+		if (inode->u.i_blkaddr == EROFS_NULL_ADDR) {
+			if (inode->i_size)
+				return -EFSCORRUPTED;
+			return 0;
+		}
+		inode->rebuild_blobpath = strdup(src_sbi->devname);
+		if (!inode->rebuild_blobpath)
+			return -ENOMEM;
+		inode->rebuild_src_dataoff =
+			erofs_pos(src_sbi, erofs_inode_dev_baddr(inode));
+		inode->datasource = EROFS_INODE_DATA_SOURCE_REBUILD_BLOB;
+	} else if (inode->datalayout == EROFS_INODE_FLAT_INLINE) {
+		erofs_blk_t nblocks = erofs_blknr(src_sbi, inode->i_size);
+		unsigned int inline_size = inode->i_size % erofs_blksiz(src_sbi);
+
+		if (nblocks > 0 && inode->u.i_blkaddr != EROFS_NULL_ADDR) {
+			inode->rebuild_blobpath = strdup(src_sbi->devname);
+			if (!inode->rebuild_blobpath)
+				return -ENOMEM;
+			inode->rebuild_src_dataoff =
+				erofs_pos(src_sbi,
+					  erofs_inode_dev_baddr(inode));
+			inode->datasource = EROFS_INODE_DATA_SOURCE_REBUILD_BLOB;
+		}
+
+		inode->idata_size = inline_size;
+		if (inline_size > 0) {
+			struct erofs_vfile vf;
+			erofs_off_t tail_offset = erofs_pos(src_sbi, nblocks);
+
+			inode->idata = malloc(inline_size);
+			if (!inode->idata)
+				return -ENOMEM;
+			err = erofs_iopen(&vf, inode);
+			if (err) {
+				free(inode->idata);
+				inode->idata = NULL;
+				return err;
+			}
+			err = erofs_pread(&vf, inode->idata, inline_size,
+					  tail_offset);
+			if (err) {
+				free(inode->idata);
+				inode->idata = NULL;
+				return err;
+			}
+		}
+	} else if (inode->datalayout == EROFS_INODE_CHUNK_BASED) {
+		erofs_err("chunk-based files not yet supported: %s",
+			  inode->i_srcpath);
+		err = -EOPNOTSUPP;
+	} else if (is_inode_layout_compression(inode)) {
+		erofs_err("compressed files not yet supported: %s",
+			  inode->i_srcpath);
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
 static int erofs_rebuild_update_inode(struct erofs_sb_info *dst_sb,
 				      struct erofs_inode *inode,
 				      enum erofs_rebuild_datamode datamode)
@@ -265,6 +332,8 @@ static int erofs_rebuild_update_inode(struct erofs_sb_info *dst_sb,
 			err = erofs_rebuild_write_blob_index(dst_sb, inode);
 		else if (datamode == EROFS_REBUILD_DATA_RESVSP)
 			inode->datasource = EROFS_INODE_DATA_SOURCE_RESVSP;
+		else if (datamode == EROFS_REBUILD_DATA_FULL)
+			err = erofs_rebuild_write_full_data(inode);
 		else
 			err = -EOPNOTSUPP;
 		break;
@@ -553,3 +622,4 @@ int erofs_rebuild_load_basedir(struct erofs_inode *dir, u64 *nr_subdirs,
 	};
 	return erofs_iterate_dir(&ctx.ctx, false);
 }
+
diff --git a/mkfs/main.c b/mkfs/main.c
index 6867478b..d75c97b2 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -1756,7 +1756,7 @@ static int erofs_mkfs_rebuild_load_trees(struct erofs_inode *root)
 		extra_devices += src->extra_devices;
 	}
 
-	if (datamode != EROFS_REBUILD_DATA_BLOB_INDEX)
+	if (datamode == EROFS_REBUILD_DATA_RESVSP)
 		return 0;
 
 	/* Each blob has either no extra device or only one device for TarFS */
@@ -1766,6 +1766,9 @@ static int erofs_mkfs_rebuild_load_trees(struct erofs_inode *root)
 		return -EOPNOTSUPP;
 	}
 
+	if (datamode == EROFS_REBUILD_DATA_FULL)
+		return 0;
+
 	ret = erofs_mkfs_init_devices(&g_sbi, rebuild_src_count);
 	if (ret)
 		return ret;
@@ -1788,7 +1791,7 @@ static int erofs_mkfs_rebuild_load_trees(struct erofs_inode *root)
 			memcpy(devs[idx].tag, tag, sizeof(devs[0].tag));
 		else
 			/* convert UUID of the source image to a hex string */
-			erofs_uuid_unparse_as_tag(src->uuid, (char *)g_sbi.devs[idx].tag);
+			erofs_uuid_unparse_as_tag(src->uuid, (char *)devs[idx].tag);
 	}
 	return 0;
 }

-- 
Git-155)


  parent reply	other threads:[~2026-04-14 19:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-14 19:10 [PATCH v3 0/4] erofs-utils: implement the FULLDATA rebuild mode Lucas Karpinski
2026-04-14 19:10 ` [PATCH v3 1/4] erofs-utils: lib: remove redundant if check Lucas Karpinski
2026-04-14 19:10 ` [PATCH v3 2/4] erofs-utils: lib: add helper function erofs_uuid_unparse_as_tag Lucas Karpinski
2026-04-14 19:10 ` Lucas Karpinski [this message]
2026-04-15  3:35   ` [PATCH v3 3/4] erofs-utils: mfks: add rebuild FULLDATA for combined EROFS images zhaoyifan (H)
2026-04-15  7:47     ` Gao Xiang
2026-04-15 13:30       ` Lucas Karpinski
2026-04-14 19:10 ` [PATCH v3 4/4] erofs-utils: manpages: update to reflect fulldata support Lucas Karpinski
2026-04-15  2:07 ` [PATCH v3 0/4] erofs-utils: implement the FULLDATA rebuild mode Gao Xiang
2026-04-15 14:09   ` Lucas Karpinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260414-merge-fs-v3-3-266bd1367fd2@nvidia.com \
    --to=lkarpinski@nvidia.com \
    --cc=jcalmels@nvidia.com \
    --cc=linux-erofs@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox