* [PATCH 1/3] erofs-utils: get rid of useless nr_dup
@ 2023-02-28 18:54 Gao Xiang
2023-02-28 18:54 ` [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments Gao Xiang
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Gao Xiang @ 2023-02-28 18:54 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang
Also refine the longest detection.
Fixes: 990c7e383795 ("erofs-utils: mkfs: support fragment deduplication")
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
lib/fragments.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/lib/fragments.c b/lib/fragments.c
index c67c1bb..1e41485 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -26,7 +26,7 @@
struct erofs_fragment_dedupe_item {
struct list_head list;
- unsigned int length, nr_dup;
+ unsigned int length;
erofs_off_t pos;
u8 data[];
};
@@ -53,7 +53,7 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
struct erofs_fragment_dedupe_item *cur, *di = NULL;
struct list_head *head;
u8 *data;
- unsigned int length, e2;
+ unsigned int length, e2, deduped;
int ret;
head = &dupli_frags[FRAGMENT_HASH(crc)];
@@ -83,6 +83,7 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
DBG_BUGON(length <= EROFS_TOF_HASHLEN);
e2 = length - EROFS_TOF_HASHLEN;
+ deduped = 0;
list_for_each_entry(cur, head, list) {
unsigned int e1, mn, i = 0;
@@ -97,22 +98,22 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
++i;
- if (i && (!di || i + EROFS_TOF_HASHLEN > di->nr_dup)) {
- cur->nr_dup = i + EROFS_TOF_HASHLEN;
+ if (!di || i + EROFS_TOF_HASHLEN > deduped) {
+ deduped = i + EROFS_TOF_HASHLEN;
di = cur;
/* full match */
- if (i == mn)
+ if (i == e2)
break;
}
}
if (!di)
goto out;
- DBG_BUGON(di->length < di->nr_dup);
+ DBG_BUGON(di->length < deduped);
- inode->fragment_size = di->nr_dup;
- inode->fragmentoff = di->pos + di->length - di->nr_dup;
+ inode->fragment_size = deduped;
+ inode->fragmentoff = di->pos + di->length - deduped;
erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
inode->fragmentoff | 0ULL);
@@ -161,7 +162,6 @@ static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
memcpy(di->data, data, len);
di->length = len;
di->pos = pos;
- di->nr_dup = 0;
list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]);
return 0;
--
2.36.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments
2023-02-28 18:54 [PATCH 1/3] erofs-utils: get rid of useless nr_dup Gao Xiang
@ 2023-02-28 18:54 ` Gao Xiang
2023-03-01 14:00 ` Yue Hu
2023-02-28 18:54 ` [PATCH 3/3] erofs-utils: add `-Eall-fragments` option Gao Xiang
2023-03-01 11:05 ` [PATCH 1/3] erofs-utils: get rid of useless nr_dup Yue Hu
2 siblings, 1 reply; 6+ messages in thread
From: Gao Xiang @ 2023-02-28 18:54 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang
The decoded lengths of uncompressed pclusters should be
strictly no more than encoded lengths.
Fixes: 9fa9b017f773 ("erofs-utils: mkfs: support fragments")
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
lib/compress.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/compress.c b/lib/compress.c
index 0aaec30..8169990 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -95,7 +95,7 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
* A lcluster cannot have three parts with the middle one which
* is well-compressed for !ztailpacking cases.
*/
- DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking);
+ DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
DBG_BUGON(ctx->e.partial);
type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
@@ -457,7 +457,7 @@ frag_packing:
if (ret < 0)
return ret;
ctx->e.compressedblks = 0; /* indicate a fragment */
- ctx->e.raw = true;
+ ctx->e.raw = false;
ctx->fragemitted = true;
fix_dedupedfrag = false;
/* tailpcluster should be less than 1 block */
@@ -928,7 +928,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
z_erofs_write_indexes(&ctx);
ctx.e.length = inode->fragment_size;
ctx.e.compressedblks = 0;
- ctx.e.raw = true;
+ ctx.e.raw = false;
ctx.e.partial = false;
ctx.e.blkaddr = ctx.blkaddr;
}
--
2.36.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/3] erofs-utils: add `-Eall-fragments` option
2023-02-28 18:54 [PATCH 1/3] erofs-utils: get rid of useless nr_dup Gao Xiang
2023-02-28 18:54 ` [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments Gao Xiang
@ 2023-02-28 18:54 ` Gao Xiang
2023-03-01 11:21 ` Yue Hu
2023-03-01 11:05 ` [PATCH 1/3] erofs-utils: get rid of useless nr_dup Yue Hu
2 siblings, 1 reply; 6+ messages in thread
From: Gao Xiang @ 2023-02-28 18:54 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang
It's almost the same as `-Efragments` option, except that will
explicitly pack the whole data into the special inode instead.
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
configure.ac | 1 +
include/erofs/config.h | 1 +
include/erofs/fragments.h | 1 +
lib/compress.c | 32 ++++++++++++++++-------------
lib/fragments.c | 43 ++++++++++++++++++++++++++++++++++++++-
man/mkfs.erofs.1 | 25 ++++++++++++++---------
mkfs/main.c | 6 ++++++
7 files changed, 84 insertions(+), 25 deletions(-)
diff --git a/configure.ac b/configure.ac
index cdbeb33..4dbe86f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -134,6 +134,7 @@ AC_CHECK_HEADERS(m4_flatten([
stdlib.h
string.h
sys/ioctl.h
+ sys/mman.h
sys/stat.h
sys/sysmacros.h
sys/time.h
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 39a6162..648a3e8 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -48,6 +48,7 @@ struct erofs_configure {
bool c_noinline_data;
bool c_ztailpacking;
bool c_fragments;
+ bool c_all_fragments;
bool c_dedupe;
bool c_ignore_mtime;
bool c_showprogress;
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
index 4caaf6b..21753ec 100644
--- a/include/erofs/fragments.h
+++ b/include/erofs/fragments.h
@@ -16,6 +16,7 @@ extern const char *frags_packedname;
#define EROFS_PACKED_INODE frags_packedname
int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc);
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc);
int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
unsigned int len, u32 tofcrc);
void z_erofs_fragments_commit(struct erofs_inode *inode);
diff --git a/lib/compress.c b/lib/compress.c
index 8169990..65c6f90 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -899,22 +899,26 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
ctx.remaining = inode->i_size - inode->fragment_size;
ctx.fix_dedupedfrag = false;
ctx.fragemitted = false;
+ if (cfg.c_all_fragments && !erofs_is_packed_inode(inode) &&
+ !inode->fragment_size) {
+ ret = z_erofs_pack_file_from_fd(inode, fd, ctx.tof_chksum);
+ } else {
+ while (ctx.remaining) {
+ const u64 rx = min_t(u64, ctx.remaining,
+ sizeof(ctx.queue) - ctx.tail);
+
+ ret = read(fd, ctx.queue + ctx.tail, rx);
+ if (ret != rx) {
+ ret = -errno;
+ goto err_bdrop;
+ }
+ ctx.remaining -= rx;
+ ctx.tail += rx;
- while (ctx.remaining) {
- const u64 readcount = min_t(u64, ctx.remaining,
- sizeof(ctx.queue) - ctx.tail);
-
- ret = read(fd, ctx.queue + ctx.tail, readcount);
- if (ret != readcount) {
- ret = -errno;
- goto err_bdrop;
+ ret = vle_compress_one(&ctx);
+ if (ret)
+ goto err_free_idata;
}
- ctx.remaining -= readcount;
- ctx.tail += readcount;
-
- ret = vle_compress_one(&ctx);
- if (ret)
- goto err_free_idata;
}
DBG_BUGON(ctx.head != ctx.tail);
diff --git a/lib/fragments.c b/lib/fragments.c
index 1e41485..ebff4b5 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -17,6 +17,7 @@
#endif
#include <stdlib.h>
#include <unistd.h>
+#include <sys/mman.h>
#include "erofs/err.h"
#include "erofs/inode.h"
#include "erofs/compress.h"
@@ -154,7 +155,11 @@ static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
if (len <= EROFS_TOF_HASHLEN)
return 0;
-
+ if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
+ data += len - EROFS_CONFIG_COMPR_MAX_SZ;
+ pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
+ len = EROFS_CONFIG_COMPR_MAX_SZ;
+ }
di = malloc(sizeof(*di) + len);
if (!di)
return -ENOMEM;
@@ -204,6 +209,42 @@ void z_erofs_fragments_commit(struct erofs_inode *inode)
erofs_sb_set_fragments();
}
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd,
+ u32 tofcrc)
+{
+#ifdef HAVE_FTELLO64
+ off64_t offset = ftello64(packedfile);
+#else
+ off_t offset = ftello(packedfile);
+#endif
+ char *memblock;
+ int rc;
+
+ if (offset < 0)
+ return -errno;
+
+ memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (memblock == MAP_FAILED)
+ return -EFAULT;
+
+ inode->fragmentoff = (erofs_off_t)offset;
+ inode->fragment_size = inode->i_size;
+
+ if (fwrite(memblock, inode->fragment_size, 1, packedfile) != 1) {
+ rc = -EIO;
+ goto out;
+ }
+
+ erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
+ inode->fragmentoff);
+
+ rc = z_erofs_fragments_dedupe_insert(memblock, inode->fragment_size,
+ inode->fragmentoff, tofcrc);
+out:
+ munmap(memblock, inode->i_size);
+ return rc;
+}
+
int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
unsigned int len, u32 tofcrc)
{
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index ba66a81..61ed24b 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -42,6 +42,21 @@ and may take an extra argument using the equals ('=') sign.
The following extended options are supported:
.RS 1.2i
.TP
+.BI all-fragments
+Forcely record the whole files into a special inode for better compression and
+it may take an argument as the pcluster size of the packed inode in bytes.
+(Linux v6.1+)
+.TP
+.BI dedupe
+Enable global compressed data deduplication to minimize duplicated data in
+the filesystem. It may be used with \fI-Efragments\fR option together to
+further reduce image sizes. (Linux v6.1+)
+.TP
+.BI fragments
+Pack the tail part (pcluster) of compressed files or the whole files into a
+special inode for smaller image sizes, and it may take an argument as the
+pcluster size of the packed inode in bytes. (Linux v6.1+)
+.TP
.BI force-inode-compact
Forcely generate compact inodes (32-byte inodes) to output.
.TP
@@ -64,16 +79,6 @@ Don't inline regular files to enable FSDAX for these files (Linux v5.15+).
.BI ztailpacking
Pack the tail part (pcluster) of compressed files into its metadata to save
more space and the tail part I/O. (Linux v5.17+)
-.TP
-.BI fragments
-Pack the tail part (pcluster) of compressed files or the whole files into a
-special inode for smaller image sizes, and it may take an argument as the
-pcluster size of the packed inode in bytes. (Linux v6.1+)
-.TP
-.BI dedupe
-Enable global compressed data deduplication to minimize duplicated data in
-the filesystem. It may be used with \fI-Efragments\fR option together to
-further reduce image sizes. (Linux v6.1+)
.RE
.TP
.BI "\-L " volume-label
diff --git a/mkfs/main.c b/mkfs/main.c
index d055902..bc973e7 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -208,10 +208,16 @@ static int parse_extended_opts(const char *opts)
cfg.c_ztailpacking = true;
}
+ if (MATCH_EXTENTED_OPT("all-fragments", token, keylen)) {
+ cfg.c_all_fragments = true;
+ goto handle_fragment;
+ }
+
if (MATCH_EXTENTED_OPT("fragments", token, keylen)) {
char *endptr;
u64 i;
+handle_fragment:
cfg.c_fragments = true;
if (vallen) {
i = strtoull(value, &endptr, 0);
--
2.36.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] erofs-utils: get rid of useless nr_dup
2023-02-28 18:54 [PATCH 1/3] erofs-utils: get rid of useless nr_dup Gao Xiang
2023-02-28 18:54 ` [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments Gao Xiang
2023-02-28 18:54 ` [PATCH 3/3] erofs-utils: add `-Eall-fragments` option Gao Xiang
@ 2023-03-01 11:05 ` Yue Hu
2 siblings, 0 replies; 6+ messages in thread
From: Yue Hu @ 2023-03-01 11:05 UTC (permalink / raw)
To: Gao Xiang; +Cc: huyue2, linux-erofs, zhangwen
On Wed, 1 Mar 2023 02:54:57 +0800
Gao Xiang <hsiangkao@linux.alibaba.com> wrote:
> Also refine the longest detection.
>
> Fixes: 990c7e383795 ("erofs-utils: mkfs: support fragment deduplication")
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Yue Hu <huyue2@coolpad.com>
> ---
> lib/fragments.c | 18 +++++++++---------
> 1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/lib/fragments.c b/lib/fragments.c
> index c67c1bb..1e41485 100644
> --- a/lib/fragments.c
> +++ b/lib/fragments.c
> @@ -26,7 +26,7 @@
>
> struct erofs_fragment_dedupe_item {
> struct list_head list;
> - unsigned int length, nr_dup;
> + unsigned int length;
> erofs_off_t pos;
> u8 data[];
> };
> @@ -53,7 +53,7 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
> struct erofs_fragment_dedupe_item *cur, *di = NULL;
> struct list_head *head;
> u8 *data;
> - unsigned int length, e2;
> + unsigned int length, e2, deduped;
> int ret;
>
> head = &dupli_frags[FRAGMENT_HASH(crc)];
> @@ -83,6 +83,7 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
>
> DBG_BUGON(length <= EROFS_TOF_HASHLEN);
> e2 = length - EROFS_TOF_HASHLEN;
> + deduped = 0;
>
> list_for_each_entry(cur, head, list) {
> unsigned int e1, mn, i = 0;
> @@ -97,22 +98,22 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
> while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
> ++i;
>
> - if (i && (!di || i + EROFS_TOF_HASHLEN > di->nr_dup)) {
> - cur->nr_dup = i + EROFS_TOF_HASHLEN;
> + if (!di || i + EROFS_TOF_HASHLEN > deduped) {
> + deduped = i + EROFS_TOF_HASHLEN;
> di = cur;
>
> /* full match */
> - if (i == mn)
> + if (i == e2)
> break;
> }
> }
> if (!di)
> goto out;
>
> - DBG_BUGON(di->length < di->nr_dup);
> + DBG_BUGON(di->length < deduped);
>
> - inode->fragment_size = di->nr_dup;
> - inode->fragmentoff = di->pos + di->length - di->nr_dup;
> + inode->fragment_size = deduped;
> + inode->fragmentoff = di->pos + di->length - deduped;
>
> erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
> inode->fragmentoff | 0ULL);
> @@ -161,7 +162,6 @@ static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
> memcpy(di->data, data, len);
> di->length = len;
> di->pos = pos;
> - di->nr_dup = 0;
>
> list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]);
> return 0;
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 3/3] erofs-utils: add `-Eall-fragments` option
2023-02-28 18:54 ` [PATCH 3/3] erofs-utils: add `-Eall-fragments` option Gao Xiang
@ 2023-03-01 11:21 ` Yue Hu
0 siblings, 0 replies; 6+ messages in thread
From: Yue Hu @ 2023-03-01 11:21 UTC (permalink / raw)
To: Gao Xiang; +Cc: linux-erofs, zhangwen
On Wed, 1 Mar 2023 02:54:59 +0800
Gao Xiang <hsiangkao@linux.alibaba.com> wrote:
> It's almost the same as `-Efragments` option, except that will
> explicitly pack the whole data into the special inode instead.
>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Tested-by: Yue Hu <huyue2@coolpad.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments
2023-02-28 18:54 ` [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments Gao Xiang
@ 2023-03-01 14:00 ` Yue Hu
0 siblings, 0 replies; 6+ messages in thread
From: Yue Hu @ 2023-03-01 14:00 UTC (permalink / raw)
To: Gao Xiang; +Cc: linux-erofs, zhangwen
On Wed, 1 Mar 2023 02:54:58 +0800
Gao Xiang <hsiangkao@linux.alibaba.com> wrote:
> The decoded lengths of uncompressed pclusters should be
> strictly no more than encoded lengths.
>
> Fixes: 9fa9b017f773 ("erofs-utils: mkfs: support fragments")
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Yue Hu <huyue2@coolpad.com>
> ---
> lib/compress.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/lib/compress.c b/lib/compress.c
> index 0aaec30..8169990 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -95,7 +95,7 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
> * A lcluster cannot have three parts with the middle one which
> * is well-compressed for !ztailpacking cases.
> */
> - DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking);
> + DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
> DBG_BUGON(ctx->e.partial);
> type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
> Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
> @@ -457,7 +457,7 @@ frag_packing:
> if (ret < 0)
> return ret;
> ctx->e.compressedblks = 0; /* indicate a fragment */
> - ctx->e.raw = true;
> + ctx->e.raw = false;
> ctx->fragemitted = true;
> fix_dedupedfrag = false;
> /* tailpcluster should be less than 1 block */
> @@ -928,7 +928,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
> z_erofs_write_indexes(&ctx);
> ctx.e.length = inode->fragment_size;
> ctx.e.compressedblks = 0;
> - ctx.e.raw = true;
> + ctx.e.raw = false;
> ctx.e.partial = false;
> ctx.e.blkaddr = ctx.blkaddr;
> }
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-03-01 13:54 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-28 18:54 [PATCH 1/3] erofs-utils: get rid of useless nr_dup Gao Xiang
2023-02-28 18:54 ` [PATCH 2/3] erofs-utils: use compressed pclusters to mark fragments Gao Xiang
2023-03-01 14:00 ` Yue Hu
2023-02-28 18:54 ` [PATCH 3/3] erofs-utils: add `-Eall-fragments` option Gao Xiang
2023-03-01 11:21 ` Yue Hu
2023-03-01 11:05 ` [PATCH 1/3] erofs-utils: get rid of useless nr_dup Yue Hu
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.