All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dhaval Giani <dgiani@mozilla.com>
To: linux-kernel@vger.kernel.org
Cc: tytso@mit.edu, tglek@mozilla.com, vdjeric@mozilla.com,
	glandium@mozilla.com, linux-ext4@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: [RFC/PATCH 1/2] szip: Add seekable zip format
Date: Wed, 24 Jul 2013 17:03:41 -0400	[thread overview]
Message-ID: <1374699821.7083.0.camel@localhost> (raw)
In-Reply-To: <cover.1374693578.git.dgiani@mozilla.com>


Add support for inflating seekable zip format. This uses zlib
underneath. In order to create a seekable zip file, use the
szip utility which can be obtained from

https://github.com/glandium/faulty.lib

We shall use this to implement transparent decompression on
ext4. The use would be very similar to that used by the faulty.lib
linker.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Taras Glek <tglek@mozilla.com>
Cc: Vladan Djeric <vdjeric@mozilla.com>
Cc: linux-ext4 <linux-ext4@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: linux-fsdevel <linux-fsdevel@vger.kernel.org>
Cc: Mike Hommey <glandium@mozilla.com>
Signed-off-by: Dhaval Giani <dgiani@mozilla.com>
---
 include/linux/szip.h |  32 ++++++++
 lib/Kconfig          |   8 ++
 lib/Makefile         |   1 +
 lib/szip.c           | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 258 insertions(+)
 create mode 100644 include/linux/szip.h
 create mode 100644 lib/szip.c

diff --git a/include/linux/szip.h b/include/linux/szip.h
new file mode 100644
index 0000000..1d4421e
--- /dev/null
+++ b/include/linux/szip.h
@@ -0,0 +1,32 @@
+#ifndef __SZIP_H
+#define __SZIP_H
+
+#include <linux/zlib.h>
+#include <linux/types.h>
+
+#define SZIP_HEADER_SIZE (20)
+
+struct szip_struct {
+	u32 magic;
+	u32 total_size;
+	u16 chunk_size;
+	u16 dict_size;
+	u32 nr_chunks;
+	u16 last_chunk_size;
+	signed char window_bits;
+	signed char filter;
+	unsigned *offset_table;
+	unsigned *dictionary;
+	char *buffer;
+	void *workspace;
+};
+
+extern int szip_decompress(struct szip_struct *, char *, size_t);
+extern int szip_seekable_decompress(struct szip_struct *, size_t,
+						size_t, char *, size_t);
+extern size_t szip_uncompressed_size(struct szip_struct *);
+extern int szip_init(struct szip_struct *, char *);
+extern void szip_init_offset_table(struct szip_struct *szip, char *buf);
+extern size_t szip_offset_table_size(struct szip_struct *szip);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index fe01d41..0903693 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -213,6 +213,14 @@ config DECOMPRESS_LZO
 	select LZO_DECOMPRESS
 	tristate
 
+config SZIP
+	select ZLIB_INFLATE
+	tristate
+	help
+	  Use this to provide szip decompression support. szip is a seekable
+	  zlib format. Check https://github.com/glandium/faulty.lib for the
+	  szip tool. This is required for transparent ext4 decompression.
+
 #
 # Generic allocator support is selected if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index c55a037..86a5d4b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
 obj-$(CONFIG_XZ_DEC) += xz/
 obj-$(CONFIG_RAID6_PQ) += raid6/
+obj-${CONFIG_SZIP} += szip.o
 
 lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
 lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
diff --git a/lib/szip.c b/lib/szip.c
new file mode 100644
index 0000000..d610e62
--- /dev/null
+++ b/lib/szip.c
@@ -0,0 +1,217 @@
+/*
+ * lib/szip.c
+ *
+ * This is a seekable zip file, the format of which is based on
+ * code available at https://github.com/glandium/faulty.lib
+ *
+ * Copyright: Mozilla
+ * Author: Dhaval Giani <dgiani@mozilla.com>
+ *
+ * Based on code written by Mike Hommey <glandium@mozilla.com> as
+ * part of faulty.lib .
+ *
+ * This code is available under the MPL v2.0 which is explicitly
+ * compatible with GPL v2.
+ */
+
+#include <linux/zlib.h>
+#include <linux/szip.h>
+#include <linux/vmalloc.h>
+
+#include <linux/string.h>
+
+#define SZIP_MAGIC 0x7a5a6553
+
+static int szip_decompress_seekable_chunk(struct szip_struct *szip,
+		char *output, size_t offset, size_t chunk, size_t length)
+{
+	int is_last_chunk = (chunk == szip->nr_chunks - 1);
+	size_t chunk_len = is_last_chunk ? szip->last_chunk_size
+						: szip->chunk_size;
+	z_stream zstream;
+	int ret = 0;
+	int flush;
+	int success;
+
+	memset(&zstream, 0, sizeof(zstream));
+
+	if (length == 0 || length > chunk_len)
+		length = chunk_len;
+
+	if (is_last_chunk)
+		zstream.avail_in = szip->total_size;
+	else
+		zstream.avail_in = szip->offset_table[chunk + 1]
+					- szip->offset_table[chunk];
+
+	zstream.next_in = szip->buffer + offset;
+	zstream.avail_out = length;
+	zstream.next_out = output;
+	if (!szip->workspace)
+		szip->workspace = vzalloc(zlib_inflate_workspacesize());
+	zstream.workspace = szip->workspace;
+	if (!zstream.workspace) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Decompress Chunk */
+	/* **TODO: Correct return value for bad zlib format** */
+	if (zlib_inflateInit2(&zstream, (int) szip->window_bits) != Z_OK) {
+		ret = -EMEDIUMTYPE;
+		goto out;
+	}
+
+	/* We don't have dictionary logic yet */
+	if (length == chunk_len) {
+		flush = Z_FINISH;
+		success = Z_STREAM_END;
+	} else {
+		flush = Z_SYNC_FLUSH;
+		success = Z_OK;
+	}
+
+	ret = zlib_inflate(&zstream, flush);
+
+	/*
+	 * Ignore Z_BUF_ERROR for now. I am sure it will bite us
+	 * later on
+	 */
+	if (ret != success && ret != Z_BUF_ERROR) {
+		ret = -EMEDIUMTYPE;
+		goto out;
+	}
+
+	if (zlib_inflateEnd(&zstream) != Z_OK) {
+		ret = -EMEDIUMTYPE;
+		goto out;
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
+int szip_seekable_decompress(struct szip_struct *szip, size_t start,
+				size_t end, char *output, size_t length)
+{
+	int ret = 0;
+	size_t chunk_nr;
+
+	for (chunk_nr = start; chunk_nr <= end; chunk_nr++) {
+		size_t len = min_t(size_t, length, szip->chunk_size);
+		size_t offset = szip->offset_table[chunk_nr]
+					- szip->offset_table[start];
+		ret = szip_decompress_seekable_chunk(szip, output,
+						offset, chunk_nr, len);
+		if (ret)
+			goto out;
+
+
+		output += len;
+		length -= len;
+	}
+out:
+	return ret;
+}
+
+int szip_decompress(struct szip_struct *szip, char *output, size_t length)
+{
+	size_t header_size = 20;
+	char *buf;
+	buf = szip->buffer + header_size;
+	szip_init_offset_table(szip, buf);
+	szip->buffer = szip->buffer + szip->offset_table[0];
+	return szip_seekable_decompress(szip, 0,
+			szip->nr_chunks, output, length);
+}
+
+size_t szip_uncompressed_size(struct szip_struct *szip)
+{
+	return (szip->chunk_size * (szip->nr_chunks - 1))
+					+ szip->last_chunk_size;
+}
+
+void szip_init_offset_table(struct szip_struct *szip, char *buf)
+{
+	szip->offset_table = vzalloc(sizeof(unsigned) * szip->nr_chunks);
+	memcpy(szip->offset_table, buf , sizeof(unsigned) * szip->nr_chunks);
+}
+
+size_t szip_offset_table_size(struct szip_struct *szip)
+{
+	return sizeof(unsigned) * szip->nr_chunks;
+}
+
+/*
+ * Initialize a szip structure looking at the buffer
+ * Returns 0 on success
+ *
+ * XX: Fixup the return values. No magic numbers!
+ */
+int szip_init(struct szip_struct *szip, char *buf)
+{
+	char *ptr = buf;
+
+	szip->buffer = buf;
+	/* We don't implement it yet */
+	szip->dictionary = NULL;
+
+	memcpy(&szip->magic, ptr, sizeof(szip->magic));
+	/* No need to decode the structure if its not an szip buffer */
+	if (szip->magic != SZIP_MAGIC)
+		return -1;
+
+	ptr += sizeof(szip->magic);
+	memcpy(&szip->total_size, ptr, sizeof(szip->total_size));
+
+	ptr += sizeof(szip->total_size);
+	memcpy(&szip->chunk_size, ptr, sizeof(szip->chunk_size));
+	/*
+	 * If chunk_size is not a multiple of PAGE_SIZE, its malformed
+	 * No need to decode further
+	 */
+	if ((szip->chunk_size % PAGE_SIZE) ||
+			(szip->chunk_size > 8 * PAGE_SIZE))
+		return -2;
+
+	ptr += sizeof(szip->chunk_size);
+	memcpy(&szip->dict_size, ptr, sizeof(szip->dict_size));
+	if (szip->dict_size)
+		return -EINVAL;
+
+	ptr += sizeof(szip->dict_size);
+	memcpy(&szip->nr_chunks, ptr, sizeof(szip->nr_chunks));
+	/* If there are no chunks, no need to decode further*/
+	if (szip->nr_chunks < 1)
+		return -3;
+
+	ptr += sizeof(szip->nr_chunks);
+	memcpy(&szip->last_chunk_size, ptr, sizeof(szip->last_chunk_size));
+	/* Last Chunk Size is never 0 or greater than chunk size*/
+	if (!szip->last_chunk_size || szip->last_chunk_size > szip->chunk_size)
+		return -4;
+
+	ptr += sizeof(szip->last_chunk_size);
+	memcpy(&szip->window_bits, ptr, sizeof(szip->window_bits));
+
+	ptr += sizeof(szip->window_bits);
+	memcpy(&szip->filter, ptr, sizeof(szip->filter));
+	if (szip->filter)
+		return -EINVAL;
+
+	ptr += sizeof(szip->filter);
+
+	szip->workspace = NULL;
+
+	return 0;
+}
+
+/*
+ * We just allocated memory for the offset table, nothing else
+ */
+void free_szip(struct szip_struct *szip)
+{
+	vfree(szip->offset_table);
+	vfree(szip->workspace);
+}
-- 
1.8.1.4

       reply	other threads:[~2013-07-24 21:03 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1374693578.git.dgiani@mozilla.com>
2013-07-24 21:03 ` Dhaval Giani [this message]
2013-07-24 21:03 ` [RFC/PATCH 2/2] Add rudimentary transparent decompression support to ext4 Dhaval Giani

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1374699821.7083.0.camel@localhost \
    --to=dgiani@mozilla.com \
    --cc=glandium@mozilla.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglek@mozilla.com \
    --cc=tytso@mit.edu \
    --cc=vdjeric@mozilla.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.