linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 0/4] Squashfs decompresssion per-cpu
@ 2011-04-22 21:17 Stephen Hemminger
  2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
  To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel

These patches are a quick stab at eliminating the single threaded
decompression in current squashfs.  They are against latest kernel
2.6.39-rc4.

They have been only lightly tested; ie. mount a zlib squashfs filesystem and
do some basic access. It is just a starting point for discussion.



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC 1/4] squashfs: use percpu for zlib decompression
  2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
  2011-04-22 21:17 ` [RFC 2/4] squashfs: use percpu for lzo decompression Stephen Hemminger
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
  To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel

[-- Attachment #1: squashfs-gzip-percpu.patch --]
[-- Type: text/plain, Size: 3520 bytes --]

Make zlib decompression multi-threaded.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/fs/squashfs/zlib_wrapper.c	2011-04-22 10:57:09.450024619 -0700
+++ b/fs/squashfs/zlib_wrapper.c	2011-04-22 13:15:52.509266395 -0700
@@ -26,6 +26,7 @@
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/zlib.h>
+#include <linux/percpu.h>
 #include <linux/vmalloc.h>
 
 #include "squashfs_fs.h"
@@ -33,31 +34,53 @@
 #include "squashfs.h"
 #include "decompressor.h"
 
+/* don't use per superblock stream anymore. */
 static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
 {
-	z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
-	if (stream == NULL)
-		goto failed;
-	stream->workspace = vmalloc(zlib_inflate_workspacesize());
-	if (stream->workspace == NULL)
-		goto failed;
+	z_stream __percpu *percpu;
+	z_stream *stream;
+	int cpu, cpu0;
+
+	percpu = alloc_percpu(z_stream);
+	if (!percpu) {
+		ERROR("Failed to allocate per cpu stream\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
+
+		stream->workspace = vmalloc(zlib_inflate_workspacesize());
+		if (stream->workspace == NULL)
+			goto failed;
+	}
 
-	return stream;
+	return (__force void *) percpu;
 
 failed:
-	ERROR("Failed to allocate zlib workspace\n");
-	kfree(stream);
+	for_each_possible_cpu(cpu0) {
+		if (cpu0 == cpu)
+			break;
+		stream = per_cpu_ptr(percpu, cpu);
+		vfree(stream->workspace);
+	}
+	free_percpu(percpu);
+
+	ERROR("Failed to allocate zlib workspaces\n");
 	return ERR_PTR(-ENOMEM);
 }
 
-
 static void zlib_free(void *strm)
 {
-	z_stream *stream = strm;
+	z_stream __percpu *percpu = (z_stream __percpu *) strm;
+	int cpu;
 
-	if (stream)
+	for_each_possible_cpu(cpu) {
+		z_stream *stream = per_cpu_ptr(percpu, cpu);
 		vfree(stream->workspace);
-	kfree(stream);
+
+	}
+	free_percpu(percpu);
 }
 
 
@@ -67,9 +90,8 @@ static int zlib_uncompress(struct squash
 {
 	int zlib_err, zlib_init = 0;
 	int k = 0, page = 0;
-	z_stream *stream = msblk->stream;
-
-	mutex_lock(&msblk->read_data_mutex);
+	z_stream __percpu *percpu = (z_stream __percpu *)msblk->stream;
+	z_stream *stream = get_cpu_ptr(percpu);
 
 	stream->avail_out = 0;
 	stream->avail_in = 0;
@@ -80,7 +102,7 @@ static int zlib_uncompress(struct squash
 			length -= avail;
 			wait_on_buffer(bh[k]);
 			if (!buffer_uptodate(bh[k]))
-				goto release_mutex;
+				goto put_per_cpu;
 
 			stream->next_in = bh[k]->b_data + offset;
 			stream->avail_in = avail;
@@ -98,7 +120,7 @@ static int zlib_uncompress(struct squash
 				ERROR("zlib_inflateInit returned unexpected "
 					"result 0x%x, srclength %d\n",
 					zlib_err, srclength);
-				goto release_mutex;
+				goto put_per_cpu;
 			}
 			zlib_init = 1;
 		}
@@ -111,26 +133,26 @@ static int zlib_uncompress(struct squash
 
 	if (zlib_err != Z_STREAM_END) {
 		ERROR("zlib_inflate error, data probably corrupt\n");
-		goto release_mutex;
+		goto put_per_cpu;
 	}
 
 	zlib_err = zlib_inflateEnd(stream);
 	if (zlib_err != Z_OK) {
 		ERROR("zlib_inflate error, data probably corrupt\n");
-		goto release_mutex;
+		goto put_per_cpu;
 	}
 
 	if (k < b) {
 		ERROR("zlib_uncompress error, data remaining\n");
-		goto release_mutex;
+		goto put_per_cpu;
 	}
 
 	length = stream->total_out;
-	mutex_unlock(&msblk->read_data_mutex);
+	put_cpu_ptr(stream);
 	return length;
 
-release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
+put_per_cpu:
+	put_cpu_ptr(stream);
 
 	for (; k < b; k++)
 		put_bh(bh[k]);



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC 2/4] squashfs: use percpu for lzo decompression
  2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
  2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
  2011-04-22 21:17 ` [RFC 3/4] squashfs: use percpu for xz decompression Stephen Hemminger
  2011-04-22 21:17 ` [RFC 4/4] squashfs: eliminate read_data_mutex Stephen Hemminger
  3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
  To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel

[-- Attachment #1: squashfs-lz-percpu.patch --]
[-- Type: text/plain, Size: 3047 bytes --]

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/fs/squashfs/lzo_wrapper.c	2011-04-22 13:15:28.349003198 -0700
+++ b/fs/squashfs/lzo_wrapper.c	2011-04-22 13:21:03.320648456 -0700
@@ -22,6 +22,7 @@
  */
 
 #include <linux/mutex.h>
+#include <linux/percpu.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -40,51 +41,71 @@ struct squashfs_lzo {
 static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
 {
 	int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+	struct squashfs_lzo __percpu *percpu;
+	struct squashfs_lzo *stream;
+	int cpu, cpu0;
+
+	percpu = alloc_percpu(struct squashfs_lzo);
+	if (!percpu) {
+		ERROR("Failed to allocate per cpu stream\n");
+		return ERR_PTR(-ENOMEM);
+	}
 
-	struct squashfs_lzo *stream = kzalloc(sizeof(*stream), GFP_KERNEL);
-	if (stream == NULL)
-		goto failed;
-	stream->input = vmalloc(block_size);
-	if (stream->input == NULL)
-		goto failed;
-	stream->output = vmalloc(block_size);
-	if (stream->output == NULL)
-		goto failed2;
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
+
+		stream->input = vmalloc(block_size);
+		if (stream->input == NULL)
+			goto failed;
+
+		stream->output = vmalloc(block_size);
+		if (stream->output == NULL)
+			goto failed2;
+	}
 
-	return stream;
+	return (__force void *) percpu;
 
 failed2:
 	vfree(stream->input);
 failed:
+	for_each_possible_cpu(cpu0) {
+		if (cpu0 == cpu)
+			break;
+		stream = per_cpu_ptr(percpu, cpu);
+		vfree(stream->output);
+		vfree(stream->input);
+	}
 	ERROR("Failed to allocate lzo workspace\n");
-	kfree(stream);
+	free_percpu(percpu);
 	return ERR_PTR(-ENOMEM);
 }
 
 
 static void lzo_free(void *strm)
 {
-	struct squashfs_lzo *stream = strm;
+	struct squashfs_lzo __percpu *percpu
+		= (struct squashfs_lzo __percpu *) strm;
+	int cpu;
 
-	if (stream) {
+	for_each_possible_cpu(cpu) {
+		struct squashfs_lzo *stream = per_cpu_ptr(percpu, cpu);
 		vfree(stream->input);
 		vfree(stream->output);
 	}
-	kfree(stream);
+	free_percpu(percpu);
 }
 
-
 static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 	struct buffer_head **bh, int b, int offset, int length, int srclength,
 	int pages)
 {
-	struct squashfs_lzo *stream = msblk->stream;
+	struct squashfs_lzo __percpu *percpu
+		= (struct squashfs_lzo __percpu *)msblk->stream;
+	struct squashfs_lzo *stream = get_cpu_ptr(percpu);
 	void *buff = stream->input;
 	int avail, i, bytes = length, res;
 	size_t out_len = srclength;
 
-	mutex_lock(&msblk->read_data_mutex);
-
 	for (i = 0; i < b; i++) {
 		wait_on_buffer(bh[i]);
 		if (!buffer_uptodate(bh[i]))
@@ -111,7 +132,7 @@ static int lzo_uncompress(struct squashf
 		bytes -= avail;
 	}
 
-	mutex_unlock(&msblk->read_data_mutex);
+	put_cpu_ptr(stream);
 	return res;
 
 block_release:
@@ -119,7 +140,7 @@ block_release:
 		put_bh(bh[i]);
 
 failed:
-	mutex_unlock(&msblk->read_data_mutex);
+	put_cpu_ptr(stream);
 
 	ERROR("lzo decompression failed, data probably corrupt\n");
 	return -EIO;



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC 3/4] squashfs: use percpu for xz decompression
  2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
  2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
  2011-04-22 21:17 ` [RFC 2/4] squashfs: use percpu for lzo decompression Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
  2011-04-22 21:17 ` [RFC 4/4] squashfs: eliminate read_data_mutex Stephen Hemminger
  3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
  To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel

[-- Attachment #1: squashfs-xz-percpu.patch --]
[-- Type: text/plain, Size: 2689 bytes --]

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/fs/squashfs/xz_wrapper.c	2011-04-22 13:55:21.251194626 -0700
+++ b/fs/squashfs/xz_wrapper.c	2011-04-22 13:55:35.779665142 -0700
@@ -47,9 +47,11 @@ static void *squashfs_xz_init(struct squ
 	int len)
 {
 	struct comp_opts *comp_opts = buff;
+	struct squashfs_xz __percpu *percpu;
 	struct squashfs_xz *stream;
 	int dict_size = msblk->block_size;
 	int err, n;
+	int cpu, cpu0;
 
 	if (comp_opts) {
 		/* check compressor options are the expected length */
@@ -71,47 +73,58 @@ static void *squashfs_xz_init(struct squ
 
 	dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
 
-	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
-	if (stream == NULL) {
-		err = -ENOMEM;
-		goto failed;
-	}
+	percpu = alloc_percpu(struct squashfs_xz);
+	if (!percpu)
+		goto nomem;
+
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
 
-	stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
-	if (stream->state == NULL) {
-		kfree(stream);
-		err = -ENOMEM;
-		goto failed;
+		stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+		if (stream->state == NULL)
+			goto cleanup_cpu;
 	}
 
-	return stream;
+	return (__force void *) percpu;
 
+cleanup_cpu:
+	for_each_possible_cpu(cpu0) {
+		if (cpu0 == cpu)
+			break;
+
+		stream = per_cpu_ptr(percpu, cpu);
+		xz_dec_end(stream->state);
+	}
+	free_percpu(percpu);
+nomem:
+	err = -ENOMEM;
 failed:
 	ERROR("Failed to initialise xz decompressor\n");
 	return ERR_PTR(err);
 }
 
-
 static void squashfs_xz_free(void *strm)
 {
-	struct squashfs_xz *stream = strm;
+	struct squashfs_xz __percpu *percpu
+		= (struct squashfs_xz __percpu *) strm;
+	int cpu;
 
-	if (stream) {
+	for_each_possible_cpu(cpu) {
+		struct squashfs_xz *stream = per_cpu_ptr(percpu, cpu);
 		xz_dec_end(stream->state);
-		kfree(stream);
 	}
+	free_percpu(percpu);
 }
 
-
 static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 	struct buffer_head **bh, int b, int offset, int length, int srclength,
 	int pages)
 {
 	enum xz_ret xz_err;
 	int avail, total = 0, k = 0, page = 0;
-	struct squashfs_xz *stream = msblk->stream;
-
-	mutex_lock(&msblk->read_data_mutex);
+	struct squashfs_xz __percpu *percpu
+		= (struct squashfs_xz __percpu *)msblk->stream;
+	struct squashfs_xz *stream = get_cpu_ptr(percpu);
 
 	xz_dec_reset(stream->state);
 	stream->buf.in_pos = 0;
@@ -158,11 +171,11 @@ static int squashfs_xz_uncompress(struct
 	}
 
 	total += stream->buf.out_pos;
-	mutex_unlock(&msblk->read_data_mutex);
+	put_cpu_ptr(stream);
 	return total;
 
 release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
+	put_cpu_ptr(stream);
 
 	for (; k < b; k++)
 		put_bh(bh[k]);



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC 4/4] squashfs: eliminate read_data_mutex
  2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
                   ` (2 preceding siblings ...)
  2011-04-22 21:17 ` [RFC 3/4] squashfs: use percpu for xz decompression Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
  3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
  To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel

[-- Attachment #1: squashfs-no-read-data-mutex.patch --]
[-- Type: text/plain, Size: 894 bytes --]

This is no longer used, no that decompress is per-cpu.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/fs/squashfs/squashfs_fs_sb.h	2011-04-22 14:01:19.090469622 -0700
+++ b/fs/squashfs/squashfs_fs_sb.h	2011-04-22 14:01:27.786735743 -0700
@@ -62,7 +62,6 @@ struct squashfs_sb_info {
 	__le64					*id_table;
 	__le64					*fragment_index;
 	__le64					*xattr_id_table;
-	struct mutex				read_data_mutex;
 	struct mutex				meta_index_mutex;
 	struct meta_index			*meta_index;
 	void					*stream;
--- a/fs/squashfs/super.c	2011-04-22 14:00:56.133765430 -0700
+++ b/fs/squashfs/super.c	2011-04-22 14:01:01.653934993 -0700
@@ -104,7 +104,6 @@ static int squashfs_fill_super(struct su
 	msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE);
 	msblk->devblksize_log2 = ffz(~msblk->devblksize);
 
-	mutex_init(&msblk->read_data_mutex);
 	mutex_init(&msblk->meta_index_mutex);
 
 	/*



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-04-22 22:06 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 2/4] squashfs: use percpu for lzo decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 3/4] squashfs: use percpu for xz decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 4/4] squashfs: eliminate read_data_mutex Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).