* [RFC 0/4] Squashfs decompresssion per-cpu
@ 2011-04-22 21:17 Stephen Hemminger
2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel
These patches are a quick stab at eliminating the single threaded
decompression in current squashfs. They are against latest kernel
2.6.39-rc4.
They have been only lightly tested; ie. mount a zlib squashfs filesystem and
do some basic access. It is just a starting point for discussion.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [RFC 1/4] squashfs: use percpu for zlib decompression
2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
2011-04-22 21:17 ` [RFC 2/4] squashfs: use percpu for lzo decompression Stephen Hemminger
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel
[-- Attachment #1: squashfs-gzip-percpu.patch --]
[-- Type: text/plain, Size: 3520 bytes --]
Make zlib decompression multi-threaded.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
--- a/fs/squashfs/zlib_wrapper.c 2011-04-22 10:57:09.450024619 -0700
+++ b/fs/squashfs/zlib_wrapper.c 2011-04-22 13:15:52.509266395 -0700
@@ -26,6 +26,7 @@
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/zlib.h>
+#include <linux/percpu.h>
#include <linux/vmalloc.h>
#include "squashfs_fs.h"
@@ -33,31 +34,53 @@
#include "squashfs.h"
#include "decompressor.h"
+/* don't use per superblock stream anymore. */
static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
{
- z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
- if (stream == NULL)
- goto failed;
- stream->workspace = vmalloc(zlib_inflate_workspacesize());
- if (stream->workspace == NULL)
- goto failed;
+ z_stream __percpu *percpu;
+ z_stream *stream;
+ int cpu, cpu0;
+
+ percpu = alloc_percpu(z_stream);
+ if (!percpu) {
+ ERROR("Failed to allocate per cpu stream\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for_each_possible_cpu(cpu) {
+ stream = per_cpu_ptr(percpu, cpu);
+
+ stream->workspace = vmalloc(zlib_inflate_workspacesize());
+ if (stream->workspace == NULL)
+ goto failed;
+ }
- return stream;
+ return (__force void *) percpu;
failed:
- ERROR("Failed to allocate zlib workspace\n");
- kfree(stream);
+ for_each_possible_cpu(cpu0) {
+ if (cpu0 == cpu)
+ break;
+ stream = per_cpu_ptr(percpu, cpu);
+ vfree(stream->workspace);
+ }
+ free_percpu(percpu);
+
+ ERROR("Failed to allocate zlib workspaces\n");
return ERR_PTR(-ENOMEM);
}
-
static void zlib_free(void *strm)
{
- z_stream *stream = strm;
+ z_stream __percpu *percpu = (z_stream __percpu *) strm;
+ int cpu;
- if (stream)
+ for_each_possible_cpu(cpu) {
+ z_stream *stream = per_cpu_ptr(percpu, cpu);
vfree(stream->workspace);
- kfree(stream);
+
+ }
+ free_percpu(percpu);
}
@@ -67,9 +90,8 @@ static int zlib_uncompress(struct squash
{
int zlib_err, zlib_init = 0;
int k = 0, page = 0;
- z_stream *stream = msblk->stream;
-
- mutex_lock(&msblk->read_data_mutex);
+ z_stream __percpu *percpu = (z_stream __percpu *)msblk->stream;
+ z_stream *stream = get_cpu_ptr(percpu);
stream->avail_out = 0;
stream->avail_in = 0;
@@ -80,7 +102,7 @@ static int zlib_uncompress(struct squash
length -= avail;
wait_on_buffer(bh[k]);
if (!buffer_uptodate(bh[k]))
- goto release_mutex;
+ goto put_per_cpu;
stream->next_in = bh[k]->b_data + offset;
stream->avail_in = avail;
@@ -98,7 +120,7 @@ static int zlib_uncompress(struct squash
ERROR("zlib_inflateInit returned unexpected "
"result 0x%x, srclength %d\n",
zlib_err, srclength);
- goto release_mutex;
+ goto put_per_cpu;
}
zlib_init = 1;
}
@@ -111,26 +133,26 @@ static int zlib_uncompress(struct squash
if (zlib_err != Z_STREAM_END) {
ERROR("zlib_inflate error, data probably corrupt\n");
- goto release_mutex;
+ goto put_per_cpu;
}
zlib_err = zlib_inflateEnd(stream);
if (zlib_err != Z_OK) {
ERROR("zlib_inflate error, data probably corrupt\n");
- goto release_mutex;
+ goto put_per_cpu;
}
if (k < b) {
ERROR("zlib_uncompress error, data remaining\n");
- goto release_mutex;
+ goto put_per_cpu;
}
length = stream->total_out;
- mutex_unlock(&msblk->read_data_mutex);
+ put_cpu_ptr(stream);
return length;
-release_mutex:
- mutex_unlock(&msblk->read_data_mutex);
+put_per_cpu:
+ put_cpu_ptr(stream);
for (; k < b; k++)
put_bh(bh[k]);
^ permalink raw reply [flat|nested] 5+ messages in thread
* [RFC 2/4] squashfs: use percpu for lzo decompression
2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
2011-04-22 21:17 ` [RFC 3/4] squashfs: use percpu for xz decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 4/4] squashfs: eliminate read_data_mutex Stephen Hemminger
3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel
[-- Attachment #1: squashfs-lz-percpu.patch --]
[-- Type: text/plain, Size: 3047 bytes --]
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
--- a/fs/squashfs/lzo_wrapper.c 2011-04-22 13:15:28.349003198 -0700
+++ b/fs/squashfs/lzo_wrapper.c 2011-04-22 13:21:03.320648456 -0700
@@ -22,6 +22,7 @@
*/
#include <linux/mutex.h>
+#include <linux/percpu.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -40,51 +41,71 @@ struct squashfs_lzo {
static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
{
int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+ struct squashfs_lzo __percpu *percpu;
+ struct squashfs_lzo *stream;
+ int cpu, cpu0;
+
+ percpu = alloc_percpu(struct squashfs_lzo);
+ if (!percpu) {
+ ERROR("Failed to allocate per cpu stream\n");
+ return ERR_PTR(-ENOMEM);
+ }
- struct squashfs_lzo *stream = kzalloc(sizeof(*stream), GFP_KERNEL);
- if (stream == NULL)
- goto failed;
- stream->input = vmalloc(block_size);
- if (stream->input == NULL)
- goto failed;
- stream->output = vmalloc(block_size);
- if (stream->output == NULL)
- goto failed2;
+ for_each_possible_cpu(cpu) {
+ stream = per_cpu_ptr(percpu, cpu);
+
+ stream->input = vmalloc(block_size);
+ if (stream->input == NULL)
+ goto failed;
+
+ stream->output = vmalloc(block_size);
+ if (stream->output == NULL)
+ goto failed2;
+ }
- return stream;
+ return (__force void *) percpu;
failed2:
vfree(stream->input);
failed:
+ for_each_possible_cpu(cpu0) {
+ if (cpu0 == cpu)
+ break;
+ stream = per_cpu_ptr(percpu, cpu);
+ vfree(stream->output);
+ vfree(stream->input);
+ }
ERROR("Failed to allocate lzo workspace\n");
- kfree(stream);
+ free_percpu(percpu);
return ERR_PTR(-ENOMEM);
}
static void lzo_free(void *strm)
{
- struct squashfs_lzo *stream = strm;
+ struct squashfs_lzo __percpu *percpu
+ = (struct squashfs_lzo __percpu *) strm;
+ int cpu;
- if (stream) {
+ for_each_possible_cpu(cpu) {
+ struct squashfs_lzo *stream = per_cpu_ptr(percpu, cpu);
vfree(stream->input);
vfree(stream->output);
}
- kfree(stream);
+ free_percpu(percpu);
}
-
static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
struct buffer_head **bh, int b, int offset, int length, int srclength,
int pages)
{
- struct squashfs_lzo *stream = msblk->stream;
+ struct squashfs_lzo __percpu *percpu
+ = (struct squashfs_lzo __percpu *)msblk->stream;
+ struct squashfs_lzo *stream = get_cpu_ptr(percpu);
void *buff = stream->input;
int avail, i, bytes = length, res;
size_t out_len = srclength;
- mutex_lock(&msblk->read_data_mutex);
-
for (i = 0; i < b; i++) {
wait_on_buffer(bh[i]);
if (!buffer_uptodate(bh[i]))
@@ -111,7 +132,7 @@ static int lzo_uncompress(struct squashf
bytes -= avail;
}
- mutex_unlock(&msblk->read_data_mutex);
+ put_cpu_ptr(stream);
return res;
block_release:
@@ -119,7 +140,7 @@ block_release:
put_bh(bh[i]);
failed:
- mutex_unlock(&msblk->read_data_mutex);
+ put_cpu_ptr(stream);
ERROR("lzo decompression failed, data probably corrupt\n");
return -EIO;
^ permalink raw reply [flat|nested] 5+ messages in thread
* [RFC 3/4] squashfs: use percpu for xz decompression
2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 2/4] squashfs: use percpu for lzo decompression Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
2011-04-22 21:17 ` [RFC 4/4] squashfs: eliminate read_data_mutex Stephen Hemminger
3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel
[-- Attachment #1: squashfs-xz-percpu.patch --]
[-- Type: text/plain, Size: 2689 bytes --]
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
--- a/fs/squashfs/xz_wrapper.c 2011-04-22 13:55:21.251194626 -0700
+++ b/fs/squashfs/xz_wrapper.c 2011-04-22 13:55:35.779665142 -0700
@@ -47,9 +47,11 @@ static void *squashfs_xz_init(struct squ
int len)
{
struct comp_opts *comp_opts = buff;
+ struct squashfs_xz __percpu *percpu;
struct squashfs_xz *stream;
int dict_size = msblk->block_size;
int err, n;
+ int cpu, cpu0;
if (comp_opts) {
/* check compressor options are the expected length */
@@ -71,47 +73,58 @@ static void *squashfs_xz_init(struct squ
dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
- stream = kmalloc(sizeof(*stream), GFP_KERNEL);
- if (stream == NULL) {
- err = -ENOMEM;
- goto failed;
- }
+ percpu = alloc_percpu(struct squashfs_xz);
+ if (!percpu)
+ goto nomem;
+
+ for_each_possible_cpu(cpu) {
+ stream = per_cpu_ptr(percpu, cpu);
- stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
- if (stream->state == NULL) {
- kfree(stream);
- err = -ENOMEM;
- goto failed;
+ stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+ if (stream->state == NULL)
+ goto cleanup_cpu;
}
- return stream;
+ return (__force void *) percpu;
+cleanup_cpu:
+ for_each_possible_cpu(cpu0) {
+ if (cpu0 == cpu)
+ break;
+
+ stream = per_cpu_ptr(percpu, cpu);
+ xz_dec_end(stream->state);
+ }
+ free_percpu(percpu);
+nomem:
+ err = -ENOMEM;
failed:
ERROR("Failed to initialise xz decompressor\n");
return ERR_PTR(err);
}
-
static void squashfs_xz_free(void *strm)
{
- struct squashfs_xz *stream = strm;
+ struct squashfs_xz __percpu *percpu
+ = (struct squashfs_xz __percpu *) strm;
+ int cpu;
- if (stream) {
+ for_each_possible_cpu(cpu) {
+ struct squashfs_xz *stream = per_cpu_ptr(percpu, cpu);
xz_dec_end(stream->state);
- kfree(stream);
}
+ free_percpu(percpu);
}
-
static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
struct buffer_head **bh, int b, int offset, int length, int srclength,
int pages)
{
enum xz_ret xz_err;
int avail, total = 0, k = 0, page = 0;
- struct squashfs_xz *stream = msblk->stream;
-
- mutex_lock(&msblk->read_data_mutex);
+ struct squashfs_xz __percpu *percpu
+ = (struct squashfs_xz __percpu *)msblk->stream;
+ struct squashfs_xz *stream = get_cpu_ptr(percpu);
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
@@ -158,11 +171,11 @@ static int squashfs_xz_uncompress(struct
}
total += stream->buf.out_pos;
- mutex_unlock(&msblk->read_data_mutex);
+ put_cpu_ptr(stream);
return total;
release_mutex:
- mutex_unlock(&msblk->read_data_mutex);
+ put_cpu_ptr(stream);
for (; k < b; k++)
put_bh(bh[k]);
^ permalink raw reply [flat|nested] 5+ messages in thread
* [RFC 4/4] squashfs: eliminate read_data_mutex
2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
` (2 preceding siblings ...)
2011-04-22 21:17 ` [RFC 3/4] squashfs: use percpu for xz decompression Stephen Hemminger
@ 2011-04-22 21:17 ` Stephen Hemminger
3 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2011-04-22 21:17 UTC (permalink / raw)
To: Phillip Lougher, kirk w; +Cc: linux-fsdevel, squashfs-devel
[-- Attachment #1: squashfs-no-read-data-mutex.patch --]
[-- Type: text/plain, Size: 894 bytes --]
This is no longer used, no that decompress is per-cpu.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
--- a/fs/squashfs/squashfs_fs_sb.h 2011-04-22 14:01:19.090469622 -0700
+++ b/fs/squashfs/squashfs_fs_sb.h 2011-04-22 14:01:27.786735743 -0700
@@ -62,7 +62,6 @@ struct squashfs_sb_info {
__le64 *id_table;
__le64 *fragment_index;
__le64 *xattr_id_table;
- struct mutex read_data_mutex;
struct mutex meta_index_mutex;
struct meta_index *meta_index;
void *stream;
--- a/fs/squashfs/super.c 2011-04-22 14:00:56.133765430 -0700
+++ b/fs/squashfs/super.c 2011-04-22 14:01:01.653934993 -0700
@@ -104,7 +104,6 @@ static int squashfs_fill_super(struct su
msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
- mutex_init(&msblk->read_data_mutex);
mutex_init(&msblk->meta_index_mutex);
/*
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2011-04-22 22:06 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-22 21:17 [RFC 0/4] Squashfs decompresssion per-cpu Stephen Hemminger
2011-04-22 21:17 ` [RFC 1/4] squashfs: use percpu for zlib decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 2/4] squashfs: use percpu for lzo decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 3/4] squashfs: use percpu for xz decompression Stephen Hemminger
2011-04-22 21:17 ` [RFC 4/4] squashfs: eliminate read_data_mutex Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).