* [RFC 1/5] squashfs: clean up squashfs_read_data
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
@ 2013-09-16 7:08 ` Minchan Kim
2013-09-16 7:08 ` [RFC 2/5] squashfs: clean up squashfs_readpage Minchan Kim
` (3 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
The squashfs_read_data functions has a role to read a block and
decompress for datablock and metadata.
This patch cleans it up so it has squashfs_read_datablock and
squashfs_meta_datablock and morever, squashfs_read_datablock
has two part, one is just request I/O and other part is decompress.
Such clean up is useful for code readability and supporting
readpages.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/block.c | 244 +++++++++++++++++++++++++++++---------------
fs/squashfs/cache.c | 16 ++-
fs/squashfs/decompressor.c | 2 +-
fs/squashfs/squashfs.h | 6 +-
4 files changed, 176 insertions(+), 92 deletions(-)
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 41d108e..f33c6ef 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -77,99 +77,25 @@ static struct buffer_head *get_block_length(struct super_block *sb,
}
-/*
- * Read and decompress a metadata block or datablock. Length is non-zero
- * if a datablock is being read (the size is stored elsewhere in the
- * filesystem), otherwise the length is obtained from the first two bytes of
- * the metadata block. A bit in the length field indicates if the block
- * is stored uncompressed in the filesystem (usually because compression
- * generated a larger block - this does occasionally happen with compression
- * algorithms).
- */
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
- int length, u64 *next_index, int srclength, int pages)
-{
- struct squashfs_sb_info *msblk = sb->s_fs_info;
- struct buffer_head **bh;
- int offset = index & ((1 << msblk->devblksize_log2) - 1);
- u64 cur_index = index >> msblk->devblksize_log2;
- int bytes, compressed, b = 0, k = 0, page = 0, avail;
-
- bh = kcalloc(((srclength + msblk->devblksize - 1)
- >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
- if (bh == NULL)
- return -ENOMEM;
-
- if (length) {
- /*
- * Datablock.
- */
- bytes = -offset;
- compressed = SQUASHFS_COMPRESSED_BLOCK(length);
- length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
- if (next_index)
- *next_index = index + length;
-
- TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
- index, compressed ? "" : "un", length, srclength);
-
- if (length < 0 || length > srclength ||
- (index + length) > msblk->bytes_used)
- goto read_failure;
-
- for (b = 0; bytes < length; b++, cur_index++) {
- bh[b] = sb_getblk(sb, cur_index);
- if (bh[b] == NULL)
- goto block_release;
- bytes += msblk->devblksize;
- }
- ll_rw_block(READ, b, bh);
- } else {
- /*
- * Metadata block.
- */
- if ((index + 2) > msblk->bytes_used)
- goto read_failure;
-
- bh[0] = get_block_length(sb, &cur_index, &offset, &length);
- if (bh[0] == NULL)
- goto read_failure;
- b = 1;
-
- bytes = msblk->devblksize - offset;
- compressed = SQUASHFS_COMPRESSED(length);
- length = SQUASHFS_COMPRESSED_SIZE(length);
- if (next_index)
- *next_index = index + length + 2;
-
- TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
- compressed ? "" : "un", length);
- if (length < 0 || length > srclength ||
- (index + length) > msblk->bytes_used)
- goto block_release;
-
- for (; bytes < length; b++) {
- bh[b] = sb_getblk(sb, ++cur_index);
- if (bh[b] == NULL)
- goto block_release;
- bytes += msblk->devblksize;
- }
- ll_rw_block(READ, b - 1, bh + 1);
- }
+int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
+ void **buffer, struct buffer_head **bh, int nr_bh,
+ int offset, int length, int srclength, int pages)
+{
+ int k = 0;
if (compressed) {
- length = squashfs_decompress(msblk, buffer, bh, b, offset,
- length, srclength, pages);
+ length = squashfs_decompress(msblk, buffer, bh, nr_bh,
+ offset, length, srclength, pages);
if (length < 0)
- goto read_failure;
+ goto out;
} else {
/*
* Block is uncompressed.
*/
- int in, pg_offset = 0;
+ int bytes, in, avail, pg_offset = 0, page = 0;
- for (bytes = length; k < b; k++) {
+ for (bytes = length; k < nr_bh; k++) {
in = min(bytes, msblk->devblksize - offset);
bytes -= in;
wait_on_buffer(bh[k]);
@@ -193,6 +119,154 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
}
}
+ return length;
+
+block_release:
+ for (; k < nr_bh; k++)
+ put_bh(bh[k]);
+out:
+ return length;
+}
+
+int squashfs_read_submit(struct super_block *sb, u64 index, int length,
+ int srclength, struct buffer_head **bh, int *nr_bh)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ u64 cur_index = index >> msblk->devblksize_log2;
+ int bytes, b = 0, k = 0;
+
+ bytes = -offset;
+ if (length < 0 || length > srclength ||
+ (index + length) > msblk->bytes_used)
+ goto read_failure;
+
+ for (b = 0; bytes < length; b++, cur_index++) {
+ bh[b] = sb_getblk(sb, cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+
+ ll_rw_block(READ, b, bh);
+ *nr_bh = b;
+ return 0;
+
+block_release:
+ for (; k < b; k++)
+ put_bh(bh[k]);
+
+read_failure:
+ ERROR("squashfs_read_submit failed to read block 0x%llx\n",
+ (unsigned long long) index);
+ return -EIO;
+}
+
+/*
+ * Read and decompress a datablock. @length should be non-zero(the size is
+ * stored elsewhere in the filesystem). A bit in the length field indicates
+ * if the block is stored uncompressed in the filesystem (usually because
+ * compression generated a larger block - this does occasionally happen with
+ * compression algorithms).
+ */
+int squashfs_read_datablock(struct super_block *sb, void **buffer, u64 index,
+ int length, int srclength, int pages)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head **bh;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ int compressed, ret, b = 0;
+
+ BUG_ON(!length);
+
+ bh = kcalloc(((srclength + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
+ if (bh == NULL)
+ return -ENOMEM;
+
+ compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+
+ ret = squashfs_read_submit(sb, index, length, srclength, bh, &b);
+ if (ret < 0) {
+ kfree(bh);
+ return ret;
+ }
+
+
+ TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
+ index, compressed ? "" : "un", length, srclength);
+
+ length = squashfs_decompress_block(sb, compressed, buffer, bh,
+ b, offset, length, srclength, pages);
+ if (length < 0) {
+ ERROR("squashfs_read_datablock failed to read block 0x%llx\n",
+ (unsigned long long) index);
+ kfree(bh);
+ return -EIO;
+ }
+
+ kfree(bh);
+ return length;
+}
+
+/*
+ * Read and decompress a metadata block. @length is obtained from the first
+ * two bytes of the metadata block. A bit in the length field indicates if
+ * the block is stored uncompressed in the filesystem (usually because
+ * compression generated a larger block - this does occasionally happen with
+ * compression algorithms).
+ */
+int squashfs_read_metablock(struct super_block *sb, void **buffer, u64 index,
+ int length, u64 *next_index, int srclength, int pages)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head **bh;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ u64 cur_index = index >> msblk->devblksize_log2;
+ int bytes, compressed, b = 0, k = 0;
+
+ BUG_ON(length);
+
+ bh = kcalloc(((srclength + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
+ if (bh == NULL)
+ return -ENOMEM;
+
+ if ((index + 2) > msblk->bytes_used)
+ goto read_failure;
+
+ bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+ if (bh[0] == NULL)
+ goto read_failure;
+ b = 1;
+
+ bytes = msblk->devblksize - offset;
+ compressed = SQUASHFS_COMPRESSED(length);
+ length = SQUASHFS_COMPRESSED_SIZE(length);
+ if (next_index)
+ *next_index = index + length + 2;
+
+ TRACE("Meta block @ 0x%llx, %scompressed size %d\n", index,
+ compressed ? "" : "un", length);
+
+ if (length < 0 || length > srclength ||
+ (index + length) > msblk->bytes_used)
+ goto block_release;
+
+ for (; bytes < length; b++) {
+ bh[b] = sb_getblk(sb, ++cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b - 1, bh + 1);
+
+ length = squashfs_decompress_block(msblk, compressed, buffer, bh, b,
+ offset, length, srclength, pages);
+ if (length < 0)
+ goto read_failure;
+
kfree(bh);
return length;
@@ -201,7 +275,7 @@ block_release:
put_bh(bh[k]);
read_failure:
- ERROR("squashfs_read_data failed to read block 0x%llx\n",
+ ERROR("squashfs_read_metablock failed to read block 0x%llx\n",
(unsigned long long) index);
kfree(bh);
return -EIO;
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index af0b738..6e6a616 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -119,9 +119,15 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
entry->error = 0;
spin_unlock(&cache->lock);
- entry->length = squashfs_read_data(sb, entry->data,
- block, length, &entry->next_index,
- cache->block_size, cache->pages);
+ if (length)
+ entry->length = squashfs_read_datablock(sb,
+ entry->data, block, length,
+ cache->block_size, cache->pages);
+ else
+ entry->length = squashfs_read_metablock(sb,
+ entry->data, block, length,
+ &entry->next_index, cache->block_size,
+ cache->pages);
spin_lock(&cache->lock);
@@ -424,8 +430,8 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
data[i] = buffer;
- res = squashfs_read_data(sb, data, block, length |
- SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+ res = squashfs_read_datablock(sb, data, block, length |
+ SQUASHFS_COMPRESSED_BIT_BLOCK, length, pages);
kfree(data);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 3f6271d..e47453e 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -97,7 +97,7 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
if (buffer == NULL)
return ERR_PTR(-ENOMEM);
- length = squashfs_read_data(sb, &buffer,
+ length = squashfs_read_metablock(sb, &buffer,
sizeof(struct squashfs_super_block), 0, NULL,
PAGE_CACHE_SIZE, 1);
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index d126651..405baed 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,12 @@
#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
/* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
+extern int squashfs_read_datablock(struct super_block *, void **, u64, int,
int, int);
+extern int squashfs_read_metablock(struct super_block *, void **, u64, int,
+ u64 *, int, int);
+extern int squashfs_read_submit(struct super_block *, u64, int, int,
+ struct buffer_head **, int *);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC 2/5] squashfs: clean up squashfs_readpage
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
2013-09-16 7:08 ` [RFC 1/5] squashfs: clean up squashfs_read_data Minchan Kim
@ 2013-09-16 7:08 ` Minchan Kim
2013-09-16 7:08 ` [RFC 3/5] squashfs: remove cache for normal data page Minchan Kim
` (2 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
Now squashfs_readpage handles regular data, fragmented data and
hole pags so it's rather complex. This patch cleans it up
so it makes simple for review and upcoming readahread support.
It shouldn't change any old behavior.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/file.c | 237 ++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 184 insertions(+), 53 deletions(-)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 8ca62c2..d4d472f 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,88 +370,189 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
return le32_to_cpu(size);
}
+static int squashfs_fragment_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes, i, offset = 0;
+ struct squashfs_cache_entry *buffer = NULL;
+ void *pageaddr;
-static int squashfs_readpage(struct file *file, struct page *page)
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int start_index = page->index & ~mask;
+
+ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+ page->index, squashfs_i(inode)->start);
+
+ /*
+ * Datablock is stored inside a fragment (tail-end packed
+ * block).
+ */
+ buffer = squashfs_get_fragment(inode->i_sb,
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+
+ if (buffer->error) {
+ ERROR("Unable to read page, block %llx, size %x\n",
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+ squashfs_cache_put(buffer);
+ goto error_out;
+ }
+
+ bytes = i_size_read(inode) & (msblk->block_size - 1);
+ offset = squashfs_i(inode)->fragment_offset;
+
+ /*
+ * Loop copying datablock into pages. As the datablock likely covers
+ * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+ * grab the pages from the page cache, except for the page that we've
+ * been called to fill.
+ */
+ for (i = start_index; bytes > 0; i++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ struct page *push_page;
+ int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+ TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
+
+ push_page = (i == page->index) ? page :
+ grab_cache_page_nowait(page->mapping, i);
+
+ if (!push_page)
+ continue;
+
+ if (PageUptodate(push_page))
+ goto skip_page;
+
+ pageaddr = kmap_atomic(push_page);
+ squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(push_page);
+ SetPageUptodate(push_page);
+skip_page:
+ unlock_page(push_page);
+ if (i != page->index)
+ page_cache_release(push_page);
+ }
+
+ squashfs_cache_put(buffer);
+
+ return 0;
+
+error_out:
+ SetPageError(page);
+ pageaddr = kmap_atomic(page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page);
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ return 0;
+}
+
+static int squashfs_hole_readpage(struct file *file, struct inode *inode,
+ int index, struct page *page)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes, i, offset = 0;
+ void *pageaddr;
+
+ int start_index = index << (msblk->block_log - PAGE_CACHE_SHIFT);
+ int file_end = i_size_read(inode) >> msblk->block_log;
+
+ bytes = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ /*
+ * Loop copying datablock into pages. As the datablock likely covers
+ * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+ * grab the pages from the page cache, except for the page that we've
+ * been called to fill.
+ */
+ for (i = start_index; bytes > 0; i++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ struct page *push_page;
+
+ push_page = (page && i == page->index) ? page :
+ grab_cache_page_nowait(page->mapping, i);
+
+ if (!push_page)
+ continue;
+
+ if (PageUptodate(push_page))
+ goto skip_page;
+
+ pageaddr = kmap_atomic(push_page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(push_page);
+ SetPageUptodate(push_page);
+skip_page:
+ unlock_page(push_page);
+ if (page && i == page->index)
+ continue;
+ page_cache_release(push_page);
+ }
+
+ return 0;
+}
+
+static int squashfs_regular_readpage(struct file *file, struct page *page)
{
+ u64 block = 0;
+ int bsize;
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes, i, offset = 0, sparse = 0;
+ int bytes, i, offset = 0;
struct squashfs_cache_entry *buffer = NULL;
void *pageaddr;
int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
int start_index = page->index & ~mask;
- int end_index = start_index | mask;
- int file_end = i_size_read(inode) >> msblk->block_log;
TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
page->index, squashfs_i(inode)->start);
- if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
- goto out;
-
- if (index < file_end || squashfs_i(inode)->fragment_block ==
- SQUASHFS_INVALID_BLK) {
- /*
- * Reading a datablock from disk. Need to read block list
- * to get location and block size.
- */
- u64 block = 0;
- int bsize = read_blocklist(inode, index, &block);
- if (bsize < 0)
- goto error_out;
+ /*
+ * Reading a datablock from disk. Need to read block list
+ * to get location and block size.
+ */
+ bsize = read_blocklist(inode, index, &block);
+ if (bsize < 0)
+ goto error_out;
- if (bsize == 0) { /* hole */
- bytes = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
- sparse = 1;
- } else {
- /*
- * Read and decompress datablock.
- */
- buffer = squashfs_get_datablock(inode->i_sb,
- block, bsize);
- if (buffer->error) {
- ERROR("Unable to read page, block %llx, size %x"
- "\n", block, bsize);
- squashfs_cache_put(buffer);
- goto error_out;
- }
- bytes = buffer->length;
- }
+ if (bsize == 0) { /* hole */
+ return squashfs_hole_readpage(file, inode, index, page);
} else {
/*
- * Datablock is stored inside a fragment (tail-end packed
- * block).
+ * Read and decompress datablock.
*/
- buffer = squashfs_get_fragment(inode->i_sb,
- squashfs_i(inode)->fragment_block,
- squashfs_i(inode)->fragment_size);
-
+ buffer = squashfs_get_datablock(inode->i_sb,
+ block, bsize);
if (buffer->error) {
ERROR("Unable to read page, block %llx, size %x\n",
- squashfs_i(inode)->fragment_block,
- squashfs_i(inode)->fragment_size);
+ block, bsize);
squashfs_cache_put(buffer);
goto error_out;
}
- bytes = i_size_read(inode) & (msblk->block_size - 1);
- offset = squashfs_i(inode)->fragment_offset;
+ bytes = buffer->length;
}
-
/*
* Loop copying datablock into pages. As the datablock likely covers
* many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
* grab the pages from the page cache, except for the page that we've
* been called to fill.
*/
- for (i = start_index; i <= end_index && bytes > 0; i++,
+ for (i = start_index; bytes > 0; i++,
bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
struct page *push_page;
- int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+ int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
@@ -476,14 +577,12 @@ skip_page:
page_cache_release(push_page);
}
- if (!sparse)
- squashfs_cache_put(buffer);
+ squashfs_cache_put(buffer);
return 0;
error_out:
SetPageError(page);
-out:
pageaddr = kmap_atomic(page);
memset(pageaddr, 0, PAGE_CACHE_SIZE);
kunmap_atomic(pageaddr);
@@ -495,6 +594,38 @@ out:
return 0;
}
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ void *pageaddr;
+
+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ int file_end = i_size_read(inode) >> msblk->block_log;
+
+ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+ page->index, squashfs_i(inode)->start);
+
+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ goto out;
+
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK)
+ return squashfs_regular_readpage(file, page);
+ else
+ return squashfs_fragment_readpage(file, page);
+out:
+ pageaddr = kmap_atomic(page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page);
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ return 0;
+}
const struct address_space_operations squashfs_aops = {
.readpage = squashfs_readpage
--
1.7.9.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC 3/5] squashfs: remove cache for normal data page
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
2013-09-16 7:08 ` [RFC 1/5] squashfs: clean up squashfs_read_data Minchan Kim
2013-09-16 7:08 ` [RFC 2/5] squashfs: clean up squashfs_readpage Minchan Kim
@ 2013-09-16 7:08 ` Minchan Kim
2013-09-16 7:08 ` [RFC 4/5] squashfs: support multiple decompress stream buffer Minchan Kim
2013-09-16 7:08 ` [RFC 5/5] squashfs: support readpages Minchan Kim
4 siblings, 0 replies; 12+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
Sqsuashfs have used cache for normal data pages but it's pointless
because MM already has cache layer and squashfs adds extra pages
into MM's page cache when it reads a page from compressed block.
This patch removes cache usage for normal data pages so it could
remove unnecessary one copy(ie, from cache to page cache) and
decouple normal data page path from squashfs cache layer.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/file.c | 117 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 75 insertions(+), 42 deletions(-)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index d4d472f..36508c3 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -505,11 +505,14 @@ skip_page:
static int squashfs_regular_readpage(struct file *file, struct page *page)
{
u64 block = 0;
- int bsize;
- struct inode *inode = page->mapping->host;
+ int bsize, i, data_len, pages, nr_pages = 0;
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes, i, offset = 0;
- struct squashfs_cache_entry *buffer = NULL;
+ gfp_t gfp_mask;
+ struct page *push_page;
+ struct page **page_array;
+ void **buffer = NULL;
void *pageaddr;
int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
@@ -519,6 +522,8 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
page->index, squashfs_i(inode)->start);
+ pages = msblk->block_size >> PAGE_CACHE_SHIFT;
+ pages = pages ? pages : 1;
/*
* Reading a datablock from disk. Need to read block list
* to get location and block size.
@@ -527,60 +532,88 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
if (bsize < 0)
goto error_out;
- if (bsize == 0) { /* hole */
+ if (bsize == 0)
return squashfs_hole_readpage(file, inode, index, page);
- } else {
- /*
- * Read and decompress datablock.
- */
- buffer = squashfs_get_datablock(inode->i_sb,
- block, bsize);
- if (buffer->error) {
- ERROR("Unable to read page, block %llx, size %x\n",
- block, bsize);
- squashfs_cache_put(buffer);
- goto error_out;
- }
- bytes = buffer->length;
- }
+
/*
- * Loop copying datablock into pages. As the datablock likely covers
- * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
- * grab the pages from the page cache, except for the page that we've
- * been called to fill.
+ * Read and decompress data block
*/
- for (i = start_index; bytes > 0; i++,
- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
- struct page *push_page;
- int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+ gfp_mask = mapping_gfp_mask(mapping);
+ buffer = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(void *), GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
- TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
+ page_array = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(struct page *), GFP_KERNEL);
- push_page = (i == page->index) ? page :
- grab_cache_page_nowait(page->mapping, i);
+ if (!page_array)
+ goto release_buffer;
+ /* alloc buffer pages */
+ for (i = 0; i < pages; i++) {
+ if (page->index == start_index + i)
+ push_page = page;
+ else
+ push_page = __page_cache_alloc(gfp_mask);
if (!push_page)
- continue;
+ goto release_page_array;
+ nr_pages++;
+ buffer[i] = kmap(push_page);
+ page_array[i] = push_page;
+ }
- if (PageUptodate(push_page))
- goto skip_page;
+ data_len = squashfs_read_datablock(inode->i_sb, buffer,
+ block, bsize, msblk->block_size, pages);
- pageaddr = kmap_atomic(push_page);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
- kunmap_atomic(pageaddr);
+ if (data_len < 0) {
+ ERROR("Unable to read page, block %llx, size %x\n",
+ block, bsize);
+ for (i = 0; i < nr_pages; i++) {
+ kunmap(page_array[i]);
+ page_cache_release(page_array[i]);
+ }
+ kfree(buffer);
+ kfree(page_array);
+ goto error_out;
+ }
+
+ for (i = 0; i < pages; i++) {
+ push_page = page_array[i];
flush_dcache_page(push_page);
SetPageUptodate(push_page);
-skip_page:
- unlock_page(push_page);
- if (i != page->index)
+ kunmap(page_array[i]);
+ if (page->index == start_index + i) {
+ unlock_page(push_page);
+ continue;
+ }
+
+ if (add_to_page_cache_lru(push_page, mapping,
+ start_index + i, gfp_mask)) {
page_cache_release(push_page);
- }
+ continue;
+ }
- squashfs_cache_put(buffer);
+ unlock_page(push_page);
+ page_cache_release(push_page);
+ }
+ kfree(page_array);
+ kfree(buffer);
return 0;
+release_page_array:
+ for (i = 0; i < nr_pages; i++) {
+ kunmap(page_array[i]);
+ page_cache_release(page_array[i]);
+ }
+
+ kfree(page_array);
+
+release_buffer:
+ kfree(buffer);
+ return -ENOMEM;
+
error_out:
SetPageError(page);
pageaddr = kmap_atomic(page);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC 4/5] squashfs: support multiple decompress stream buffer
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
` (2 preceding siblings ...)
2013-09-16 7:08 ` [RFC 3/5] squashfs: remove cache for normal data page Minchan Kim
@ 2013-09-16 7:08 ` Minchan Kim
2013-09-16 7:08 ` [RFC 5/5] squashfs: support readpages Minchan Kim
4 siblings, 0 replies; 12+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
Now squashfs have used for only one stream buffer for decompression
so it hurts concurrent read performance due to locking lock of getting
stream buffer.
When file system mount, the number of stream buffer is started from
num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2.
The rationale is MM does readahead chunk into 2M unit to prevent too much
memory pin and while one request is waitting, we should request another
chunk. That's why I multiply by 2.
If it reveals too much memory problem, we can add shrinker routine.
I did test following as
Two 1G file dd read
dd if=test/test1.dat of=/dev/null &
dd if=test/test2.dat of=/dev/null &
old : 60sec -> new : 30 sec
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/block.c | 9 ++--
fs/squashfs/decompressor.c | 105 ++++++++++++++++++++++++++++++++++++++----
fs/squashfs/decompressor.h | 27 +++++------
fs/squashfs/lzo_wrapper.c | 12 ++---
fs/squashfs/squashfs.h | 3 +-
fs/squashfs/squashfs_fs_sb.h | 7 ++-
fs/squashfs/super.c | 40 ++++++++++++----
fs/squashfs/xz_wrapper.c | 20 ++++----
fs/squashfs/zlib_wrapper.c | 12 ++---
9 files changed, 168 insertions(+), 67 deletions(-)
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index f33c6ef..d41bac8 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
+int squashfs_decompress_block(struct super_block *sb, int compressed,
void **buffer, struct buffer_head **bh, int nr_bh,
int offset, int length, int srclength, int pages)
{
int k = 0;
if (compressed) {
- length = squashfs_decompress(msblk, buffer, bh, nr_bh,
+ length = squashfs_decompress(sb, buffer, bh, nr_bh,
offset, length, srclength, pages);
if (length < 0)
goto out;
@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
/*
* Block is uncompressed.
*/
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
int bytes, in, avail, pg_offset = 0, page = 0;
for (bytes = length; k < nr_bh; k++) {
@@ -262,8 +263,8 @@ int squashfs_read_metablock(struct super_block *sb, void **buffer, u64 index,
}
ll_rw_block(READ, b - 1, bh + 1);
- length = squashfs_decompress_block(msblk, compressed, buffer, bh, b,
- offset, length, srclength, pages);
+ length = squashfs_decompress_block(sb, compressed, buffer, bh,
+ b, offset, length, srclength, pages);
if (length < 0)
goto read_failure;
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e47453e..ed35b32 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -25,6 +25,8 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -70,6 +72,80 @@ static const struct squashfs_decompressor *decompressor[] = {
&squashfs_unknown_comp_ops
};
+void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
+ struct squashfs_decomp_strm *stream)
+{
+ if (msblk->decompressor)
+ msblk->decompressor->free(stream->strm);
+ kfree(stream);
+}
+
+static void *squashfs_get_decomp_strm(struct squashfs_sb_info *msblk)
+{
+ struct squashfs_decomp_strm *strm = NULL;
+ mutex_lock(&msblk->comp_strm_mutex);
+ if (!list_empty(&msblk->strm_list)) {
+ strm = list_entry(msblk->strm_list.next,
+ struct squashfs_decomp_strm, list);
+ list_del(&strm->list);
+ msblk->nr_avail_decomp--;
+ WARN_ON(msblk->nr_avail_decomp < 0);
+ }
+ mutex_unlock(&msblk->comp_strm_mutex);
+ return strm;
+}
+
+static bool full_decomp_strm(struct squashfs_sb_info *msblk)
+{
+ /* MM do readahread 2M unit */
+ int blocks = 2 * 1024 * 1024 / msblk->block_size;
+ return msblk->nr_avail_decomp > (num_online_cpus() * blocks * 2);
+}
+
+static void squashfs_put_decomp_strm(struct squashfs_sb_info *msblk,
+ struct squashfs_decomp_strm *strm)
+{
+ mutex_lock(&msblk->comp_strm_mutex);
+ if (full_decomp_strm(msblk)) {
+ mutex_unlock(&msblk->comp_strm_mutex);
+ squashfs_decompressor_free(msblk, strm);
+ return;
+ }
+
+ list_add(&strm->list, &msblk->strm_list);
+ msblk->nr_avail_decomp++;
+ mutex_unlock(&msblk->comp_strm_mutex);
+ wake_up(&msblk->decomp_wait_queue);
+}
+
+int squashfs_decompress(struct super_block *sb, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length,
+ int srclength, int pages)
+{
+ int ret;
+ struct squashfs_decomp_strm *strm;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ while (1) {
+ strm = squashfs_get_decomp_strm(msblk);
+ if (strm)
+ break;
+
+ if (!full_decomp_strm(msblk)) {
+ strm = squashfs_decompressor_init(sb);
+ if (strm)
+ break;
+ }
+
+ wait_event(msblk->decomp_wait_queue, msblk->nr_avail_decomp);
+ continue;
+ }
+
+ ret = msblk->decompressor->decompress(msblk, strm->strm, buffer, bh,
+ b, offset, length, srclength, pages);
+
+ squashfs_put_decomp_strm(msblk, strm);
+ return ret;
+}
const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
{
@@ -82,35 +158,48 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
return decompressor[i];
}
-
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+struct squashfs_decomp_strm *squashfs_decompressor_init(struct super_block *sb)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct squashfs_decomp_strm *decomp_strm = NULL;
void *strm, *buffer = NULL;
int length = 0;
+ decomp_strm = kmalloc(sizeof(struct squashfs_decomp_strm), GFP_KERNEL);
+ if (!decomp_strm)
+ return ERR_PTR(-ENOMEM);
/*
* Read decompressor specific options from file system if present
*/
- if (SQUASHFS_COMP_OPTS(flags)) {
+ if (SQUASHFS_COMP_OPTS(msblk->flags)) {
buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
- if (buffer == NULL)
- return ERR_PTR(-ENOMEM);
+ if (buffer == NULL) {
+ decomp_strm = ERR_PTR(-ENOMEM);
+ goto finished;
+ }
length = squashfs_read_metablock(sb, &buffer,
sizeof(struct squashfs_super_block), 0, NULL,
PAGE_CACHE_SIZE, 1);
if (length < 0) {
- strm = ERR_PTR(length);
+ decomp_strm = ERR_PTR(length);
goto finished;
}
}
strm = msblk->decompressor->init(msblk, buffer, length);
+ if (IS_ERR(strm)) {
+ decomp_strm = strm;
+ goto finished;
+ }
-finished:
+ decomp_strm->strm = strm;
kfree(buffer);
+ return decomp_strm;
- return strm;
+finished:
+ kfree(decomp_strm);
+ kfree(buffer);
+ return decomp_strm;
}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 330073e..207c810 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -26,27 +26,24 @@
struct squashfs_decompressor {
void *(*init)(struct squashfs_sb_info *, void *, int);
void (*free)(void *);
- int (*decompress)(struct squashfs_sb_info *, void **,
+ int (*decompress)(struct squashfs_sb_info *, void*, void **,
struct buffer_head **, int, int, int, int, int);
int id;
char *name;
int supported;
};
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
- void *s)
-{
- if (msblk->decompressor)
- msblk->decompressor->free(s);
-}
-
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
- void **buffer, struct buffer_head **bh, int b, int offset, int length,
- int srclength, int pages)
-{
- return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
- length, srclength, pages);
-}
+struct squashfs_decomp_strm {
+ void *strm;
+ struct list_head list;
+};
+
+int squashfs_decompress(struct super_block *sb, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length,
+ int srclength, int pages);
+
+void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
+ struct squashfs_decomp_strm *stream);
#ifdef CONFIG_SQUASHFS_XZ
extern const struct squashfs_decompressor squashfs_xz_comp_ops;
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 00f4dfc..e1bf135 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -74,17 +74,15 @@ static void lzo_free(void *strm)
}
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
- struct buffer_head **bh, int b, int offset, int length, int srclength,
- int pages)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ void **buffer, struct buffer_head **bh, int b, int offset,
+ int length, int srclength, int pages)
{
- struct squashfs_lzo *stream = msblk->stream;
+ struct squashfs_lzo *stream = strm;
void *buff = stream->input;
int avail, i, bytes = length, res;
size_t out_len = srclength;
- mutex_lock(&msblk->read_data_mutex);
-
for (i = 0; i < b; i++) {
wait_on_buffer(bh[i]);
if (!buffer_uptodate(bh[i]))
@@ -111,7 +109,6 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
bytes -= avail;
}
- mutex_unlock(&msblk->read_data_mutex);
return res;
block_release:
@@ -119,7 +116,6 @@ block_release:
put_bh(bh[i]);
failed:
- mutex_unlock(&msblk->read_data_mutex);
ERROR("lzo decompression failed, data probably corrupt\n");
return -EIO;
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 405baed..4bb1f90 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -52,7 +52,8 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
/* decompressor.c */
extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern struct squashfs_decomp_strm *squashfs_decompressor_init(
+ struct super_block *);
/* export.c */
extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 52934a2..0a209e6 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -63,10 +63,10 @@ struct squashfs_sb_info {
__le64 *id_table;
__le64 *fragment_index;
__le64 *xattr_id_table;
- struct mutex read_data_mutex;
+ struct mutex comp_strm_mutex;
struct mutex meta_index_mutex;
struct meta_index *meta_index;
- void *stream;
+ struct list_head strm_list;
__le64 *inode_lookup_table;
u64 inode_table;
u64 directory_table;
@@ -76,5 +76,8 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
int xattr_ids;
+ wait_queue_head_t decomp_wait_queue;
+ int nr_avail_decomp;
+ unsigned short flags;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 60553a9..70aa9c4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -84,7 +84,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
unsigned short flags;
unsigned int fragments;
u64 lookup_table_start, xattr_id_table_start, next_table;
- int err;
+ int err, i;
TRACE("Entered squashfs_fill_superblock\n");
@@ -98,7 +98,9 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
- mutex_init(&msblk->read_data_mutex);
+ INIT_LIST_HEAD(&msblk->strm_list);
+ mutex_init(&msblk->comp_strm_mutex);
+ init_waitqueue_head(&msblk->decomp_wait_queue);
mutex_init(&msblk->meta_index_mutex);
/*
@@ -176,6 +178,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
flags = le16_to_cpu(sblk->flags);
+ msblk->flags = flags;
TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b));
TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(flags)
@@ -212,11 +215,16 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- msblk->stream = squashfs_decompressor_init(sb, flags);
- if (IS_ERR(msblk->stream)) {
- err = PTR_ERR(msblk->stream);
- msblk->stream = NULL;
- goto failed_mount;
+ /* Allocate mutliple decompressor */
+ for (i = 0; i < num_online_cpus(); i++) {
+ struct squashfs_decomp_strm *decomp_strm;
+ decomp_strm = squashfs_decompressor_init(sb);
+ if (IS_ERR(decomp_strm)) {
+ err = PTR_ERR(decomp_strm);
+ goto failed_mount;
+ }
+ list_add(&decomp_strm->list, &msblk->strm_list);
+ msblk->nr_avail_decomp++;
}
/* Handle xattrs */
@@ -336,7 +344,14 @@ failed_mount:
squashfs_cache_delete(msblk->block_cache);
squashfs_cache_delete(msblk->fragment_cache);
squashfs_cache_delete(msblk->read_page);
- squashfs_decompressor_free(msblk, msblk->stream);
+ while (!list_empty(&msblk->strm_list)) {
+ struct squashfs_decomp_strm *decomp_strm =
+ list_entry(msblk->strm_list.prev,
+ struct squashfs_decomp_strm, list);
+ list_del(&decomp_strm->list);
+ squashfs_decompressor_free(msblk, decomp_strm);
+ }
+ msblk->nr_avail_decomp = 0;
kfree(msblk->inode_lookup_table);
kfree(msblk->fragment_index);
kfree(msblk->id_table);
@@ -383,7 +398,14 @@ static void squashfs_put_super(struct super_block *sb)
squashfs_cache_delete(sbi->block_cache);
squashfs_cache_delete(sbi->fragment_cache);
squashfs_cache_delete(sbi->read_page);
- squashfs_decompressor_free(sbi, sbi->stream);
+ while (!list_empty(&sbi->strm_list)) {
+ struct squashfs_decomp_strm *decomp_strm =
+ list_entry(sbi->strm_list.prev,
+ struct squashfs_decomp_strm, list);
+ list_del(&decomp_strm->list);
+ squashfs_decompressor_free(sbi, decomp_strm);
+ }
+ sbi->nr_avail_decomp = 0;
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 1760b7d1..98b8bb5 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -103,15 +103,13 @@ static void squashfs_xz_free(void *strm)
}
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
- struct buffer_head **bh, int b, int offset, int length, int srclength,
- int pages)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ void **buffer, struct buffer_head **bh, int b, int offset,
+ int length, int srclength, int pages)
{
enum xz_ret xz_err;
int avail, total = 0, k = 0, page = 0;
- struct squashfs_xz *stream = msblk->stream;
-
- mutex_lock(&msblk->read_data_mutex);
+ struct squashfs_xz *stream = strm;
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
@@ -126,7 +124,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
length -= avail;
wait_on_buffer(bh[k]);
if (!buffer_uptodate(bh[k]))
- goto release_mutex;
+ goto out;
stream->buf.in = bh[k]->b_data + offset;
stream->buf.in_size = avail;
@@ -149,20 +147,18 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
if (xz_err != XZ_STREAM_END) {
ERROR("xz_dec_run error, data probably corrupt\n");
- goto release_mutex;
+ goto out;
}
if (k < b) {
ERROR("xz_uncompress error, input remaining\n");
- goto release_mutex;
+ goto out;
}
total += stream->buf.out_pos;
- mutex_unlock(&msblk->read_data_mutex);
return total;
-release_mutex:
- mutex_unlock(&msblk->read_data_mutex);
+out:
for (; k < b; k++)
put_bh(bh[k]);
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 55d918f..82c1a70 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -61,15 +61,13 @@ static void zlib_free(void *strm)
}
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
- struct buffer_head **bh, int b, int offset, int length, int srclength,
- int pages)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ void **buffer, struct buffer_head **bh, int b, int offset,
+ int length, int srclength, int pages)
{
int zlib_err, zlib_init = 0;
int k = 0, page = 0;
- z_stream *stream = msblk->stream;
-
- mutex_lock(&msblk->read_data_mutex);
+ z_stream *stream = strm;
stream->avail_out = 0;
stream->avail_in = 0;
@@ -126,11 +124,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
}
length = stream->total_out;
- mutex_unlock(&msblk->read_data_mutex);
return length;
release_mutex:
- mutex_unlock(&msblk->read_data_mutex);
for (; k < b; k++)
put_bh(bh[k]);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC 5/5] squashfs: support readpages
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
` (3 preceding siblings ...)
2013-09-16 7:08 ` [RFC 4/5] squashfs: support multiple decompress stream buffer Minchan Kim
@ 2013-09-16 7:08 ` Minchan Kim
2013-09-17 1:52 ` Minchan Kim
4 siblings, 1 reply; 12+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
This patch supports squashfs_readpages so it can do readahead pages
without unplugging I/O scheduler. With blktrace, I confirmed following test.
2 compression ratio 1G file(ie, 500M consumed by real storage) sequential read
with fadvise(SEQUENTIAL) hint and tune some knobs for block device and read_ahead_kb.
Old :
Reads Queued: 524289, 524289KiB Writes Queued: 0, 0KiB
Read Dispatches: 4096, 524289KiB Write Dispatches: 0, 0KiB
Reads Requeued: 0 Writes Requeued: 0
Reads Completed: 4096, 524289KiB Writes Completed: 0, 0KiB
Read Merges: 520193, 520193KiB Write Merges: 0, 0KiB
PC Reads Queued: 0, 0KiB PC Writes Queued: 0, 0KiB
PC Read Disp.: 10, 0KiB PC Write Disp.: 0, 0KiB
PC Reads Req.: 0 PC Writes Req.: 0
PC Reads Compl.: 0 PC Writes Compl.: 0
IO unplugs: 4096 Timer unplugs: 0
New :
Reads Queued: 524289, 524289KiB Writes Queued: 0, 0KiB
Read Dispatches: 633, 524289KiB Write Dispatches: 0, 0KiB
Reads Requeued: 0 Writes Requeued: 0
Reads Completed: 633, 524289KiB Writes Completed: 0, 0KiB
Read Merges: 523656, 523656KiB Write Merges: 0, 0KiB
PC Reads Queued: 0, 0KiB PC Writes Queued: 0, 0KiB
PC Read Disp.: 7, 0KiB PC Write Disp.: 0, 0KiB
PC Reads Req.: 0 PC Writes Req.: 0
PC Reads Compl.: 0 PC Writes Compl.: 0
IO unplugs: 31 Timer unplugs: 0
IOW, lots of I/O were merged before issuing. Of course, I can confirm that with
blktrace.
old:
8,34 0 1933 0.014381550 4957 Q R 4197628 + 2 [seqread]
8,34 0 1934 0.014381629 4957 M R 4197628 + 2 [seqread]
8,32 0 1935 0.014381821 4957 A R 4197630 + 2 <- (8,34) 1278
8,34 0 1936 0.014381919 4957 Q R 4197630 + 2 [seqread]
8,34 0 1937 0.014381998 4957 M R 4197630 + 2 [seqread]
8,32 0 1938 0.014382131 4957 A R 4197632 + 2 <- (8,34) 1280
8,34 0 1939 0.014382203 4957 Q R 4197632 + 2 [seqread]
8,34 0 1940 0.014382281 4957 M R 4197632 + 2 [seqread]
8,34 0 1941 0.014382873 4957 I R 4197378 + 256 [seqread]
8,34 0 0 0.014383649 0 m N cfq4957S / insert_request
8,34 0 0 0.014384609 0 m N cfq4957S / dispatch_insert
8,34 0 0 0.014385132 0 m N cfq4957S / dispatched a request
8,34 0 0 0.014385460 0 m N cfq4957S / activate rq, drv=1
8,34 0 1942 0.014385581 4957 D R 4197378 + 256 [seqread]
new:
8,34 0 98321 0.352583517 5101 M R 4261888 + 2 [seqread]
8,34 0 98322 0.353008833 5101 I R 4230404 + 4096 [seqread]
8,34 0 0 0.353012357 0 m N cfq5101SN / insert_request
8,34 0 98323 0.353013872 5101 I R 4234500 + 4096 [seqread]
8,34 0 0 0.353014218 0 m N cfq5101SN / insert_request
8,34 0 98324 0.353014553 5101 I R 4238596 + 4096 [seqread]
8,34 0 0 0.353014802 0 m N cfq5101SN / insert_request
8,34 0 98325 0.353014992 5101 I R 4242692 + 4096 [seqread]
8,34 0 0 0.353015315 0 m N cfq5101SN / insert_request
Voila, it's a result by that.
elapsed time, old : 17.5 sec new : 11.5 sec
A drawback from my mind is magic value 3ms for waiting more I/O so that
it can make delay at least 3ms although the I/O was done earlier.
The reason I selected that value is that it was a vaule kblockd had used
for a long time to plug/unplug for scheduler until we introduced explicit
blk_[start|finish]_plug. If it's really problem, we can add a hook into
plug and flush the I/O if someone is waiting the I/O.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/block.c | 2 -
fs/squashfs/file.c | 462 +++++++++++++++++++++++++++++++++++++++++-
fs/squashfs/squashfs.h | 3 +
fs/squashfs/squashfs_fs_sb.h | 4 +
fs/squashfs/super.c | 4 +-
5 files changed, 465 insertions(+), 10 deletions(-)
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index d41bac8..e8bf200 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -76,8 +76,6 @@ static struct buffer_head *get_block_length(struct super_block *sb,
return bh;
}
-
-
int squashfs_decompress_block(struct super_block *sb, int compressed,
void **buffer, struct buffer_head **bh, int nr_bh,
int offset, int length, int srclength, int pages)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 36508c3..53ad207 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,6 +47,7 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
+#include <linux/swap.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -454,8 +455,55 @@ error_out:
return 0;
}
-static int squashfs_hole_readpage(struct file *file, struct inode *inode,
- int index, struct page *page)
+static int squashfs_hole_readpages(struct inode *inode, int index,
+ struct list_head *page_list)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes, i, offset = 0;
+ void *pageaddr;
+ struct page *push_page;
+
+ int start_index = index << (msblk->block_log - PAGE_CACHE_SHIFT);
+ int file_end = i_size_read(inode) >> msblk->block_log;
+
+ bytes = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ /*
+ * Loop copying datablock into pages. As the datablock likely covers
+ * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+ * grab the pages from the page cache, except for the page that we've
+ * been called to fill.
+ */
+ for (i = start_index; bytes > 0; i++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+
+ push_page = list_entry(page_list->prev, struct page, lru);
+ list_del(&push_page->lru);
+
+ pageaddr = kmap_atomic(push_page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(push_page);
+ SetPageUptodate(push_page);
+
+ lru_cache_add_file(push_page);
+ unlock_page(push_page);
+ page_cache_release(push_page);
+ }
+
+ while (!list_empty(page_list)) {
+ push_page = list_entry(page_list->prev, struct page, lru);
+ list_del(&push_page->lru);
+ page_cache_release(push_page);
+ }
+
+ return 0;
+}
+
+static int squashfs_hole_readpage(struct inode *inode, int index,
+ struct page *page)
{
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
int bytes, i, offset = 0;
@@ -478,8 +526,8 @@ static int squashfs_hole_readpage(struct file *file, struct inode *inode,
bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
struct page *push_page;
- push_page = (page && i == page->index) ? page :
- grab_cache_page_nowait(page->mapping, i);
+ push_page = (i == page->index) ? page :
+ grab_cache_page_nowait(inode->i_mapping, i);
if (!push_page)
continue;
@@ -494,7 +542,7 @@ static int squashfs_hole_readpage(struct file *file, struct inode *inode,
SetPageUptodate(push_page);
skip_page:
unlock_page(push_page);
- if (page && i == page->index)
+ if (i == page->index)
continue;
page_cache_release(push_page);
}
@@ -533,7 +581,7 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
goto error_out;
if (bsize == 0)
- return squashfs_hole_readpage(file, inode, index, page);
+ return squashfs_hole_readpage(inode, index, page);
/*
* Read and decompress data block
@@ -660,6 +708,406 @@ out:
return 0;
}
+static int squashfs_ra_readblock(struct inode *inode, int index,
+ struct buffer_head **bh, int *nr_bh,
+ int *block_size, u64 *block)
+{
+ int bsize;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int ret, b;
+
+ /*
+ * Reading a datablock from disk. Need to read block list
+ * to get location and block size.
+ */
+ *block_size = read_blocklist(inode, index, block);
+ if (*block_size < 0)
+ return 1;
+
+ if (*block_size == 0)
+ return 0;
+
+ bsize = SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_size);
+ ret = squashfs_read_submit(inode->i_sb, *block, bsize,
+ msblk->block_size, bh, &b);
+ if (ret < 0)
+ return ret;
+
+ *nr_bh = b;
+ return 0;
+}
+
+struct squashfs_ra_private {
+ struct buffer_head **bh;
+ int nr_bh;
+ int block_size;
+ u64 block;
+ void **buffer;
+ struct page **page_array;
+};
+
+/* Caller should free buffer head */
+static int squashfs_ra_read_submit(struct inode *inode, int bindex,
+ struct squashfs_ra_private *ra_priv)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int ret;
+ struct buffer_head **bh;
+ int nr_bh = -1, bsize;
+ u64 block;
+ bh = kcalloc(((msblk->block_size + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1,
+ sizeof(*bh), GFP_KERNEL);
+ if (!bh)
+ return -ENOMEM;
+
+ ra_priv->bh = bh;
+ ret = squashfs_ra_readblock(inode, bindex, bh, &nr_bh,
+ &bsize, &block);
+ if (ret != 0)
+ goto release_bh;
+
+ ra_priv->nr_bh = nr_bh;
+ ra_priv->block_size = bsize;
+ ra_priv->block = block;
+
+ return 0;
+
+release_bh:
+ kfree(ra_priv->bh);
+ return ret;
+}
+
+static int squashfs_ra_decompress(struct inode *inode, int bindex,
+ struct squashfs_ra_private *ra_priv, gfp_t gfp_mask,
+ struct list_head *page_list)
+{
+ int j;
+ int ret = -ENOMEM;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int pages_per_block;
+
+ void **buffer;
+ struct buffer_head **bh;
+ int block_size, nr_bh;
+ u64 block;
+ struct page **page_array, *page;
+
+ int compressed, offset, length;
+
+ pages_per_block = msblk->block_size >> PAGE_CACHE_SHIFT;
+ pages_per_block = pages_per_block ? pages_per_block : 1;
+
+ bh = ra_priv->bh;
+ block_size = ra_priv->block_size;
+ nr_bh = ra_priv->nr_bh;
+ block = ra_priv->block;
+
+ if (block_size == 0)
+ return squashfs_hole_readpages(inode, bindex, page_list);
+
+ buffer = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(void *), GFP_KERNEL);
+ if (!buffer)
+ goto out;
+ ra_priv->buffer = buffer;
+
+ page_array = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(struct page *), GFP_KERNEL);
+ if (!page_array)
+ goto release_buffer;
+ ra_priv->page_array = page_array;
+
+ /* alloc buffer pages */
+ for (j = 0; j < pages_per_block; j++) {
+ page = list_entry(page_list->prev, struct page, lru);
+ list_del(&page->lru);
+ buffer[j] = kmap(page);
+ page_array[j] = page;
+ }
+
+ compressed = SQUASHFS_COMPRESSED_BLOCK(block_size);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(block_size);
+
+ offset = block & ((1 << msblk->devblksize_log2) - 1);
+ length = squashfs_decompress_block(inode->i_sb, compressed, buffer,
+ bh, nr_bh, offset, length, msblk->block_size,
+ pages_per_block);
+
+ for (j = 0; j < pages_per_block; j++) {
+ page = page_array[j];
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ lru_cache_add_file(page);
+ unlock_page(page);
+ }
+
+ ret = 0;
+
+release_buffer:
+ if (ra_priv->buffer) {
+ for (j = 0; j < pages_per_block; j++) {
+ if (!ra_priv->buffer[j])
+ break;
+ kunmap(ra_priv->page_array[j]);
+ page_cache_release(ra_priv->page_array[j]);
+ }
+ kfree(ra_priv->page_array);
+ }
+
+ kfree(ra_priv->buffer);
+out:
+ return ret;
+}
+
+/*
+ * Fill hole pages between page_index and last_page_index
+ * Return 0 if function is successful, otherwise, return 1
+ * All pages are locked and added into page cache so that caller should
+ * add them into LRU and unlock.
+ */
+int hole_plugging(struct squashfs_sb_info *msblk, struct list_head *page_list,
+ int start_bindex, int last_bindex,
+ struct address_space *mapping)
+{
+ struct page *page, *hole_page;
+ int page_index, last_page_index;
+ gfp_t gfp_mask = mapping_gfp_mask(mapping);
+
+ page_index = start_bindex << (msblk->block_log - PAGE_CACHE_SHIFT);
+ last_page_index = ((last_bindex + 1) <<
+ (msblk->block_log - PAGE_CACHE_SHIFT));
+
+ /* Fill hole pages in start block */
+ list_for_each_entry_reverse(page, page_list, lru) {
+ if (page_index == page->index) {
+ if (add_to_page_cache(page, mapping, page->index,
+ gfp_mask))
+ goto rollback;
+ page_index++;
+ continue;
+ }
+
+ while (page_index != page->index) {
+ hole_page = page_cache_alloc_readahead(mapping);
+ if (!hole_page)
+ goto rollback;
+
+ hole_page->index = page_index;
+ if (add_to_page_cache(hole_page, mapping,
+ hole_page->index, gfp_mask))
+ goto rollback;
+
+ list_add(&hole_page->lru, &page->lru);
+ page_index++;
+ }
+
+ page_index++;
+ }
+
+ /* Fill hole pages in last block */
+ while (page_index < last_page_index) {
+ page = page_cache_alloc_readahead(mapping);
+ if (!page) {
+ page = list_entry(page_list->prev, struct page, lru);
+ goto rollback;
+ }
+
+ page->index = page_index;
+ if (add_to_page_cache(page, mapping, page->index,
+ gfp_mask))
+ goto rollback;
+
+ list_add(&page->lru, page_list);
+ page_index++;
+ }
+
+ return 0;
+
+rollback:
+ list_for_each_entry_reverse(hole_page, page_list, lru) {
+ if (hole_page == page)
+ break;
+ delete_from_page_cache(hole_page);
+ unlock_page(hole_page);
+ }
+ return 1;
+}
+
+struct decomp_work {
+ struct inode *inode;
+ int bindex;
+ struct list_head list;
+ struct list_head pages;
+ struct squashfs_ra_private *ra_priv;
+ gfp_t gfp_mask;
+};
+
+void squashfs_decomp_work(struct work_struct *work)
+{
+ struct inode *inode;
+ struct list_head *pages;
+ struct squashfs_ra_private *ra_priv;
+ struct decomp_work *decomp_work;
+ gfp_t gfp_mask;
+ int bindex;
+ struct squashfs_sb_info *msblk =
+ container_of(work, struct squashfs_sb_info, delay_work.work);
+
+ for (;;) {
+ decomp_work = NULL;
+ spin_lock(&msblk->decomp_lock);
+ if (!list_empty(&msblk->decomp_list)) {
+ decomp_work = list_entry(msblk->decomp_list.prev,
+ struct decomp_work, list);
+ list_del(&decomp_work->list);
+ }
+ spin_unlock(&msblk->decomp_lock);
+ if (!decomp_work)
+ return;
+
+ inode = decomp_work->inode;
+ bindex = decomp_work->bindex;
+ ra_priv = decomp_work->ra_priv;
+ gfp_mask = decomp_work->gfp_mask;
+ pages = &decomp_work->pages;
+
+ if (squashfs_ra_decompress(inode, bindex, ra_priv,
+ gfp_mask, pages)) {
+ struct page *page, *tmp;
+ list_for_each_entry_safe_reverse(page, tmp,
+ pages, lru) {
+ list_del(&page->lru);
+ delete_from_page_cache(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ }
+
+ kfree(ra_priv->bh);
+ kfree(ra_priv);
+ kfree(decomp_work);
+ }
+}
+
+static int move_pages(struct list_head *page_list, struct list_head *pages,
+ int nr)
+{
+ int moved_pages = 0;
+ struct page *page, *tmp_page;
+ list_for_each_entry_safe_reverse(page, tmp_page,
+ page_list, lru) {
+ list_move(&page->lru, pages);
+ moved_pages++;
+ if (moved_pages == nr)
+ break;
+ }
+
+ return moved_pages;
+}
+
+static int squashfs_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *page_list, unsigned nr_pages)
+{
+ struct inode *inode = mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ struct page *hpage, *tpage;
+ struct decomp_work **work;
+ int start_bindex, last_bindex;
+ struct squashfs_ra_private **ra_priv;
+ int pages_per_block, i, ret = -ENOMEM;
+ gfp_t gfp_mask;
+ int nr_blocks;
+
+ gfp_mask = mapping_gfp_mask(mapping);
+
+ hpage = list_entry(page_list->prev, struct page, lru);
+ tpage = list_entry(page_list->next, struct page, lru);
+
+ start_bindex = hpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ last_bindex = tpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+
+ if (last_bindex >= (i_size_read(inode) >> msblk->block_log))
+ return 0;
+
+ /* fill with pages for readahead */
+ if (hole_plugging(msblk, page_list, start_bindex,
+ last_bindex, mapping)) {
+ ret = 0;
+ goto out;
+ }
+
+ nr_blocks = last_bindex - start_bindex + 1;
+ ra_priv = kcalloc(nr_blocks, sizeof(*ra_priv), GFP_KERNEL);
+ if (!ra_priv)
+ goto out;
+
+ for (i = 0; i < nr_blocks; i++) {
+ ra_priv[i] = kmalloc(sizeof(**ra_priv), GFP_KERNEL);
+ if (ra_priv[i] == NULL)
+ goto release_ra_priv;
+ }
+
+ work = kcalloc(nr_blocks, sizeof(*work), GFP_KERNEL);
+ if (!work)
+ goto release_ra_priv;
+
+ for (i = 0; i < nr_blocks; i++) {
+ work[i] = kmalloc(sizeof(**work), GFP_KERNEL);
+ if (!work[i])
+ goto release_work;
+ }
+
+ for (i = 0; i < nr_blocks; i++) {
+ ret = squashfs_ra_read_submit(inode, start_bindex + i ,
+ ra_priv[i]);
+ if (ret)
+ goto release_ra_priv;
+ }
+
+ ret = 0;
+
+ queue_delayed_work(system_unbound_wq, &msblk->delay_work,
+ msecs_to_jiffies(3));
+
+ pages_per_block = msblk->block_size >> PAGE_CACHE_SHIFT;
+ pages_per_block = pages_per_block ? pages_per_block : 1;
+
+ for (i = 0; i < nr_blocks; i++) {
+ struct decomp_work *decomp_work = work[i];
+
+ INIT_LIST_HEAD(&decomp_work->pages);
+ decomp_work->bindex = start_bindex + i;
+ decomp_work->ra_priv = ra_priv[i];
+ decomp_work->gfp_mask = gfp_mask;
+ decomp_work->inode = inode;
+
+ move_pages(page_list, &decomp_work->pages,
+ pages_per_block);
+
+ spin_lock(&msblk->decomp_lock);
+ list_add(&decomp_work->list, &msblk->decomp_list);
+ spin_unlock(&msblk->decomp_lock);
+ }
+
+ kfree(ra_priv);
+ kfree(work);
+
+ return ret;
+
+release_work:
+ for (i = 0; i < nr_blocks; i++)
+ kfree(work[i]);
+ kfree(work);
+release_ra_priv:
+ for (i = 0; i < nr_blocks; i++)
+ kfree(ra_priv[i]);
+ kfree(ra_priv);
+out:
+ return ret;
+}
+
const struct address_space_operations squashfs_aops = {
- .readpage = squashfs_readpage
+ .readpage = squashfs_readpage,
+ .readpages = squashfs_readpages,
};
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 4bb1f90..2de96a7 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -34,6 +34,8 @@ extern int squashfs_read_metablock(struct super_block *, void **, u64, int,
u64 *, int, int);
extern int squashfs_read_submit(struct super_block *, u64, int, int,
struct buffer_head **, int *);
+extern int squashfs_decompress_block(struct super_block *, int, void **,
+ struct buffer_head **, int, int, int, int, int);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -89,6 +91,7 @@ extern const struct export_operations squashfs_export_ops;
/* file.c */
extern const struct address_space_operations squashfs_aops;
+extern void squashfs_decomp_work(struct work_struct *work);
/* inode.c */
extern const struct inode_operations squashfs_inode_ops;
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 0a209e6..bdcc0f2 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -79,5 +79,9 @@ struct squashfs_sb_info {
wait_queue_head_t decomp_wait_queue;
int nr_avail_decomp;
unsigned short flags;
+
+ struct delayed_work delay_work;
+ spinlock_t decomp_lock;
+ struct list_head decomp_list;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 70aa9c4..78d12d6 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -73,7 +73,6 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(short
return decompressor;
}
-
static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct squashfs_sb_info *msblk;
@@ -98,6 +97,9 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
+ spin_lock_init(&msblk->decomp_lock);
+ INIT_DELAYED_WORK(&msblk->delay_work, squashfs_decomp_work);
+ INIT_LIST_HEAD(&msblk->decomp_list);
INIT_LIST_HEAD(&msblk->strm_list);
mutex_init(&msblk->comp_strm_mutex);
init_waitqueue_head(&msblk->decomp_wait_queue);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC 5/5] squashfs: support readpages
2013-09-16 7:08 ` [RFC 5/5] squashfs: support readpages Minchan Kim
@ 2013-09-17 1:52 ` Minchan Kim
2013-09-17 1:59 ` Minchan Kim
0 siblings, 1 reply; 12+ messages in thread
From: Minchan Kim @ 2013-09-17 1:52 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee
This patch had a bug so I send fixed below patchset.
Thanks.
>From ccbf1322b6322bc34bb3e6f75a27f4fecf84ed02 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Mon, 16 Sep 2013 14:28:49 +0900
Subject: [RFC 5/5] squashfs: support readpages
This patch supports squashfs_readpages so it can do readahead pages
without unplugging I/O scheduler. With blktrace, I confirmed following test.
2 compression ratio 1G file(ie, 500M consumed by real storage) sequential read
with fadvise(SEQUENTIAL) hint and tune some knobs for block device and read_ahead_kb.
Old :
Reads Queued: 524289, 524289KiB Writes Queued: 0, 0KiB
Read Dispatches: 4096, 524289KiB Write Dispatches: 0, 0KiB
Reads Requeued: 0 Writes Requeued: 0
Reads Completed: 4096, 524289KiB Writes Completed: 0, 0KiB
Read Merges: 520193, 520193KiB Write Merges: 0, 0KiB
PC Reads Queued: 0, 0KiB PC Writes Queued: 0, 0KiB
PC Read Disp.: 10, 0KiB PC Write Disp.: 0, 0KiB
PC Reads Req.: 0 PC Writes Req.: 0
PC Reads Compl.: 0 PC Writes Compl.: 0
IO unplugs: 4096 Timer unplugs: 0
New :
Reads Queued: 524289, 524289KiB Writes Queued: 0, 0KiB
Read Dispatches: 633, 524289KiB Write Dispatches: 0, 0KiB
Reads Requeued: 0 Writes Requeued: 0
Reads Completed: 633, 524289KiB Writes Completed: 0, 0KiB
Read Merges: 523656, 523656KiB Write Merges: 0, 0KiB
PC Reads Queued: 0, 0KiB PC Writes Queued: 0, 0KiB
PC Read Disp.: 7, 0KiB PC Write Disp.: 0, 0KiB
PC Reads Req.: 0 PC Writes Req.: 0
PC Reads Compl.: 0 PC Writes Compl.: 0
IO unplugs: 31 Timer unplugs: 0
IOW, lots of I/O were merged before issuing. Of course, I can confirm that with
blktrace.
old:
8,34 0 1933 0.014381550 4957 Q R 4197628 + 2 [seqread]
8,34 0 1934 0.014381629 4957 M R 4197628 + 2 [seqread]
8,32 0 1935 0.014381821 4957 A R 4197630 + 2 <- (8,34) 1278
8,34 0 1936 0.014381919 4957 Q R 4197630 + 2 [seqread]
8,34 0 1937 0.014381998 4957 M R 4197630 + 2 [seqread]
8,32 0 1938 0.014382131 4957 A R 4197632 + 2 <- (8,34) 1280
8,34 0 1939 0.014382203 4957 Q R 4197632 + 2 [seqread]
8,34 0 1940 0.014382281 4957 M R 4197632 + 2 [seqread]
8,34 0 1941 0.014382873 4957 I R 4197378 + 256 [seqread]
8,34 0 0 0.014383649 0 m N cfq4957S / insert_request
8,34 0 0 0.014384609 0 m N cfq4957S / dispatch_insert
8,34 0 0 0.014385132 0 m N cfq4957S / dispatched a request
8,34 0 0 0.014385460 0 m N cfq4957S / activate rq, drv=1
8,34 0 1942 0.014385581 4957 D R 4197378 + 256 [seqread]
new:
8,34 0 98321 0.352583517 5101 M R 4261888 + 2 [seqread]
8,34 0 98322 0.353008833 5101 I R 4230404 + 4096 [seqread]
8,34 0 0 0.353012357 0 m N cfq5101SN / insert_request
8,34 0 98323 0.353013872 5101 I R 4234500 + 4096 [seqread]
8,34 0 0 0.353014218 0 m N cfq5101SN / insert_request
8,34 0 98324 0.353014553 5101 I R 4238596 + 4096 [seqread]
8,34 0 0 0.353014802 0 m N cfq5101SN / insert_request
8,34 0 98325 0.353014992 5101 I R 4242692 + 4096 [seqread]
8,34 0 0 0.353015315 0 m N cfq5101SN / insert_request
Voila, it's a result by that.
elapsed time, old : 17.5 sec new : 11.5 sec
A drawback from my mind is magic value 3ms for waiting more I/O so that
it can make delay at least 3ms although the I/O was done earlier.
The reason I selected that value is that it was a vaule kblockd had used
for a long time to plug/unplug for scheduler until we introduced explicit
blk_[start|finish]_plug. If it's really problem, we can add a hook into
plug and flush the I/O if someone is waiting the I/O.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/block.c | 2 -
fs/squashfs/file.c | 447 ++++++++++++++++++++++++++++++++++++++++++-
fs/squashfs/squashfs.h | 3 +
fs/squashfs/squashfs_fs_sb.h | 4 +
fs/squashfs/super.c | 4 +-
5 files changed, 450 insertions(+), 10 deletions(-)
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index d41bac8..e8bf200 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -76,8 +76,6 @@ static struct buffer_head *get_block_length(struct super_block *sb,
return bh;
}
-
-
int squashfs_decompress_block(struct super_block *sb, int compressed,
void **buffer, struct buffer_head **bh, int nr_bh,
int offset, int length, int srclength, int pages)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 36508c3..b98965c 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,6 +47,7 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mutex.h>
+#include <linux/swap.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -454,8 +455,55 @@ error_out:
return 0;
}
-static int squashfs_hole_readpage(struct file *file, struct inode *inode,
- int index, struct page *page)
+static int squashfs_hole_readpages(struct inode *inode, int index,
+ struct list_head *page_list)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes, i, offset = 0;
+ void *pageaddr;
+ struct page *push_page;
+
+ int start_index = index << (msblk->block_log - PAGE_CACHE_SHIFT);
+ int file_end = i_size_read(inode) >> msblk->block_log;
+
+ bytes = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ /*
+ * Loop copying datablock into pages. As the datablock likely covers
+ * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+ * grab the pages from the page cache, except for the page that we've
+ * been called to fill.
+ */
+ for (i = start_index; bytes > 0; i++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+
+ push_page = list_entry(page_list->prev, struct page, lru);
+ list_del(&push_page->lru);
+
+ pageaddr = kmap_atomic(push_page);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(push_page);
+ SetPageUptodate(push_page);
+
+ lru_cache_add_file(push_page);
+ unlock_page(push_page);
+ page_cache_release(push_page);
+ }
+
+ while (!list_empty(page_list)) {
+ push_page = list_entry(page_list->prev, struct page, lru);
+ list_del(&push_page->lru);
+ page_cache_release(push_page);
+ }
+
+ return 0;
+}
+
+static int squashfs_hole_readpage(struct inode *inode, int index,
+ struct page *page)
{
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
int bytes, i, offset = 0;
@@ -478,8 +526,8 @@ static int squashfs_hole_readpage(struct file *file, struct inode *inode,
bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
struct page *push_page;
- push_page = (page && i == page->index) ? page :
- grab_cache_page_nowait(page->mapping, i);
+ push_page = (i == page->index) ? page :
+ grab_cache_page_nowait(inode->i_mapping, i);
if (!push_page)
continue;
@@ -494,7 +542,7 @@ static int squashfs_hole_readpage(struct file *file, struct inode *inode,
SetPageUptodate(push_page);
skip_page:
unlock_page(push_page);
- if (page && i == page->index)
+ if (i == page->index)
continue;
page_cache_release(push_page);
}
@@ -533,7 +581,7 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
goto error_out;
if (bsize == 0)
- return squashfs_hole_readpage(file, inode, index, page);
+ return squashfs_hole_readpage(inode, index, page);
/*
* Read and decompress data block
@@ -660,6 +708,391 @@ out:
return 0;
}
+static int squashfs_ra_readblock(struct inode *inode, int index,
+ struct buffer_head **bh, int *nr_bh,
+ int *block_size, u64 *block)
+{
+ int bsize;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int ret, b;
+
+ /*
+ * Reading a datablock from disk. Need to read block list
+ * to get location and block size.
+ */
+ *block_size = read_blocklist(inode, index, block);
+ if (*block_size < 0)
+ return 1;
+
+ if (*block_size == 0)
+ return 0;
+
+ bsize = SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_size);
+ ret = squashfs_read_submit(inode->i_sb, *block, bsize,
+ msblk->block_size, bh, &b);
+ if (ret < 0)
+ return ret;
+
+ *nr_bh = b;
+ return 0;
+}
+
+struct squashfs_ra_private {
+ struct buffer_head **bh;
+ int nr_bh;
+ int block_size;
+ u64 block;
+ void **buffer;
+ struct page **page_array;
+};
+
+/* Caller should free buffer head */
+static int squashfs_ra_read_submit(struct inode *inode, int bindex,
+ struct squashfs_ra_private *ra_priv)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int ret;
+ struct buffer_head **bh;
+ int nr_bh = -1, bsize;
+ u64 block;
+ bh = kcalloc(((msblk->block_size + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1,
+ sizeof(*bh), GFP_KERNEL);
+ if (!bh)
+ return -ENOMEM;
+
+ ra_priv->bh = bh;
+ ret = squashfs_ra_readblock(inode, bindex, bh, &nr_bh,
+ &bsize, &block);
+ if (ret != 0)
+ goto release_bh;
+
+ ra_priv->nr_bh = nr_bh;
+ ra_priv->block_size = bsize;
+ ra_priv->block = block;
+
+ return 0;
+
+release_bh:
+ kfree(ra_priv->bh);
+ return ret;
+}
+
+static int squashfs_ra_decompress(struct inode *inode, int bindex,
+ struct squashfs_ra_private *ra_priv, gfp_t gfp_mask,
+ struct list_head *page_list)
+{
+ int j;
+ int ret = -ENOMEM;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int pages_per_block;
+
+ void **buffer;
+ struct buffer_head **bh;
+ int block_size, nr_bh;
+ u64 block;
+ struct page **page_array, *page;
+
+ int compressed, offset, length;
+
+ pages_per_block = msblk->block_size >> PAGE_CACHE_SHIFT;
+ pages_per_block = pages_per_block ? pages_per_block : 1;
+
+ bh = ra_priv->bh;
+ block_size = ra_priv->block_size;
+ nr_bh = ra_priv->nr_bh;
+ block = ra_priv->block;
+
+ if (block_size == 0)
+ return squashfs_hole_readpages(inode, bindex, page_list);
+
+ buffer = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(void *), GFP_KERNEL);
+ if (!buffer)
+ goto out;
+ ra_priv->buffer = buffer;
+
+ page_array = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(struct page *), GFP_KERNEL);
+ if (!page_array)
+ goto release_buffer;
+ ra_priv->page_array = page_array;
+
+ /* alloc buffer pages */
+ for (j = 0; j < pages_per_block; j++) {
+ page = list_entry(page_list->prev, struct page, lru);
+ list_del(&page->lru);
+ buffer[j] = kmap(page);
+ page_array[j] = page;
+ }
+
+ compressed = SQUASHFS_COMPRESSED_BLOCK(block_size);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(block_size);
+
+ offset = block & ((1 << msblk->devblksize_log2) - 1);
+ length = squashfs_decompress_block(inode->i_sb, compressed, buffer,
+ bh, nr_bh, offset, length, msblk->block_size,
+ pages_per_block);
+
+ for (j = 0; j < pages_per_block; j++) {
+ page = page_array[j];
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ lru_cache_add_file(page);
+ if (!PageLocked(page))
+ printk("unlock page %p j %d\n", page, j);
+ unlock_page(page);
+ }
+
+ ret = 0;
+
+release_buffer:
+ if (ra_priv->buffer) {
+ for (j = 0; j < pages_per_block; j++) {
+ if (!ra_priv->buffer[j])
+ break;
+ kunmap(ra_priv->page_array[j]);
+ page_cache_release(ra_priv->page_array[j]);
+ }
+ kfree(ra_priv->page_array);
+ }
+
+ kfree(ra_priv->buffer);
+out:
+ return ret;
+}
+
+/*
+ * Fill hole pages between page_index and last_page_index
+ * Return 0 if function is successful, otherwise, return 1
+ * All pages are locked and added into page cache so that caller should
+ * add them into LRU and unlock.
+ */
+int hole_plugging(struct squashfs_sb_info *msblk, struct list_head *page_list,
+ int start_bindex, int last_bindex,
+ struct address_space *mapping)
+{
+ struct page *page, *tmp_page, *hpage, *tpage;
+ int page_index, last_page_index;
+ gfp_t gfp_mask = mapping_gfp_mask(mapping);
+
+ page_index = start_bindex << (msblk->block_log - PAGE_CACHE_SHIFT);
+ last_page_index = ((last_bindex + 1) <<
+ (msblk->block_log - PAGE_CACHE_SHIFT));
+
+ hpage = list_entry(page_list->prev, struct page, lru);
+ for (; page_index < hpage->index; page_index++) {
+ struct page *new_page = page_cache_alloc_readahead(mapping);
+ if (!new_page)
+ return 1;
+ new_page->index = page_index;
+ list_add(&new_page->lru, &hpage->lru);
+ }
+
+ tpage = list_entry(page_list->next, struct page, lru);
+ page_index = tpage->index + 1;
+ for (; page_index < last_page_index; page_index++) {
+ struct page *new_page = page_cache_alloc_readahead(mapping);
+ if (!new_page)
+ return 1;
+ new_page->index = page_index;
+ list_add(&new_page->lru, page_list);
+ }
+
+ list_for_each_entry_reverse(page, page_list, lru)
+ if (add_to_page_cache(page, mapping, page->index, gfp_mask))
+ goto remove_pagecache;
+ return 0;
+
+remove_pagecache:
+ list_for_each_entry_reverse(tmp_page, page_list, lru) {
+ if (tmp_page == page)
+ break;
+ delete_from_page_cache(tmp_page);
+ unlock_page(tmp_page);
+ }
+ return 1;
+}
+
+struct decomp_work {
+ struct inode *inode;
+ int bindex;
+ struct list_head list;
+ struct list_head pages;
+ struct squashfs_ra_private *ra_priv;
+ gfp_t gfp_mask;
+};
+
+void squashfs_decomp_work(struct work_struct *work)
+{
+ struct inode *inode;
+ struct list_head *pages;
+ struct squashfs_ra_private *ra_priv;
+ struct decomp_work *decomp_work;
+ gfp_t gfp_mask;
+ int bindex;
+ struct squashfs_sb_info *msblk =
+ container_of(work, struct squashfs_sb_info, delay_work.work);
+
+ for (;;) {
+ decomp_work = NULL;
+ spin_lock(&msblk->decomp_lock);
+ if (!list_empty(&msblk->decomp_list)) {
+ decomp_work = list_entry(msblk->decomp_list.prev,
+ struct decomp_work, list);
+ list_del(&decomp_work->list);
+ }
+ spin_unlock(&msblk->decomp_lock);
+ if (!decomp_work)
+ return;
+
+ inode = decomp_work->inode;
+ bindex = decomp_work->bindex;
+ ra_priv = decomp_work->ra_priv;
+ gfp_mask = decomp_work->gfp_mask;
+ pages = &decomp_work->pages;
+
+ if (squashfs_ra_decompress(inode, bindex, ra_priv,
+ gfp_mask, pages)) {
+ struct page *page, *tmp;
+ list_for_each_entry_safe_reverse(page, tmp,
+ pages, lru) {
+ list_del(&page->lru);
+ delete_from_page_cache(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ }
+
+ kfree(ra_priv->bh);
+ kfree(ra_priv);
+ kfree(decomp_work);
+ }
+}
+
+static int move_pages(struct list_head *page_list, struct list_head *pages,
+ int nr)
+{
+ int moved_pages = 0;
+ struct page *page, *tmp_page;
+ list_for_each_entry_safe_reverse(page, tmp_page,
+ page_list, lru) {
+ list_move(&page->lru, pages);
+ moved_pages++;
+ if (moved_pages == nr)
+ break;
+ }
+
+ return moved_pages;
+}
+
+static int squashfs_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *page_list, unsigned nr_pages)
+{
+ struct inode *inode = mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ struct page *hpage, *tpage, *page;
+ struct decomp_work **work;
+ int start_bindex, last_bindex;
+ struct squashfs_ra_private **ra_priv;
+ int pages_per_block, i, ret = -ENOMEM;
+ gfp_t gfp_mask;
+ int nr_blocks;
+
+ gfp_mask = mapping_gfp_mask(mapping);
+
+ hpage = list_entry(page_list->prev, struct page, lru);
+ tpage = list_entry(page_list->next, struct page, lru);
+
+ start_bindex = hpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ last_bindex = tpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+
+ if (last_bindex >= (i_size_read(inode) >> msblk->block_log))
+ return 0;
+
+ /* fill with pages for readahead */
+ if (hole_plugging(msblk, page_list, start_bindex,
+ last_bindex, mapping)) {
+ ret = 0;
+ goto out;
+ }
+
+ nr_blocks = last_bindex - start_bindex + 1;
+ ra_priv = kcalloc(nr_blocks, sizeof(*ra_priv), GFP_KERNEL);
+ if (!ra_priv)
+ goto remove_pagecache;
+
+ for (i = 0; i < nr_blocks; i++) {
+ ra_priv[i] = kmalloc(sizeof(**ra_priv), GFP_KERNEL);
+ if (ra_priv[i] == NULL)
+ goto release_ra_priv;
+ }
+
+ work = kcalloc(nr_blocks, sizeof(*work), GFP_KERNEL);
+ if (!work)
+ goto release_ra_priv;
+
+ for (i = 0; i < nr_blocks; i++) {
+ work[i] = kmalloc(sizeof(**work), GFP_KERNEL);
+ if (!work[i])
+ goto release_work;
+ }
+
+ for (i = 0; i < nr_blocks; i++) {
+ ret = squashfs_ra_read_submit(inode, start_bindex + i ,
+ ra_priv[i]);
+ if (ret)
+ goto release_ra_priv;
+ }
+
+ ret = 0;
+
+ queue_delayed_work(system_unbound_wq, &msblk->delay_work,
+ msecs_to_jiffies(3));
+
+ pages_per_block = msblk->block_size >> PAGE_CACHE_SHIFT;
+ pages_per_block = pages_per_block ? pages_per_block : 1;
+
+ for (i = 0; i < nr_blocks; i++) {
+ struct decomp_work *decomp_work = work[i];
+
+ INIT_LIST_HEAD(&decomp_work->pages);
+ decomp_work->bindex = start_bindex + i;
+ decomp_work->ra_priv = ra_priv[i];
+ decomp_work->gfp_mask = gfp_mask;
+ decomp_work->inode = inode;
+
+ move_pages(page_list, &decomp_work->pages,
+ pages_per_block);
+
+ spin_lock(&msblk->decomp_lock);
+ list_add(&decomp_work->list, &msblk->decomp_list);
+ spin_unlock(&msblk->decomp_lock);
+ }
+
+ kfree(ra_priv);
+ kfree(work);
+
+ return ret;
+
+release_work:
+ for (i = 0; i < nr_blocks; i++)
+ kfree(work[i]);
+ kfree(work);
+release_ra_priv:
+ for (i = 0; i < nr_blocks; i++)
+ kfree(ra_priv[i]);
+ kfree(ra_priv);
+remove_pagecache:
+ list_for_each_entry_reverse(page, page_list, lru) {
+ delete_from_page_cache(page);
+ unlock_page(page);
+ }
+out:
+ return ret;
+}
+
const struct address_space_operations squashfs_aops = {
- .readpage = squashfs_readpage
+ .readpage = squashfs_readpage,
+ .readpages = squashfs_readpages,
};
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 4bb1f90..2de96a7 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -34,6 +34,8 @@ extern int squashfs_read_metablock(struct super_block *, void **, u64, int,
u64 *, int, int);
extern int squashfs_read_submit(struct super_block *, u64, int, int,
struct buffer_head **, int *);
+extern int squashfs_decompress_block(struct super_block *, int, void **,
+ struct buffer_head **, int, int, int, int, int);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -89,6 +91,7 @@ extern const struct export_operations squashfs_export_ops;
/* file.c */
extern const struct address_space_operations squashfs_aops;
+extern void squashfs_decomp_work(struct work_struct *work);
/* inode.c */
extern const struct inode_operations squashfs_inode_ops;
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 0a209e6..bdcc0f2 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -79,5 +79,9 @@ struct squashfs_sb_info {
wait_queue_head_t decomp_wait_queue;
int nr_avail_decomp;
unsigned short flags;
+
+ struct delayed_work delay_work;
+ spinlock_t decomp_lock;
+ struct list_head decomp_list;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 70aa9c4..78d12d6 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -73,7 +73,6 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(short
return decompressor;
}
-
static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct squashfs_sb_info *msblk;
@@ -98,6 +97,9 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
+ spin_lock_init(&msblk->decomp_lock);
+ INIT_DELAYED_WORK(&msblk->delay_work, squashfs_decomp_work);
+ INIT_LIST_HEAD(&msblk->decomp_list);
INIT_LIST_HEAD(&msblk->strm_list);
mutex_init(&msblk->comp_strm_mutex);
init_waitqueue_head(&msblk->decomp_wait_queue);
--
1.8.2.1
--
Kind regards,
Minchan Kim
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC 5/5] squashfs: support readpages
2013-09-17 1:52 ` Minchan Kim
@ 2013-09-17 1:59 ` Minchan Kim
0 siblings, 0 replies; 12+ messages in thread
From: Minchan Kim @ 2013-09-17 1:59 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee
And I found regression from iozone random read test.
This patch fixes it.
>From 546a541c6e29b321e4908ebe0f8aac506eb5b3af Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 17 Sep 2013 10:20:56 +0900
Subject: [PATCH] squahsfs: read synchronously when readahead window is small
Normally, MM does readahead with small window size so in that case,
our plugging logic can hurt performance.
If readahead window is smaller than compress block size, let's do
old synchronous read.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/file.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index b98965c..2537eeb 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -1007,6 +1007,24 @@ static int squashfs_readpages(struct file *file, struct address_space *mapping,
start_bindex = hpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
last_bindex = tpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ /*
+ * Normally, MM readahread window is smaller than our compressed
+ * block size. In that case, plugging could hurt performance so
+ * let's do synchronous read in that case.
+ */
+ if (start_bindex == last_bindex) {
+ list_del(&hpage->lru);
+ if (add_to_page_cache_lru(hpage, mapping, hpage->index,
+ GFP_KERNEL)) {
+ page_cache_release(hpage);
+ return 0;
+ }
+
+ ret = squashfs_readpage(file, hpage);
+ page_cache_release(hpage);
+ return ret;
+ }
+
if (last_bindex >= (i_size_read(inode) >> msblk->block_log))
return 0;
--
1.7.9.5
On Tue, Sep 17, 2013 at 10:52:14AM +0900, Minchan Kim wrote:
> This patch had a bug so I send fixed below patchset.
> Thanks.
>
> >From ccbf1322b6322bc34bb3e6f75a27f4fecf84ed02 Mon Sep 17 00:00:00 2001
> From: Minchan Kim <minchan@kernel.org>
> Date: Mon, 16 Sep 2013 14:28:49 +0900
> Subject: [RFC 5/5] squashfs: support readpages
>
> This patch supports squashfs_readpages so it can do readahead pages
> without unplugging I/O scheduler. With blktrace, I confirmed following test.
>
> 2 compression ratio 1G file(ie, 500M consumed by real storage) sequential read
> with fadvise(SEQUENTIAL) hint and tune some knobs for block device and read_ahead_kb.
>
> Old :
> Reads Queued: 524289, 524289KiB Writes Queued: 0, 0KiB
> Read Dispatches: 4096, 524289KiB Write Dispatches: 0, 0KiB
> Reads Requeued: 0 Writes Requeued: 0
> Reads Completed: 4096, 524289KiB Writes Completed: 0, 0KiB
> Read Merges: 520193, 520193KiB Write Merges: 0, 0KiB
> PC Reads Queued: 0, 0KiB PC Writes Queued: 0, 0KiB
> PC Read Disp.: 10, 0KiB PC Write Disp.: 0, 0KiB
> PC Reads Req.: 0 PC Writes Req.: 0
> PC Reads Compl.: 0 PC Writes Compl.: 0
> IO unplugs: 4096 Timer unplugs: 0
>
> New :
> Reads Queued: 524289, 524289KiB Writes Queued: 0, 0KiB
> Read Dispatches: 633, 524289KiB Write Dispatches: 0, 0KiB
> Reads Requeued: 0 Writes Requeued: 0
> Reads Completed: 633, 524289KiB Writes Completed: 0, 0KiB
> Read Merges: 523656, 523656KiB Write Merges: 0, 0KiB
> PC Reads Queued: 0, 0KiB PC Writes Queued: 0, 0KiB
> PC Read Disp.: 7, 0KiB PC Write Disp.: 0, 0KiB
> PC Reads Req.: 0 PC Writes Req.: 0
> PC Reads Compl.: 0 PC Writes Compl.: 0
> IO unplugs: 31 Timer unplugs: 0
>
> IOW, lots of I/O were merged before issuing. Of course, I can confirm that with
> blktrace.
>
> old:
>
> 8,34 0 1933 0.014381550 4957 Q R 4197628 + 2 [seqread]
> 8,34 0 1934 0.014381629 4957 M R 4197628 + 2 [seqread]
> 8,32 0 1935 0.014381821 4957 A R 4197630 + 2 <- (8,34) 1278
> 8,34 0 1936 0.014381919 4957 Q R 4197630 + 2 [seqread]
> 8,34 0 1937 0.014381998 4957 M R 4197630 + 2 [seqread]
> 8,32 0 1938 0.014382131 4957 A R 4197632 + 2 <- (8,34) 1280
> 8,34 0 1939 0.014382203 4957 Q R 4197632 + 2 [seqread]
> 8,34 0 1940 0.014382281 4957 M R 4197632 + 2 [seqread]
> 8,34 0 1941 0.014382873 4957 I R 4197378 + 256 [seqread]
> 8,34 0 0 0.014383649 0 m N cfq4957S / insert_request
> 8,34 0 0 0.014384609 0 m N cfq4957S / dispatch_insert
> 8,34 0 0 0.014385132 0 m N cfq4957S / dispatched a request
> 8,34 0 0 0.014385460 0 m N cfq4957S / activate rq, drv=1
> 8,34 0 1942 0.014385581 4957 D R 4197378 + 256 [seqread]
>
> new:
>
> 8,34 0 98321 0.352583517 5101 M R 4261888 + 2 [seqread]
> 8,34 0 98322 0.353008833 5101 I R 4230404 + 4096 [seqread]
> 8,34 0 0 0.353012357 0 m N cfq5101SN / insert_request
> 8,34 0 98323 0.353013872 5101 I R 4234500 + 4096 [seqread]
> 8,34 0 0 0.353014218 0 m N cfq5101SN / insert_request
> 8,34 0 98324 0.353014553 5101 I R 4238596 + 4096 [seqread]
> 8,34 0 0 0.353014802 0 m N cfq5101SN / insert_request
> 8,34 0 98325 0.353014992 5101 I R 4242692 + 4096 [seqread]
> 8,34 0 0 0.353015315 0 m N cfq5101SN / insert_request
>
> Voila, it's a result by that.
> elapsed time, old : 17.5 sec new : 11.5 sec
>
> A drawback from my mind is magic value 3ms for waiting more I/O so that
> it can make delay at least 3ms although the I/O was done earlier.
> The reason I selected that value is that it was a vaule kblockd had used
> for a long time to plug/unplug for scheduler until we introduced explicit
> blk_[start|finish]_plug. If it's really problem, we can add a hook into
> plug and flush the I/O if someone is waiting the I/O.
>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
> fs/squashfs/block.c | 2 -
> fs/squashfs/file.c | 447 ++++++++++++++++++++++++++++++++++++++++++-
> fs/squashfs/squashfs.h | 3 +
> fs/squashfs/squashfs_fs_sb.h | 4 +
> fs/squashfs/super.c | 4 +-
> 5 files changed, 450 insertions(+), 10 deletions(-)
>
> diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
> index d41bac8..e8bf200 100644
> --- a/fs/squashfs/block.c
> +++ b/fs/squashfs/block.c
> @@ -76,8 +76,6 @@ static struct buffer_head *get_block_length(struct super_block *sb,
> return bh;
> }
>
> -
> -
> int squashfs_decompress_block(struct super_block *sb, int compressed,
> void **buffer, struct buffer_head **bh, int nr_bh,
> int offset, int length, int srclength, int pages)
> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
> index 36508c3..b98965c 100644
> --- a/fs/squashfs/file.c
> +++ b/fs/squashfs/file.c
> @@ -47,6 +47,7 @@
> #include <linux/string.h>
> #include <linux/pagemap.h>
> #include <linux/mutex.h>
> +#include <linux/swap.h>
>
> #include "squashfs_fs.h"
> #include "squashfs_fs_sb.h"
> @@ -454,8 +455,55 @@ error_out:
> return 0;
> }
>
> -static int squashfs_hole_readpage(struct file *file, struct inode *inode,
> - int index, struct page *page)
> +static int squashfs_hole_readpages(struct inode *inode, int index,
> + struct list_head *page_list)
> +{
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + int bytes, i, offset = 0;
> + void *pageaddr;
> + struct page *push_page;
> +
> + int start_index = index << (msblk->block_log - PAGE_CACHE_SHIFT);
> + int file_end = i_size_read(inode) >> msblk->block_log;
> +
> + bytes = index == file_end ?
> + (i_size_read(inode) & (msblk->block_size - 1)) :
> + msblk->block_size;
> +
> + /*
> + * Loop copying datablock into pages. As the datablock likely covers
> + * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
> + * grab the pages from the page cache, except for the page that we've
> + * been called to fill.
> + */
> + for (i = start_index; bytes > 0; i++,
> + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
> +
> + push_page = list_entry(page_list->prev, struct page, lru);
> + list_del(&push_page->lru);
> +
> + pageaddr = kmap_atomic(push_page);
> + memset(pageaddr, 0, PAGE_CACHE_SIZE);
> + kunmap_atomic(pageaddr);
> + flush_dcache_page(push_page);
> + SetPageUptodate(push_page);
> +
> + lru_cache_add_file(push_page);
> + unlock_page(push_page);
> + page_cache_release(push_page);
> + }
> +
> + while (!list_empty(page_list)) {
> + push_page = list_entry(page_list->prev, struct page, lru);
> + list_del(&push_page->lru);
> + page_cache_release(push_page);
> + }
> +
> + return 0;
> +}
> +
> +static int squashfs_hole_readpage(struct inode *inode, int index,
> + struct page *page)
> {
> struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> int bytes, i, offset = 0;
> @@ -478,8 +526,8 @@ static int squashfs_hole_readpage(struct file *file, struct inode *inode,
> bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
> struct page *push_page;
>
> - push_page = (page && i == page->index) ? page :
> - grab_cache_page_nowait(page->mapping, i);
> + push_page = (i == page->index) ? page :
> + grab_cache_page_nowait(inode->i_mapping, i);
>
> if (!push_page)
> continue;
> @@ -494,7 +542,7 @@ static int squashfs_hole_readpage(struct file *file, struct inode *inode,
> SetPageUptodate(push_page);
> skip_page:
> unlock_page(push_page);
> - if (page && i == page->index)
> + if (i == page->index)
> continue;
> page_cache_release(push_page);
> }
> @@ -533,7 +581,7 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
> goto error_out;
>
> if (bsize == 0)
> - return squashfs_hole_readpage(file, inode, index, page);
> + return squashfs_hole_readpage(inode, index, page);
>
> /*
> * Read and decompress data block
> @@ -660,6 +708,391 @@ out:
> return 0;
> }
>
> +static int squashfs_ra_readblock(struct inode *inode, int index,
> + struct buffer_head **bh, int *nr_bh,
> + int *block_size, u64 *block)
> +{
> + int bsize;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + int ret, b;
> +
> + /*
> + * Reading a datablock from disk. Need to read block list
> + * to get location and block size.
> + */
> + *block_size = read_blocklist(inode, index, block);
> + if (*block_size < 0)
> + return 1;
> +
> + if (*block_size == 0)
> + return 0;
> +
> + bsize = SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_size);
> + ret = squashfs_read_submit(inode->i_sb, *block, bsize,
> + msblk->block_size, bh, &b);
> + if (ret < 0)
> + return ret;
> +
> + *nr_bh = b;
> + return 0;
> +}
> +
> +struct squashfs_ra_private {
> + struct buffer_head **bh;
> + int nr_bh;
> + int block_size;
> + u64 block;
> + void **buffer;
> + struct page **page_array;
> +};
> +
> +/* Caller should free buffer head */
> +static int squashfs_ra_read_submit(struct inode *inode, int bindex,
> + struct squashfs_ra_private *ra_priv)
> +{
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + int ret;
> + struct buffer_head **bh;
> + int nr_bh = -1, bsize;
> + u64 block;
> + bh = kcalloc(((msblk->block_size + msblk->devblksize - 1)
> + >> msblk->devblksize_log2) + 1,
> + sizeof(*bh), GFP_KERNEL);
> + if (!bh)
> + return -ENOMEM;
> +
> + ra_priv->bh = bh;
> + ret = squashfs_ra_readblock(inode, bindex, bh, &nr_bh,
> + &bsize, &block);
> + if (ret != 0)
> + goto release_bh;
> +
> + ra_priv->nr_bh = nr_bh;
> + ra_priv->block_size = bsize;
> + ra_priv->block = block;
> +
> + return 0;
> +
> +release_bh:
> + kfree(ra_priv->bh);
> + return ret;
> +}
> +
> +static int squashfs_ra_decompress(struct inode *inode, int bindex,
> + struct squashfs_ra_private *ra_priv, gfp_t gfp_mask,
> + struct list_head *page_list)
> +{
> + int j;
> + int ret = -ENOMEM;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + int pages_per_block;
> +
> + void **buffer;
> + struct buffer_head **bh;
> + int block_size, nr_bh;
> + u64 block;
> + struct page **page_array, *page;
> +
> + int compressed, offset, length;
> +
> + pages_per_block = msblk->block_size >> PAGE_CACHE_SHIFT;
> + pages_per_block = pages_per_block ? pages_per_block : 1;
> +
> + bh = ra_priv->bh;
> + block_size = ra_priv->block_size;
> + nr_bh = ra_priv->nr_bh;
> + block = ra_priv->block;
> +
> + if (block_size == 0)
> + return squashfs_hole_readpages(inode, bindex, page_list);
> +
> + buffer = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
> + sizeof(void *), GFP_KERNEL);
> + if (!buffer)
> + goto out;
> + ra_priv->buffer = buffer;
> +
> + page_array = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
> + sizeof(struct page *), GFP_KERNEL);
> + if (!page_array)
> + goto release_buffer;
> + ra_priv->page_array = page_array;
> +
> + /* alloc buffer pages */
> + for (j = 0; j < pages_per_block; j++) {
> + page = list_entry(page_list->prev, struct page, lru);
> + list_del(&page->lru);
> + buffer[j] = kmap(page);
> + page_array[j] = page;
> + }
> +
> + compressed = SQUASHFS_COMPRESSED_BLOCK(block_size);
> + length = SQUASHFS_COMPRESSED_SIZE_BLOCK(block_size);
> +
> + offset = block & ((1 << msblk->devblksize_log2) - 1);
> + length = squashfs_decompress_block(inode->i_sb, compressed, buffer,
> + bh, nr_bh, offset, length, msblk->block_size,
> + pages_per_block);
> +
> + for (j = 0; j < pages_per_block; j++) {
> + page = page_array[j];
> + flush_dcache_page(page);
> + SetPageUptodate(page);
> + lru_cache_add_file(page);
> + if (!PageLocked(page))
> + printk("unlock page %p j %d\n", page, j);
> + unlock_page(page);
> + }
> +
> + ret = 0;
> +
> +release_buffer:
> + if (ra_priv->buffer) {
> + for (j = 0; j < pages_per_block; j++) {
> + if (!ra_priv->buffer[j])
> + break;
> + kunmap(ra_priv->page_array[j]);
> + page_cache_release(ra_priv->page_array[j]);
> + }
> + kfree(ra_priv->page_array);
> + }
> +
> + kfree(ra_priv->buffer);
> +out:
> + return ret;
> +}
> +
> +/*
> + * Fill hole pages between page_index and last_page_index
> + * Return 0 if function is successful, otherwise, return 1
> + * All pages are locked and added into page cache so that caller should
> + * add them into LRU and unlock.
> + */
> +int hole_plugging(struct squashfs_sb_info *msblk, struct list_head *page_list,
> + int start_bindex, int last_bindex,
> + struct address_space *mapping)
> +{
> + struct page *page, *tmp_page, *hpage, *tpage;
> + int page_index, last_page_index;
> + gfp_t gfp_mask = mapping_gfp_mask(mapping);
> +
> + page_index = start_bindex << (msblk->block_log - PAGE_CACHE_SHIFT);
> + last_page_index = ((last_bindex + 1) <<
> + (msblk->block_log - PAGE_CACHE_SHIFT));
> +
> + hpage = list_entry(page_list->prev, struct page, lru);
> + for (; page_index < hpage->index; page_index++) {
> + struct page *new_page = page_cache_alloc_readahead(mapping);
> + if (!new_page)
> + return 1;
> + new_page->index = page_index;
> + list_add(&new_page->lru, &hpage->lru);
> + }
> +
> + tpage = list_entry(page_list->next, struct page, lru);
> + page_index = tpage->index + 1;
> + for (; page_index < last_page_index; page_index++) {
> + struct page *new_page = page_cache_alloc_readahead(mapping);
> + if (!new_page)
> + return 1;
> + new_page->index = page_index;
> + list_add(&new_page->lru, page_list);
> + }
> +
> + list_for_each_entry_reverse(page, page_list, lru)
> + if (add_to_page_cache(page, mapping, page->index, gfp_mask))
> + goto remove_pagecache;
> + return 0;
> +
> +remove_pagecache:
> + list_for_each_entry_reverse(tmp_page, page_list, lru) {
> + if (tmp_page == page)
> + break;
> + delete_from_page_cache(tmp_page);
> + unlock_page(tmp_page);
> + }
> + return 1;
> +}
> +
> +struct decomp_work {
> + struct inode *inode;
> + int bindex;
> + struct list_head list;
> + struct list_head pages;
> + struct squashfs_ra_private *ra_priv;
> + gfp_t gfp_mask;
> +};
> +
> +void squashfs_decomp_work(struct work_struct *work)
> +{
> + struct inode *inode;
> + struct list_head *pages;
> + struct squashfs_ra_private *ra_priv;
> + struct decomp_work *decomp_work;
> + gfp_t gfp_mask;
> + int bindex;
> + struct squashfs_sb_info *msblk =
> + container_of(work, struct squashfs_sb_info, delay_work.work);
> +
> + for (;;) {
> + decomp_work = NULL;
> + spin_lock(&msblk->decomp_lock);
> + if (!list_empty(&msblk->decomp_list)) {
> + decomp_work = list_entry(msblk->decomp_list.prev,
> + struct decomp_work, list);
> + list_del(&decomp_work->list);
> + }
> + spin_unlock(&msblk->decomp_lock);
> + if (!decomp_work)
> + return;
> +
> + inode = decomp_work->inode;
> + bindex = decomp_work->bindex;
> + ra_priv = decomp_work->ra_priv;
> + gfp_mask = decomp_work->gfp_mask;
> + pages = &decomp_work->pages;
> +
> + if (squashfs_ra_decompress(inode, bindex, ra_priv,
> + gfp_mask, pages)) {
> + struct page *page, *tmp;
> + list_for_each_entry_safe_reverse(page, tmp,
> + pages, lru) {
> + list_del(&page->lru);
> + delete_from_page_cache(page);
> + unlock_page(page);
> + page_cache_release(page);
> + }
> + }
> +
> + kfree(ra_priv->bh);
> + kfree(ra_priv);
> + kfree(decomp_work);
> + }
> +}
> +
> +static int move_pages(struct list_head *page_list, struct list_head *pages,
> + int nr)
> +{
> + int moved_pages = 0;
> + struct page *page, *tmp_page;
> + list_for_each_entry_safe_reverse(page, tmp_page,
> + page_list, lru) {
> + list_move(&page->lru, pages);
> + moved_pages++;
> + if (moved_pages == nr)
> + break;
> + }
> +
> + return moved_pages;
> +}
> +
> +static int squashfs_readpages(struct file *file, struct address_space *mapping,
> + struct list_head *page_list, unsigned nr_pages)
> +{
> + struct inode *inode = mapping->host;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + struct page *hpage, *tpage, *page;
> + struct decomp_work **work;
> + int start_bindex, last_bindex;
> + struct squashfs_ra_private **ra_priv;
> + int pages_per_block, i, ret = -ENOMEM;
> + gfp_t gfp_mask;
> + int nr_blocks;
> +
> + gfp_mask = mapping_gfp_mask(mapping);
> +
> + hpage = list_entry(page_list->prev, struct page, lru);
> + tpage = list_entry(page_list->next, struct page, lru);
> +
> + start_bindex = hpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
> + last_bindex = tpage->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
> +
> + if (last_bindex >= (i_size_read(inode) >> msblk->block_log))
> + return 0;
> +
> + /* fill with pages for readahead */
> + if (hole_plugging(msblk, page_list, start_bindex,
> + last_bindex, mapping)) {
> + ret = 0;
> + goto out;
> + }
> +
> + nr_blocks = last_bindex - start_bindex + 1;
> + ra_priv = kcalloc(nr_blocks, sizeof(*ra_priv), GFP_KERNEL);
> + if (!ra_priv)
> + goto remove_pagecache;
> +
> + for (i = 0; i < nr_blocks; i++) {
> + ra_priv[i] = kmalloc(sizeof(**ra_priv), GFP_KERNEL);
> + if (ra_priv[i] == NULL)
> + goto release_ra_priv;
> + }
> +
> + work = kcalloc(nr_blocks, sizeof(*work), GFP_KERNEL);
> + if (!work)
> + goto release_ra_priv;
> +
> + for (i = 0; i < nr_blocks; i++) {
> + work[i] = kmalloc(sizeof(**work), GFP_KERNEL);
> + if (!work[i])
> + goto release_work;
> + }
> +
> + for (i = 0; i < nr_blocks; i++) {
> + ret = squashfs_ra_read_submit(inode, start_bindex + i ,
> + ra_priv[i]);
> + if (ret)
> + goto release_ra_priv;
> + }
> +
> + ret = 0;
> +
> + queue_delayed_work(system_unbound_wq, &msblk->delay_work,
> + msecs_to_jiffies(3));
> +
> + pages_per_block = msblk->block_size >> PAGE_CACHE_SHIFT;
> + pages_per_block = pages_per_block ? pages_per_block : 1;
> +
> + for (i = 0; i < nr_blocks; i++) {
> + struct decomp_work *decomp_work = work[i];
> +
> + INIT_LIST_HEAD(&decomp_work->pages);
> + decomp_work->bindex = start_bindex + i;
> + decomp_work->ra_priv = ra_priv[i];
> + decomp_work->gfp_mask = gfp_mask;
> + decomp_work->inode = inode;
> +
> + move_pages(page_list, &decomp_work->pages,
> + pages_per_block);
> +
> + spin_lock(&msblk->decomp_lock);
> + list_add(&decomp_work->list, &msblk->decomp_list);
> + spin_unlock(&msblk->decomp_lock);
> + }
> +
> + kfree(ra_priv);
> + kfree(work);
> +
> + return ret;
> +
> +release_work:
> + for (i = 0; i < nr_blocks; i++)
> + kfree(work[i]);
> + kfree(work);
> +release_ra_priv:
> + for (i = 0; i < nr_blocks; i++)
> + kfree(ra_priv[i]);
> + kfree(ra_priv);
> +remove_pagecache:
> + list_for_each_entry_reverse(page, page_list, lru) {
> + delete_from_page_cache(page);
> + unlock_page(page);
> + }
> +out:
> + return ret;
> +}
> +
> const struct address_space_operations squashfs_aops = {
> - .readpage = squashfs_readpage
> + .readpage = squashfs_readpage,
> + .readpages = squashfs_readpages,
> };
> diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
> index 4bb1f90..2de96a7 100644
> --- a/fs/squashfs/squashfs.h
> +++ b/fs/squashfs/squashfs.h
> @@ -34,6 +34,8 @@ extern int squashfs_read_metablock(struct super_block *, void **, u64, int,
> u64 *, int, int);
> extern int squashfs_read_submit(struct super_block *, u64, int, int,
> struct buffer_head **, int *);
> +extern int squashfs_decompress_block(struct super_block *, int, void **,
> + struct buffer_head **, int, int, int, int, int);
>
> /* cache.c */
> extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
> @@ -89,6 +91,7 @@ extern const struct export_operations squashfs_export_ops;
>
> /* file.c */
> extern const struct address_space_operations squashfs_aops;
> +extern void squashfs_decomp_work(struct work_struct *work);
>
> /* inode.c */
> extern const struct inode_operations squashfs_inode_ops;
> diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
> index 0a209e6..bdcc0f2 100644
> --- a/fs/squashfs/squashfs_fs_sb.h
> +++ b/fs/squashfs/squashfs_fs_sb.h
> @@ -79,5 +79,9 @@ struct squashfs_sb_info {
> wait_queue_head_t decomp_wait_queue;
> int nr_avail_decomp;
> unsigned short flags;
> +
> + struct delayed_work delay_work;
> + spinlock_t decomp_lock;
> + struct list_head decomp_list;
> };
> #endif
> diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
> index 70aa9c4..78d12d6 100644
> --- a/fs/squashfs/super.c
> +++ b/fs/squashfs/super.c
> @@ -73,7 +73,6 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(short
> return decompressor;
> }
>
> -
> static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> {
> struct squashfs_sb_info *msblk;
> @@ -98,6 +97,9 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
> msblk->devblksize_log2 = ffz(~msblk->devblksize);
>
> + spin_lock_init(&msblk->decomp_lock);
> + INIT_DELAYED_WORK(&msblk->delay_work, squashfs_decomp_work);
> + INIT_LIST_HEAD(&msblk->decomp_list);
> INIT_LIST_HEAD(&msblk->strm_list);
> mutex_init(&msblk->comp_strm_mutex);
> init_waitqueue_head(&msblk->decomp_wait_queue);
> --
> 1.8.2.1
>
> --
> Kind regards,
> Minchan Kim
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Kind regards,
Minchan Kim
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC,4/5] squashfs: support multiple decompress stream buffer
2013-10-07 3:41 [RFC,4/5] squashfs: support multiple decompress stream buffer Phillip Lougher
@ 2013-10-07 6:09 ` Minchan Kim
2013-10-07 6:35 ` Minchan Kim
0 siblings, 1 reply; 12+ messages in thread
From: Minchan Kim @ 2013-10-07 6:09 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee
Hello Phillip,
On Mon, Oct 07, 2013 at 04:41:20AM +0100, Phillip Lougher wrote:
> Hi,
>
> This a partial review, based on the stuff I've managed to review
> so far!
>
> 1. This is a substantial performance improvement, which is great
> stuff!
Thanks.
>
> But like the "squashfs: remove cache for normal data page" patch
> it needs to be optional, with the previous behaviour retained as
> default. Again, without wanting to sound like a broken (vinyl)
Just FYI, I have a plan to drop "squashfs: remove cache for normal
data page" in next submit as you pointed out it could make regression.
So my plan is that squashfs_readpage uses the cache but squashfs_readpages
will not use the cache.
If you have any concern in my design, please tell me.
> record, this is because as maintainer I get to worry about breaking
> things for existing users of Squashfs when they upgrade their kernel.
>
> I know from consulting experience, many users of Squashfs are "on the
> edge" of memory and CPU performance, and are using Squashfs to squeeze
> a bit more performance out of a maxed out system.
>
> In these cases, changing Squashfs so it uses more memory and more
> CPU than previously (and in this patch a lot more memory and CPU as
> it will try and kick off multiple decompressors per core) is a bit
> like robbing Peter to pay Paul, Squashfs may take CPU and memory
> that are needed elsewhere, and used to be available.
>
> So, basically, users need to be able to explicitly select this.
Okay.
>
> 2. The patch breaks the decompressor interface. Compressor option
> parsing is implemented in the decompressor init() function, which
> means everytime a new decompressor is dynamically instantiated, we
> need to read and parse the compression options again and again. This
> is an unnecessary performance degradation.
>
> Compressor option parsing and reading should be split out of init()
> and into a separate function.
Indeed.
>
> Compression option parsing and reading is quite obscure, it is a
> late addition to the filesystem format, and had to be squeezed into
> the existing format. This means it can be difficult to get it right
> as the specification exists only in my head.
Hmm, I had a question. Please look at below.
>
> I'll help you here.
>
> Specific comments follow in the patch.
>
> Phillip
>
>
>
> >Now squashfs have used for only one stream buffer for decompression
> >so it hurts concurrent read performance due to locking lock of getting
> >stream buffer.
> >
> >When file system mount, the number of stream buffer is started from
> >num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2.
> >The rationale is MM does readahead chunk into 2M unit to prevent too much
> >memory pin and while one request is waitting, we should request another
> >chunk. That's why I multiply by 2.
> >
> >If it reveals too much memory problem, we can add shrinker routine.
> >
> >I did test following as
> >
> >Two 1G file dd read
> >
> >dd if=test/test1.dat of=/dev/null &
> >dd if=test/test2.dat of=/dev/null &
> >
> >old : 60sec -> new : 30 sec
> >
> >Signed-off-by: Minchan Kim <minchan@kernel.org>
> >
> >---
> >fs/squashfs/block.c | 9 ++--
> >fs/squashfs/decompressor.c | 105 ++++++++++++++++++++++++++++++++++++++----
> >fs/squashfs/decompressor.h | 27 +++++------
> >fs/squashfs/lzo_wrapper.c | 12 ++---
> >fs/squashfs/squashfs.h | 3 +-
> >fs/squashfs/squashfs_fs_sb.h | 7 ++-
> >fs/squashfs/super.c | 40 ++++++++++++----
> >fs/squashfs/xz_wrapper.c | 20 ++++----
> >fs/squashfs/zlib_wrapper.c | 12 ++---
> >9 files changed, 168 insertions(+), 67 deletions(-)
> >
> >diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
> >index f33c6ef..d41bac8 100644
> >--- a/fs/squashfs/block.c
> >+++ b/fs/squashfs/block.c
> >@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
> >
> >
> >
> >-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
> >+int squashfs_decompress_block(struct super_block *sb, int compressed,
> > void **buffer, struct buffer_head **bh, int nr_bh,
> > int offset, int length, int srclength, int pages)
> >{
> > int k = 0;
> >
> > if (compressed) {
> >- length = squashfs_decompress(msblk, buffer, bh, nr_bh,
> >+ length = squashfs_decompress(sb, buffer, bh, nr_bh,
> > offset, length, srclength, pages);
> > if (length < 0)
> > goto out;
> >@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
> > /*
> > * Block is uncompressed.
> > */
> >+ struct squashfs_sb_info *msblk = sb->s_fs_info;
> > int bytes, in, avail, pg_offset = 0, page = 0;
> >
> > for (bytes = length; k < nr_bh; k++) {
> >@@ -262,8 +263,8 @@ int squashfs_read_metablock(struct super_block *sb, void **buffer, u64 index,
> > }
> > ll_rw_block(READ, b - 1, bh + 1);
> >
> >- length = squashfs_decompress_block(msblk, compressed, buffer, bh, b,
> >- offset, length, srclength, pages);
> >+ length = squashfs_decompress_block(sb, compressed, buffer, bh,
> >+ b, offset, length, srclength, pages);
> > if (length < 0)
> > goto read_failure;
> >
> >diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
> >index e47453e..ed35b32 100644
> >--- a/fs/squashfs/decompressor.c
> >+++ b/fs/squashfs/decompressor.c
> >@@ -25,6 +25,8 @@
> >#include <linux/mutex.h>
> >#include <linux/slab.h>
> >#include <linux/buffer_head.h>
> >+#include <linux/sched.h>
> >+#include <linux/wait.h>
> >
> >#include "squashfs_fs.h"
> >#include "squashfs_fs_sb.h"
> >@@ -70,6 +72,80 @@ static const struct squashfs_decompressor *decompressor[] = {
> > &squashfs_unknown_comp_ops
> >};
> >
> >+void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
> >+ struct squashfs_decomp_strm *stream)
> >+{
> >+ if (msblk->decompressor)
> >+ msblk->decompressor->free(stream->strm);
> >+ kfree(stream);
> >+}
> >+
> >+static void *squashfs_get_decomp_strm(struct squashfs_sb_info *msblk)
> >+{
> >+ struct squashfs_decomp_strm *strm = NULL;
> >+ mutex_lock(&msblk->comp_strm_mutex);
> >+ if (!list_empty(&msblk->strm_list)) {
> >+ strm = list_entry(msblk->strm_list.next,
> >+ struct squashfs_decomp_strm, list);
> >+ list_del(&strm->list);
> >+ msblk->nr_avail_decomp--;
> >+ WARN_ON(msblk->nr_avail_decomp < 0);
> >+ }
> >+ mutex_unlock(&msblk->comp_strm_mutex);
> >+ return strm;
> >+}
> >+
> >+static bool full_decomp_strm(struct squashfs_sb_info *msblk)
> >+{
> >+ /* MM do readahread 2M unit */
> >+ int blocks = 2 * 1024 * 1024 / msblk->block_size;
> >+ return msblk->nr_avail_decomp > (num_online_cpus() * blocks * 2);
> >+}
> >+
> >+static void squashfs_put_decomp_strm(struct squashfs_sb_info *msblk,
> >+ struct squashfs_decomp_strm *strm)
> >+{
> >+ mutex_lock(&msblk->comp_strm_mutex);
> >+ if (full_decomp_strm(msblk)) {
> >+ mutex_unlock(&msblk->comp_strm_mutex);
> >+ squashfs_decompressor_free(msblk, strm);
> >+ return;
> >+ }
> >+
> >+ list_add(&strm->list, &msblk->strm_list);
> >+ msblk->nr_avail_decomp++;
> >+ mutex_unlock(&msblk->comp_strm_mutex);
> >+ wake_up(&msblk->decomp_wait_queue);
> >+}
> >+
> >+int squashfs_decompress(struct super_block *sb, void **buffer,
> >+ struct buffer_head **bh, int b, int offset, int length,
> >+ int srclength, int pages)
> >+{
> >+ int ret;
> >+ struct squashfs_decomp_strm *strm;
> >+ struct squashfs_sb_info *msblk = sb->s_fs_info;
> >+ while (1) {
> >+ strm = squashfs_get_decomp_strm(msblk);
> >+ if (strm)
> >+ break;
> >+
> >+ if (!full_decomp_strm(msblk)) {
> >+ strm = squashfs_decompressor_init(sb);
>
> here you call squashfs_decompressor_dynamically to instantiate a new
> decompressor, this needs to read and parse the compression options again.
>
> >+ if (strm)
> >+ break;
> >+ }
> >+
> >+ wait_event(msblk->decomp_wait_queue, msblk->nr_avail_decomp);
> >+ continue;
> >+ }
> >+
> >+ ret = msblk->decompressor->decompress(msblk, strm->strm, buffer, bh,
> >+ b, offset, length, srclength, pages);
> >+
> >+ squashfs_put_decomp_strm(msblk, strm);
> >+ return ret;
> >+}
> >
> >const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
> >{
> >@@ -82,35 +158,48 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
> > return decompressor[i];
> >}
> >
> >-
> >-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
> >+struct squashfs_decomp_strm *squashfs_decompressor_init(struct super_block *sb)
> >{
> > struct squashfs_sb_info *msblk = sb->s_fs_info;
> >+ struct squashfs_decomp_strm *decomp_strm = NULL;
> > void *strm, *buffer = NULL;
> > int length = 0;
> >
> >+ decomp_strm = kmalloc(sizeof(struct squashfs_decomp_strm), GFP_KERNEL);
> >+ if (!decomp_strm)
> >+ return ERR_PTR(-ENOMEM);
> > /*
> > * Read decompressor specific options from file system if present
> > */
> >- if (SQUASHFS_COMP_OPTS(flags)) {
> >+ if (SQUASHFS_COMP_OPTS(msblk->flags)) {
> > buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
> >- if (buffer == NULL)
> >- return ERR_PTR(-ENOMEM);
> >+ if (buffer == NULL) {
> >+ decomp_strm = ERR_PTR(-ENOMEM);
> >+ goto finished;
> >+ }
> >
> > length = squashfs_read_metablock(sb, &buffer,
> > sizeof(struct squashfs_super_block), 0, NULL,
> > PAGE_CACHE_SIZE, 1);
> >
>
> This reads the compressor options each decompressor init(). This should
> only be done once at mount time.
>
> > if (length < 0) {
> >- strm = ERR_PTR(length);
> >+ decomp_strm = ERR_PTR(length);
> > goto finished;
> > }
> > }
> >
>
>
> > strm = msblk->decompressor->init(msblk, buffer, length);
> >+ if (IS_ERR(strm)) {
> >+ decomp_strm = strm;
> >+ goto finished;
> >+ }
> >
> >-finished:
> >+ decomp_strm->strm = strm;
> > kfree(buffer);
> >+ return decomp_strm;
> >
> >- return strm;
> >+finished:
> >+ kfree(decomp_strm);
> >+ kfree(buffer);
> >+ return decomp_strm;
> >}
> >diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
> >index 330073e..207c810 100644
> >--- a/fs/squashfs/decompressor.h
> >+++ b/fs/squashfs/decompressor.h
> >@@ -26,27 +26,24 @@
> >struct squashfs_decompressor {
> > void *(*init)(struct squashfs_sb_info *, void *, int);
> > void (*free)(void *);
> >- int (*decompress)(struct squashfs_sb_info *, void **,
> >+ int (*decompress)(struct squashfs_sb_info *, void*, void **,
> > struct buffer_head **, int, int, int, int, int);
> > int id;
> > char *name;
> > int supported;
> >};
> >
> >-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
> >- void *s)
> >-{
> >- if (msblk->decompressor)
> >- msblk->decompressor->free(s);
> >-}
> >-
> >-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
> >- void **buffer, struct buffer_head **bh, int b, int offset, int length,
> >- int srclength, int pages)
> >-{
> >- return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
> >- length, srclength, pages);
> >-}
> >+struct squashfs_decomp_strm {
> >+ void *strm;
> >+ struct list_head list;
> >+};
> >+
> >+int squashfs_decompress(struct super_block *sb, void **buffer,
> >+ struct buffer_head **bh, int b, int offset, int length,
> >+ int srclength, int pages);
> >+
> >+void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
> >+ struct squashfs_decomp_strm *stream);
> >
> >#ifdef CONFIG_SQUASHFS_XZ
> >extern const struct squashfs_decompressor squashfs_xz_comp_ops;
> >diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
> >index 00f4dfc..e1bf135 100644
> >--- a/fs/squashfs/lzo_wrapper.c
> >+++ b/fs/squashfs/lzo_wrapper.c
> >@@ -74,17 +74,15 @@ static void lzo_free(void *strm)
> >}
> >
> >
> >-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
> >- struct buffer_head **bh, int b, int offset, int length, int srclength,
> >- int pages)
> >+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
> >+ void **buffer, struct buffer_head **bh, int b, int offset,
> >+ int length, int srclength, int pages)
> >{
> >- struct squashfs_lzo *stream = msblk->stream;
> >+ struct squashfs_lzo *stream = strm;
> > void *buff = stream->input;
> > int avail, i, bytes = length, res;
> > size_t out_len = srclength;
> >
> >- mutex_lock(&msblk->read_data_mutex);
> >-
> > for (i = 0; i < b; i++) {
> > wait_on_buffer(bh[i]);
> > if (!buffer_uptodate(bh[i]))
> >@@ -111,7 +109,6 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
> > bytes -= avail;
> > }
> >
> >- mutex_unlock(&msblk->read_data_mutex);
> > return res;
> >
> >block_release:
> >@@ -119,7 +116,6 @@ block_release:
> > put_bh(bh[i]);
> >
> >failed:
> >- mutex_unlock(&msblk->read_data_mutex);
> >
> > ERROR("lzo decompression failed, data probably corrupt\n");
> > return -EIO;
> >diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
> >index 405baed..4bb1f90 100644
> >--- a/fs/squashfs/squashfs.h
> >+++ b/fs/squashfs/squashfs.h
> >@@ -52,7 +52,8 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
> >
> >/* decompressor.c */
> >extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
> >-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
> >+extern struct squashfs_decomp_strm *squashfs_decompressor_init(
> >+ struct super_block *);
> >
> >/* export.c */
> >extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
> >diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
> >index 52934a2..0a209e6 100644
> >--- a/fs/squashfs/squashfs_fs_sb.h
> >+++ b/fs/squashfs/squashfs_fs_sb.h
> >@@ -63,10 +63,10 @@ struct squashfs_sb_info {
> > __le64 *id_table;
> > __le64 *fragment_index;
> > __le64 *xattr_id_table;
> >- struct mutex read_data_mutex;
> >+ struct mutex comp_strm_mutex;
> > struct mutex meta_index_mutex;
> > struct meta_index *meta_index;
> >- void *stream;
> >+ struct list_head strm_list;
> > __le64 *inode_lookup_table;
> > u64 inode_table;
> > u64 directory_table;
> >@@ -76,5 +76,8 @@ struct squashfs_sb_info {
> > long long bytes_used;
> > unsigned int inodes;
> > int xattr_ids;
> >+ wait_queue_head_t decomp_wait_queue;
> >+ int nr_avail_decomp;
> >+ unsigned short flags;
> >};
> >#endif
> >diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
> >index 60553a9..70aa9c4 100644
> >--- a/fs/squashfs/super.c
> >+++ b/fs/squashfs/super.c
> >@@ -84,7 +84,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> > unsigned short flags;
> > unsigned int fragments;
> > u64 lookup_table_start, xattr_id_table_start, next_table;
> >- int err;
> >+ int err, i;
> >
> > TRACE("Entered squashfs_fill_superblock\n");
> >
> >@@ -98,7 +98,9 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> > msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
> > msblk->devblksize_log2 = ffz(~msblk->devblksize);
> >
> >- mutex_init(&msblk->read_data_mutex);
> >+ INIT_LIST_HEAD(&msblk->strm_list);
> >+ mutex_init(&msblk->comp_strm_mutex);
> >+ init_waitqueue_head(&msblk->decomp_wait_queue);
>
> This should be done via a helper in decompressor.c, i.e. the implementation
> shouldn't be visible here.
>
> When I added the decompressor framework, I should have done this.
>
> > mutex_init(&msblk->meta_index_mutex);
> >
> > /*
> >@@ -176,6 +178,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> > msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
> > msblk->inodes = le32_to_cpu(sblk->inodes);
> > flags = le16_to_cpu(sblk->flags);
> >+ msblk->flags = flags;
> >
>
> ha, the superblock flags should only be needed at mount time. After
> mount time there shouldn't be anything in flags we need to look at.
>
> You need to do this because flags is needed for the decompressor init
> function (COMP_OPTS(flags)) which is now called after mount time.
>
> Once the compressor options parsing is moved back to being only
> at mount time, you won't need to store the flags anymore.
Hmm, I'd like to clarify your point further.
I agree it's unnecessary performance degradation if we read compressor
option from storage whenever we create new stream buffer.
But we need it to create new stream buffer(ex, xz_dec_init) dynamically
so we should keep compressor option into somewhere. It means we should
keep it to somewhere although we remove flags field from msblk.
Am I missing something?
--
Kind regards,
Minchan Kim
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC,4/5] squashfs: support multiple decompress stream buffer
2013-10-07 6:09 ` Minchan Kim
@ 2013-10-07 6:35 ` Minchan Kim
2013-10-08 1:25 ` Minchan Kim
0 siblings, 1 reply; 12+ messages in thread
From: Minchan Kim @ 2013-10-07 6:35 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee
On Mon, Oct 07, 2013 at 03:09:51PM +0900, Minchan Kim wrote:
> Hello Phillip,
>
> On Mon, Oct 07, 2013 at 04:41:20AM +0100, Phillip Lougher wrote:
> > Hi,
> >
> > This a partial review, based on the stuff I've managed to review
> > so far!
> >
> > 1. This is a substantial performance improvement, which is great
> > stuff!
>
> Thanks.
>
> >
> > But like the "squashfs: remove cache for normal data page" patch
> > it needs to be optional, with the previous behaviour retained as
> > default. Again, without wanting to sound like a broken (vinyl)
>
> Just FYI, I have a plan to drop "squashfs: remove cache for normal
> data page" in next submit as you pointed out it could make regression.
> So my plan is that squashfs_readpage uses the cache but squashfs_readpages
> will not use the cache.
> If you have any concern in my design, please tell me.
>
> > record, this is because as maintainer I get to worry about breaking
> > things for existing users of Squashfs when they upgrade their kernel.
> >
> > I know from consulting experience, many users of Squashfs are "on the
> > edge" of memory and CPU performance, and are using Squashfs to squeeze
> > a bit more performance out of a maxed out system.
> >
> > In these cases, changing Squashfs so it uses more memory and more
> > CPU than previously (and in this patch a lot more memory and CPU as
> > it will try and kick off multiple decompressors per core) is a bit
> > like robbing Peter to pay Paul, Squashfs may take CPU and memory
> > that are needed elsewhere, and used to be available.
> >
> > So, basically, users need to be able to explicitly select this.
>
> Okay.
>
> >
> > 2. The patch breaks the decompressor interface. Compressor option
> > parsing is implemented in the decompressor init() function, which
> > means everytime a new decompressor is dynamically instantiated, we
> > need to read and parse the compression options again and again. This
> > is an unnecessary performance degradation.
> >
> > Compressor option parsing and reading should be split out of init()
> > and into a separate function.
>
> Indeed.
>
> >
> > Compression option parsing and reading is quite obscure, it is a
> > late addition to the filesystem format, and had to be squeezed into
> > the existing format. This means it can be difficult to get it right
> > as the specification exists only in my head.
>
> Hmm, I had a question. Please look at below.
>
> >
> > I'll help you here.
> >
> > Specific comments follow in the patch.
> >
> > Phillip
> >
> >
> >
> > >Now squashfs have used for only one stream buffer for decompression
> > >so it hurts concurrent read performance due to locking lock of getting
> > >stream buffer.
> > >
> > >When file system mount, the number of stream buffer is started from
> > >num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2.
> > >The rationale is MM does readahead chunk into 2M unit to prevent too much
> > >memory pin and while one request is waitting, we should request another
> > >chunk. That's why I multiply by 2.
> > >
> > >If it reveals too much memory problem, we can add shrinker routine.
> > >
> > >I did test following as
> > >
> > >Two 1G file dd read
> > >
> > >dd if=test/test1.dat of=/dev/null &
> > >dd if=test/test2.dat of=/dev/null &
> > >
> > >old : 60sec -> new : 30 sec
> > >
> > >Signed-off-by: Minchan Kim <minchan@kernel.org>
> > >
> > >---
> > >fs/squashfs/block.c | 9 ++--
> > >fs/squashfs/decompressor.c | 105 ++++++++++++++++++++++++++++++++++++++----
> > >fs/squashfs/decompressor.h | 27 +++++------
> > >fs/squashfs/lzo_wrapper.c | 12 ++---
> > >fs/squashfs/squashfs.h | 3 +-
> > >fs/squashfs/squashfs_fs_sb.h | 7 ++-
> > >fs/squashfs/super.c | 40 ++++++++++++----
> > >fs/squashfs/xz_wrapper.c | 20 ++++----
> > >fs/squashfs/zlib_wrapper.c | 12 ++---
> > >9 files changed, 168 insertions(+), 67 deletions(-)
> > >
> > >diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
> > >index f33c6ef..d41bac8 100644
> > >--- a/fs/squashfs/block.c
> > >+++ b/fs/squashfs/block.c
> > >@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
> > >
> > >
> > >
> > >-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
> > >+int squashfs_decompress_block(struct super_block *sb, int compressed,
> > > void **buffer, struct buffer_head **bh, int nr_bh,
> > > int offset, int length, int srclength, int pages)
> > >{
> > > int k = 0;
> > >
> > > if (compressed) {
> > >- length = squashfs_decompress(msblk, buffer, bh, nr_bh,
> > >+ length = squashfs_decompress(sb, buffer, bh, nr_bh,
> > > offset, length, srclength, pages);
> > > if (length < 0)
> > > goto out;
> > >@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
> > > /*
> > > * Block is uncompressed.
> > > */
> > >+ struct squashfs_sb_info *msblk = sb->s_fs_info;
> > > int bytes, in, avail, pg_offset = 0, page = 0;
> > >
> > > for (bytes = length; k < nr_bh; k++) {
> > >@@ -262,8 +263,8 @@ int squashfs_read_metablock(struct super_block *sb, void **buffer, u64 index,
> > > }
> > > ll_rw_block(READ, b - 1, bh + 1);
> > >
> > >- length = squashfs_decompress_block(msblk, compressed, buffer, bh, b,
> > >- offset, length, srclength, pages);
> > >+ length = squashfs_decompress_block(sb, compressed, buffer, bh,
> > >+ b, offset, length, srclength, pages);
> > > if (length < 0)
> > > goto read_failure;
> > >
> > >diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
> > >index e47453e..ed35b32 100644
> > >--- a/fs/squashfs/decompressor.c
> > >+++ b/fs/squashfs/decompressor.c
> > >@@ -25,6 +25,8 @@
> > >#include <linux/mutex.h>
> > >#include <linux/slab.h>
> > >#include <linux/buffer_head.h>
> > >+#include <linux/sched.h>
> > >+#include <linux/wait.h>
> > >
> > >#include "squashfs_fs.h"
> > >#include "squashfs_fs_sb.h"
> > >@@ -70,6 +72,80 @@ static const struct squashfs_decompressor *decompressor[] = {
> > > &squashfs_unknown_comp_ops
> > >};
> > >
> > >+void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
> > >+ struct squashfs_decomp_strm *stream)
> > >+{
> > >+ if (msblk->decompressor)
> > >+ msblk->decompressor->free(stream->strm);
> > >+ kfree(stream);
> > >+}
> > >+
> > >+static void *squashfs_get_decomp_strm(struct squashfs_sb_info *msblk)
> > >+{
> > >+ struct squashfs_decomp_strm *strm = NULL;
> > >+ mutex_lock(&msblk->comp_strm_mutex);
> > >+ if (!list_empty(&msblk->strm_list)) {
> > >+ strm = list_entry(msblk->strm_list.next,
> > >+ struct squashfs_decomp_strm, list);
> > >+ list_del(&strm->list);
> > >+ msblk->nr_avail_decomp--;
> > >+ WARN_ON(msblk->nr_avail_decomp < 0);
> > >+ }
> > >+ mutex_unlock(&msblk->comp_strm_mutex);
> > >+ return strm;
> > >+}
> > >+
> > >+static bool full_decomp_strm(struct squashfs_sb_info *msblk)
> > >+{
> > >+ /* MM do readahread 2M unit */
> > >+ int blocks = 2 * 1024 * 1024 / msblk->block_size;
> > >+ return msblk->nr_avail_decomp > (num_online_cpus() * blocks * 2);
> > >+}
> > >+
> > >+static void squashfs_put_decomp_strm(struct squashfs_sb_info *msblk,
> > >+ struct squashfs_decomp_strm *strm)
> > >+{
> > >+ mutex_lock(&msblk->comp_strm_mutex);
> > >+ if (full_decomp_strm(msblk)) {
> > >+ mutex_unlock(&msblk->comp_strm_mutex);
> > >+ squashfs_decompressor_free(msblk, strm);
> > >+ return;
> > >+ }
> > >+
> > >+ list_add(&strm->list, &msblk->strm_list);
> > >+ msblk->nr_avail_decomp++;
> > >+ mutex_unlock(&msblk->comp_strm_mutex);
> > >+ wake_up(&msblk->decomp_wait_queue);
> > >+}
> > >+
> > >+int squashfs_decompress(struct super_block *sb, void **buffer,
> > >+ struct buffer_head **bh, int b, int offset, int length,
> > >+ int srclength, int pages)
> > >+{
> > >+ int ret;
> > >+ struct squashfs_decomp_strm *strm;
> > >+ struct squashfs_sb_info *msblk = sb->s_fs_info;
> > >+ while (1) {
> > >+ strm = squashfs_get_decomp_strm(msblk);
> > >+ if (strm)
> > >+ break;
> > >+
> > >+ if (!full_decomp_strm(msblk)) {
> > >+ strm = squashfs_decompressor_init(sb);
> >
> > here you call squashfs_decompressor_dynamically to instantiate a new
> > decompressor, this needs to read and parse the compression options again.
> >
> > >+ if (strm)
> > >+ break;
> > >+ }
> > >+
> > >+ wait_event(msblk->decomp_wait_queue, msblk->nr_avail_decomp);
> > >+ continue;
> > >+ }
> > >+
> > >+ ret = msblk->decompressor->decompress(msblk, strm->strm, buffer, bh,
> > >+ b, offset, length, srclength, pages);
> > >+
> > >+ squashfs_put_decomp_strm(msblk, strm);
> > >+ return ret;
> > >+}
> > >
> > >const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
> > >{
> > >@@ -82,35 +158,48 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
> > > return decompressor[i];
> > >}
> > >
> > >-
> > >-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
> > >+struct squashfs_decomp_strm *squashfs_decompressor_init(struct super_block *sb)
> > >{
> > > struct squashfs_sb_info *msblk = sb->s_fs_info;
> > >+ struct squashfs_decomp_strm *decomp_strm = NULL;
> > > void *strm, *buffer = NULL;
> > > int length = 0;
> > >
> > >+ decomp_strm = kmalloc(sizeof(struct squashfs_decomp_strm), GFP_KERNEL);
> > >+ if (!decomp_strm)
> > >+ return ERR_PTR(-ENOMEM);
> > > /*
> > > * Read decompressor specific options from file system if present
> > > */
> > >- if (SQUASHFS_COMP_OPTS(flags)) {
> > >+ if (SQUASHFS_COMP_OPTS(msblk->flags)) {
> > > buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
> > >- if (buffer == NULL)
> > >- return ERR_PTR(-ENOMEM);
> > >+ if (buffer == NULL) {
> > >+ decomp_strm = ERR_PTR(-ENOMEM);
> > >+ goto finished;
> > >+ }
> > >
> > > length = squashfs_read_metablock(sb, &buffer,
> > > sizeof(struct squashfs_super_block), 0, NULL,
> > > PAGE_CACHE_SIZE, 1);
> > >
> >
> > This reads the compressor options each decompressor init(). This should
> > only be done once at mount time.
> >
> > > if (length < 0) {
> > >- strm = ERR_PTR(length);
> > >+ decomp_strm = ERR_PTR(length);
> > > goto finished;
> > > }
> > > }
> > >
> >
> >
> > > strm = msblk->decompressor->init(msblk, buffer, length);
> > >+ if (IS_ERR(strm)) {
> > >+ decomp_strm = strm;
> > >+ goto finished;
> > >+ }
> > >
> > >-finished:
> > >+ decomp_strm->strm = strm;
> > > kfree(buffer);
> > >+ return decomp_strm;
> > >
> > >- return strm;
> > >+finished:
> > >+ kfree(decomp_strm);
> > >+ kfree(buffer);
> > >+ return decomp_strm;
> > >}
> > >diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
> > >index 330073e..207c810 100644
> > >--- a/fs/squashfs/decompressor.h
> > >+++ b/fs/squashfs/decompressor.h
> > >@@ -26,27 +26,24 @@
> > >struct squashfs_decompressor {
> > > void *(*init)(struct squashfs_sb_info *, void *, int);
> > > void (*free)(void *);
> > >- int (*decompress)(struct squashfs_sb_info *, void **,
> > >+ int (*decompress)(struct squashfs_sb_info *, void*, void **,
> > > struct buffer_head **, int, int, int, int, int);
> > > int id;
> > > char *name;
> > > int supported;
> > >};
> > >
> > >-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
> > >- void *s)
> > >-{
> > >- if (msblk->decompressor)
> > >- msblk->decompressor->free(s);
> > >-}
> > >-
> > >-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
> > >- void **buffer, struct buffer_head **bh, int b, int offset, int length,
> > >- int srclength, int pages)
> > >-{
> > >- return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
> > >- length, srclength, pages);
> > >-}
> > >+struct squashfs_decomp_strm {
> > >+ void *strm;
> > >+ struct list_head list;
> > >+};
> > >+
> > >+int squashfs_decompress(struct super_block *sb, void **buffer,
> > >+ struct buffer_head **bh, int b, int offset, int length,
> > >+ int srclength, int pages);
> > >+
> > >+void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
> > >+ struct squashfs_decomp_strm *stream);
> > >
> > >#ifdef CONFIG_SQUASHFS_XZ
> > >extern const struct squashfs_decompressor squashfs_xz_comp_ops;
> > >diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
> > >index 00f4dfc..e1bf135 100644
> > >--- a/fs/squashfs/lzo_wrapper.c
> > >+++ b/fs/squashfs/lzo_wrapper.c
> > >@@ -74,17 +74,15 @@ static void lzo_free(void *strm)
> > >}
> > >
> > >
> > >-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
> > >- struct buffer_head **bh, int b, int offset, int length, int srclength,
> > >- int pages)
> > >+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
> > >+ void **buffer, struct buffer_head **bh, int b, int offset,
> > >+ int length, int srclength, int pages)
> > >{
> > >- struct squashfs_lzo *stream = msblk->stream;
> > >+ struct squashfs_lzo *stream = strm;
> > > void *buff = stream->input;
> > > int avail, i, bytes = length, res;
> > > size_t out_len = srclength;
> > >
> > >- mutex_lock(&msblk->read_data_mutex);
> > >-
> > > for (i = 0; i < b; i++) {
> > > wait_on_buffer(bh[i]);
> > > if (!buffer_uptodate(bh[i]))
> > >@@ -111,7 +109,6 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
> > > bytes -= avail;
> > > }
> > >
> > >- mutex_unlock(&msblk->read_data_mutex);
> > > return res;
> > >
> > >block_release:
> > >@@ -119,7 +116,6 @@ block_release:
> > > put_bh(bh[i]);
> > >
> > >failed:
> > >- mutex_unlock(&msblk->read_data_mutex);
> > >
> > > ERROR("lzo decompression failed, data probably corrupt\n");
> > > return -EIO;
> > >diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
> > >index 405baed..4bb1f90 100644
> > >--- a/fs/squashfs/squashfs.h
> > >+++ b/fs/squashfs/squashfs.h
> > >@@ -52,7 +52,8 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
> > >
> > >/* decompressor.c */
> > >extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
> > >-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
> > >+extern struct squashfs_decomp_strm *squashfs_decompressor_init(
> > >+ struct super_block *);
> > >
> > >/* export.c */
> > >extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
> > >diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
> > >index 52934a2..0a209e6 100644
> > >--- a/fs/squashfs/squashfs_fs_sb.h
> > >+++ b/fs/squashfs/squashfs_fs_sb.h
> > >@@ -63,10 +63,10 @@ struct squashfs_sb_info {
> > > __le64 *id_table;
> > > __le64 *fragment_index;
> > > __le64 *xattr_id_table;
> > >- struct mutex read_data_mutex;
> > >+ struct mutex comp_strm_mutex;
> > > struct mutex meta_index_mutex;
> > > struct meta_index *meta_index;
> > >- void *stream;
> > >+ struct list_head strm_list;
> > > __le64 *inode_lookup_table;
> > > u64 inode_table;
> > > u64 directory_table;
> > >@@ -76,5 +76,8 @@ struct squashfs_sb_info {
> > > long long bytes_used;
> > > unsigned int inodes;
> > > int xattr_ids;
> > >+ wait_queue_head_t decomp_wait_queue;
> > >+ int nr_avail_decomp;
> > >+ unsigned short flags;
> > >};
> > >#endif
> > >diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
> > >index 60553a9..70aa9c4 100644
> > >--- a/fs/squashfs/super.c
> > >+++ b/fs/squashfs/super.c
> > >@@ -84,7 +84,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> > > unsigned short flags;
> > > unsigned int fragments;
> > > u64 lookup_table_start, xattr_id_table_start, next_table;
> > >- int err;
> > >+ int err, i;
> > >
> > > TRACE("Entered squashfs_fill_superblock\n");
> > >
> > >@@ -98,7 +98,9 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> > > msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
> > > msblk->devblksize_log2 = ffz(~msblk->devblksize);
> > >
> > >- mutex_init(&msblk->read_data_mutex);
> > >+ INIT_LIST_HEAD(&msblk->strm_list);
> > >+ mutex_init(&msblk->comp_strm_mutex);
> > >+ init_waitqueue_head(&msblk->decomp_wait_queue);
> >
> > This should be done via a helper in decompressor.c, i.e. the implementation
> > shouldn't be visible here.
> >
> > When I added the decompressor framework, I should have done this.
> >
> > > mutex_init(&msblk->meta_index_mutex);
> > >
> > > /*
> > >@@ -176,6 +178,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
> > > msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
> > > msblk->inodes = le32_to_cpu(sblk->inodes);
> > > flags = le16_to_cpu(sblk->flags);
> > >+ msblk->flags = flags;
> > >
> >
> > ha, the superblock flags should only be needed at mount time. After
> > mount time there shouldn't be anything in flags we need to look at.
> >
> > You need to do this because flags is needed for the decompressor init
> > function (COMP_OPTS(flags)) which is now called after mount time.
> >
> > Once the compressor options parsing is moved back to being only
> > at mount time, you won't need to store the flags anymore.
>
> Hmm, I'd like to clarify your point further.
> I agree it's unnecessary performance degradation if we read compressor
> option from storage whenever we create new stream buffer.
> But we need it to create new stream buffer(ex, xz_dec_init) dynamically
> so we should keep compressor option into somewhere. It means we should
> keep it to somewhere although we remove flags field from msblk.
> Am I missing something?
I should have been more specific.
My concern is that we could remove the flags from msblk but we need comp_opts
to create new comp buffer dynamically so we should keep comp_opts into somewhere.
So, I think best place to keep it is struct squashfs_sb_info like this.
struct squashfs_sb_info {
const struct squashfs_decompressor *decompressor;
void *comp_opts;
...
};
What do you think about it?
--
Kind regards,
Minchan Kim
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC,4/5] squashfs: support multiple decompress stream buffer
2013-10-07 6:35 ` Minchan Kim
@ 2013-10-08 1:25 ` Minchan Kim
0 siblings, 0 replies; 12+ messages in thread
From: Minchan Kim @ 2013-10-08 1:25 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee
On Mon, Oct 07, 2013 at 03:35:03PM +0900, Minchan Kim wrote:
< snip >
> > >
> > > ha, the superblock flags should only be needed at mount time. After
> > > mount time there shouldn't be anything in flags we need to look at.
> > >
> > > You need to do this because flags is needed for the decompressor init
> > > function (COMP_OPTS(flags)) which is now called after mount time.
> > >
> > > Once the compressor options parsing is moved back to being only
> > > at mount time, you won't need to store the flags anymore.
> >
> > Hmm, I'd like to clarify your point further.
> > I agree it's unnecessary performance degradation if we read compressor
> > option from storage whenever we create new stream buffer.
> > But we need it to create new stream buffer(ex, xz_dec_init) dynamically
> > so we should keep compressor option into somewhere. It means we should
> > keep it to somewhere although we remove flags field from msblk.
> > Am I missing something?
>
> I should have been more specific.
> My concern is that we could remove the flags from msblk but we need comp_opts
> to create new comp buffer dynamically so we should keep comp_opts into somewhere.
> So, I think best place to keep it is struct squashfs_sb_info like this.
>
> struct squashfs_sb_info {
> const struct squashfs_decompressor *decompressor;
> void *comp_opts;
> ...
> };
>
> What do you think about it?
So, this is my thought. Could you review this? I'd like to rebase other patches
on this.
>From 04b050357f4d6bf1dec8b324c3ab70b0cd113e6c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Mon, 7 Oct 2013 16:54:31 +0900
Subject: [PATCH] squashfs: separate decompressor option parse logic and
stream allocation
Now, when we mount, squashfs parses(ie, read block on storage)
decompressor option and creates stream buffer. Both functions are
abstracted by "initialization" phase of decompressor all at once.
But decompressor option parsing could be done only mounted time
while stream buffer allocation could be done dynamically in future
so this patch try to separate them.
This patch adds new decompressor functions (alloc and destroy)
and changes init function's semantic.
Old :
init : parse decompressor option + create stream buffer
New :
init : parse decompressor option and keep it on memory.
alloc: create stream buffer
destroy: free decompressor option which is allocated in init phase.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/decompressor.c | 30 ++++++++++-------
fs/squashfs/decompressor.h | 10 +++++-
fs/squashfs/lzo_wrapper.c | 14 +++++++-
fs/squashfs/squashfs.h | 1 +
fs/squashfs/squashfs_fs_sb.h | 1 +
fs/squashfs/super.c | 9 ++++-
fs/squashfs/xz_wrapper.c | 76 +++++++++++++++++++++++++++++++-----------
fs/squashfs/zlib_wrapper.c | 14 +++++++-
8 files changed, 119 insertions(+), 36 deletions(-)
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 3f6271d..b0004c5 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -37,29 +37,29 @@
*/
static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
- NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+ NULL, NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
};
#ifndef CONFIG_SQUASHFS_LZO
static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
- NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+ NULL, NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
};
#endif
#ifndef CONFIG_SQUASHFS_XZ
static const struct squashfs_decompressor squashfs_xz_comp_ops = {
- NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+ NULL, NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
};
#endif
#ifndef CONFIG_SQUASHFS_ZLIB
static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
- NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+ NULL, NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
};
#endif
static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
- NULL, NULL, NULL, 0, "unknown", 0
+ NULL, NULL, NULL, NULL, NULL, 0, "unknown", 0
};
static const struct squashfs_decompressor *decompressor[] = {
@@ -82,11 +82,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
return decompressor[i];
}
-
void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *strm, *buffer = NULL;
+ int err;
+ void *buffer = NULL;
int length = 0;
/*
@@ -102,15 +102,21 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
PAGE_CACHE_SIZE, 1);
if (length < 0) {
- strm = ERR_PTR(length);
- goto finished;
+ kfree(buffer);
+ return ERR_PTR(length);
}
}
- strm = msblk->decompressor->init(msblk, buffer, length);
-
-finished:
+ err = msblk->decompressor->init(msblk, buffer, length);
kfree(buffer);
+ return ERR_PTR(err);
+
+}
+
+void *squashfs_decompressor_alloc(struct super_block *sb)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ void *strm = msblk->decompressor->alloc(msblk);
return strm;
}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 330073e..0c108dd 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,15 +24,23 @@
*/
struct squashfs_decompressor {
- void *(*init)(struct squashfs_sb_info *, void *, int);
+ int (*init)(struct squashfs_sb_info *, void *, int);
+ void *(*alloc)(struct squashfs_sb_info *);
void (*free)(void *);
int (*decompress)(struct squashfs_sb_info *, void **,
struct buffer_head **, int, int, int, int, int);
+ void (*destroy)(struct squashfs_sb_info *);
int id;
char *name;
int supported;
};
+static inline void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+ if (msblk->decompressor)
+ msblk->decompressor->destroy(msblk);
+}
+
static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
void *s)
{
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 00f4dfc..f1ad2df 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -37,7 +37,17 @@ struct squashfs_lzo {
void *output;
};
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static int lzo_init(struct squashfs_sb_info *msblk, void *buffer, int len)
+{
+ return 0;
+}
+
+static void lzo_destroy(struct squashfs_sb_info *msblk)
+{
+
+}
+
+static void *lzo_alloc(struct squashfs_sb_info *msblk)
{
int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
@@ -127,8 +137,10 @@ failed:
const struct squashfs_decompressor squashfs_lzo_comp_ops = {
.init = lzo_init,
+ .alloc = lzo_alloc,
.free = lzo_free,
.decompress = lzo_uncompress,
+ .destroy = lzo_destroy,
.id = LZO_COMPRESSION,
.name = "lzo",
.supported = 1
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index d126651..6fc329f 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -49,6 +49,7 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
/* decompressor.c */
extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_alloc(struct super_block *);
/* export.c */
extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 52934a2..a553ec4 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -54,6 +54,7 @@ struct squashfs_cache_entry {
struct squashfs_sb_info {
const struct squashfs_decompressor *decompressor;
+ void *decomp_opt;
int devblksize;
int devblksize_log2;
struct squashfs_cache *block_cache;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 60553a9..47180b9 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -212,7 +212,12 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- msblk->stream = squashfs_decompressor_init(sb, flags);
+ if (squashfs_decompressor_init(sb, flags)) {
+ ERROR("Failed to initialize decompressor\n");
+ goto failed_mount;
+ }
+
+ msblk->stream = squashfs_decompressor_alloc(sb);
if (IS_ERR(msblk->stream)) {
err = PTR_ERR(msblk->stream);
msblk->stream = NULL;
@@ -337,6 +342,7 @@ failed_mount:
squashfs_cache_delete(msblk->fragment_cache);
squashfs_cache_delete(msblk->read_page);
squashfs_decompressor_free(msblk, msblk->stream);
+ squashfs_decompressor_destroy(msblk);
kfree(msblk->inode_lookup_table);
kfree(msblk->fragment_index);
kfree(msblk->id_table);
@@ -384,6 +390,7 @@ static void squashfs_put_super(struct super_block *sb)
squashfs_cache_delete(sbi->fragment_cache);
squashfs_cache_delete(sbi->read_page);
squashfs_decompressor_free(sbi, sbi->stream);
+ squashfs_decompressor_destroy(sbi);
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 1760b7d1..60f30d7e2 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -38,38 +38,72 @@ struct squashfs_xz {
struct xz_buf buf;
};
-struct comp_opts {
+struct comp_opts_ondisk {
__le32 dictionary_size;
__le32 flags;
};
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
+struct comp_opts {
+ int dict_size;
+};
+
+static int squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
int len)
{
- struct comp_opts *comp_opts = buff;
- struct squashfs_xz *stream;
- int dict_size = msblk->block_size;
- int err, n;
+ int err = 0;
+ int dict_size, n;
+ struct comp_opts_ondisk *comp_opts_ondisk = buff;
+ struct comp_opts *comp_opts = NULL;
+ /* check compressor options are the expected length */
+ if (len < sizeof(*comp_opts_ondisk)) {
+ err = -EIO;
+ goto failed;
+ }
- if (comp_opts) {
- /* check compressor options are the expected length */
- if (len < sizeof(*comp_opts)) {
- err = -EIO;
- goto failed;
- }
+ comp_opts = kmalloc(sizeof(struct comp_opts), GFP_KERNEL);
+ if (!comp_opts) {
+ err = -ENOMEM;
+ goto failed;
+ }
- dict_size = le32_to_cpu(comp_opts->dictionary_size);
+ dict_size = le32_to_cpu(comp_opts_ondisk->dictionary_size);
- /* the dictionary size should be 2^n or 2^n+2^(n+1) */
- n = ffs(dict_size) - 1;
- if (dict_size != (1 << n) && dict_size != (1 << n) +
- (1 << (n + 1))) {
- err = -EIO;
- goto failed;
- }
+ /* the dictionary size should be 2^n or 2^n+2^(n+1) */
+ n = ffs(dict_size) - 1;
+ if (dict_size != (1 << n) && dict_size != (1 << n) +
+ (1 << (n + 1))) {
+ err = -EIO;
+ goto failed;
}
dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+ comp_opts->dict_size = dict_size;
+ msblk->decomp_opt = comp_opts;
+ return 0;
+failed:
+ kfree(comp_opts);
+ return err;
+}
+
+static void squashfs_xz_destroy(struct squashfs_sb_info *msblk)
+{
+ if (msblk->decomp_opt)
+ kfree(msblk->decomp_opt);
+}
+
+static void *squashfs_xz_alloc(struct squashfs_sb_info *msblk)
+{
+ struct squashfs_xz *stream;
+ struct comp_opts *comp_opts = (struct comp_opts *)msblk->decomp_opt;
+ int err;
+ int dict_size;
+
+ if (comp_opts) {
+ dict_size = comp_opts->dict_size;
+ } else {
+ dict_size = max_t(int, msblk->block_size,
+ SQUASHFS_METADATA_SIZE);
+ }
stream = kmalloc(sizeof(*stream), GFP_KERNEL);
if (stream == NULL) {
@@ -172,8 +206,10 @@ release_mutex:
const struct squashfs_decompressor squashfs_xz_comp_ops = {
.init = squashfs_xz_init,
+ .alloc = squashfs_xz_alloc,
.free = squashfs_xz_free,
.decompress = squashfs_xz_uncompress,
+ .destroy = squashfs_xz_destroy,
.id = XZ_COMPRESSION,
.name = "xz",
.supported = 1
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 55d918f..e46e24d 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -33,7 +33,17 @@
#include "squashfs.h"
#include "decompressor.h"
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static int zlib_init(struct squashfs_sb_info *dummy, void *buffer, int len)
+{
+ return 0;
+}
+
+static void zlib_destroy(struct squashfs_sb_info *dummy)
+{
+
+}
+
+static void *zlib_alloc(struct squashfs_sb_info *dummy)
{
z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
if (stream == NULL)
@@ -140,8 +150,10 @@ release_mutex:
const struct squashfs_decompressor squashfs_zlib_comp_ops = {
.init = zlib_init,
+ .alloc = zlib_alloc,
.free = zlib_free,
.decompress = zlib_uncompress,
+ .destroy = zlib_destroy,
.id = ZLIB_COMPRESSION,
.name = "zlib",
.supported = 1
--
1.7.9.5
--
Kind regards,
Minchan Kim
^ permalink raw reply related [flat|nested] 12+ messages in thread