* re: [RFC,3/5] squashfs: remove cache for normal data page
@ 2013-10-04 6:13 Phillip Lougher
2013-10-07 0:53 ` Minchan Kim
0 siblings, 1 reply; 3+ messages in thread
From: Phillip Lougher @ 2013-10-04 6:13 UTC (permalink / raw)
To: minchan, linux-kernel; +Cc: linux-kernel, ch0.han, gunho.lee
Minchan Kim <minchan@kernel.org> wrote:
>Sqsuashfs have used cache for normal data pages but it's pointless
>because MM already has cache layer and squashfs adds extra pages
>into MM's page cache when it reads a page from compressed block.
>
>This patch removes cache usage for normal data pages so it could
>remove unnecessary one copy
1. As I mentioned last week, the use of the "unnecessary" cache is there
to prevent two or more processes simultaneously trying to read the same
pages. Without this, such racing processes will decompress the same
blocks repeatedly.
It is easy to dismiss this as an rare event, but, when it happens it
has a major impact on performance, because the racing processes
can get stuck in a lock-step arrangement, repeatedly trying to access
the same blocks until the eof. If the file is many megabytes or
gigabytes in size (such as when Squashfs is used as a container fs for
cetain liveCDs, or virtual machine disk images) this will lead to
a significant reduction in performance.
So I consider this a major regression.
2. You patch also adds another regression, which is to reintroduce
kmap() rather than kmap_atomic().
I was asked to remove this at my first submission attempt
in 2005
http://lkml.indiana.edu/hypermail/linux/kernel/0503.2/0809.html
So I'm not particularly willing to reintroduce it now.
3. Your patch potentially reintroduces this bug
http://www.spinics.net/lists/linux-fsdevel/msg02555.html
http://zaitcev.livejournal.com/86954.html
4. You patch is unconditional. With such code changes as this
it is always essential to make this a "buy in option", with the
original behaviour retained as default. Otherwise, lots of users
potentially find their embedded/enterprise/mission critical
system unexpectedly breaks when upgrading the kernel, and I get
a lot of angry email.
Phillip
>(ie, from cache to page cache) and
>decouple normal data page path from squashfs cache layer.
>
>Signed-off-by: Minchan Kim <minchan@kernel.org>
>
>---
>fs/squashfs/file.c | 117 +++++++++++++++++++++++++++++++++-------------------
> 1 file changed, 75 insertions(+), 42 deletions(-)
>
>diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
>index d4d472f..36508c3 100644
>--- a/fs/squashfs/file.c
>+++ b/fs/squashfs/file.c
>@@ -505,11 +505,14 @@ skip_page:
> static int squashfs_regular_readpage(struct file *file, struct page *page)
> {
> u64 block = 0;
>- int bsize;
>- struct inode *inode = page->mapping->host;
>+ int bsize, i, data_len, pages, nr_pages = 0;
>+ struct address_space *mapping = page->mapping;
>+ struct inode *inode = mapping->host;
> struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
>- int bytes, i, offset = 0;
>- struct squashfs_cache_entry *buffer = NULL;
>+ gfp_t gfp_mask;
>+ struct page *push_page;
>+ struct page **page_array;
>+ void **buffer = NULL;
> void *pageaddr;
>
> int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
>@@ -519,6 +522,8 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
> TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
> page->index, squashfs_i(inode)->start);
>
>+ pages = msblk->block_size >> PAGE_CACHE_SHIFT;
>+ pages = pages ? pages : 1;
> /*
> * Reading a datablock from disk. Need to read block list
> * to get location and block size.
>@@ -527,60 +532,88 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
> if (bsize < 0)
> goto error_out;
>
>- if (bsize == 0) { /* hole */
>+ if (bsize == 0)
> return squashfs_hole_readpage(file, inode, index, page);
>- } else {
>- /*
>- * Read and decompress datablock.
>- */
>- buffer = squashfs_get_datablock(inode->i_sb,
>- block, bsize);
>- if (buffer->error) {
>- ERROR("Unable to read page, block %llx, size %x\n",
>- block, bsize);
>- squashfs_cache_put(buffer);
>- goto error_out;
>- }
>- bytes = buffer->length;
>- }
>+
> /*
>- * Loop copying datablock into pages. As the datablock likely covers
>- * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
>- * grab the pages from the page cache, except for the page that we've
>- * been called to fill.
>+ * Read and decompress data block
> */
>- for (i = start_index; bytes > 0; i++,
>- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
>- struct page *push_page;
>- int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
>+ gfp_mask = mapping_gfp_mask(mapping);
>+ buffer = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
>+ sizeof(void *), GFP_KERNEL);
>+ if (!buffer)
>+ return -ENOMEM;
>
>- TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
>+ page_array = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
>+ sizeof(struct page *), GFP_KERNEL);
>
>- push_page = (i == page->index) ? page :
>- grab_cache_page_nowait(page->mapping, i);
>+ if (!page_array)
>+ goto release_buffer;
>
>+ /* alloc buffer pages */
>+ for (i = 0; i < pages; i++) {
>+ if (page->index == start_index + i)
>+ push_page = page;
>+ else
>+ push_page = __page_cache_alloc(gfp_mask);
> if (!push_page)
>- continue;
>+ goto release_page_array;
>+ nr_pages++;
>+ buffer[i] = kmap(push_page);
>+ page_array[i] = push_page;
>+ }
>
>- if (PageUptodate(push_page))
>- goto skip_page;
>+ data_len = squashfs_read_datablock(inode->i_sb, buffer,
>+ block, bsize, msblk->block_size, pages);
>
>- pageaddr = kmap_atomic(push_page);
>- squashfs_copy_data(pageaddr, buffer, offset, avail);
>- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
>- kunmap_atomic(pageaddr);
>+ if (data_len < 0) {
>+ ERROR("Unable to read page, block %llx, size %x\n",
>+ block, bsize);
>+ for (i = 0; i < nr_pages; i++) {
>+ kunmap(page_array[i]);
>+ page_cache_release(page_array[i]);
>+ }
>+ kfree(buffer);
>+ kfree(page_array);
>+ goto error_out;
>+ }
>+
>+ for (i = 0; i < pages; i++) {
>+ push_page = page_array[i];
> flush_dcache_page(push_page);
> SetPageUptodate(push_page);
>-skip_page:
>- unlock_page(push_page);
>- if (i != page->index)
>+ kunmap(page_array[i]);
>+ if (page->index == start_index + i) {
>+ unlock_page(push_page);
>+ continue;
>+ }
>+
>+ if (add_to_page_cache_lru(push_page, mapping,
>+ start_index + i, gfp_mask)) {
> page_cache_release(push_page);
>- }
>+ continue;
>+ }
>
>- squashfs_cache_put(buffer);
>+ unlock_page(push_page);
>+ page_cache_release(push_page);
>+ }
>
>+ kfree(page_array);
>+ kfree(buffer);
> return 0;
>
>+release_page_array:
>+ for (i = 0; i < nr_pages; i++) {
>+ kunmap(page_array[i]);
>+ page_cache_release(page_array[i]);
>+ }
>+
>+ kfree(page_array);
>+
>+release_buffer:
>+ kfree(buffer);
>+ return -ENOMEM;
>+
> error_out:
> SetPageError(page);
> pageaddr = kmap_atomic(page);
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC,3/5] squashfs: remove cache for normal data page
2013-10-04 6:13 [RFC,3/5] squashfs: remove cache for normal data page Phillip Lougher
@ 2013-10-07 0:53 ` Minchan Kim
0 siblings, 0 replies; 3+ messages in thread
From: Minchan Kim @ 2013-10-07 0:53 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee
On Fri, Oct 04, 2013 at 07:13:31AM +0100, Phillip Lougher wrote:
> Minchan Kim <minchan@kernel.org> wrote:
> >Sqsuashfs have used cache for normal data pages but it's pointless
> >because MM already has cache layer and squashfs adds extra pages
> >into MM's page cache when it reads a page from compressed block.
> >
> >This patch removes cache usage for normal data pages so it could
> >remove unnecessary one copy
>
> 1. As I mentioned last week, the use of the "unnecessary" cache is there
> to prevent two or more processes simultaneously trying to read the same
> pages. Without this, such racing processes will decompress the same
> blocks repeatedly.
>
> It is easy to dismiss this as an rare event, but, when it happens it
> has a major impact on performance, because the racing processes
> can get stuck in a lock-step arrangement, repeatedly trying to access
> the same blocks until the eof. If the file is many megabytes or
> gigabytes in size (such as when Squashfs is used as a container fs for
> cetain liveCDs, or virtual machine disk images) this will lead to
> a significant reduction in performance.
>
> So I consider this a major regression.
>
> 2. You patch also adds another regression, which is to reintroduce
> kmap() rather than kmap_atomic().
>
> I was asked to remove this at my first submission attempt
> in 2005
>
> http://lkml.indiana.edu/hypermail/linux/kernel/0503.2/0809.html
>
> So I'm not particularly willing to reintroduce it now.
>
> 3. Your patch potentially reintroduces this bug
>
> http://www.spinics.net/lists/linux-fsdevel/msg02555.html
> http://zaitcev.livejournal.com/86954.html
>
> 4. You patch is unconditional. With such code changes as this
> it is always essential to make this a "buy in option", with the
> original behaviour retained as default. Otherwise, lots of users
> potentially find their embedded/enterprise/mission critical
> system unexpectedly breaks when upgrading the kernel, and I get
> a lot of angry email.
>
> Phillip
I will handle all your points in next patchset.
Thanks for the review!
--
Kind regards,
Minchan Kim
^ permalink raw reply [flat|nested] 3+ messages in thread
* [RFC 0/5] squashfs enhance
@ 2013-09-16 7:08 Minchan Kim
2013-09-16 7:08 ` [RFC 3/5] squashfs: remove cache for normal data page Minchan Kim
0 siblings, 1 reply; 3+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
Our proudct have used squashfs for rootfs and it saves a few bucks
per device. Super thanks, Squashfs! You were a perfect for us.
But unfortunately, our device start to become complex so sometime
we need better throughput for sequential I/O but current squashfs
couldn't meet our usecase.
When I dive into the code, it has some problems.
1) Too many copy
2) Only a decompression stream buffer so that concurrent read stuck with that
3) short of readpages
This patchset try to solve above problems.
Just two patches are clean up so it shouldn't change any behavior.
And functions factored out will be used for later patches.
If they changes some behavior, it's not what I intended. :(
3rd patch removes cache usage for (not-fragemented, no-tail-end)
data pages so that we can reduce memory copy.
4th patch supports multiple decompress stream buffer so concurrent
read could be handled at the same time. When I tested experiment,
It obviously reduces a half time.
5th patch try to implement asynchronous readahead functions
so I found it can enhance about 35% with lots of I/O merging.
Any comments are welcome.
Thanks.
Minchan Kim (5):
squashfs: clean up squashfs_read_data
squashfs: clean up squashfs_readpage
squashfs: remove cache for normal data page
squashfs: support multiple decompress stream buffer
squashfs: support readpages
fs/squashfs/block.c | 245 +++++++++-----
fs/squashfs/cache.c | 16 +-
fs/squashfs/decompressor.c | 107 +++++-
fs/squashfs/decompressor.h | 27 +-
fs/squashfs/file.c | 738 ++++++++++++++++++++++++++++++++++++++----
fs/squashfs/lzo_wrapper.c | 12 +-
fs/squashfs/squashfs.h | 12 +-
fs/squashfs/squashfs_fs_sb.h | 11 +-
fs/squashfs/super.c | 44 ++-
fs/squashfs/xz_wrapper.c | 20 +-
fs/squashfs/zlib_wrapper.c | 12 +-
11 files changed, 1024 insertions(+), 220 deletions(-)
--
1.7.9.5
^ permalink raw reply [flat|nested] 3+ messages in thread
* [RFC 3/5] squashfs: remove cache for normal data page
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
@ 2013-09-16 7:08 ` Minchan Kim
0 siblings, 0 replies; 3+ messages in thread
From: Minchan Kim @ 2013-09-16 7:08 UTC (permalink / raw)
To: Phillip Lougher; +Cc: linux-kernel, ch0.han, gunho.lee, Minchan Kim
Sqsuashfs have used cache for normal data pages but it's pointless
because MM already has cache layer and squashfs adds extra pages
into MM's page cache when it reads a page from compressed block.
This patch removes cache usage for normal data pages so it could
remove unnecessary one copy(ie, from cache to page cache) and
decouple normal data page path from squashfs cache layer.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
fs/squashfs/file.c | 117 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 75 insertions(+), 42 deletions(-)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index d4d472f..36508c3 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -505,11 +505,14 @@ skip_page:
static int squashfs_regular_readpage(struct file *file, struct page *page)
{
u64 block = 0;
- int bsize;
- struct inode *inode = page->mapping->host;
+ int bsize, i, data_len, pages, nr_pages = 0;
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes, i, offset = 0;
- struct squashfs_cache_entry *buffer = NULL;
+ gfp_t gfp_mask;
+ struct page *push_page;
+ struct page **page_array;
+ void **buffer = NULL;
void *pageaddr;
int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
@@ -519,6 +522,8 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
page->index, squashfs_i(inode)->start);
+ pages = msblk->block_size >> PAGE_CACHE_SHIFT;
+ pages = pages ? pages : 1;
/*
* Reading a datablock from disk. Need to read block list
* to get location and block size.
@@ -527,60 +532,88 @@ static int squashfs_regular_readpage(struct file *file, struct page *page)
if (bsize < 0)
goto error_out;
- if (bsize == 0) { /* hole */
+ if (bsize == 0)
return squashfs_hole_readpage(file, inode, index, page);
- } else {
- /*
- * Read and decompress datablock.
- */
- buffer = squashfs_get_datablock(inode->i_sb,
- block, bsize);
- if (buffer->error) {
- ERROR("Unable to read page, block %llx, size %x\n",
- block, bsize);
- squashfs_cache_put(buffer);
- goto error_out;
- }
- bytes = buffer->length;
- }
+
/*
- * Loop copying datablock into pages. As the datablock likely covers
- * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
- * grab the pages from the page cache, except for the page that we've
- * been called to fill.
+ * Read and decompress data block
*/
- for (i = start_index; bytes > 0; i++,
- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
- struct page *push_page;
- int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+ gfp_mask = mapping_gfp_mask(mapping);
+ buffer = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(void *), GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
- TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
+ page_array = kcalloc(1 << (msblk->block_log - PAGE_CACHE_SHIFT),
+ sizeof(struct page *), GFP_KERNEL);
- push_page = (i == page->index) ? page :
- grab_cache_page_nowait(page->mapping, i);
+ if (!page_array)
+ goto release_buffer;
+ /* alloc buffer pages */
+ for (i = 0; i < pages; i++) {
+ if (page->index == start_index + i)
+ push_page = page;
+ else
+ push_page = __page_cache_alloc(gfp_mask);
if (!push_page)
- continue;
+ goto release_page_array;
+ nr_pages++;
+ buffer[i] = kmap(push_page);
+ page_array[i] = push_page;
+ }
- if (PageUptodate(push_page))
- goto skip_page;
+ data_len = squashfs_read_datablock(inode->i_sb, buffer,
+ block, bsize, msblk->block_size, pages);
- pageaddr = kmap_atomic(push_page);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
- kunmap_atomic(pageaddr);
+ if (data_len < 0) {
+ ERROR("Unable to read page, block %llx, size %x\n",
+ block, bsize);
+ for (i = 0; i < nr_pages; i++) {
+ kunmap(page_array[i]);
+ page_cache_release(page_array[i]);
+ }
+ kfree(buffer);
+ kfree(page_array);
+ goto error_out;
+ }
+
+ for (i = 0; i < pages; i++) {
+ push_page = page_array[i];
flush_dcache_page(push_page);
SetPageUptodate(push_page);
-skip_page:
- unlock_page(push_page);
- if (i != page->index)
+ kunmap(page_array[i]);
+ if (page->index == start_index + i) {
+ unlock_page(push_page);
+ continue;
+ }
+
+ if (add_to_page_cache_lru(push_page, mapping,
+ start_index + i, gfp_mask)) {
page_cache_release(push_page);
- }
+ continue;
+ }
- squashfs_cache_put(buffer);
+ unlock_page(push_page);
+ page_cache_release(push_page);
+ }
+ kfree(page_array);
+ kfree(buffer);
return 0;
+release_page_array:
+ for (i = 0; i < nr_pages; i++) {
+ kunmap(page_array[i]);
+ page_cache_release(page_array[i]);
+ }
+
+ kfree(page_array);
+
+release_buffer:
+ kfree(buffer);
+ return -ENOMEM;
+
error_out:
SetPageError(page);
pageaddr = kmap_atomic(page);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-10-07 0:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-10-04 6:13 [RFC,3/5] squashfs: remove cache for normal data page Phillip Lougher
2013-10-07 0:53 ` Minchan Kim
-- strict thread matches above, loose matches on Subject: below --
2013-09-16 7:08 [RFC 0/5] squashfs enhance Minchan Kim
2013-09-16 7:08 ` [RFC 3/5] squashfs: remove cache for normal data page Minchan Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).