* [PATCH]: Improve performance of LZO hibernation
@ 2011-09-19 4:14 Bojan Smojver
0 siblings, 0 replies; 5+ messages in thread
From: Bojan Smojver @ 2011-09-19 4:14 UTC (permalink / raw)
To: linux-kernel
Hi,
Here is a patch (well, first draft anyway) that enables threading of
compression/decompression and better buffering on thaw. The hibernation
speed didn't change much in my tests (most likely because my CPU is
already fast enough to saturate I/O), but thaw speed was almost cut in
half with this approach. See what you think of it and let me know (I'm
not subscribed, so please CC me).
I did test this on my ThinkPad T510, but because I'm affected by bug
#37142, occasionally I still have trouble on thaw, which makes it hard
to distinguish what caused which problem. So, if this patch eats your
disk, I don't want to hear it. ;-)
---------------------------------------
Use threads for LZO compression/decompression on hibernate/thaw.
Improve read buffering on thaw.
Signed-off-by: Bojan Smojver <bojan@rexursive.com>
---
kernel/power/swap.c | 514 +++++++++++++++++++++++++++++++++++++++------------
1 files changed, 391 insertions(+), 123 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a..d450488 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,9 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpu.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
#include "power.h"
@@ -372,6 +375,9 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 2
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +425,46 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while(1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ atomic_set(&d->ready, 0);
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -434,11 +480,26 @@ static int save_image_lzo(struct swap_map_handle *handle,
int ret = 0;
int nr_pages;
int err2;
+ int cpu;
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off, thr, cthr, nthr;
+ unsigned char *page;
+ struct cmp_data *data;
+
+ /*
+ * Get more grunt. We don't care if this fails - we'll do it with just
+ * one core in that case.
+ */
+ enable_nonboot_cpus();
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
@@ -446,29 +507,58 @@ static int save_image_lzo(struct swap_map_handle *handle,
return -ENOMEM;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
free_page((unsigned long)page);
+ disable_nonboot_cpus();
return -ENOMEM;
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ atomic_set(&data[thr].ready, 0);
+ atomic_set(&data[thr].stop, 0);
+
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%zu", thr);
+ /*
+ * On error, stop started threads, clean up, then exit.
+ */
+ if (IS_ERR(data[thr].thr)) {
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ while(thr) {
+ --thr;
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ vfree(data);
+ free_page((unsigned long)page);
+ disable_nonboot_cpus();
+ return -ENOMEM;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
"PM: Compressing and saving image data (%u pages) ... ",
@@ -480,54 +570,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nthr; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
+ if (!thr)
break;
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
- break;
- }
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
-
- *(size_t *)cmp = cmp_len;
+ ret = data[thr].ret;
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
}
@@ -542,10 +653,13 @@ out_finish:
printk(KERN_CONT "\n");
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
+ for (thr = 0; thr < nthr; thr++) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ vfree(data);
free_page((unsigned long)page);
+ disable_nonboot_cpus();
return ret;
}
@@ -743,6 +857,46 @@ static int load_image(struct swap_map_handle *handle,
}
/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ atomic_set(&d->ready, 0);
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
+
+/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
@@ -754,45 +908,99 @@ static int load_image_lzo(struct swap_map_handle *handle,
{
unsigned int m;
int error = 0;
+ int cpu;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
+ size_t i, off, thr, cthr, nthr;
+ size_t ring = 0, pg = 0, npages,
+ have = 0, want = MAP_PAGE_ENTRIES, need, asked = 0;
+ unsigned char **page;
+ struct dec_data *data;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
+
+ page = vmalloc(sizeof(*page) * MAP_PAGE_ENTRIES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ return -ENOMEM;
+ }
- for (i = 0; i < LZO_CMP_PAGES; i++) {
+ for (i = 0; i < MAP_PAGE_ENTRIES; i++) {
page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-
- while (i)
- free_page((unsigned long)page[--i]);
-
- return -ENOMEM;
+ if (i < LZO_CMP_PAGES) {
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ while (i)
+ free_page((unsigned long)page[--i]);
+ vfree(page);
+ return -ENOMEM;
+ }
}
}
+ npages = i;
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ for (i = 0; i < npages; i++)
free_page((unsigned long)page[i]);
-
+ vfree(page);
return -ENOMEM;
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ atomic_set(&data[thr].ready, 0);
+ atomic_set(&data[thr].stop, 0);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
- return -ENOMEM;
+ data[thr].thr = kthread_create(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%zu", thr);
+ /*
+ * On error, stop started threads, clean up, then exit.
+ */
+ if (IS_ERR(data[thr].thr)) {
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ while (thr) {
+ --thr;
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ vfree(data);
+ for (i = 0; i < npages; i++)
+ free_page((unsigned long)page[i]);
+ vfree(page);
+ return -ENOMEM;
+ }
+ }
+
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
"PM: Loading and decompressing image data (%u pages) ... ",
@@ -808,61 +1016,117 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (error <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; have < LZO_CMP_PAGES && i < want; i++) {
+ error = swap_read_page(handle, page[ring], &bio);
+ if (error) {
+ /*
+ * On real read error, finish. On end of data,
+ * just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k])
+ goto out_finish;
+ else
+ break;
+ }
+ if (++ring >= npages)
+ ring = 0;
}
+ asked += i;
+ want -= i;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
+
+ error = hib_wait_on_bio_chain(&bio);
if (error)
goto out_finish;
+ have += asked;
+ asked = 0;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
+ for (thr = 0; have && thr < nthr; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ error = -1;
+ goto out_finish;
+ }
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
- }
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have)
+ break;
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= npages)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ error = hib_wait_on_bio_chain(&bio);
+ if (error)
+ goto out_finish;
+ have += asked;
+ asked = 0;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
+
+ error = data[thr].ret;
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ if (error < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
+ goto out_finish;
+ }
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ error = -1;
goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ error = snapshot_write_next(snapshot);
+ if (error <= 0)
+ goto out_finish;
+ }
}
}
@@ -877,10 +1141,14 @@ out_finish:
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
+ for (thr = 0; thr < nthr; thr++) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ vfree(data);
+ for (i = 0; i < npages; i++)
free_page((unsigned long)page[i]);
+ vfree(page);
return error;
}
---------------------------------------
--
Bojan
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH]: Improve performance of LZO hibernation
@ 2011-09-19 4:29 Bojan Smojver
0 siblings, 0 replies; 5+ messages in thread
From: Bojan Smojver @ 2011-09-19 4:29 UTC (permalink / raw)
To: linux-kernel
On Mon, 2011-09-19 at 14:14 +1000, Bojan Smojver wrote:
> + size_t ring = 0, pg = 0, npages,
> + have = 0, want = MAP_PAGE_ENTRIES, need, asked = 0;
The want = MAP_PAGE_ENTRIES part is a bug for sure (a leftover from the
previous version of the patch). The value of want should be set to
npages, once npages is set after the page allocation loop, of course.
--
Bojan
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH]: Improve performance of LZO hibernation
@ 2011-09-19 7:53 Bojan Smojver
0 siblings, 0 replies; 5+ messages in thread
From: Bojan Smojver @ 2011-09-19 7:53 UTC (permalink / raw)
To: linux-kernel
On Mon, 2011-09-19 at 14:29 +1000, Bojan Smojver wrote:
> The want = MAP_PAGE_ENTRIES part is a bug for sure (a leftover from
> the previous version of the patch). The value of want should be set to
> npages, once npages is set after the page allocation loop, of course.
OK, v2 of the patch is here. Should address the above and the cleanup
was also made simpler. In addition, pages required for reading the image
are allocated last, which was supposed to be the case from the start.
-----------------------------------
Use threads for LZO compression/decompression on hibernate/thaw.
Improve read buffering on thaw.
v2
Signed-off-by: Bojan Smojver <bojan@rexursive.com>
---
kernel/power/swap.c | 525 +++++++++++++++++++++++++++++++++++++--------------
1 files changed, 387 insertions(+), 138 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a..6d26338 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,9 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpu.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
#include "power.h"
@@ -372,6 +375,9 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 2
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +425,46 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while(1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ atomic_set(&d->ready, 0);
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -434,41 +480,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
int ret = 0;
int nr_pages;
int err2;
+ int cpu;
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off, thr, cthr, nthr;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+
+ /*
+ * Get more grunt. We don't care if this fails - we'll do it with just
+ * one core in that case.
+ */
+ enable_nonboot_cpus();
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_clean;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
- free_page((unsigned long)page);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
-
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ for (thr = 0; thr < nthr; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
"PM: Compressing and saving image data (%u pages) ... ",
@@ -480,54 +560,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nthr; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
+ if (!thr)
break;
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
- break;
- }
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
-
- *(size_t *)cmp = cmp_len;
+ ret = data[thr].ret;
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
}
@@ -541,11 +642,16 @@ out_finish:
else
printk(KERN_CONT "\n");
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
+out_clean:
+ for (thr = 0; thr < nthr; thr++) {
+ if (data[thr].thr) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ }
+ if (data) vfree(data);
+ if (page) free_page((unsigned long)page);
+ disable_nonboot_cpus();
return ret;
}
@@ -743,6 +849,46 @@ static int load_image(struct swap_map_handle *handle,
}
/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ atomic_set(&d->ready, 0);
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
+
+/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
@@ -754,45 +900,85 @@ static int load_image_lzo(struct swap_map_handle *handle,
{
unsigned int m;
int error = 0;
+ int cpu;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
- for (i = 0; i < LZO_CMP_PAGES; i++) {
- page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-
- while (i)
- free_page((unsigned long)page[--i]);
+ size_t i, off, thr, cthr, nthr;
+ size_t ring = 0, pg = 0, npages, have = 0, want, need, asked = 0;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
+
+ page = vmalloc(sizeof(*page) * MAP_PAGE_ENTRIES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ memset(page, 0, sizeof(*page) * MAP_PAGE_ENTRIES);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nthr; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ error = -ENOMEM;
+ goto out_clean;
}
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ for (i = 0; i < MAP_PAGE_ENTRIES; i++) {
+ page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ }
}
-
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
-
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ want = npages = i;
+
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
"PM: Loading and decompressing image data (%u pages) ... ",
@@ -808,61 +994,117 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (error <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; have < LZO_CMP_PAGES && i < want; i++) {
+ error = swap_read_page(handle, page[ring], &bio);
+ if (error) {
+ /*
+ * On real read error, finish. On end of data,
+ * just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k])
+ goto out_finish;
+ else
+ break;
+ }
+ if (++ring >= npages)
+ ring = 0;
}
+ asked += i;
+ want -= i;
+
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
+ error = hib_wait_on_bio_chain(&bio);
if (error)
goto out_finish;
+ have += asked;
+ asked = 0;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
+ for (thr = 0; have && thr < nthr; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ error = -1;
+ goto out_finish;
+ }
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
- }
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have)
+ break;
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= npages)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ error = hib_wait_on_bio_chain(&bio);
+ if (error)
+ goto out_finish;
+ have += asked;
+ asked = 0;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ error = data[thr].ret;
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (error < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
goto out_finish;
+ }
+
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ error = -1;
+ goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ error = snapshot_write_next(snapshot);
+ if (error <= 0)
+ goto out_finish;
+ }
}
}
@@ -876,11 +1118,18 @@ out_finish:
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
+out_clean:
+ for (i = 0; i < npages; i++)
+ if (page[i])
+ free_page((unsigned long)page[i]);
+ for (thr = 0; thr < nthr; thr++) {
+ if (data[thr].thr) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ }
+ if (data) vfree(data);
+ if (page) vfree(page);
return error;
}
-----------------------------------
--
Bojan
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH]: Improve performance of LZO hibernation
@ 2011-09-22 1:39 Bojan Smojver
0 siblings, 0 replies; 5+ messages in thread
From: Bojan Smojver @ 2011-09-22 1:39 UTC (permalink / raw)
To: linux-kernel; +Cc: Rafael J. Wysocki
On Mon, 2011-09-19 at 17:53 +1000, Bojan Smojver wrote:
> OK, v2 of the patch is here.
Version 3 follows. Thanks to Rafael for pointing out that there is no
need to enable nonboot cpus - they are already enabled. I also fixed a
potential problem where we could get wedged in an infinite loop on image
load, if the images is not correct.
-----------------------------------
Use threads for LZO compression/decompression on hibernate/thaw.
Improve read buffering on thaw.
v3
Signed-off-by: Bojan Smojver <bojan@rexursive.com>
---
kernel/power/swap.c | 535 +++++++++++++++++++++++++++++++++++++--------------
1 files changed, 395 insertions(+), 140 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a..1f88d34 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,9 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpu.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
#include "power.h"
@@ -372,6 +375,9 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 2
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +425,46 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while(1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+ atomic_set(&d->ready, 0);
+
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -434,45 +480,74 @@ static int save_image_lzo(struct swap_map_handle *handle,
int ret = 0;
int nr_pages;
int err2;
+ int cpu;
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off, thr, cthr, nthr;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_clean;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
- free_page((unsigned long)page);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
-
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ for (thr = 0; thr < nthr; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
+ "PM: Using %zu thread(s) for compression.\n"
"PM: Compressing and saving image data (%u pages) ... ",
- nr_to_write);
+ nthr, nr_to_write);
m = nr_to_write / 100;
if (!m)
m = 1;
@@ -480,54 +555,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nthr; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
+ if (!thr)
break;
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
- break;
- }
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
+ ret = data[thr].ret;
- *(size_t *)cmp = cmp_len;
-
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
}
@@ -541,11 +637,15 @@ out_finish:
else
printk(KERN_CONT "\n");
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
+out_clean:
+ for (thr = 0; thr < nthr; thr++) {
+ if (data[thr].thr) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ }
+ if (data) vfree(data);
+ if (page) free_page((unsigned long)page);
return ret;
}
@@ -743,6 +843,46 @@ static int load_image(struct swap_map_handle *handle,
}
/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+ atomic_set(&d->ready, 0);
+
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
+
+/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
@@ -754,49 +894,90 @@ static int load_image_lzo(struct swap_map_handle *handle,
{
unsigned int m;
int error = 0;
+ int cpu, eof = 0;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
- for (i = 0; i < LZO_CMP_PAGES; i++) {
- page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-
- while (i)
- free_page((unsigned long)page[--i]);
+ size_t i, off, thr, cthr, nthr;
+ size_t ring = 0, pg = 0, npages, have = 0, want, need, asked = 0;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
+
+ page = vmalloc(sizeof(*page) * MAP_PAGE_ENTRIES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ memset(page, 0, sizeof(*page) * MAP_PAGE_ENTRIES);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nthr; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ error = -ENOMEM;
+ goto out_clean;
}
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ for (i = 0; i < MAP_PAGE_ENTRIES; i++) {
+ page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ }
}
-
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
-
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ want = npages = i;
+
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
+ "PM: Using %zu thread(s) for decompression.\n"
"PM: Loading and decompressing image data (%u pages) ... ",
- nr_to_read);
+ nthr, nr_to_read);
m = nr_to_read / 100;
if (!m)
m = 1;
@@ -808,61 +989,128 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (error <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; !eof && i < want; i++) {
+ error = swap_read_page(handle, page[ring], &bio);
+ if (error) {
+ /*
+ * On real read error, finish. On end of data,
+ * set EOF flag and just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k]) {
+ goto out_finish;
+ } else {
+ eof = 1;
+ break;
+ }
+ }
+ if (++ring >= npages)
+ ring = 0;
}
+ asked += i;
+ want -= i;
+
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
+ error = hib_wait_on_bio_chain(&bio);
if (error)
goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
+ for (thr = 0; have && thr < nthr; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ error = -1;
+ goto out_finish;
+ }
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
- }
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have) {
+ if (eof > 1) {
+ error = -1;
+ goto out_finish;
+ }
+ break;
+ }
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= npages)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ error = hib_wait_on_bio_chain(&bio);
+ if (error)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
+
+ error = data[thr].ret;
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ if (error < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
+ goto out_finish;
+ }
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ error = -1;
goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ error = snapshot_write_next(snapshot);
+ if (error <= 0)
+ goto out_finish;
+ }
}
}
@@ -876,11 +1124,18 @@ out_finish:
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
+out_clean:
+ for (i = 0; i < npages; i++)
+ if (page[i])
+ free_page((unsigned long)page[i]);
+ for (thr = 0; thr < nthr; thr++) {
+ if (data[thr].thr) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ }
+ if (data) vfree(data);
+ if (page) vfree(page);
return error;
}
-----------------------------------
--
Bojan
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH]: Improve performance of LZO hibernation
@ 2011-09-26 9:12 Bojan Smojver
0 siblings, 0 replies; 5+ messages in thread
From: Bojan Smojver @ 2011-09-26 9:12 UTC (permalink / raw)
To: linux-kernel; +Cc: Rafael J. Wysocki
On Thu, 2011-09-22 at 11:39 +1000, Bojan Smojver wrote:
> Version 3 follows.
Here is version 4. Turns out that if we completely drop sync writes on
image save, we can really crank it. This essentially doubled the speed
on image save for me. It relies on write_page() running into memory
allocation trouble before we go sync, so it's very aggressive. Handle
with care and all that.
-----------------------------------
Use threads for LZO compression/decompression on hibernate/thaw.
Improve write/read buffering on hibernate/thaw.
v4
Signed-off-by: Bojan Smojver <bojan@rexursive.com>
---
kernel/power/swap.c | 558 +++++++++++++++++++++++++++++++++++++--------------
1 files changed, 409 insertions(+), 149 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a..59a0bbf 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,9 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpu.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
#include "power.h"
@@ -245,6 +248,7 @@ static int swsusp_swap_check(void)
static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
{
void *src;
+ int ret;
if (!offset)
return -ENOSPC;
@@ -254,9 +258,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
if (src) {
copy_page(src, buf);
} else {
- WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
- src = buf;
+ ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ if (ret)
+ return ret;
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (src) {
+ copy_page(src, buf);
+ } else {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ src = buf;
+ }
}
} else {
src = buf;
@@ -316,14 +328,11 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
- if (error)
- goto out;
offset = alloc_swapdev_block(root_swap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap, NULL);
+ error = write_page(handle->cur, handle->cur_swap, bio_chain);
if (error)
goto out;
clear_page(handle->cur);
@@ -372,6 +381,9 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 2
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +431,46 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while(1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+ atomic_set(&d->ready, 0);
+
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -434,45 +486,74 @@ static int save_image_lzo(struct swap_map_handle *handle,
int ret = 0;
int nr_pages;
int err2;
+ int cpu;
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off, thr, cthr, nthr;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_clean;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
- free_page((unsigned long)page);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
-
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ for (thr = 0; thr < nthr; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
+ "PM: Using %zu thread(s) for compression.\n"
"PM: Compressing and saving image data (%u pages) ... ",
- nr_to_write);
+ nthr, nr_to_write);
m = nr_to_write / 100;
if (!m)
m = 1;
@@ -480,54 +561,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nthr; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
- break;
-
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
+ if (!thr)
break;
- }
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- *(size_t *)cmp = cmp_len;
+ ret = data[thr].ret;
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
}
@@ -541,11 +643,15 @@ out_finish:
else
printk(KERN_CONT "\n");
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
+out_clean:
+ for (thr = 0; thr < nthr; thr++) {
+ if (data[thr].thr) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ }
+ if (data) vfree(data);
+ if (page) free_page((unsigned long)page);
return ret;
}
@@ -668,12 +774,11 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
handle->k = 0;
offset = handle->cur->next_swap;
if (!offset)
release_swap_reader(handle);
- else if (!error)
+ else
error = hib_bio_read_page(offset, handle->cur, NULL);
}
return error;
@@ -743,6 +848,46 @@ static int load_image(struct swap_map_handle *handle,
}
/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ int ret; /* return code */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+ atomic_set(&d->ready, 0);
+
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+
+ return 0;
+}
+
+/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
@@ -754,49 +899,90 @@ static int load_image_lzo(struct swap_map_handle *handle,
{
unsigned int m;
int error = 0;
+ int cpu, eof = 0;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
- for (i = 0; i < LZO_CMP_PAGES; i++) {
- page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-
- while (i)
- free_page((unsigned long)page[--i]);
+ size_t i, off, thr, cthr, nthr;
+ size_t ring = 0, pg = 0, npages, have = 0, want, need, asked = 0;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nthr = num_online_cpus() - 1;
+ nthr = nthr > LZO_THREADS ? LZO_THREADS : (nthr < 1 ? 1 : nthr);
+
+ page = vmalloc(sizeof(*page) * MAP_PAGE_ENTRIES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ memset(page, 0, sizeof(*page) * MAP_PAGE_ENTRIES);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nthr);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nthr; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nthr; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_create(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ error = -ENOMEM;
+ goto out_clean;
}
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ for (i = 0; i < MAP_PAGE_ENTRIES; i++) {
+ page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ }
}
-
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
-
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ want = npages = i;
+
+ /*
+ * Bind the threads to CPUs and wake them up.
+ */
+ thr = 0;
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ kthread_bind(data[thr++].thr, cpu);
+ if (thr >= nthr)
+ break;
}
+ for (thr = 0; thr < nthr; thr++)
+ wake_up_process(data[thr].thr);
printk(KERN_INFO
+ "PM: Using %zu thread(s) for decompression.\n"
"PM: Loading and decompressing image data (%u pages) ... ",
- nr_to_read);
+ nthr, nr_to_read);
m = nr_to_read / 100;
if (!m)
m = 1;
@@ -808,61 +994,128 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (error <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; !eof && i < want; i++) {
+ error = swap_read_page(handle, page[ring], &bio);
+ if (error) {
+ /*
+ * On real read error, finish. On end of data,
+ * set EOF flag and just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k]) {
+ goto out_finish;
+ } else {
+ eof = 1;
+ break;
+ }
+ }
+ if (++ring >= npages)
+ ring = 0;
}
+ asked += i;
+ want -= i;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
+
+ error = hib_wait_on_bio_chain(&bio);
if (error)
goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
+ for (thr = 0; have && thr < nthr; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ error = -1;
+ goto out_finish;
+ }
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
- }
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have) {
+ if (eof > 1) {
+ error = -1;
+ goto out_finish;
+ }
+ break;
+ }
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= npages)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ error = hib_wait_on_bio_chain(&bio);
+ if (error)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (cthr = thr, thr = 0; thr < cthr; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ error = data[thr].ret;
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (error < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
goto out_finish;
+ }
+
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ error = -1;
+ goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ error = snapshot_write_next(snapshot);
+ if (error <= 0)
+ goto out_finish;
+ }
}
}
@@ -876,11 +1129,18 @@ out_finish:
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
+out_clean:
+ for (i = 0; i < npages; i++)
+ if (page[i])
+ free_page((unsigned long)page[i]);
+ for (thr = 0; thr < nthr; thr++) {
+ if (data[thr].thr) {
+ kthread_stop(data[thr].thr);
+ wake_up(&data[thr].go);
+ }
+ }
+ if (data) vfree(data);
+ if (page) vfree(page);
return error;
}
-----------------------------------
--
Bojan
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2011-09-26 9:12 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-19 4:29 [PATCH]: Improve performance of LZO hibernation Bojan Smojver
-- strict thread matches above, loose matches on Subject: below --
2011-09-26 9:12 Bojan Smojver
2011-09-22 1:39 Bojan Smojver
2011-09-19 7:53 Bojan Smojver
2011-09-19 4:14 Bojan Smojver
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox