* [PATCH v8]: Improve performance of LZO/plain hibernation
@ 2011-09-30 0:27 Bojan Smojver
2011-10-04 22:07 ` Rafael J. Wysocki
0 siblings, 1 reply; 24+ messages in thread
From: Bojan Smojver @ 2011-09-30 0:27 UTC (permalink / raw)
To: linux-kernel; +Cc: Rafael J. Wysocki
Pekka was right - there is no measurable difference in performance,
whether we bind threads to specific CPUs or not. So, I dropped that, it
is not required and makes code simpler.
Other than that, just some code simplifications, variable name cleanups
and buglet removals.
Note that I changed the subject to reflect the fact that
hibernation/thaw without compression also benefits from the patch, from
the improved I/O.
---------------------------------------
Use threads for LZO compression/decompression on hibernate/thaw.
Improve buffering on hibernate/thaw.
v8
In my testing, this improved write/read speed by a factor of 2 to 3.
Signed-off-by: Bojan Smojver <bojan@rexursive.com>
---
kernel/power/swap.c | 628 ++++++++++++++++++++++++++++++++++++++-------------
1 files changed, 466 insertions(+), 162 deletions(-)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a..d692842 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,9 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
#include "power.h"
@@ -43,8 +46,7 @@
* allocated and populated one at a time, so we only need one memory
* page to set up the entire structure.
*
- * During resume we also only need to use one swap_map_page structure
- * at a time.
+ * During resume we pick up all swap_map_page structures into a list.
*/
#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
@@ -54,6 +56,11 @@ struct swap_map_page {
sector_t next_swap;
};
+struct swap_map_page_list {
+ struct swap_map_page *map;
+ struct swap_map_page_list *next;
+};
+
/**
* The swap_map_handle structure is used for handling swap in
* a file-alike way
@@ -61,9 +68,11 @@ struct swap_map_page {
struct swap_map_handle {
struct swap_map_page *cur;
+ struct swap_map_page_list *maps;
sector_t cur_swap;
sector_t first_sector;
unsigned int k;
+ unsigned long nr_free_pages, written;
};
struct swsusp_header {
@@ -245,6 +254,7 @@ static int swsusp_swap_check(void)
static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
{
void *src;
+ int ret;
if (!offset)
return -ENOSPC;
@@ -254,9 +264,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
if (src) {
copy_page(src, buf);
} else {
- WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
- src = buf;
+ ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ if (ret)
+ return ret;
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (src) {
+ copy_page(src, buf);
+ } else {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ src = buf;
+ }
}
} else {
src = buf;
@@ -293,6 +311,8 @@ static int get_swap_writer(struct swap_map_handle *handle)
goto err_rel;
}
handle->k = 0;
+ handle->nr_free_pages = nr_free_pages();
+ handle->written = 0;
handle->first_sector = handle->cur_swap;
return 0;
err_rel:
@@ -316,20 +336,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
- if (error)
- goto out;
offset = alloc_swapdev_block(root_swap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap, NULL);
+ error = write_page(handle->cur, handle->cur_swap, bio_chain);
if (error)
goto out;
clear_page(handle->cur);
handle->cur_swap = offset;
handle->k = 0;
}
+ if (++handle->written > (handle->nr_free_pages >> 1)) {
+ error = hib_wait_on_bio_chain(bio_chain);
+ if (error)
+ goto out;
+ handle->written = 0;
+ }
out:
return error;
}
@@ -372,6 +395,13 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 3
+
+/* Maximum number of pages for read buffering. */
+#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 4)
+
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +449,50 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -437,42 +511,65 @@ static int save_image_lzo(struct swap_map_handle *handle,
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off, thr, run_threads, nr_threads;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ if (nr_threads > LZO_THREADS)
+ nr_threads = LZO_THREADS;
+ else if (nr_threads < 1)
+ nr_threads = 1;
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_clean;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
- free_page((unsigned long)page);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
-
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ nr_threads = thr;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
- }
+ /*
+ * Adjust number of free pages after all allocations have been done.
+ * We don't want to run out of pages when writing.
+ */
+ handle->nr_free_pages = nr_free_pages();
printk(KERN_INFO
+ "PM: Using %zu thread(s) for compression.\n"
"PM: Compressing and saving image data (%u pages) ... ",
- nr_to_write);
+ nr_threads, nr_to_write);
m = nr_to_write / 100;
if (!m)
m = 1;
@@ -480,54 +577,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nr_threads; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
+ if (!thr)
break;
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
- break;
- }
-
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- *(size_t *)cmp = cmp_len;
+ ret = data[thr].ret;
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
}
@@ -541,11 +659,13 @@ out_finish:
else
printk(KERN_CONT "\n");
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
+out_clean:
+ for (thr = 0; thr < nr_threads; thr++) {
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ }
+ if (data) vfree(data);
+ if (page) free_page((unsigned long)page);
return ret;
}
@@ -625,31 +745,65 @@ out_finish:
static void release_swap_reader(struct swap_map_handle *handle)
{
+ struct swap_map_page_list *tmp;
+
if (handle->cur)
free_page((unsigned long)handle->cur);
+ while (handle->maps) {
+ if (handle->maps->map)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ vfree(tmp);
+ }
handle->cur = NULL;
+ handle->maps = NULL;
}
static int get_swap_reader(struct swap_map_handle *handle,
unsigned int *flags_p)
{
int error;
+ struct swap_map_page_list *tmp, *last;
+ sector_t offset;
*flags_p = swsusp_header->flags;
if (!swsusp_header->image) /* how can this happen? */
return -EINVAL;
- handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
- if (!handle->cur)
- return -ENOMEM;
+ handle->cur = NULL;
+ last = handle->maps = NULL;
+ offset = swsusp_header->image;
+ while (offset) {
+ tmp = vmalloc(sizeof(*handle->maps));
+ if (!tmp) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
+ memset(tmp, 0, sizeof(*tmp));
+ if (!handle->maps)
+ handle->maps = tmp;
+ if (last)
+ last->next = tmp;
+ last = tmp;
+
+ tmp->map = (struct swap_map_page *)
+ __get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!tmp->map) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
- error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
- if (error) {
- release_swap_reader(handle);
- return error;
+ error = hib_bio_read_page(offset, tmp->map, NULL);
+ if (error) {
+ release_swap_reader(handle);
+ return error;
+ }
+ offset = tmp->map->next_swap;
}
handle->k = 0;
+ handle->cur = handle->maps->map;
return 0;
}
@@ -658,6 +812,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
{
sector_t offset;
int error;
+ struct swap_map_page_list *tmp;
if (!handle->cur)
return -EINVAL;
@@ -668,13 +823,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
handle->k = 0;
- offset = handle->cur->next_swap;
- if (!offset)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ vfree(tmp);
+ if (!handle->maps)
release_swap_reader(handle);
- else if (!error)
- error = hib_bio_read_page(offset, handle->cur, NULL);
+ else
+ handle->cur = handle->maps->map;
}
return error;
}
@@ -743,6 +900,50 @@ static int load_image(struct swap_map_handle *handle,
}
/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start decompression */
+ wait_queue_head_t done; /* decompression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
+
+/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
@@ -754,49 +955,81 @@ static int load_image_lzo(struct swap_map_handle *handle,
{
unsigned int m;
int error = 0;
+ int eof = 0;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
- for (i = 0; i < LZO_CMP_PAGES; i++) {
- page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-
- while (i)
- free_page((unsigned long)page[--i]);
-
- return -ENOMEM;
- }
+ size_t i, off, thr, run_threads, nr_threads;
+ size_t ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ if (nr_threads > LZO_THREADS)
+ nr_threads = LZO_THREADS;
+ else if (nr_threads < 1)
+ nr_threads = 1;
+
+ page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ error = -ENOMEM;
+ goto out_clean;
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct dec_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ nr_threads = thr;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
-
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ for (i = 0; i < LZO_READ_PAGES; i++) {
+ page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ ring_size = i;
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ error = -ENOMEM;
+ goto out_clean;
+ } else {
+ break;
+ }
+ }
}
+ want = ring_size = i;
printk(KERN_INFO
+ "PM: Using %zu thread(s) for decompression.\n"
"PM: Loading and decompressing image data (%u pages) ... ",
- nr_to_read);
+ nr_threads, nr_to_read);
m = nr_to_read / 100;
if (!m)
m = 1;
@@ -808,61 +1041,128 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (error <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; !eof && i < want; i++) {
+ error = swap_read_page(handle, page[ring], &bio);
+ if (error) {
+ /*
+ * On real read error, finish. On end of data,
+ * set EOF flag and just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k]) {
+ goto out_finish;
+ } else {
+ eof = 1;
+ break;
+ }
+ }
+ if (++ring >= ring_size)
+ ring = 0;
}
+ asked += i;
+ want -= i;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
+
+ error = hib_wait_on_bio_chain(&bio);
if (error)
goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
+ for (thr = 0; have && thr < nr_threads; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ error = -1;
+ goto out_finish;
+ }
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
- }
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have) {
+ if (eof > 1) {
+ error = -1;
+ goto out_finish;
+ }
+ break;
+ }
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= ring_size)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ error = hib_wait_on_bio_chain(&bio);
+ if (error)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ error = data[thr].ret;
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (error < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
goto out_finish;
+ }
+
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ error = -1;
+ goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ error = snapshot_write_next(snapshot);
+ if (error <= 0)
+ goto out_finish;
+ }
}
}
@@ -876,11 +1176,15 @@ out_finish:
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
+out_clean:
+ for (i = 0; i < ring_size; i++)
free_page((unsigned long)page[i]);
+ for (thr = 0; thr < nr_threads; thr++) {
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ }
+ if (data) vfree(data);
+ if (page) vfree(page);
return error;
}
---------------------------------------
--
Bojan
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-09-30 0:27 [PATCH v8]: Improve performance of LZO/plain hibernation Bojan Smojver
@ 2011-10-04 22:07 ` Rafael J. Wysocki
2011-10-05 1:07 ` Bojan Smojver
0 siblings, 1 reply; 24+ messages in thread
From: Rafael J. Wysocki @ 2011-10-04 22:07 UTC (permalink / raw)
To: Bojan Smojver; +Cc: linux-kernel, Linux PM mailing list
On Friday, September 30, 2011, Bojan Smojver wrote:
> Pekka was right - there is no measurable difference in performance,
> whether we bind threads to specific CPUs or not. So, I dropped that, it
> is not required and makes code simpler.
>
> Other than that, just some code simplifications, variable name cleanups
> and buglet removals.
>
> Note that I changed the subject to reflect the fact that
> hibernation/thaw without compression also benefits from the patch, from
> the improved I/O.
>
> ---------------------------------------
> Use threads for LZO compression/decompression on hibernate/thaw.
> Improve buffering on hibernate/thaw.
> v8
>
> In my testing, this improved write/read speed by a factor of 2 to 3.
>
> Signed-off-by: Bojan Smojver <bojan@rexursive.com>
Applied to linux-pm/linux-next.
Thanks,
Rafael
> ---
> kernel/power/swap.c | 628 ++++++++++++++++++++++++++++++++++++++-------------
> 1 files changed, 466 insertions(+), 162 deletions(-)
>
> diff --git a/kernel/power/swap.c b/kernel/power/swap.c
> index 7c97c3a..d692842 100644
> --- a/kernel/power/swap.c
> +++ b/kernel/power/swap.c
> @@ -27,6 +27,9 @@
> #include <linux/slab.h>
> #include <linux/lzo.h>
> #include <linux/vmalloc.h>
> +#include <linux/cpumask.h>
> +#include <linux/atomic.h>
> +#include <linux/kthread.h>
>
> #include "power.h"
>
> @@ -43,8 +46,7 @@
> * allocated and populated one at a time, so we only need one memory
> * page to set up the entire structure.
> *
> - * During resume we also only need to use one swap_map_page structure
> - * at a time.
> + * During resume we pick up all swap_map_page structures into a list.
> */
>
> #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
> @@ -54,6 +56,11 @@ struct swap_map_page {
> sector_t next_swap;
> };
>
> +struct swap_map_page_list {
> + struct swap_map_page *map;
> + struct swap_map_page_list *next;
> +};
> +
> /**
> * The swap_map_handle structure is used for handling swap in
> * a file-alike way
> @@ -61,9 +68,11 @@ struct swap_map_page {
>
> struct swap_map_handle {
> struct swap_map_page *cur;
> + struct swap_map_page_list *maps;
> sector_t cur_swap;
> sector_t first_sector;
> unsigned int k;
> + unsigned long nr_free_pages, written;
> };
>
> struct swsusp_header {
> @@ -245,6 +254,7 @@ static int swsusp_swap_check(void)
> static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
> {
> void *src;
> + int ret;
>
> if (!offset)
> return -ENOSPC;
> @@ -254,9 +264,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
> if (src) {
> copy_page(src, buf);
> } else {
> - WARN_ON_ONCE(1);
> - bio_chain = NULL; /* Go synchronous */
> - src = buf;
> + ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
> + if (ret)
> + return ret;
> + src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
> + if (src) {
> + copy_page(src, buf);
> + } else {
> + WARN_ON_ONCE(1);
> + bio_chain = NULL; /* Go synchronous */
> + src = buf;
> + }
> }
> } else {
> src = buf;
> @@ -293,6 +311,8 @@ static int get_swap_writer(struct swap_map_handle *handle)
> goto err_rel;
> }
> handle->k = 0;
> + handle->nr_free_pages = nr_free_pages();
> + handle->written = 0;
> handle->first_sector = handle->cur_swap;
> return 0;
> err_rel:
> @@ -316,20 +336,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
> return error;
> handle->cur->entries[handle->k++] = offset;
> if (handle->k >= MAP_PAGE_ENTRIES) {
> - error = hib_wait_on_bio_chain(bio_chain);
> - if (error)
> - goto out;
> offset = alloc_swapdev_block(root_swap);
> if (!offset)
> return -ENOSPC;
> handle->cur->next_swap = offset;
> - error = write_page(handle->cur, handle->cur_swap, NULL);
> + error = write_page(handle->cur, handle->cur_swap, bio_chain);
> if (error)
> goto out;
> clear_page(handle->cur);
> handle->cur_swap = offset;
> handle->k = 0;
> }
> + if (++handle->written > (handle->nr_free_pages >> 1)) {
> + error = hib_wait_on_bio_chain(bio_chain);
> + if (error)
> + goto out;
> + handle->written = 0;
> + }
> out:
> return error;
> }
> @@ -372,6 +395,13 @@ static int swap_writer_finish(struct swap_map_handle *handle,
> LZO_HEADER, PAGE_SIZE)
> #define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
>
> +/* Maximum number of threads for compression/decompression. */
> +#define LZO_THREADS 3
> +
> +/* Maximum number of pages for read buffering. */
> +#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 4)
> +
> +
> /**
> * save_image - save the suspend image data
> */
> @@ -419,6 +449,50 @@ static int save_image(struct swap_map_handle *handle,
> return ret;
> }
>
> +/**
> + * Structure used for LZO data compression.
> + */
> +struct cmp_data {
> + struct task_struct *thr; /* thread */
> + atomic_t ready; /* ready to start flag */
> + atomic_t stop; /* ready to stop flag */
> + int ret; /* return code */
> + wait_queue_head_t go; /* start compression */
> + wait_queue_head_t done; /* compression done */
> + size_t unc_len; /* uncompressed length */
> + size_t cmp_len; /* compressed length */
> + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
> + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
> + unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
> +};
> +
> +/**
> + * Compression function that runs in its own thread.
> + */
> +static int lzo_compress_threadfn(void *data)
> +{
> + struct cmp_data *d = data;
> +
> + while (1) {
> + wait_event(d->go, atomic_read(&d->ready) ||
> + kthread_should_stop());
> + if (kthread_should_stop()) {
> + d->thr = NULL;
> + d->ret = -1;
> + atomic_set(&d->stop, 1);
> + wake_up(&d->done);
> + break;
> + }
> + atomic_set(&d->ready, 0);
> +
> + d->ret = lzo1x_1_compress(d->unc, d->unc_len,
> + d->cmp + LZO_HEADER, &d->cmp_len,
> + d->wrk);
> + atomic_set(&d->stop, 1);
> + wake_up(&d->done);
> + }
> + return 0;
> +}
>
> /**
> * save_image_lzo - Save the suspend image data compressed with LZO.
> @@ -437,42 +511,65 @@ static int save_image_lzo(struct swap_map_handle *handle,
> struct bio *bio;
> struct timeval start;
> struct timeval stop;
> - size_t off, unc_len, cmp_len;
> - unsigned char *unc, *cmp, *wrk, *page;
> + size_t off, thr, run_threads, nr_threads;
> + unsigned char *page = NULL;
> + struct cmp_data *data = NULL;
> +
> + /*
> + * We'll limit the number of threads for compression to limit memory
> + * footprint.
> + */
> + nr_threads = num_online_cpus() - 1;
> + if (nr_threads > LZO_THREADS)
> + nr_threads = LZO_THREADS;
> + else if (nr_threads < 1)
> + nr_threads = 1;
>
> page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
> if (!page) {
> printk(KERN_ERR "PM: Failed to allocate LZO page\n");
> - return -ENOMEM;
> + ret = -ENOMEM;
> + goto out_clean;
> }
>
> - wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
> - if (!wrk) {
> - printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
> - free_page((unsigned long)page);
> - return -ENOMEM;
> + data = vmalloc(sizeof(*data) * nr_threads);
> + if (!data) {
> + printk(KERN_ERR "PM: Failed to allocate LZO data\n");
> + ret = -ENOMEM;
> + goto out_clean;
> }
> -
> - unc = vmalloc(LZO_UNC_SIZE);
> - if (!unc) {
> - printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
> - vfree(wrk);
> - free_page((unsigned long)page);
> - return -ENOMEM;
> + for (thr = 0; thr < nr_threads; thr++)
> + memset(&data[thr], 0, offsetof(struct cmp_data, go));
> +
> + /*
> + * Start the compression threads.
> + */
> + for (thr = 0; thr < nr_threads; thr++) {
> + init_waitqueue_head(&data[thr].go);
> + init_waitqueue_head(&data[thr].done);
> +
> + data[thr].thr = kthread_run(lzo_compress_threadfn,
> + &data[thr],
> + "image_compress/%zu", thr);
> + if (IS_ERR(data[thr].thr)) {
> + nr_threads = thr;
> + printk(KERN_ERR
> + "PM: Cannot start compression threads\n");
> + ret = -ENOMEM;
> + goto out_clean;
> + }
> }
>
> - cmp = vmalloc(LZO_CMP_SIZE);
> - if (!cmp) {
> - printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
> - vfree(unc);
> - vfree(wrk);
> - free_page((unsigned long)page);
> - return -ENOMEM;
> - }
> + /*
> + * Adjust number of free pages after all allocations have been done.
> + * We don't want to run out of pages when writing.
> + */
> + handle->nr_free_pages = nr_free_pages();
>
> printk(KERN_INFO
> + "PM: Using %zu thread(s) for compression.\n"
> "PM: Compressing and saving image data (%u pages) ... ",
> - nr_to_write);
> + nr_threads, nr_to_write);
> m = nr_to_write / 100;
> if (!m)
> m = 1;
> @@ -480,54 +577,75 @@ static int save_image_lzo(struct swap_map_handle *handle,
> bio = NULL;
> do_gettimeofday(&start);
> for (;;) {
> - for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
> - ret = snapshot_read_next(snapshot);
> - if (ret < 0)
> - goto out_finish;
> -
> - if (!ret)
> + for (thr = 0; thr < nr_threads; thr++) {
> + for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
> + ret = snapshot_read_next(snapshot);
> + if (ret < 0)
> + goto out_finish;
> +
> + if (!ret)
> + break;
> +
> + memcpy(data[thr].unc + off,
> + data_of(*snapshot), PAGE_SIZE);
> +
> + if (!(nr_pages % m))
> + printk(KERN_CONT "\b\b\b\b%3d%%",
> + nr_pages / m);
> + nr_pages++;
> + }
> + if (!off)
> break;
>
> - memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
> + data[thr].unc_len = off;
>
> - if (!(nr_pages % m))
> - printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
> - nr_pages++;
> + atomic_set(&data[thr].ready, 1);
> + wake_up(&data[thr].go);
> }
>
> - if (!off)
> + if (!thr)
> break;
>
> - unc_len = off;
> - ret = lzo1x_1_compress(unc, unc_len,
> - cmp + LZO_HEADER, &cmp_len, wrk);
> - if (ret < 0) {
> - printk(KERN_ERR "PM: LZO compression failed\n");
> - break;
> - }
> -
> - if (unlikely(!cmp_len ||
> - cmp_len > lzo1x_worst_compress(unc_len))) {
> - printk(KERN_ERR "PM: Invalid LZO compressed length\n");
> - ret = -1;
> - break;
> - }
> + for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
> + wait_event(data[thr].done,
> + atomic_read(&data[thr].stop));
> + atomic_set(&data[thr].stop, 0);
>
> - *(size_t *)cmp = cmp_len;
> + ret = data[thr].ret;
>
> - /*
> - * Given we are writing one page at a time to disk, we copy
> - * that much from the buffer, although the last bit will likely
> - * be smaller than full page. This is OK - we saved the length
> - * of the compressed data, so any garbage at the end will be
> - * discarded when we read it.
> - */
> - for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
> - memcpy(page, cmp + off, PAGE_SIZE);
> + if (ret < 0) {
> + printk(KERN_ERR "PM: LZO compression failed\n");
> + goto out_finish;
> + }
>
> - ret = swap_write_page(handle, page, &bio);
> - if (ret)
> + if (unlikely(!data[thr].cmp_len ||
> + data[thr].cmp_len >
> + lzo1x_worst_compress(data[thr].unc_len))) {
> + printk(KERN_ERR
> + "PM: Invalid LZO compressed length\n");
> + ret = -1;
> goto out_finish;
> + }
> +
> + *(size_t *)data[thr].cmp = data[thr].cmp_len;
> +
> + /*
> + * Given we are writing one page at a time to disk, we
> + * copy that much from the buffer, although the last
> + * bit will likely be smaller than full page. This is
> + * OK - we saved the length of the compressed data, so
> + * any garbage at the end will be discarded when we
> + * read it.
> + */
> + for (off = 0;
> + off < LZO_HEADER + data[thr].cmp_len;
> + off += PAGE_SIZE) {
> + memcpy(page, data[thr].cmp + off, PAGE_SIZE);
> +
> + ret = swap_write_page(handle, page, &bio);
> + if (ret)
> + goto out_finish;
> + }
> }
> }
>
> @@ -541,11 +659,13 @@ out_finish:
> else
> printk(KERN_CONT "\n");
> swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
> -
> - vfree(cmp);
> - vfree(unc);
> - vfree(wrk);
> - free_page((unsigned long)page);
> +out_clean:
> + for (thr = 0; thr < nr_threads; thr++) {
> + if (data[thr].thr)
> + kthread_stop(data[thr].thr);
> + }
> + if (data) vfree(data);
> + if (page) free_page((unsigned long)page);
>
> return ret;
> }
> @@ -625,31 +745,65 @@ out_finish:
>
> static void release_swap_reader(struct swap_map_handle *handle)
> {
> + struct swap_map_page_list *tmp;
> +
> if (handle->cur)
> free_page((unsigned long)handle->cur);
> + while (handle->maps) {
> + if (handle->maps->map)
> + free_page((unsigned long)handle->maps->map);
> + tmp = handle->maps;
> + handle->maps = handle->maps->next;
> + vfree(tmp);
> + }
> handle->cur = NULL;
> + handle->maps = NULL;
> }
>
> static int get_swap_reader(struct swap_map_handle *handle,
> unsigned int *flags_p)
> {
> int error;
> + struct swap_map_page_list *tmp, *last;
> + sector_t offset;
>
> *flags_p = swsusp_header->flags;
>
> if (!swsusp_header->image) /* how can this happen? */
> return -EINVAL;
>
> - handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
> - if (!handle->cur)
> - return -ENOMEM;
> + handle->cur = NULL;
> + last = handle->maps = NULL;
> + offset = swsusp_header->image;
> + while (offset) {
> + tmp = vmalloc(sizeof(*handle->maps));
> + if (!tmp) {
> + release_swap_reader(handle);
> + return -ENOMEM;
> + }
> + memset(tmp, 0, sizeof(*tmp));
> + if (!handle->maps)
> + handle->maps = tmp;
> + if (last)
> + last->next = tmp;
> + last = tmp;
> +
> + tmp->map = (struct swap_map_page *)
> + __get_free_page(__GFP_WAIT | __GFP_HIGH);
> + if (!tmp->map) {
> + release_swap_reader(handle);
> + return -ENOMEM;
> + }
>
> - error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
> - if (error) {
> - release_swap_reader(handle);
> - return error;
> + error = hib_bio_read_page(offset, tmp->map, NULL);
> + if (error) {
> + release_swap_reader(handle);
> + return error;
> + }
> + offset = tmp->map->next_swap;
> }
> handle->k = 0;
> + handle->cur = handle->maps->map;
> return 0;
> }
>
> @@ -658,6 +812,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
> {
> sector_t offset;
> int error;
> + struct swap_map_page_list *tmp;
>
> if (!handle->cur)
> return -EINVAL;
> @@ -668,13 +823,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
> if (error)
> return error;
> if (++handle->k >= MAP_PAGE_ENTRIES) {
> - error = hib_wait_on_bio_chain(bio_chain);
> handle->k = 0;
> - offset = handle->cur->next_swap;
> - if (!offset)
> + free_page((unsigned long)handle->maps->map);
> + tmp = handle->maps;
> + handle->maps = handle->maps->next;
> + vfree(tmp);
> + if (!handle->maps)
> release_swap_reader(handle);
> - else if (!error)
> - error = hib_bio_read_page(offset, handle->cur, NULL);
> + else
> + handle->cur = handle->maps->map;
> }
> return error;
> }
> @@ -743,6 +900,50 @@ static int load_image(struct swap_map_handle *handle,
> }
>
> /**
> + * Structure used for LZO data decompression.
> + */
> +struct dec_data {
> + struct task_struct *thr; /* thread */
> + atomic_t ready; /* ready to start flag */
> + atomic_t stop; /* ready to stop flag */
> + int ret; /* return code */
> + wait_queue_head_t go; /* start decompression */
> + wait_queue_head_t done; /* decompression done */
> + size_t unc_len; /* uncompressed length */
> + size_t cmp_len; /* compressed length */
> + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
> + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
> +};
> +
> +/**
> + * Deompression function that runs in its own thread.
> + */
> +static int lzo_decompress_threadfn(void *data)
> +{
> + struct dec_data *d = data;
> +
> + while (1) {
> + wait_event(d->go, atomic_read(&d->ready) ||
> + kthread_should_stop());
> + if (kthread_should_stop()) {
> + d->thr = NULL;
> + d->ret = -1;
> + atomic_set(&d->stop, 1);
> + wake_up(&d->done);
> + break;
> + }
> + atomic_set(&d->ready, 0);
> +
> + d->unc_len = LZO_UNC_SIZE;
> + d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
> + d->unc, &d->unc_len);
> + atomic_set(&d->stop, 1);
> + wake_up(&d->done);
> + }
> + return 0;
> +}
> +
> +/**
> * load_image_lzo - Load compressed image data and decompress them with LZO.
> * @handle: Swap map handle to use for loading data.
> * @snapshot: Image to copy uncompressed data into.
> @@ -754,49 +955,81 @@ static int load_image_lzo(struct swap_map_handle *handle,
> {
> unsigned int m;
> int error = 0;
> + int eof = 0;
> struct bio *bio;
> struct timeval start;
> struct timeval stop;
> unsigned nr_pages;
> - size_t i, off, unc_len, cmp_len;
> - unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
> -
> - for (i = 0; i < LZO_CMP_PAGES; i++) {
> - page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
> - if (!page[i]) {
> - printk(KERN_ERR "PM: Failed to allocate LZO page\n");
> -
> - while (i)
> - free_page((unsigned long)page[--i]);
> -
> - return -ENOMEM;
> - }
> + size_t i, off, thr, run_threads, nr_threads;
> + size_t ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0;
> + unsigned char **page = NULL;
> + struct dec_data *data = NULL;
> +
> + /*
> + * We'll limit the number of threads for decompression to limit memory
> + * footprint.
> + */
> + nr_threads = num_online_cpus() - 1;
> + if (nr_threads > LZO_THREADS)
> + nr_threads = LZO_THREADS;
> + else if (nr_threads < 1)
> + nr_threads = 1;
> +
> + page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
> + if (!page) {
> + printk(KERN_ERR "PM: Failed to allocate LZO page\n");
> + error = -ENOMEM;
> + goto out_clean;
> }
>
> - unc = vmalloc(LZO_UNC_SIZE);
> - if (!unc) {
> - printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
> -
> - for (i = 0; i < LZO_CMP_PAGES; i++)
> - free_page((unsigned long)page[i]);
> -
> - return -ENOMEM;
> + data = vmalloc(sizeof(*data) * nr_threads);
> + if (!data) {
> + printk(KERN_ERR "PM: Failed to allocate LZO data\n");
> + error = -ENOMEM;
> + goto out_clean;
> + }
> + for (thr = 0; thr < nr_threads; thr++)
> + memset(&data[thr], 0, offsetof(struct dec_data, go));
> +
> + /*
> + * Start the decompression threads.
> + */
> + for (thr = 0; thr < nr_threads; thr++) {
> + init_waitqueue_head(&data[thr].go);
> + init_waitqueue_head(&data[thr].done);
> +
> + data[thr].thr = kthread_run(lzo_decompress_threadfn,
> + &data[thr],
> + "image_decompress/%zu", thr);
> + if (IS_ERR(data[thr].thr)) {
> + nr_threads = thr;
> + printk(KERN_ERR
> + "PM: Cannot start decompression threads\n");
> + error = -ENOMEM;
> + goto out_clean;
> + }
> }
>
> - cmp = vmalloc(LZO_CMP_SIZE);
> - if (!cmp) {
> - printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
> -
> - vfree(unc);
> - for (i = 0; i < LZO_CMP_PAGES; i++)
> - free_page((unsigned long)page[i]);
> -
> - return -ENOMEM;
> + for (i = 0; i < LZO_READ_PAGES; i++) {
> + page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
> + if (!page[i]) {
> + if (i < LZO_CMP_PAGES) {
> + ring_size = i;
> + printk(KERN_ERR
> + "PM: Failed to allocate LZO pages\n");
> + error = -ENOMEM;
> + goto out_clean;
> + } else {
> + break;
> + }
> + }
> }
> + want = ring_size = i;
>
> printk(KERN_INFO
> + "PM: Using %zu thread(s) for decompression.\n"
> "PM: Loading and decompressing image data (%u pages) ... ",
> - nr_to_read);
> + nr_threads, nr_to_read);
> m = nr_to_read / 100;
> if (!m)
> m = 1;
> @@ -808,61 +1041,128 @@ static int load_image_lzo(struct swap_map_handle *handle,
> if (error <= 0)
> goto out_finish;
>
> - for (;;) {
> - error = swap_read_page(handle, page[0], NULL); /* sync */
> - if (error)
> - break;
> -
> - cmp_len = *(size_t *)page[0];
> - if (unlikely(!cmp_len ||
> - cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
> - printk(KERN_ERR "PM: Invalid LZO compressed length\n");
> - error = -1;
> - break;
> + for(;;) {
> + for (i = 0; !eof && i < want; i++) {
> + error = swap_read_page(handle, page[ring], &bio);
> + if (error) {
> + /*
> + * On real read error, finish. On end of data,
> + * set EOF flag and just exit the read loop.
> + */
> + if (handle->cur &&
> + handle->cur->entries[handle->k]) {
> + goto out_finish;
> + } else {
> + eof = 1;
> + break;
> + }
> + }
> + if (++ring >= ring_size)
> + ring = 0;
> }
> + asked += i;
> + want -= i;
>
> - for (off = PAGE_SIZE, i = 1;
> - off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
> - error = swap_read_page(handle, page[i], &bio);
> + /*
> + * We are out of data, wait for some more.
> + */
> + if (!have) {
> + if (!asked)
> + break;
> +
> + error = hib_wait_on_bio_chain(&bio);
> if (error)
> goto out_finish;
> + have += asked;
> + asked = 0;
> + if (eof)
> + eof = 2;
> }
>
> - error = hib_wait_on_bio_chain(&bio); /* need all data now */
> - if (error)
> - goto out_finish;
> + for (thr = 0; have && thr < nr_threads; thr++) {
> + data[thr].cmp_len = *(size_t *)page[pg];
> + if (unlikely(!data[thr].cmp_len ||
> + data[thr].cmp_len >
> + lzo1x_worst_compress(LZO_UNC_SIZE))) {
> + printk(KERN_ERR
> + "PM: Invalid LZO compressed length\n");
> + error = -1;
> + goto out_finish;
> + }
>
> - for (off = 0, i = 0;
> - off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
> - memcpy(cmp + off, page[i], PAGE_SIZE);
> - }
> + need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
> + PAGE_SIZE);
> + if (need > have) {
> + if (eof > 1) {
> + error = -1;
> + goto out_finish;
> + }
> + break;
> + }
>
> - unc_len = LZO_UNC_SIZE;
> - error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
> - unc, &unc_len);
> - if (error < 0) {
> - printk(KERN_ERR "PM: LZO decompression failed\n");
> - break;
> + for (off = 0;
> + off < LZO_HEADER + data[thr].cmp_len;
> + off += PAGE_SIZE) {
> + memcpy(data[thr].cmp + off,
> + page[pg], PAGE_SIZE);
> + have--;
> + want++;
> + if (++pg >= ring_size)
> + pg = 0;
> + }
> +
> + atomic_set(&data[thr].ready, 1);
> + wake_up(&data[thr].go);
> }
>
> - if (unlikely(!unc_len ||
> - unc_len > LZO_UNC_SIZE ||
> - unc_len & (PAGE_SIZE - 1))) {
> - printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
> - error = -1;
> - break;
> + /*
> + * Wait for more data while we are decompressing.
> + */
> + if (have < LZO_CMP_PAGES && asked) {
> + error = hib_wait_on_bio_chain(&bio);
> + if (error)
> + goto out_finish;
> + have += asked;
> + asked = 0;
> + if (eof)
> + eof = 2;
> }
>
> - for (off = 0; off < unc_len; off += PAGE_SIZE) {
> - memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
> + for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
> + wait_event(data[thr].done,
> + atomic_read(&data[thr].stop));
> + atomic_set(&data[thr].stop, 0);
>
> - if (!(nr_pages % m))
> - printk("\b\b\b\b%3d%%", nr_pages / m);
> - nr_pages++;
> + error = data[thr].ret;
>
> - error = snapshot_write_next(snapshot);
> - if (error <= 0)
> + if (error < 0) {
> + printk(KERN_ERR
> + "PM: LZO decompression failed\n");
> goto out_finish;
> + }
> +
> + if (unlikely(!data[thr].unc_len ||
> + data[thr].unc_len > LZO_UNC_SIZE ||
> + data[thr].unc_len & (PAGE_SIZE - 1))) {
> + printk(KERN_ERR
> + "PM: Invalid LZO uncompressed length\n");
> + error = -1;
> + goto out_finish;
> + }
> +
> + for (off = 0;
> + off < data[thr].unc_len; off += PAGE_SIZE) {
> + memcpy(data_of(*snapshot),
> + data[thr].unc + off, PAGE_SIZE);
> +
> + if (!(nr_pages % m))
> + printk("\b\b\b\b%3d%%", nr_pages / m);
> + nr_pages++;
> +
> + error = snapshot_write_next(snapshot);
> + if (error <= 0)
> + goto out_finish;
> + }
> }
> }
>
> @@ -876,11 +1176,15 @@ out_finish:
> } else
> printk("\n");
> swsusp_show_speed(&start, &stop, nr_to_read, "Read");
> -
> - vfree(cmp);
> - vfree(unc);
> - for (i = 0; i < LZO_CMP_PAGES; i++)
> +out_clean:
> + for (i = 0; i < ring_size; i++)
> free_page((unsigned long)page[i]);
> + for (thr = 0; thr < nr_threads; thr++) {
> + if (data[thr].thr)
> + kthread_stop(data[thr].thr);
> + }
> + if (data) vfree(data);
> + if (page) vfree(page);
>
> return error;
> }
> ---------------------------------------
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-04 22:07 ` Rafael J. Wysocki
@ 2011-10-05 1:07 ` Bojan Smojver
2011-10-06 18:37 ` Rafael J. Wysocki
0 siblings, 1 reply; 24+ messages in thread
From: Bojan Smojver @ 2011-10-05 1:07 UTC (permalink / raw)
To: rjw; +Cc: linux-kernel, linux-pm
------- Original message -------
> From: Rafael J. Wysocki
> Sent: 5.10.'11, 8:10
> Applied to linux-pm/linux-next.
Thank you.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-05 1:07 ` Bojan Smojver
@ 2011-10-06 18:37 ` Rafael J. Wysocki
2011-10-06 18:45 ` Rafael J. Wysocki
0 siblings, 1 reply; 24+ messages in thread
From: Rafael J. Wysocki @ 2011-10-06 18:37 UTC (permalink / raw)
To: Bojan Smojver; +Cc: linux-kernel, linux-pm
On Wednesday, October 05, 2011, Bojan Smojver wrote:
> ------- Original message -------
> > From: Rafael J. Wysocki
> > Sent: 5.10.'11, 8:10
>
> > Applied to linux-pm/linux-next.
>
> Thank you.
I get a kernel panic on a test box using a x86_64 kernel with this patch
applied.
I'm going to drop it from my linux-next branch for now.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:37 ` Rafael J. Wysocki
@ 2011-10-06 18:45 ` Rafael J. Wysocki
2011-10-06 22:23 ` Bojan Smojver
` (6 more replies)
0 siblings, 7 replies; 24+ messages in thread
From: Rafael J. Wysocki @ 2011-10-06 18:45 UTC (permalink / raw)
To: Bojan Smojver; +Cc: linux-kernel, linux-pm
On Thursday, October 06, 2011, Rafael J. Wysocki wrote:
> On Wednesday, October 05, 2011, Bojan Smojver wrote:
> > ------- Original message -------
> > > From: Rafael J. Wysocki
> > > Sent: 5.10.'11, 8:10
> >
> > > Applied to linux-pm/linux-next.
> >
> > Thank you.
>
> I get a kernel panic on a test box using a x86_64 kernel with this patch
> applied.
The panic happens during late resume, when trying to switch to the hibernated
kernel.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
@ 2011-10-06 22:23 ` Bojan Smojver
2011-10-07 3:20 ` Bojan Smojver
` (5 subsequent siblings)
6 siblings, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-06 22:23 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, linux-pm
On Thu, 2011-10-06 at 20:45 +0200, Rafael J. Wysocki wrote:
> > I get a kernel panic on a test box using a x86_64 kernel with this patch
> > applied.
>
> The panic happens during late resume, when trying to switch to the hibernated
> kernel.
Do you have a dump by any chance? Is it always at the same spot or
random?
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
2011-10-06 22:23 ` Bojan Smojver
@ 2011-10-07 3:20 ` Bojan Smojver
2011-10-07 3:39 ` Bojan Smojver
` (4 subsequent siblings)
6 siblings, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-07 3:20 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, linux-pm
On Thu, 2011-10-06 at 20:45 +0200, Rafael J. Wysocki wrote:
> > I get a kernel panic on a test box using a x86_64 kernel with this
> patch applied.
>
> The panic happens during late resume, when trying to switch to the
> hibernated kernel.
I'm working on a version of the patch that will produce a SHA1 checksum
of the image pages on save/load. This will then eliminate problems
related to pages not being saved/loaded correctly by threads etc.
I'll send you that when I test it. If that still panics your kernel
after the checksums have been verified, then we have another problem
somewhere.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
2011-10-06 22:23 ` Bojan Smojver
2011-10-07 3:20 ` Bojan Smojver
@ 2011-10-07 3:39 ` Bojan Smojver
2011-10-07 23:33 ` Bojan Smojver
` (3 subsequent siblings)
6 siblings, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-07 3:39 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, linux-pm
On Fri, 2011-10-07 at 14:20 +1100, Bojan Smojver wrote:
> I'll send you that when I test it.
Obviously, the following is not for mass consumption (it will slow the
v8 patch by a factor of 2), but rather for you to test, so that we can
verify that what you're getting back are valid pages after
decompression.
Let me know whether this fails for you on image checksum comparison.
PS. Yes, the checksum code should be running in a separate thread. :-)
----------------------------------------
kernel/power/Kconfig | 2 +
kernel/power/hibernate.c | 3 +
kernel/power/swap.c | 695 +++++++++++++++++++++++++++++++++++-----------
3 files changed, 535 insertions(+), 165 deletions(-)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 3744c59..0fd83f7 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -27,6 +27,8 @@ config HIBERNATION
select HIBERNATE_CALLBACKS
select LZO_COMPRESS
select LZO_DECOMPRESS
+ select CRYPTO
+ select CRYPTO_SHA1
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 8f7b1db..443df6c 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -650,6 +650,9 @@ int hibernate(void)
flags |= SF_PLATFORM_MODE;
if (nocompress)
flags |= SF_NOCOMPRESS_MODE;
+ else
+ flags |= SF_SHA1DIGEST_MODE;
+
pr_debug("PM: writing image.\n");
error = swsusp_write(flags);
swsusp_free();
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a..bffbbe2 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,12 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/sha.h>
#include "power.h"
@@ -43,8 +49,7 @@
* allocated and populated one at a time, so we only need one memory
* page to set up the entire structure.
*
- * During resume we also only need to use one swap_map_page structure
- * at a time.
+ * During resume we pick up all swap_map_page structures into a list.
*/
#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
@@ -54,6 +59,11 @@ struct swap_map_page {
sector_t next_swap;
};
+struct swap_map_page_list {
+ struct swap_map_page *map;
+ struct swap_map_page_list *next;
+};
+
/**
* The swap_map_handle structure is used for handling swap in
* a file-alike way
@@ -61,13 +71,21 @@ struct swap_map_page {
struct swap_map_handle {
struct swap_map_page *cur;
+ struct swap_map_page_list *maps;
sector_t cur_swap;
sector_t first_sector;
unsigned int k;
+ unsigned long nr_free_pages, written;
+ struct crypto_hash *tfm;
+ struct hash_desc desc;
+ struct scatterlist sg;
+ u8 digest[SHA1_DIGEST_SIZE];
};
struct swsusp_header {
- char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
+ char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
+ sizeof(u8) * SHA1_DIGEST_SIZE];
+ u8 digest[SHA1_DIGEST_SIZE];
sector_t image;
unsigned int flags; /* Flags to pass to the "boot" kernel */
char orig_sig[10];
@@ -199,6 +217,9 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
swsusp_header->image = handle->first_sector;
swsusp_header->flags = flags;
+ if (flags & SF_SHA1DIGEST_MODE)
+ memcpy(swsusp_header->digest,
+ handle->digest, SHA1_DIGEST_SIZE);
error = hib_bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
@@ -245,6 +266,7 @@ static int swsusp_swap_check(void)
static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
{
void *src;
+ int ret;
if (!offset)
return -ENOSPC;
@@ -254,9 +276,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
if (src) {
copy_page(src, buf);
} else {
- WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
- src = buf;
+ ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ if (ret)
+ return ret;
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (src) {
+ copy_page(src, buf);
+ } else {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ src = buf;
+ }
}
} else {
src = buf;
@@ -269,6 +299,10 @@ static void release_swap_writer(struct swap_map_handle *handle)
if (handle->cur)
free_page((unsigned long)handle->cur);
handle->cur = NULL;
+ if (handle->tfm) {
+ crypto_free_hash(handle->tfm);
+ handle->tfm = NULL;
+ }
}
static int get_swap_writer(struct swap_map_handle *handle)
@@ -293,7 +327,19 @@ static int get_swap_writer(struct swap_map_handle *handle)
goto err_rel;
}
handle->k = 0;
+ handle->nr_free_pages = nr_free_pages();
+ handle->written = 0;
handle->first_sector = handle->cur_swap;
+ handle->tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(handle->tfm)) {
+ handle->tfm = NULL;
+ ret = -ENOMEM;
+ goto err_rel;
+ }
+ handle->desc.tfm = handle->tfm;
+ handle->desc.flags = 0;
+ crypto_hash_init(&handle->desc);
+ sg_init_table(&handle->sg, 1);
return 0;
err_rel:
release_swap_writer(handle);
@@ -316,20 +362,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
- if (error)
- goto out;
offset = alloc_swapdev_block(root_swap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap, NULL);
+ error = write_page(handle->cur, handle->cur_swap, bio_chain);
if (error)
goto out;
clear_page(handle->cur);
handle->cur_swap = offset;
handle->k = 0;
}
+ if (++handle->written > (handle->nr_free_pages >> 1)) {
+ error = hib_wait_on_bio_chain(bio_chain);
+ if (error)
+ goto out;
+ handle->written = 0;
+ }
out:
return error;
}
@@ -372,6 +421,13 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 3
+
+/* Maximum number of pages for read buffering. */
+#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 4)
+
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +475,50 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -437,42 +537,65 @@ static int save_image_lzo(struct swap_map_handle *handle,
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off, thr, run_threads, nr_threads;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ if (nr_threads > LZO_THREADS)
+ nr_threads = LZO_THREADS;
+ else if (nr_threads < 1)
+ nr_threads = 1;
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_clean;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
- free_page((unsigned long)page);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
-
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ nr_threads = thr;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
- }
+ /*
+ * Adjust number of free pages after all allocations have been done.
+ * We don't want to run out of pages when writing.
+ */
+ handle->nr_free_pages = nr_free_pages();
printk(KERN_INFO
+ "PM: Using %zu thread(s) for compression.\n"
"PM: Compressing and saving image data (%u pages) ... ",
- nr_to_write);
+ nr_threads, nr_to_write);
m = nr_to_write / 100;
if (!m)
m = 1;
@@ -480,54 +603,80 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nr_threads; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ sg_set_buf(&handle->sg,
+ data_of(*snapshot), PAGE_SIZE);
+ crypto_hash_update(&handle->desc,
+ &handle->sg, PAGE_SIZE);
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
+ if (!thr)
break;
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
- break;
- }
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
+ ret = data[thr].ret;
- *(size_t *)cmp = cmp_len;
-
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
}
@@ -536,16 +685,20 @@ out_finish:
do_gettimeofday(&stop);
if (!ret)
ret = err2;
- if (!ret)
+ if (!ret) {
printk(KERN_CONT "\b\b\b\bdone\n");
- else
+ crypto_hash_final(&handle->desc, handle->digest);
+ } else {
printk(KERN_CONT "\n");
+ }
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
+out_clean:
+ for (thr = 0; thr < nr_threads; thr++) {
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ }
+ if (data) vfree(data);
+ if (page) free_page((unsigned long)page);
return ret;
}
@@ -625,31 +778,79 @@ out_finish:
static void release_swap_reader(struct swap_map_handle *handle)
{
+ struct swap_map_page_list *tmp;
+
if (handle->cur)
free_page((unsigned long)handle->cur);
+ while (handle->maps) {
+ if (handle->maps->map)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ vfree(tmp);
+ }
handle->cur = NULL;
+ handle->maps = NULL;
+ if (handle->tfm) {
+ crypto_free_hash(handle->tfm);
+ handle->tfm = NULL;
+ }
}
static int get_swap_reader(struct swap_map_handle *handle,
unsigned int *flags_p)
{
int error;
+ struct swap_map_page_list *tmp, *last;
+ sector_t offset;
*flags_p = swsusp_header->flags;
if (!swsusp_header->image) /* how can this happen? */
return -EINVAL;
- handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
- if (!handle->cur)
- return -ENOMEM;
+ handle->cur = NULL;
+ last = handle->maps = NULL;
+ offset = swsusp_header->image;
+ while (offset) {
+ tmp = vmalloc(sizeof(*handle->maps));
+ if (!tmp) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
+ memset(tmp, 0, sizeof(*tmp));
+ if (!handle->maps)
+ handle->maps = tmp;
+ if (last)
+ last->next = tmp;
+ last = tmp;
+
+ tmp->map = (struct swap_map_page *)
+ __get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!tmp->map) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
- error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
- if (error) {
- release_swap_reader(handle);
- return error;
+ error = hib_bio_read_page(offset, tmp->map, NULL);
+ if (error) {
+ release_swap_reader(handle);
+ return error;
+ }
+ offset = tmp->map->next_swap;
}
handle->k = 0;
+ handle->cur = handle->maps->map;
+ handle->tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(handle->tfm)) {
+ handle->tfm = NULL;
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
+ handle->desc.tfm = handle->tfm;
+ handle->desc.flags = 0;
+ crypto_hash_init(&handle->desc);
+ sg_init_table(&handle->sg, 1);
return 0;
}
@@ -658,6 +859,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
{
sector_t offset;
int error;
+ struct swap_map_page_list *tmp;
if (!handle->cur)
return -EINVAL;
@@ -668,13 +870,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
handle->k = 0;
- offset = handle->cur->next_swap;
- if (!offset)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ vfree(tmp);
+ if (!handle->maps)
release_swap_reader(handle);
- else if (!error)
- error = hib_bio_read_page(offset, handle->cur, NULL);
+ else
+ handle->cur = handle->maps->map;
}
return error;
}
@@ -743,6 +947,50 @@ static int load_image(struct swap_map_handle *handle,
}
/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start decompression */
+ wait_queue_head_t done; /* decompression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
+
+/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
@@ -754,49 +1002,81 @@ static int load_image_lzo(struct swap_map_handle *handle,
{
unsigned int m;
int error = 0;
+ int eof = 0;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
- for (i = 0; i < LZO_CMP_PAGES; i++) {
- page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-
- while (i)
- free_page((unsigned long)page[--i]);
-
- return -ENOMEM;
- }
+ size_t i, off, thr, run_threads, nr_threads;
+ size_t ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ if (nr_threads > LZO_THREADS)
+ nr_threads = LZO_THREADS;
+ else if (nr_threads < 1)
+ nr_threads = 1;
+
+ page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ error = -ENOMEM;
+ goto out_clean;
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct dec_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%zu", thr);
+ if (IS_ERR(data[thr].thr)) {
+ nr_threads = thr;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ error = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
-
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ for (i = 0; i < LZO_READ_PAGES; i++) {
+ page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ ring_size = i;
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ error = -ENOMEM;
+ goto out_clean;
+ } else {
+ break;
+ }
+ }
}
+ want = ring_size = i;
printk(KERN_INFO
+ "PM: Using %zu thread(s) for decompression.\n"
"PM: Loading and decompressing image data (%u pages) ... ",
- nr_to_read);
+ nr_threads, nr_to_read);
m = nr_to_read / 100;
if (!m)
m = 1;
@@ -808,61 +1088,133 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (error <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; !eof && i < want; i++) {
+ error = swap_read_page(handle, page[ring], &bio);
+ if (error) {
+ /*
+ * On real read error, finish. On end of data,
+ * set EOF flag and just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k]) {
+ goto out_finish;
+ } else {
+ eof = 1;
+ break;
+ }
+ }
+ if (++ring >= ring_size)
+ ring = 0;
}
+ asked += i;
+ want -= i;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
+
+ error = hib_wait_on_bio_chain(&bio);
if (error)
goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
+ for (thr = 0; have && thr < nr_threads; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ error = -1;
+ goto out_finish;
+ }
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
- }
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have) {
+ if (eof > 1) {
+ error = -1;
+ goto out_finish;
+ }
+ break;
+ }
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= ring_size)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ error = hib_wait_on_bio_chain(&bio);
+ if (error)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
+
+ error = data[thr].ret;
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ if (error < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
+ goto out_finish;
+ }
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ error = -1;
goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ sg_set_buf(&handle->sg,
+ data_of(*snapshot), PAGE_SIZE);
+ crypto_hash_update(&handle->desc,
+ &handle->sg, PAGE_SIZE);
+
+ error = snapshot_write_next(snapshot);
+ if (error <= 0)
+ goto out_finish;
+ }
}
}
@@ -873,14 +1225,27 @@ out_finish:
snapshot_write_finalize(snapshot);
if (!snapshot_image_loaded(snapshot))
error = -ENODATA;
+ if (!error) {
+ crypto_hash_final(&handle->desc, handle->digest);
+ if(memcmp(handle->digest,
+ swsusp_header->digest, SHA1_DIGEST_SIZE)) {
+ printk(KERN_ERR
+ "PM: Invalid image checksum!\n");
+ error = -ENODATA;
+ }
+ }
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
+out_clean:
+ for (i = 0; i < ring_size; i++)
free_page((unsigned long)page[i]);
+ for (thr = 0; thr < nr_threads; thr++) {
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ }
+ if (data) vfree(data);
+ if (page) vfree(page);
return error;
}
----------------------------------------
--
Bojan
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
` (2 preceding siblings ...)
2011-10-07 3:39 ` Bojan Smojver
@ 2011-10-07 23:33 ` Bojan Smojver
2011-10-09 2:57 ` Bojan Smojver
` (2 subsequent siblings)
6 siblings, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-07 23:33 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, linux-pm
On Fri, 2011-10-07 at 09:23 +1100, Bojan Smojver wrote:
> On Thu, 2011-10-06 at 20:45 +0200, Rafael J. Wysocki wrote:
>
> > > I get a kernel panic on a test box using a x86_64 kernel with this patch
> > > applied.
> >
> > The panic happens during late resume, when trying to switch to the hibernated
> > kernel.
>
> Do you have a dump by any chance? Is it always at the same spot or
> random?
Ping...
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
` (3 preceding siblings ...)
2011-10-07 23:33 ` Bojan Smojver
@ 2011-10-09 2:57 ` Bojan Smojver
2011-10-09 7:54 ` Bojan Smojver
2011-10-09 9:22 ` Pekka Enberg
2011-10-13 11:00 ` Bojan Smojver
6 siblings, 1 reply; 24+ messages in thread
From: Bojan Smojver @ 2011-10-09 2:57 UTC (permalink / raw)
To: rjw; +Cc: linux-kernel, linux-pm
------- Original message -------
> From: Rafael J. Wysocki <rjw@sisk.pl>
> To: bojan@rexursive.com
> Cc: linux-kernel@vger.kernel.org, linux-pm@lists.linux-foundation.org
> Sent: 7.10.'11, 4:48
>
> On Thursday, October 06, 2011, Rafael J. Wysocki wrote:
>> On Wednesday, October 05, 2011, Bojan Smojver wrote:
>> > ------- Original message -------
>> > > From: Rafael J. Wysocki
>> > > Sent: 5.10.'11, 8:10
>> >
>> > > Applied to linux-pm/linux-next.
>> >
>> > Thank you.
>>
>> I get a kernel panic on a test box using a x86_64 kernel with this patch
>> applied.
>
> The panic happens during late resume, when trying to switch to the
> hibernated
> kernel.
Yeah, something is not quite right. My CRC32 checks keep failing on resume.
I don't get a panic, but still, this should not happen. Looking...
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-09 2:57 ` Bojan Smojver
@ 2011-10-09 7:54 ` Bojan Smojver
0 siblings, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-09 7:54 UTC (permalink / raw)
To: rjw; +Cc: linux-kernel, linux-pm
On Sun, 2011-10-09 at 13:57 +1100, Bojan Smojver wrote:
> My CRC32 checks keep failing on resume.
> I don't get a panic, but still, this should not happen.
False alarm, actually. My threaded CRC32 code was broken and was not
taking some of the last pages into account. I fixed that now in my code
and indeed CRC32 of the decompressed pages is the same as the one of the
pages before compression.
PS. We are going to lose about 10% of the the speed improvements by
introduction of CRC32 (which will only be used with compression). I
reckon it's still worth it.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
` (4 preceding siblings ...)
2011-10-09 2:57 ` Bojan Smojver
@ 2011-10-09 9:22 ` Pekka Enberg
2011-10-09 10:19 ` Bojan Smojver
2011-10-13 11:00 ` Bojan Smojver
6 siblings, 1 reply; 24+ messages in thread
From: Pekka Enberg @ 2011-10-09 9:22 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: Bojan Smojver, linux-kernel, linux-pm
On Thu, Oct 6, 2011 at 9:45 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> The panic happens during late resume, when trying to switch to the hibernated
> kernel.
Is there some way to autotest hibernation? I could try to reproduce
the issue but I'm certainly not going to hibernate/resume by hand...
:-)
Pekka
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-09 9:22 ` Pekka Enberg
@ 2011-10-09 10:19 ` Bojan Smojver
2011-10-09 22:24 ` Bojan Smojver
2011-10-10 7:12 ` Bojan Smojver
0 siblings, 2 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-09 10:19 UTC (permalink / raw)
To: penberg; +Cc: rjw, linux-kernel, linux-pm
------- Original message -------
> From: Pekka Enberg <penberg@cs.helsinki.fi>
> To: rjw@sisk.pl
> Cc: bojan@rexursive.com, linux-kernel@vger.kernel.org,
> linux-pm@lists.linux-foundation.org
> Sent: 9.10.'11, 20:22
>
> On Thu, Oct 6, 2011 at 9:45 PM, Rafael J. Wysocki <rjw@sisk.pl> wrote:
>> The panic happens during late resume, when trying to switch to the
>> hibernated
>> kernel.
>
> Is there some way to autotest hibernation? I could try to reproduce
> the issue but I'm certainly not going to hibernate/resume by hand...
> :-)
I kinda remember reading in one of the bug reports for Intel graphics about
someone doing it. Not sure how, to be honest. Rafael will know for sure.
May be related, when I simulate image loading failure by returning error
from load_image(), I get bad page state error, count -1, so something is
amiss. Probably the readahead buffers or maps.
PS. I will also rewrite some of the vmalloc() calls to kmalloc(). No point
wasting whole pages for small objects.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-09 10:19 ` Bojan Smojver
@ 2011-10-09 22:24 ` Bojan Smojver
2011-10-10 7:12 ` Bojan Smojver
1 sibling, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-09 22:24 UTC (permalink / raw)
To: penberg; +Cc: rjw, linux-kernel, linux-pm
On Sun, 2011-10-09 at 21:19 +1100, Bojan Smojver wrote:
> May be related, when I simulate image loading failure by returning
> error from load_image(), I get bad page state error, count -1, so
> something is amiss. Probably the readahead buffers or maps.
Yeah, this will probably be an easy fix. I think I've forgotten that
handle->cur is no longer being allocated as a standalone page on image
load (instead, it's a pointer into maps). So, when we fail, handle->cur
is freed twice, because it's not NULL. Will test the fix shortly.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-09 10:19 ` Bojan Smojver
2011-10-09 22:24 ` Bojan Smojver
@ 2011-10-10 7:12 ` Bojan Smojver
1 sibling, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-10 7:12 UTC (permalink / raw)
To: penberg; +Cc: rjw, linux-kernel, linux-pm
On Sun, 2011-10-09 at 21:19 +1100, Bojan Smojver wrote:
> I kinda remember reading in one of the bug reports for Intel graphics
> about someone doing it. Not sure how, to be honest.
This worked for me on Fedora:
echo -n reboot > /sys/power/disk
for (( i=0; i<5; i++)); do pm-hibernate; sleep 2; done
PS. Obviously, substitute pm-hibernate for whatever you distro uses.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-06 18:45 ` Rafael J. Wysocki
` (5 preceding siblings ...)
2011-10-09 9:22 ` Pekka Enberg
@ 2011-10-13 11:00 ` Bojan Smojver
2011-10-13 21:26 ` Rafael J. Wysocki
6 siblings, 1 reply; 24+ messages in thread
From: Bojan Smojver @ 2011-10-13 11:00 UTC (permalink / raw)
To: rjw; +Cc: linux-kernel, linux-pm
------- Original message -------
> From: Rafael J. Wysocki
>> I get a kernel panic on a test box using a x86_64 kernel with this patch
>> applied.
>
> The panic happens during late resume, when trying to switch to the
> hibernated
> kernel.
In an effort to test this whole thing a bit more, I took today's kernel
from Linus' repo and applied v11 of my patch. I compiled this on a single
cpu machine this time, with 768 MB of memory (in today's terms, this laptop
would not pass as a smartphone :-), running F-16 beta.. The system has
radeon graphics (my other system, where I initially developed the patch,
has intel). Also, this was a 32-bit box, as opposed to my other system,
which is 64-bit.
I was getting kernel trouble on repeated hibernate/thaw cycles on both
systems when KMS was enabled. With nomodeset passed into the kernel, both
systems would go through 50+ cycles (this was my test loop) with no
trouble. Whether my patch was applied or not didn't matter - I was getting
trouble with Fedora supplied kernels as well, after a few cycles.
So, I don't know for sure, but it seems to me something is amiss in KMS
when it comes to hibernation.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-13 11:00 ` Bojan Smojver
@ 2011-10-13 21:26 ` Rafael J. Wysocki
2011-10-13 21:44 ` Bojan Smojver
2011-10-14 2:01 ` Bojan Smojver
0 siblings, 2 replies; 24+ messages in thread
From: Rafael J. Wysocki @ 2011-10-13 21:26 UTC (permalink / raw)
To: Bojan Smojver; +Cc: linux-kernel, Linux PM list
On Thursday, October 13, 2011, Bojan Smojver wrote:
> ------- Original message -------
> > From: Rafael J. Wysocki
>
> >> I get a kernel panic on a test box using a x86_64 kernel with this patch
> >> applied.
> >
> > The panic happens during late resume, when trying to switch to the
> > hibernated
> > kernel.
>
> In an effort to test this whole thing a bit more, I took today's kernel
> from Linus' repo and applied v11 of my patch. I compiled this on a single
> cpu machine this time, with 768 MB of memory (in today's terms, this laptop
> would not pass as a smartphone :-), running F-16 beta.. The system has
> radeon graphics (my other system, where I initially developed the patch,
> has intel). Also, this was a 32-bit box, as opposed to my other system,
> which is 64-bit.
>
> I was getting kernel trouble on repeated hibernate/thaw cycles on both
> systems when KMS was enabled. With nomodeset passed into the kernel, both
> systems would go through 50+ cycles (this was my test loop) with no
> trouble. Whether my patch was applied or not didn't matter - I was getting
> trouble with Fedora supplied kernels as well, after a few cycles.
>
> So, I don't know for sure, but it seems to me something is amiss in KMS
> when it comes to hibernation.
If your system is 64-bit, then the patch below may help.
Thanks,
Rafael
---
arch/x86/mm/init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux/arch/x86/mm/init.c
===================================================================
--- linux.orig/arch/x86/mm/init.c
+++ linux/arch/x86/mm/init.c
@@ -63,9 +63,9 @@ static void __init find_early_table_spac
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
+#endif
good_end = max_pfn_mapped << PAGE_SHIFT;
-#endif
base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-13 21:26 ` Rafael J. Wysocki
@ 2011-10-13 21:44 ` Bojan Smojver
2011-10-13 21:57 ` Rafael J. Wysocki
2011-10-14 2:01 ` Bojan Smojver
1 sibling, 1 reply; 24+ messages in thread
From: Bojan Smojver @ 2011-10-13 21:44 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, Linux PM list
On Thu, 2011-10-13 at 23:26 +0200, Rafael J. Wysocki wrote:
> If your system is 64-bit, then the patch below may help.
OK, I may try that on my ThinkPad.
Anyhow, you still getting panics as a direct result of my hibernation
patch?
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-13 21:57 ` Rafael J. Wysocki
@ 2011-10-13 21:56 ` Bojan Smojver
0 siblings, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-13 21:56 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, Linux PM list
On Thu, 2011-10-13 at 23:57 +0200, Rafael J. Wysocki wrote:
> No, v11 doesn't panic for me any more, so I'm going to put it
> back into linux-pm/linux-next.
Nice! Thanks.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-13 21:44 ` Bojan Smojver
@ 2011-10-13 21:57 ` Rafael J. Wysocki
2011-10-13 21:56 ` Bojan Smojver
0 siblings, 1 reply; 24+ messages in thread
From: Rafael J. Wysocki @ 2011-10-13 21:57 UTC (permalink / raw)
To: Bojan Smojver; +Cc: linux-kernel, Linux PM list
On Thursday, October 13, 2011, Bojan Smojver wrote:
> On Thu, 2011-10-13 at 23:26 +0200, Rafael J. Wysocki wrote:
> > If your system is 64-bit, then the patch below may help.
>
> OK, I may try that on my ThinkPad.
>
> Anyhow, you still getting panics as a direct result of my hibernation
> patch?
No, v11 doesn't panic for me any more, so I'm going to put it
back into linux-pm/linux-next.
Thanks,
Rafael
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-13 21:26 ` Rafael J. Wysocki
2011-10-13 21:44 ` Bojan Smojver
@ 2011-10-14 2:01 ` Bojan Smojver
2011-10-14 6:14 ` Pekka Enberg
1 sibling, 1 reply; 24+ messages in thread
From: Bojan Smojver @ 2011-10-14 2:01 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: linux-kernel, Linux PM list
On Thu, 2011-10-13 at 23:26 +0200, Rafael J. Wysocki wrote:
> If your system is 64-bit, then the patch below may help.
Unfortunately, it didn't. Got a hang after several hibernate/thaw just
the same, when KMS was enabled.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-14 2:01 ` Bojan Smojver
@ 2011-10-14 6:14 ` Pekka Enberg
2011-10-14 6:50 ` Bojan Smojver
2011-10-15 6:28 ` Bojan Smojver
0 siblings, 2 replies; 24+ messages in thread
From: Pekka Enberg @ 2011-10-14 6:14 UTC (permalink / raw)
To: Bojan Smojver
Cc: Rafael J. Wysocki, linux-kernel, Linux PM list, Keith Packard
On Fri, Oct 14, 2011 at 5:01 AM, Bojan Smojver <bojan@rexursive.com> wrote:
> On Thu, 2011-10-13 at 23:26 +0200, Rafael J. Wysocki wrote:
>> If your system is 64-bit, then the patch below may help.
>
> Unfortunately, it didn't. Got a hang after several hibernate/thaw just
> the same, when KMS was enabled.
What debugging options do you have enabled? Rafael, is there some list
of suggested debugging options that should be enabled when debugging
hibernation issues?
Bojan, this is with Intel drivers, right? I'm CC'ing Keith which might
be able to give some clues how to debug KMS related hibernate/thaw
issues.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-14 6:14 ` Pekka Enberg
@ 2011-10-14 6:50 ` Bojan Smojver
2011-10-15 6:28 ` Bojan Smojver
1 sibling, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-14 6:50 UTC (permalink / raw)
To: penberg; +Cc: rjw, linux-kernel, linux-pm, keithp
------- Original message -------
> From: Pekka Enberg
> What debugging options do you have enabled?
Not sure to be honest. Just copied Fedora kernel config file and built with
that. Will check.
> Bojan, this is with Intel drivers, right?
Correct. But note that I had similar trouble on a box that has radeon
graphics. Essentially, if I pass nomodeset to the kernel, I can hibernate
thaw 100+ times with no issues (after all these cycles, the box is healthy,
runs programs with no trouble etc.).
> I'm CC'ing Keith which might
> be able to give some clues how to debug KMS related hibernate/thaw
> issues.
I am already on intel-gfx list where I asked the same questions, got some
suggestions and eventually opened a bug to track the issue. My last message
to that list was that I think the problem may be in common KMS code, given
I get similar touble with intel and radeon graphics, 32 and 64 bit machines
and with or without my patch.
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH v8]: Improve performance of LZO/plain hibernation
2011-10-14 6:14 ` Pekka Enberg
2011-10-14 6:50 ` Bojan Smojver
@ 2011-10-15 6:28 ` Bojan Smojver
1 sibling, 0 replies; 24+ messages in thread
From: Bojan Smojver @ 2011-10-15 6:28 UTC (permalink / raw)
To: Pekka Enberg
Cc: Rafael J. Wysocki, linux-kernel, Linux PM list, Keith Packard
On Fri, 2011-10-14 at 09:14 +0300, Pekka Enberg wrote:
> What debugging options do you have enabled?
This is what grep of DEBUG prints:
-------------------
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_SLUB_DEBUG=y
CONFIG_HAVE_DMA_API_DEBUG=y
CONFIG_XEN_DEBUG_FS=y
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_PM_DEBUG=y
CONFIG_PM_ADVANCED_DEBUG=y
CONFIG_ACPI_EC_DEBUGFS=m
CONFIG_L2TP_DEBUGFS=m
CONFIG_CFG80211_DEBUGFS=y
CONFIG_MAC80211_DEBUGFS=y
CONFIG_WIMAX_DEBUG_LEVEL=8
CONFIG_DEBUG_DEVRES=y
CONFIG_CB710_DEBUG_ASSUMPTIONS=y
CONFIG_IWMC3200TOP_DEBUGFS=y
CONFIG_AIC7XXX_DEBUG_MASK=0
CONFIG_AIC79XX_DEBUG_MASK=0
CONFIG_SCSI_DEBUG=m
CONFIG_DM_DEBUG=y
CONFIG_FIREWIRE_OHCI_DEBUG=y
CONFIG_MLX4_DEBUG=y
CONFIG_ATH5K_DEBUG=y
CONFIG_ATH9K_DEBUGFS=y
CONFIG_IWLWIFI_DEBUG=y
CONFIG_IWLWIFI_DEBUGFS=y
CONFIG_IWLWIFI_LEGACY_DEBUG=y
CONFIG_IWLWIFI_LEGACY_DEBUGFS=y
CONFIG_RT2X00_LIB_DEBUGFS=y
CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
CONFIG_SND_DEBUG=y
CONFIG_SND_PCM_XRUN_DEBUG=y
CONFIG_USB_SERIAL_DEBUG=m
CONFIG_INFINIBAND_MTHCA_DEBUG=y
CONFIG_INFINIBAND_IPOIB_DEBUG=y
CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
CONFIG_DRM_NOUVEAU_DEBUG=y
CONFIG_JFFS2_FS_DEBUG=0
CONFIG_DLM_DEBUG=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_SCHED_DEBUG=y
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_RODATA=y
CONFIG_DEBUG_RODATA_TEST=y
CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
-------------------
--
Bojan
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2011-10-15 6:28 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-30 0:27 [PATCH v8]: Improve performance of LZO/plain hibernation Bojan Smojver
2011-10-04 22:07 ` Rafael J. Wysocki
2011-10-05 1:07 ` Bojan Smojver
2011-10-06 18:37 ` Rafael J. Wysocki
2011-10-06 18:45 ` Rafael J. Wysocki
2011-10-06 22:23 ` Bojan Smojver
2011-10-07 3:20 ` Bojan Smojver
2011-10-07 3:39 ` Bojan Smojver
2011-10-07 23:33 ` Bojan Smojver
2011-10-09 2:57 ` Bojan Smojver
2011-10-09 7:54 ` Bojan Smojver
2011-10-09 9:22 ` Pekka Enberg
2011-10-09 10:19 ` Bojan Smojver
2011-10-09 22:24 ` Bojan Smojver
2011-10-10 7:12 ` Bojan Smojver
2011-10-13 11:00 ` Bojan Smojver
2011-10-13 21:26 ` Rafael J. Wysocki
2011-10-13 21:44 ` Bojan Smojver
2011-10-13 21:57 ` Rafael J. Wysocki
2011-10-13 21:56 ` Bojan Smojver
2011-10-14 2:01 ` Bojan Smojver
2011-10-14 6:14 ` Pekka Enberg
2011-10-14 6:50 ` Bojan Smojver
2011-10-15 6:28 ` Bojan Smojver
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).