All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
@ 2007-04-07 21:20 Rafael J. Wysocki
  2007-04-07 22:06 ` Andrew Morton
  2007-04-08 12:56 ` Pavel Machek
  0 siblings, 2 replies; 13+ messages in thread
From: Rafael J. Wysocki @ 2007-04-07 21:20 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andrew Morton, Nigel Cunningham

Hi,

Some time ago we discussed the possibility of simplifying the swsusp's approach
towards tracking the swap pages allocated by it for saving the image (so that
they can be freed if there's an error).

I think we can get back to it now, as it is a nice optimization that should
allow us to use less memory (almost always) and improve performance a bit.

Greetings,
Rafael

---
From: Rafael J. Wysocki <rjw@sisk.pl>

Make swsusp use extents instead of a bitmap to trace swap pages allocated for
saving the image (the tracking is only needed in case there's an error, so that
the allocated swap pages can be released).

This should allow us to reduce the memory usage, practically always, and
improve performance.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/power.h  |   27 +---------
 kernel/power/swap.c   |   18 +-----
 kernel/power/swsusp.c |  135 ++++++++++++++++++++++++++------------------------
 kernel/power/user.c   |   22 +-------
 4 files changed, 85 insertions(+), 117 deletions(-)

Index: linux-2.6.21-rc6/kernel/power/swsusp.c
===================================================================
--- linux-2.6.21-rc6.orig/kernel/power/swsusp.c
+++ linux-2.6.21-rc6/kernel/power/swsusp.c
@@ -50,6 +50,7 @@
 #include <linux/syscalls.h>
 #include <linux/highmem.h>
 #include <linux/time.h>
+#include <linux/rbtree.h>
 
 #include "power.h"
 
@@ -74,72 +75,69 @@ static inline unsigned int count_highmem
 /**
  *	The following functions are used for tracing the allocated
  *	swap pages, so that they can be freed in case of an error.
- *
- *	The functions operate on a linked bitmap structure defined
- *	in power.h
  */
 
-void free_bitmap(struct bitmap_page *bitmap)
-{
-	struct bitmap_page *bp;
+struct swsusp_extent {
+	struct rb_node node;
+	unsigned long start;
+	unsigned long end;
+};
 
-	while (bitmap) {
-		bp = bitmap->next;
-		free_page((unsigned long)bitmap);
-		bitmap = bp;
-	}
-}
+static struct rb_root swsusp_extents = RB_ROOT;
 
-struct bitmap_page *alloc_bitmap(unsigned int nr_bits)
+static int swsusp_extents_insert(unsigned long swap_offset)
 {
-	struct bitmap_page *bitmap, *bp;
-	unsigned int n;
-
-	if (!nr_bits)
-		return NULL;
-
-	bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
-	bp = bitmap;
-	for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) {
-		bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
-		bp = bp->next;
-		if (!bp) {
-			free_bitmap(bitmap);
-			return NULL;
+	struct rb_node **new = &(swsusp_extents.rb_node);
+	struct rb_node *parent = NULL;
+	struct swsusp_extent *ext;
+
+	/* Figure out where to put the new node */
+	while (*new) {
+		ext = container_of(*new, struct swsusp_extent, node);
+		parent = *new;
+		if (swap_offset < ext->start) {
+			/* Try to merge */
+			if (swap_offset == ext->start - 1) {
+				ext->start--;
+				return 0;
+			}
+			new = &((*new)->rb_left);
+		} else if (swap_offset > ext->end) {
+			/* Try to merge */
+			if (swap_offset == ext->end + 1) {
+				ext->end++;
+				return 0;
+			}
+			new = &((*new)->rb_right);
+		} else {
+			/* It already is in the tree */
+			return -EINVAL;
 		}
 	}
-	return bitmap;
-}
-
-static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
-{
-	unsigned int n;
-
-	n = BITMAP_PAGE_BITS;
-	while (bitmap && n <= bit) {
-		n += BITMAP_PAGE_BITS;
-		bitmap = bitmap->next;
-	}
-	if (!bitmap)
-		return -EINVAL;
-	n -= BITMAP_PAGE_BITS;
-	bit -= n;
-	n = 0;
-	while (bit >= BITS_PER_CHUNK) {
-		bit -= BITS_PER_CHUNK;
-		n++;
-	}
-	bitmap->chunks[n] |= (1UL << bit);
+	/* Add the new node and rebalance the tree. */
+	ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
+	if (!ext)
+		return -ENOMEM;
+
+	ext->start = swap_offset;
+	ext->end = swap_offset;
+	rb_link_node(&ext->node, parent, new);
+	rb_insert_color(&ext->node, &swsusp_extents);
 	return 0;
 }
 
-sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
+/**
+ *	alloc_swapdev_block - allocate a swap page and register that it has
+ *	been allocated, so that it can be freed in case of an error.
+ */
+
+sector_t alloc_swapdev_block(int swap)
 {
 	unsigned long offset;
 
 	offset = swp_offset(get_swap_page_of_type(swap));
 	if (offset) {
-		if (bitmap_set(bitmap, offset))
+		if (swsusp_extents_insert(offset))
 			swap_free(swp_entry(swap, offset));
 		else
 			return swapdev_block(swap, offset);
@@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, s
 	return 0;
 }
 
-void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
+/**
+ *	free_all_swap_pages - free swap pages allocated for saving image data.
+ *	It also frees the extents used to register which swap entres had been
+ *	allocated.
+ */
+
+void free_all_swap_pages(int swap)
 {
-	unsigned int bit, n;
-	unsigned long test;
+	struct rb_node *node;
 
-	bit = 0;
-	while (bitmap) {
-		for (n = 0; n < BITMAP_PAGE_CHUNKS; n++)
-			for (test = 1UL; test; test <<= 1) {
-				if (bitmap->chunks[n] & test)
-					swap_free(swp_entry(swap, bit));
-				bit++;
-			}
-		bitmap = bitmap->next;
+	while ((node = swsusp_extents.rb_node)) {
+		struct swsusp_extent *ext;
+		unsigned long offset;
+
+		ext = container_of(node, struct swsusp_extent, node);
+		rb_erase(node, &swsusp_extents);
+		for (offset = ext->start; offset <= ext->end; offset++)
+			swap_free(swp_entry(swap, offset));
+
+		kfree(ext);
 	}
 }
 
+int swsusp_swap_in_use(void)
+{
+	return (swsusp_extents.rb_node != NULL);
+}
+
 /**
  *	swsusp_show_speed - print the time elapsed between two events represented by
  *	@start and @stop
Index: linux-2.6.21-rc6/kernel/power/power.h
===================================================================
--- linux-2.6.21-rc6.orig/kernel/power/power.h
+++ linux-2.6.21-rc6/kernel/power/power.h
@@ -144,30 +144,9 @@ struct resume_swap_area {
 /* If unset, the snapshot device cannot be open. */
 extern atomic_t snapshot_device_available;
 
-/**
- *	The bitmap is used for tracing allocated swap pages
- *
- *	The entire bitmap consists of a number of bitmap_page
- *	structures linked with the help of the .next member.
- *	Thus each page can be allocated individually, so we only
- *	need to make 0-order memory allocations to create
- *	the bitmap.
- */
-
-#define BITMAP_PAGE_SIZE	(PAGE_SIZE - sizeof(void *))
-#define BITMAP_PAGE_CHUNKS	(BITMAP_PAGE_SIZE / sizeof(long))
-#define BITS_PER_CHUNK		(sizeof(long) * 8)
-#define BITMAP_PAGE_BITS	(BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK)
-
-struct bitmap_page {
-	unsigned long		chunks[BITMAP_PAGE_CHUNKS];
-	struct bitmap_page	*next;
-};
-
-extern void free_bitmap(struct bitmap_page *bitmap);
-extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
-extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap);
-extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
+extern sector_t alloc_swapdev_block(int swap);
+extern void free_all_swap_pages(int swap);
+extern int swsusp_swap_in_use(void);
 
 extern int swsusp_check(void);
 extern int swsusp_shrink_memory(void);
Index: linux-2.6.21-rc6/kernel/power/user.c
===================================================================
--- linux-2.6.21-rc6.orig/kernel/power/user.c
+++ linux-2.6.21-rc6/kernel/power/user.c
@@ -33,7 +33,6 @@
 static struct snapshot_data {
 	struct snapshot_handle handle;
 	int swap;
-	struct bitmap_page *bitmap;
 	int mode;
 	char frozen;
 	char ready;
@@ -69,7 +68,6 @@ static int snapshot_open(struct inode *i
 		data->swap = -1;
 		data->mode = O_WRONLY;
 	}
-	data->bitmap = NULL;
 	data->frozen = 0;
 	data->ready = 0;
 	data->platform_suspend = 0;
@@ -84,8 +82,7 @@ static int snapshot_release(struct inode
 	swsusp_free();
 	free_basic_memory_bitmaps();
 	data = filp->private_data;
-	free_all_swap_pages(data->swap, data->bitmap);
-	free_bitmap(data->bitmap);
+	free_all_swap_pages(data->swap);
 	if (data->frozen) {
 		mutex_lock(&pm_mutex);
 		thaw_processes();
@@ -300,14 +297,7 @@ static int snapshot_ioctl(struct inode *
 			error = -ENODEV;
 			break;
 		}
-		if (!data->bitmap) {
-			data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0));
-			if (!data->bitmap) {
-				error = -ENOMEM;
-				break;
-			}
-		}
-		offset = alloc_swapdev_block(data->swap, data->bitmap);
+		offset = alloc_swapdev_block(data->swap);
 		if (offset) {
 			offset <<= PAGE_SHIFT;
 			error = put_user(offset, (sector_t __user *)arg);
@@ -321,13 +311,11 @@ static int snapshot_ioctl(struct inode *
 			error = -ENODEV;
 			break;
 		}
-		free_all_swap_pages(data->swap, data->bitmap);
-		free_bitmap(data->bitmap);
-		data->bitmap = NULL;
+		free_all_swap_pages(data->swap);
 		break;
 
 	case SNAPSHOT_SET_SWAP_FILE:
-		if (!data->bitmap) {
+		if (!swsusp_swap_in_use()) {
 			/*
 			 * User space encodes device types as two-byte values,
 			 * so we need to recode them
@@ -426,7 +414,7 @@ static int snapshot_ioctl(struct inode *
 		break;
 
 	case SNAPSHOT_SET_SWAP_AREA:
-		if (data->bitmap) {
+		if (swsusp_swap_in_use()) {
 			error = -EPERM;
 		} else {
 			struct resume_swap_area swap_area;
Index: linux-2.6.21-rc6/kernel/power/swap.c
===================================================================
--- linux-2.6.21-rc6.orig/kernel/power/swap.c
+++ linux-2.6.21-rc6/kernel/power/swap.c
@@ -241,7 +241,6 @@ struct swap_map_page {
 struct swap_map_handle {
 	struct swap_map_page *cur;
 	sector_t cur_swap;
-	struct bitmap_page *bitmap;
 	unsigned int k;
 };
 
@@ -250,9 +249,6 @@ static void release_swap_writer(struct s
 	if (handle->cur)
 		free_page((unsigned long)handle->cur);
 	handle->cur = NULL;
-	if (handle->bitmap)
-		free_bitmap(handle->bitmap);
-	handle->bitmap = NULL;
 }
 
 static int get_swap_writer(struct swap_map_handle *handle)
@@ -260,12 +256,7 @@ static int get_swap_writer(struct swap_m
 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
 	if (!handle->cur)
 		return -ENOMEM;
-	handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0));
-	if (!handle->bitmap) {
-		release_swap_writer(handle);
-		return -ENOMEM;
-	}
-	handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap);
+	handle->cur_swap = alloc_swapdev_block(root_swap);
 	if (!handle->cur_swap) {
 		release_swap_writer(handle);
 		return -ENOSPC;
@@ -282,7 +273,7 @@ static int swap_write_page(struct swap_m
 
 	if (!handle->cur)
 		return -EINVAL;
-	offset = alloc_swapdev_block(root_swap, handle->bitmap);
+	offset = alloc_swapdev_block(root_swap);
 	error = write_page(buf, offset, bio_chain);
 	if (error)
 		return error;
@@ -291,7 +282,7 @@ static int swap_write_page(struct swap_m
 		error = wait_on_bio_chain(bio_chain);
 		if (error)
 			goto out;
-		offset = alloc_swapdev_block(root_swap, handle->bitmap);
+		offset = alloc_swapdev_block(root_swap);
 		if (!offset)
 			return -ENOSPC;
 		handle->cur->next_swap = offset;
@@ -428,7 +419,8 @@ int swsusp_write(void)
 		}
 	}
 	if (error)
-		free_all_swap_pages(root_swap, handle.bitmap);
+		free_all_swap_pages(root_swap);
+
 	release_swap_writer(&handle);
  out:
 	swsusp_close();

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-07 21:20 [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap Rafael J. Wysocki
@ 2007-04-07 22:06 ` Andrew Morton
  2007-04-07 22:31   ` Nigel Cunningham
  2007-04-08 12:56 ` Pavel Machek
  1 sibling, 1 reply; 13+ messages in thread
From: Andrew Morton @ 2007-04-07 22:06 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, LKML, Nigel Cunningham

On Sat, 7 Apr 2007 23:20:39 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:

> This should allow us to reduce the memory usage, practically always, and
> improve performance.

And does it?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-07 22:06 ` Andrew Morton
@ 2007-04-07 22:31   ` Nigel Cunningham
  2007-04-07 23:13     ` Rafael J. Wysocki
  0 siblings, 1 reply; 13+ messages in thread
From: Nigel Cunningham @ 2007-04-07 22:31 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rafael J. Wysocki, Pavel Machek, LKML

Hi.

On Sat, 2007-04-07 at 15:06 -0700, Andrew Morton wrote:
> On Sat, 7 Apr 2007 23:20:39 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> 
> > This should allow us to reduce the memory usage, practically always, and
> > improve performance.
> 
> And does it?

It will. I've been using extents for ages, for the same reasons. I don't
put them in an rb_tree because I view it as less than most efficient,
but it will still be a huge step forward from bitmaps in the normal
case.

The worst case would be if every second page of swap was in use, so that
you needed one extent per swap page. In that case, it would use more
memory than the bitmap, but far, far more common will be the case where
only one extent is needed for the whole swap partition, because the
algorithm used by the swap allocator minimises fragmentation.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-07 22:31   ` Nigel Cunningham
@ 2007-04-07 23:13     ` Rafael J. Wysocki
  2007-04-07 23:42       ` Nigel Cunningham
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2007-04-07 23:13 UTC (permalink / raw)
  To: Nigel Cunningham, Andrew Morton; +Cc: Pavel Machek, LKML

On Sunday, 8 April 2007 00:31, Nigel Cunningham wrote:
> Hi.
> 
> On Sat, 2007-04-07 at 15:06 -0700, Andrew Morton wrote:
> > On Sat, 7 Apr 2007 23:20:39 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> > 
> > > This should allow us to reduce the memory usage, practically always, and
> > > improve performance.
> > 
> > And does it?

Yes.  There are theoretical corner cases in which it may be less efficient
than the current approach, but in the usual situation it is _much_ better.

> It will. I've been using extents for ages, for the same reasons. I don't
> put them in an rb_tree because I view it as less than most efficient,

Actually, I don't agree with that.  In the normal situation (ie. one extent is
needed) there is no difference as far as the memory usage or performance
are concerned, but if there are more extents, the rbtree should be more
efficient.

> but it will still be a huge step forward from bitmaps in the normal
> case.
> 
> The worst case would be if every second page of swap was in use, so that
> you needed one extent per swap page. In that case, it would use more
> memory than the bitmap, but far, far more common will be the case where
> only one extent is needed for the whole swap partition, because the
> algorithm used by the swap allocator minimises fragmentation.

Exactly.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-07 23:13     ` Rafael J. Wysocki
@ 2007-04-07 23:42       ` Nigel Cunningham
  2007-04-08 16:47         ` Rafael J. Wysocki
  0 siblings, 1 reply; 13+ messages in thread
From: Nigel Cunningham @ 2007-04-07 23:42 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, Pavel Machek, LKML

[-- Attachment #1: Type: text/plain, Size: 2798 bytes --]

Hi.

On Sun, 2007-04-08 at 01:13 +0200, Rafael J. Wysocki wrote:
> On Sunday, 8 April 2007 00:31, Nigel Cunningham wrote:
> > Hi.
> > 
> > On Sat, 2007-04-07 at 15:06 -0700, Andrew Morton wrote:
> > > On Sat, 7 Apr 2007 23:20:39 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> > > 
> > > > This should allow us to reduce the memory usage, practically always, and
> > > > improve performance.
> > > 
> > > And does it?
> 
> Yes.  There are theoretical corner cases in which it may be less efficient
> than the current approach, but in the usual situation it is _much_ better.
> 
> > It will. I've been using extents for ages, for the same reasons. I don't
> > put them in an rb_tree because I view it as less than most efficient,
> 
> Actually, I don't agree with that.  In the normal situation (ie. one extent is
> needed) there is no difference as far as the memory usage or performance
> are concerned, but if there are more extents, the rbtree should be more
> efficient.

I don't think it's worth having a big discussion over, but let me give
you the details, which you can then feel free to ignore :)

The rb_node struct adds an unsigned long and two struct rb_node *
pointers. My extents use one struct extent * pointer. The difference is
thus 12/24 bytes per extent (32/64 bits) vs 20/40. In the normal
situation, not worth worrying about, but I'm also using these for
recording the sectors we write too, and thinking about swap files and
multiple swap devices. Nearly double the memory use bites more as you
get more extents.

Insertion cost for rb_node includes keeping the tree balanced. For
extents, I start with the location of the last insertion to minimise the
cost, so insertion time is usually virtually zero (inc max of last
extent or append a new one). If for some reason swap was allocated out
of order, I might need to traverse the whole chain from the start.

Normal usage in both cases is simply iterating through the list, so I
guess the cost would be approximately the same.

Deletion could would include rebalancing for the rb_nodes.

Code cost is a gain for you - you're leveraging existing code, I'm
adding a bit more. extent.c is 300 lines including code for serialising
the chains in an image header and iterating through a group of chains
(multiple swap devices support).

rb_nodes seem to be the wrong solution to me because we generally don't
care about searching. We care about minimising memory usage and
maximising the speed of iteration, insertion and deletion. I believe
I've managed to do that with a singly linked, sorted list.

That said, we've agreed that we're normally talking about a small number
of extents, so it's probably not worth the bandwidth I've already
spent :)

Regards,

Nigel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-07 21:20 [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap Rafael J. Wysocki
  2007-04-07 22:06 ` Andrew Morton
@ 2007-04-08 12:56 ` Pavel Machek
  2007-04-08 17:02   ` Rafael J. Wysocki
  1 sibling, 1 reply; 13+ messages in thread
From: Pavel Machek @ 2007-04-08 12:56 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: LKML, Andrew Morton, Nigel Cunningham

Hi!

> Some time ago we discussed the possibility of simplifying the swsusp's approach
> towards tracking the swap pages allocated by it for saving the image (so that
> they can be freed if there's an error).
> 
> I think we can get back to it now, as it is a nice optimization that should
> allow us to use less memory (almost always) and improve performance a bit.
> 

Well, I do not think you can measure the difference, but...

>  kernel/power/power.h  |   27 +---------
>  kernel/power/swap.c   |   18 +-----
>  kernel/power/swsusp.c |  135 ++++++++++++++++++++++++++------------------------
>  kernel/power/user.c   |   22 +-------
>  4 files changed, 85 insertions(+), 117 deletions(-)

....as it removes code... I think we can do that. But it is 2.6.23+
material.

							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-07 23:42       ` Nigel Cunningham
@ 2007-04-08 16:47         ` Rafael J. Wysocki
  2007-04-08 21:07           ` Nigel Cunningham
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2007-04-08 16:47 UTC (permalink / raw)
  To: nigel; +Cc: Andrew Morton, Pavel Machek, LKML

On Sunday, 8 April 2007 01:42, Nigel Cunningham wrote:
> Hi.
> 
> On Sun, 2007-04-08 at 01:13 +0200, Rafael J. Wysocki wrote:
> > On Sunday, 8 April 2007 00:31, Nigel Cunningham wrote:
> > > Hi.
> > > 
> > > On Sat, 2007-04-07 at 15:06 -0700, Andrew Morton wrote:
> > > > On Sat, 7 Apr 2007 23:20:39 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> > > > 
> > > > > This should allow us to reduce the memory usage, practically always, and
> > > > > improve performance.
> > > > 
> > > > And does it?
> > 
> > Yes.  There are theoretical corner cases in which it may be less efficient
> > than the current approach, but in the usual situation it is _much_ better.
> > 
> > > It will. I've been using extents for ages, for the same reasons. I don't
> > > put them in an rb_tree because I view it as less than most efficient,
> > 
> > Actually, I don't agree with that.  In the normal situation (ie. one extent is
> > needed) there is no difference as far as the memory usage or performance
> > are concerned, but if there are more extents, the rbtree should be more
> > efficient.
> 
> I don't think it's worth having a big discussion over, but let me give
> you the details, which you can then feel free to ignore :)
> 
> The rb_node struct adds an unsigned long and two struct rb_node *
> pointers. My extents use one struct extent * pointer. The difference is
> thus 12/24 bytes per extent (32/64 bits) vs 20/40.

Well, you use open-coded lists.  If you used list.h lists, the numbers 
would be different. :-)

> In the normal situation, not worth worrying about, but I'm also using these for
> recording the sectors we write too, and thinking about swap files and
> multiple swap devices. Nearly double the memory use bites more as you
> get more extents.
>
> Insertion cost for rb_node includes keeping the tree balanced. For
> extents, I start with the location of the last insertion to minimise the
> cost, so insertion time is usually virtually zero (inc max of last
> extent or append a new one).

Isn't the appending one actually linear worst-case?

> If for some reason swap was allocated out of order, I might need to traverse
> the whole chain from the start. 

Exactly.

> Normal usage in both cases is simply iterating through the list, so I
> guess the cost would be approximately the same.
> 
> Deletion could would include rebalancing for the rb_nodes.

In swsusp the deletions are needed only if there's an error.

> Code cost is a gain for you - you're leveraging existing code, I'm
> adding a bit more. extent.c is 300 lines including code for serialising
> the chains in an image header and iterating through a group of chains
> (multiple swap devices support).
> 
> rb_nodes seem to be the wrong solution to me because we generally don't
> care about searching. We care about minimising memory usage and
> maximising the speed of iteration, insertion and deletion. I believe
> I've managed to do that with a singly linked, sorted list.

The insertion also uses searching and in fact I don't really care for anything
else.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-08 12:56 ` Pavel Machek
@ 2007-04-08 17:02   ` Rafael J. Wysocki
  2007-04-09 12:39     ` Pavel Machek
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2007-04-08 17:02 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andrew Morton, Nigel Cunningham

On Sunday, 8 April 2007 14:56, Pavel Machek wrote:
> Hi!
> 
> > Some time ago we discussed the possibility of simplifying the swsusp's approach
> > towards tracking the swap pages allocated by it for saving the image (so that
> > they can be freed if there's an error).
> > 
> > I think we can get back to it now, as it is a nice optimization that should
> > allow us to use less memory (almost always) and improve performance a bit.
> > 
> 
> Well, I do not think you can measure the difference, but...

As far as the memory usage is concerned, I can. :-)  Usually, it takes 1 extent
(40 B on x86_64) to register all of the allocated swap pages.  If bitmaps are
used, we need as many bits as there are swap pages available (for 1 GB swap
and 4 KB pages that would be ~250000 bits, which gives ~8 pages, and we can
save more than 800 extents using that much memory).

> >  kernel/power/power.h  |   27 +---------
> >  kernel/power/swap.c   |   18 +-----
> >  kernel/power/swsusp.c |  135 ++++++++++++++++++++++++++------------------------
> >  kernel/power/user.c   |   22 +-------
> >  4 files changed, 85 insertions(+), 117 deletions(-)
> 
> ....as it removes code... I think we can do that. But it is 2.6.23+
> material.

Yes, I think so, but still we can ask Andrew to include it into -mm earlier? ;-)

Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-08 16:47         ` Rafael J. Wysocki
@ 2007-04-08 21:07           ` Nigel Cunningham
  2007-04-09 13:03             ` Rafael J. Wysocki
  0 siblings, 1 reply; 13+ messages in thread
From: Nigel Cunningham @ 2007-04-08 21:07 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, Pavel Machek, LKML

Hi.

On Sun, 2007-04-08 at 18:47 +0200, Rafael J. Wysocki wrote:
> On Sunday, 8 April 2007 01:42, Nigel Cunningham wrote:
> > Hi.
> > 
> > On Sun, 2007-04-08 at 01:13 +0200, Rafael J. Wysocki wrote:
> > > On Sunday, 8 April 2007 00:31, Nigel Cunningham wrote:
> > > > Hi.
> > > > 
> > > > On Sat, 2007-04-07 at 15:06 -0700, Andrew Morton wrote:
> > > > > On Sat, 7 Apr 2007 23:20:39 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> > > > > 
> > > > > > This should allow us to reduce the memory usage, practically always, and
> > > > > > improve performance.
> > > > > 
> > > > > And does it?
> > > 
> > > Yes.  There are theoretical corner cases in which it may be less efficient
> > > than the current approach, but in the usual situation it is _much_ better.
> > > 
> > > > It will. I've been using extents for ages, for the same reasons. I don't
> > > > put them in an rb_tree because I view it as less than most efficient,
> > > 
> > > Actually, I don't agree with that.  In the normal situation (ie. one extent is
> > > needed) there is no difference as far as the memory usage or performance
> > > are concerned, but if there are more extents, the rbtree should be more
> > > efficient.
> > 
> > I don't think it's worth having a big discussion over, but let me give
> > you the details, which you can then feel free to ignore :)
> > 
> > The rb_node struct adds an unsigned long and two struct rb_node *
> > pointers. My extents use one struct extent * pointer. The difference is
> > thus 12/24 bytes per extent (32/64 bits) vs 20/40.
> 
> Well, you use open-coded lists.  If you used list.h lists, the numbers 
> would be different. :-)

Yes, but I don't need doubly linked lists.

> > In the normal situation, not worth worrying about, but I'm also using these for
> > recording the sectors we write too, and thinking about swap files and
> > multiple swap devices. Nearly double the memory use bites more as you
> > get more extents.
> >
> > Insertion cost for rb_node includes keeping the tree balanced. For
> > extents, I start with the location of the last insertion to minimise the
> > cost, so insertion time is usually virtually zero (inc max of last
> > extent or append a new one).
> 
> Isn't the appending one actually linear worst-case?

Worst case would be the swap allocator returning swap pages in reverse
order. As you and I both know, that doesn't happen. I first implemented
this in 2003. If the worst case actually happened, I would have seen the
effect by now :)

> > If for some reason swap was allocated out of order, I might need to traverse
> > the whole chain from the start. 
> 
> Exactly.
> 
> > Normal usage in both cases is simply iterating through the list, so I
> > guess the cost would be approximately the same.
> > 
> > Deletion could would include rebalancing for the rb_nodes.
> 
> In swsusp the deletions are needed only if there's an error.

When freeing swap at the end of the cycle?

> > Code cost is a gain for you - you're leveraging existing code, I'm
> > adding a bit more. extent.c is 300 lines including code for serialising
> > the chains in an image header and iterating through a group of chains
> > (multiple swap devices support).
> > 
> > rb_nodes seem to be the wrong solution to me because we generally don't
> > care about searching. We care about minimising memory usage and
> > maximising the speed of iteration, insertion and deletion. I believe
> > I've managed to do that with a singly linked, sorted list.
> 
> The insertion also uses searching and in fact I don't really care for anything
> else.

Ok :)

Nigel


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-08 17:02   ` Rafael J. Wysocki
@ 2007-04-09 12:39     ` Pavel Machek
  2007-04-10 21:00       ` Rafael J. Wysocki
  0 siblings, 1 reply; 13+ messages in thread
From: Pavel Machek @ 2007-04-09 12:39 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: LKML, Andrew Morton, Nigel Cunningham

Hi!

> > > Some time ago we discussed the possibility of simplifying the swsusp's approach
> > > towards tracking the swap pages allocated by it for saving the image (so that
> > > they can be freed if there's an error).
> > > 
> > > I think we can get back to it now, as it is a nice optimization that should
> > > allow us to use less memory (almost always) and improve performance a bit.
> > > 
> > 
> > Well, I do not think you can measure the difference, but...
> 
> As far as the memory usage is concerned, I can. :-)  Usually, it takes 1 extent
> (40 B on x86_64) to register all of the allocated swap pages.  If bitmaps are
> used, we need as many bits as there are swap pages available (for 1 GB swap
> and 4 KB pages that would be ~250000 bits, which gives ~8 pages, and we can
> save more than 800 extents using that much memory).

Well... obviously it works for the best case. OTOH, for the worst, it
needs 40bytes for every 2 bits. That's 16000% worse. And for that
nightmare-fragmented 1GB swap, you'll need 5000000bytes... which is
pretty bad.

OTOH 5MB RAM per 1GB swap is not _too_ bad... so we can do it...

> > >  kernel/power/power.h  |   27 +---------
> > >  kernel/power/swap.c   |   18 +-----
> > >  kernel/power/swsusp.c |  135 ++++++++++++++++++++++++++------------------------
> > >  kernel/power/user.c   |   22 +-------
> > >  4 files changed, 85 insertions(+), 117 deletions(-)

...and call it 'cleanup' not 'speedup'.


> > ....as it removes code... I think we can do that. But it is 2.6.23+
> > material.
> 
> Yes, I think so, but still we can ask Andrew to include it into -mm earlier? ;-)

Yes, that's okay with me.

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-08 21:07           ` Nigel Cunningham
@ 2007-04-09 13:03             ` Rafael J. Wysocki
  2007-04-09 21:00               ` Nigel Cunningham
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2007-04-09 13:03 UTC (permalink / raw)
  To: Nigel Cunningham; +Cc: Andrew Morton, Pavel Machek, LKML

On Sunday, 8 April 2007 23:07, Nigel Cunningham wrote:
[--snip--]
> > > Normal usage in both cases is simply iterating through the list, so I
> > > guess the cost would be approximately the same.
> > > 
> > > Deletion could would include rebalancing for the rb_nodes.
> > 
> > In swsusp the deletions are needed only if there's an error.
> 
> When freeing swap at the end of the cycle?

That depends on what you mean by 'the end'. :-)

We free swap if the image saving fails only, since it's allocated after we've
created the image.  After the resume, the state of swap from before the image
creation is the current one anyway.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-09 13:03             ` Rafael J. Wysocki
@ 2007-04-09 21:00               ` Nigel Cunningham
  0 siblings, 0 replies; 13+ messages in thread
From: Nigel Cunningham @ 2007-04-09 21:00 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, Pavel Machek, LKML

Hi.

On Mon, 2007-04-09 at 15:03 +0200, Rafael J. Wysocki wrote:
> On Sunday, 8 April 2007 23:07, Nigel Cunningham wrote:
> [--snip--]
> > > > Normal usage in both cases is simply iterating through the list, so I
> > > > guess the cost would be approximately the same.
> > > > 
> > > > Deletion could would include rebalancing for the rb_nodes.
> > > 
> > > In swsusp the deletions are needed only if there's an error.
> > 
> > When freeing swap at the end of the cycle?
> 
> That depends on what you mean by 'the end'. :-)
> 
> We free swap if the image saving fails only, since it's allocated after we've
> created the image.  After the resume, the state of swap from before the image
> creation is the current one anyway.

Ah, of course. I forgot that temporarily.

Nigel


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap
  2007-04-09 12:39     ` Pavel Machek
@ 2007-04-10 21:00       ` Rafael J. Wysocki
  0 siblings, 0 replies; 13+ messages in thread
From: Rafael J. Wysocki @ 2007-04-10 21:00 UTC (permalink / raw)
  To: Pavel Machek; +Cc: LKML, Andrew Morton, Nigel Cunningham

On Monday, 9 April 2007 14:39, Pavel Machek wrote:
> Hi!
> 
> > > > Some time ago we discussed the possibility of simplifying the swsusp's approach
> > > > towards tracking the swap pages allocated by it for saving the image (so that
> > > > they can be freed if there's an error).
> > > > 
> > > > I think we can get back to it now, as it is a nice optimization that should
> > > > allow us to use less memory (almost always) and improve performance a bit.
> > > > 
> > > 
> > > Well, I do not think you can measure the difference, but...
> > 
> > As far as the memory usage is concerned, I can. :-)  Usually, it takes 1 extent
> > (40 B on x86_64) to register all of the allocated swap pages.  If bitmaps are
> > used, we need as many bits as there are swap pages available (for 1 GB swap
> > and 4 KB pages that would be ~250000 bits, which gives ~8 pages, and we can
> > save more than 800 extents using that much memory).
> 
> Well... obviously it works for the best case. OTOH, for the worst, it
> needs 40bytes for every 2 bits. That's 16000% worse. And for that
> nightmare-fragmented 1GB swap, you'll need 5000000bytes... which is
> pretty bad.
> 
> OTOH 5MB RAM per 1GB swap is not _too_ bad... so we can do it...

In real-life scenarios you always need to keep free swap space enough for
suspending all the time, which IMO effectively prevents the "totally fragmented
1 GB swap" situation from happening.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2007-04-10 20:57 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-07 21:20 [RFC][PATCH -mm] swsusp: Use rbtree for tracking allocated swap Rafael J. Wysocki
2007-04-07 22:06 ` Andrew Morton
2007-04-07 22:31   ` Nigel Cunningham
2007-04-07 23:13     ` Rafael J. Wysocki
2007-04-07 23:42       ` Nigel Cunningham
2007-04-08 16:47         ` Rafael J. Wysocki
2007-04-08 21:07           ` Nigel Cunningham
2007-04-09 13:03             ` Rafael J. Wysocki
2007-04-09 21:00               ` Nigel Cunningham
2007-04-08 12:56 ` Pavel Machek
2007-04-08 17:02   ` Rafael J. Wysocki
2007-04-09 12:39     ` Pavel Machek
2007-04-10 21:00       ` Rafael J. Wysocki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.