* [PATCH 1/4] swap: revert special hibernation allocation
@ 2010-09-06 8:10 Hugh Dickins
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
` (3 more replies)
0 siblings, 4 replies; 11+ messages in thread
From: Hugh Dickins @ 2010-09-06 8:10 UTC (permalink / raw)
To: Andrew Morton
Cc: KAMEZAWA Hiroyuki, KOSAKI Motohiro, Rafael J. Wysocki,
Ondrej Zary, Andrea Gelmini, Balbir Singh, Andrea Arcangeli,
Nigel Cunningham, linux-mm, linux-kernel
Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf
"hibernation: freeze swap at hibernation". It complicated matters by
adding a second swap allocation path, just for hibernation; without in
any way fixing the issue that it was intended to address - page reclaim
after fixing the hibernation image might free swap from a page already
imaged as swapcache, letting its swap be reallocated to store a different
page of the image: resulting in data corruption if the imaged page were
freed as clean then swapped back in. Pages freed to si->swap_map were
still in danger of being reallocated by the alternative allocation path.
I guess it inadvertently fixed slow SSD swap allocation for hibernation,
as reported by Nigel Cunningham: by missing out the discards that occur
on the usual swap allocation path; but that was unintentional, and needs
a separate fix.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ondrej Zary <linux@rainbow-software.org>
Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nigel Cunningham <nigel@tuxonice.net>
---
include/linux/swap.h | 8 ---
kernel/power/hibernate.c | 1
kernel/power/snapshot.c | 1
kernel/power/swap.c | 6 +-
mm/swapfile.c | 94 ++++++++-----------------------------
5 files changed, 26 insertions(+), 84 deletions(-)
--- 2.6.36-rc3/include/linux/swap.h 2010-08-16 00:17:59.000000000 -0700
+++ swap1/include/linux/swap.h 2010-09-05 22:37:07.000000000 -0700
@@ -315,6 +315,7 @@ extern long nr_swap_pages;
extern long total_swap_pages;
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page_of_type(int);
extern int valid_swaphandles(swp_entry_t, unsigned long *);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
@@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
-#ifdef CONFIG_HIBERNATION
-void hibernation_freeze_swap(void);
-void hibernation_thaw_swap(void);
-swp_entry_t get_swap_for_hibernation(int type);
-void swap_free_for_hibernation(swp_entry_t val);
-#endif
-
/* linux/mm/thrash.c */
extern struct mm_struct *swap_token_mm;
extern void grab_swap_token(struct mm_struct *);
--- 2.6.36-rc3/kernel/power/hibernate.c 2010-08-16 00:18:00.000000000 -0700
+++ swap1/kernel/power/hibernate.c 2010-09-05 22:37:07.000000000 -0700
@@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mo
goto Close;
suspend_console();
- hibernation_freeze_swap();
saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
error = dpm_suspend_start(PMSG_FREEZE);
if (error)
--- 2.6.36-rc3/kernel/power/snapshot.c 2010-08-16 00:18:00.000000000 -0700
+++ swap1/kernel/power/snapshot.c 2010-09-05 22:37:07.000000000 -0700
@@ -1086,7 +1086,6 @@ void swsusp_free(void)
buffer = NULL;
alloc_normal = 0;
alloc_highmem = 0;
- hibernation_thaw_swap();
}
/* Helper functions used for the shrinking of memory. */
--- 2.6.36-rc3/kernel/power/swap.c 2010-08-16 00:18:00.000000000 -0700
+++ swap1/kernel/power/swap.c 2010-09-05 22:37:07.000000000 -0700
@@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap)
{
unsigned long offset;
- offset = swp_offset(get_swap_for_hibernation(swap));
+ offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
if (swsusp_extents_insert(offset))
- swap_free_for_hibernation(swp_entry(swap, offset));
+ swap_free(swp_entry(swap, offset));
else
return swapdev_block(swap, offset);
}
@@ -163,7 +163,7 @@ void free_all_swap_pages(int swap)
ext = container_of(node, struct swsusp_extent, node);
rb_erase(node, &swsusp_extents);
for (offset = ext->start; offset <= ext->end; offset++)
- swap_free_for_hibernation(swp_entry(swap, offset));
+ swap_free(swp_entry(swap, offset));
kfree(ext);
}
--- 2.6.36-rc3/mm/swapfile.c 2010-08-16 00:18:01.000000000 -0700
+++ swap1/mm/swapfile.c 2010-09-05 22:37:07.000000000 -0700
@@ -47,8 +47,6 @@ long nr_swap_pages;
long total_swap_pages;
static int least_priority;
-static bool swap_for_hibernation;
-
static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
@@ -453,8 +451,6 @@ swp_entry_t get_swap_page(void)
spin_lock(&swap_lock);
if (nr_swap_pages <= 0)
goto noswap;
- if (swap_for_hibernation)
- goto noswap;
nr_swap_pages--;
for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
@@ -487,6 +483,28 @@ noswap:
return (swp_entry_t) {0};
}
+/* The only caller of this function is now susupend routine */
+swp_entry_t get_swap_page_of_type(int type)
+{
+ struct swap_info_struct *si;
+ pgoff_t offset;
+
+ spin_lock(&swap_lock);
+ si = swap_info[type];
+ if (si && (si->flags & SWP_WRITEOK)) {
+ nr_swap_pages--;
+ /* This is called for allocating swap entry, not cache */
+ offset = scan_swap_map(si, 1);
+ if (offset) {
+ spin_unlock(&swap_lock);
+ return swp_entry(type, offset);
+ }
+ nr_swap_pages++;
+ }
+ spin_unlock(&swap_lock);
+ return (swp_entry_t) {0};
+}
+
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
{
struct swap_info_struct *p;
@@ -746,74 +764,6 @@ int mem_cgroup_count_swap_user(swp_entry
#endif
#ifdef CONFIG_HIBERNATION
-
-static pgoff_t hibernation_offset[MAX_SWAPFILES];
-/*
- * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise,
- * saved swap_map[] image to the disk will be an incomplete because it's
- * changing without synchronization with hibernation snap shot.
- * At resume, we just make swap_for_hibernation=false. We can forget
- * used maps easily.
- */
-void hibernation_freeze_swap(void)
-{
- int i;
-
- spin_lock(&swap_lock);
-
- printk(KERN_INFO "PM: Freeze Swap\n");
- swap_for_hibernation = true;
- for (i = 0; i < MAX_SWAPFILES; i++)
- hibernation_offset[i] = 1;
- spin_unlock(&swap_lock);
-}
-
-void hibernation_thaw_swap(void)
-{
- spin_lock(&swap_lock);
- if (swap_for_hibernation) {
- printk(KERN_INFO "PM: Thaw Swap\n");
- swap_for_hibernation = false;
- }
- spin_unlock(&swap_lock);
-}
-
-/*
- * Because updateing swap_map[] can make not-saved-status-change,
- * we use our own easy allocator.
- * Please see kernel/power/swap.c, Used swaps are recorded into
- * RB-tree.
- */
-swp_entry_t get_swap_for_hibernation(int type)
-{
- pgoff_t off;
- swp_entry_t val = {0};
- struct swap_info_struct *si;
-
- spin_lock(&swap_lock);
-
- si = swap_info[type];
- if (!si || !(si->flags & SWP_WRITEOK))
- goto done;
-
- for (off = hibernation_offset[type]; off < si->max; ++off) {
- if (!si->swap_map[off])
- break;
- }
- if (off < si->max) {
- val = swp_entry(type, off);
- hibernation_offset[type] = off + 1;
- }
-done:
- spin_unlock(&swap_lock);
- return val;
-}
-
-void swap_free_for_hibernation(swp_entry_t ent)
-{
- /* Nothing to do */
-}
-
/*
* Find the swap type that corresponds to given device (if any).
*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-06 8:10 [PATCH 1/4] swap: revert special hibernation allocation Hugh Dickins
@ 2010-09-06 8:12 ` Hugh Dickins
2010-09-06 8:15 ` KAMEZAWA Hiroyuki
` (2 more replies)
2010-09-06 8:13 ` [PATCH 1/4] swap: revert special hibernation allocation KAMEZAWA Hiroyuki
` (2 subsequent siblings)
3 siblings, 3 replies; 11+ messages in thread
From: Hugh Dickins @ 2010-09-06 8:12 UTC (permalink / raw)
To: Andrew Morton
Cc: KAMEZAWA Hiroyuki, KOSAKI Motohiro, Rafael J. Wysocki,
Ondrej Zary, Andrea Gelmini, Balbir Singh, Andrea Arcangeli,
Nigel Cunningham, linux-mm, linux-kernel
Move the hibernation check from scan_swap_map() into try_to_free_swap():
to catch not only the common case when hibernation's allocation itself
triggers swap reuse, but also the less likely case when concurrent page
reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
reclaim from going to swap: check that to prevent swap reuse too.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ondrej Zary <linux@rainbow-software.org>
Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nigel Cunningham <nigel@tuxonice.net>
Cc: stable@kernel.org
---
mm/swapfile.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
--- swap1/mm/swapfile.c 2010-09-05 22:37:07.000000000 -0700
+++ swap2/mm/swapfile.c 2010-09-05 22:45:54.000000000 -0700
@@ -318,10 +318,8 @@ checks:
if (offset > si->highest_bit)
scan_base = offset = si->lowest_bit;
- /* reuse swap entry of cache-only swap if not hibernation. */
- if (vm_swap_full()
- && usage == SWAP_HAS_CACHE
- && si->swap_map[offset] == SWAP_HAS_CACHE) {
+ /* reuse swap entry of cache-only swap if not busy. */
+ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
int swap_was_freed;
spin_unlock(&swap_lock);
swap_was_freed = __try_to_reclaim_swap(si, offset);
@@ -688,6 +686,24 @@ int try_to_free_swap(struct page *page)
if (page_swapcount(page))
return 0;
+ /*
+ * Once hibernation has begun to create its image of memory,
+ * there's a danger that one of the calls to try_to_free_swap()
+ * - most probably a call from __try_to_reclaim_swap() while
+ * hibernation is allocating its own swap pages for the image,
+ * but conceivably even a call from memory reclaim - will free
+ * the swap from a page which has already been recorded in the
+ * image as a clean swapcache page, and then reuse its swap for
+ * another page of the image. On waking from hibernation, the
+ * original page might be freed under memory pressure, then
+ * later read back in from swap, now with the wrong data.
+ *
+ * Hibernation clears bits from gfp_allowed_mask to prevent
+ * memory reclaim from writing to disk, so check that here.
+ */
+ if (!(gfp_allowed_mask & __GFP_IO))
+ return 0;
+
delete_from_swap_cache(page);
SetPageDirty(page);
return 1;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
@ 2010-09-06 8:15 ` KAMEZAWA Hiroyuki
2010-09-06 18:34 ` Rafael J. Wysocki
2010-09-07 1:03 ` KOSAKI Motohiro
2010-09-07 20:20 ` Andrew Morton
2 siblings, 1 reply; 11+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-09-06 8:15 UTC (permalink / raw)
To: Hugh Dickins
Cc: Andrew Morton, KOSAKI Motohiro, Rafael J. Wysocki, Ondrej Zary,
Andrea Gelmini, Balbir Singh, Andrea Arcangeli, Nigel Cunningham,
linux-mm, linux-kernel
On Mon, 6 Sep 2010 01:12:38 -0700 (PDT)
Hugh Dickins <hughd@google.com> wrote:
> Move the hibernation check from scan_swap_map() into try_to_free_swap():
> to catch not only the common case when hibernation's allocation itself
> triggers swap reuse, but also the less likely case when concurrent page
> reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
>
> Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
> reclaim from going to swap: check that to prevent swap reuse too.
>
> Signed-off-by: Hugh Dickins <hughd@google.com>
> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> Cc: Ondrej Zary <linux@rainbow-software.org>
> Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
> Cc: Balbir Singh <balbir@in.ibm.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Nigel Cunningham <nigel@tuxonice.net>
> Cc: stable@kernel.org
Hmm...seems better.
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-06 8:15 ` KAMEZAWA Hiroyuki
@ 2010-09-06 18:34 ` Rafael J. Wysocki
0 siblings, 0 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2010-09-06 18:34 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: Hugh Dickins, Andrew Morton, KOSAKI Motohiro, Ondrej Zary,
Andrea Gelmini, Balbir Singh, Andrea Arcangeli, Nigel Cunningham,
linux-mm, linux-kernel
On Monday, September 06, 2010, KAMEZAWA Hiroyuki wrote:
> On Mon, 6 Sep 2010 01:12:38 -0700 (PDT)
> Hugh Dickins <hughd@google.com> wrote:
>
> > Move the hibernation check from scan_swap_map() into try_to_free_swap():
> > to catch not only the common case when hibernation's allocation itself
> > triggers swap reuse, but also the less likely case when concurrent page
> > reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
> >
> > Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
> > reclaim from going to swap: check that to prevent swap reuse too.
> >
> > Signed-off-by: Hugh Dickins <hughd@google.com>
> > Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> > Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> > Cc: Ondrej Zary <linux@rainbow-software.org>
> > Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
> > Cc: Balbir Singh <balbir@in.ibm.com>
> > Cc: Andrea Arcangeli <aarcange@redhat.com>
> > Cc: Nigel Cunningham <nigel@tuxonice.net>
> > Cc: stable@kernel.org
>
> Hmm...seems better.
>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
2010-09-06 8:15 ` KAMEZAWA Hiroyuki
@ 2010-09-07 1:03 ` KOSAKI Motohiro
2010-09-07 20:20 ` Andrew Morton
2 siblings, 0 replies; 11+ messages in thread
From: KOSAKI Motohiro @ 2010-09-07 1:03 UTC (permalink / raw)
To: Hugh Dickins
Cc: kosaki.motohiro, Andrew Morton, KAMEZAWA Hiroyuki,
Rafael J. Wysocki, Ondrej Zary, Andrea Gelmini, Balbir Singh,
Andrea Arcangeli, Nigel Cunningham, linux-mm, linux-kernel
> Move the hibernation check from scan_swap_map() into try_to_free_swap():
> to catch not only the common case when hibernation's allocation itself
> triggers swap reuse, but also the less likely case when concurrent page
> reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
>
> Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
> reclaim from going to swap: check that to prevent swap reuse too.
>
> Signed-off-by: Hugh Dickins <hughd@google.com>
> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> Cc: Ondrej Zary <linux@rainbow-software.org>
> Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
> Cc: Balbir Singh <balbir@in.ibm.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Nigel Cunningham <nigel@tuxonice.net>
> Cc: stable@kernel.org
> ---
>
> mm/swapfile.c | 24 ++++++++++++++++++++----
> 1 file changed, 20 insertions(+), 4 deletions(-)
>
> --- swap1/mm/swapfile.c 2010-09-05 22:37:07.000000000 -0700
> +++ swap2/mm/swapfile.c 2010-09-05 22:45:54.000000000 -0700
> @@ -318,10 +318,8 @@ checks:
> if (offset > si->highest_bit)
> scan_base = offset = si->lowest_bit;
>
> - /* reuse swap entry of cache-only swap if not hibernation. */
> - if (vm_swap_full()
> - && usage == SWAP_HAS_CACHE
> - && si->swap_map[offset] == SWAP_HAS_CACHE) {
> + /* reuse swap entry of cache-only swap if not busy. */
> + if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
> int swap_was_freed;
> spin_unlock(&swap_lock);
> swap_was_freed = __try_to_reclaim_swap(si, offset);
> @@ -688,6 +686,24 @@ int try_to_free_swap(struct page *page)
> if (page_swapcount(page))
> return 0;
>
> + /*
> + * Once hibernation has begun to create its image of memory,
> + * there's a danger that one of the calls to try_to_free_swap()
> + * - most probably a call from __try_to_reclaim_swap() while
> + * hibernation is allocating its own swap pages for the image,
> + * but conceivably even a call from memory reclaim - will free
> + * the swap from a page which has already been recorded in the
> + * image as a clean swapcache page, and then reuse its swap for
> + * another page of the image. On waking from hibernation, the
> + * original page might be freed under memory pressure, then
> + * later read back in from swap, now with the wrong data.
> + *
> + * Hibernation clears bits from gfp_allowed_mask to prevent
> + * memory reclaim from writing to disk, so check that here.
> + */
> + if (!(gfp_allowed_mask & __GFP_IO))
> + return 0;
> +
I like this one.
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
2010-09-06 8:15 ` KAMEZAWA Hiroyuki
2010-09-07 1:03 ` KOSAKI Motohiro
@ 2010-09-07 20:20 ` Andrew Morton
2010-09-07 21:38 ` Hugh Dickins
2 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2010-09-07 20:20 UTC (permalink / raw)
To: Hugh Dickins
Cc: KAMEZAWA Hiroyuki, KOSAKI Motohiro, Rafael J. Wysocki,
Ondrej Zary, Andrea Gelmini, Balbir Singh, Andrea Arcangeli,
Nigel Cunningham, linux-mm, linux-kernel, stable
On Mon, 6 Sep 2010 01:12:38 -0700 (PDT)
Hugh Dickins <hughd@google.com> wrote:
> Move the hibernation check from scan_swap_map() into try_to_free_swap():
Well, it doesn't really "move" anything. It removes one test (usage ==
SWAP_HAS_CACHE) and adds a quite different one (gfp_allowed_mask &
__GFP_IO).
> to catch not only the common case when hibernation's allocation itself
> triggers swap reuse, but also the less likely case when concurrent page
> reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
>
> Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
> reclaim from going to swap: check that to prevent swap reuse too.
>
> Signed-off-by: Hugh Dickins <hughd@google.com>
> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> Cc: Ondrej Zary <linux@rainbow-software.org>
> Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
> Cc: Balbir Singh <balbir@in.ibm.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Nigel Cunningham <nigel@tuxonice.net>
> Cc: stable@kernel.org
> ---
>
> mm/swapfile.c | 24 ++++++++++++++++++++----
> 1 file changed, 20 insertions(+), 4 deletions(-)
>
> --- swap1/mm/swapfile.c 2010-09-05 22:37:07.000000000 -0700
> +++ swap2/mm/swapfile.c 2010-09-05 22:45:54.000000000 -0700
> @@ -318,10 +318,8 @@ checks:
> if (offset > si->highest_bit)
> scan_base = offset = si->lowest_bit;
>
> - /* reuse swap entry of cache-only swap if not hibernation. */
> - if (vm_swap_full()
> - && usage == SWAP_HAS_CACHE
> - && si->swap_map[offset] == SWAP_HAS_CACHE) {
> + /* reuse swap entry of cache-only swap if not busy. */
> + if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
> int swap_was_freed;
> spin_unlock(&swap_lock);
> swap_was_freed = __try_to_reclaim_swap(si, offset);
This hunk is already present in 2.6.35.
> @@ -688,6 +686,24 @@ int try_to_free_swap(struct page *page)
> if (page_swapcount(page))
> return 0;
>
> + /*
> + * Once hibernation has begun to create its image of memory,
> + * there's a danger that one of the calls to try_to_free_swap()
> + * - most probably a call from __try_to_reclaim_swap() while
> + * hibernation is allocating its own swap pages for the image,
> + * but conceivably even a call from memory reclaim - will free
> + * the swap from a page which has already been recorded in the
> + * image as a clean swapcache page, and then reuse its swap for
> + * another page of the image. On waking from hibernation, the
> + * original page might be freed under memory pressure, then
> + * later read back in from swap, now with the wrong data.
> + *
> + * Hibernation clears bits from gfp_allowed_mask to prevent
> + * memory reclaim from writing to disk, so check that here.
> + */
> + if (!(gfp_allowed_mask & __GFP_IO))
> + return 0;
> +
> delete_from_swap_cache(page);
> SetPageDirty(page);
> return 1;
This is the good bit. I guess the (unCc:ed!) -stable guys would like a
standalone patch.
Also, are patches [3/4] and [4/4] really -stable material??
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-07 20:20 ` Andrew Morton
@ 2010-09-07 21:38 ` Hugh Dickins
2010-09-09 5:44 ` Nigel Cunningham
0 siblings, 1 reply; 11+ messages in thread
From: Hugh Dickins @ 2010-09-07 21:38 UTC (permalink / raw)
To: Andrew Morton
Cc: KAMEZAWA Hiroyuki, KOSAKI Motohiro, Rafael J. Wysocki,
Ondrej Zary, Andrea Gelmini, Balbir Singh, Andrea Arcangeli,
Nigel Cunningham, linux-mm, linux-kernel, stable
On Tue, 7 Sep 2010, Andrew Morton wrote:
> On Mon, 6 Sep 2010 01:12:38 -0700 (PDT)
> Hugh Dickins <hughd@google.com> wrote:
>
> > Move the hibernation check from scan_swap_map() into try_to_free_swap():
>
> Well, it doesn't really "move" anything. It removes one test (usage ==
> SWAP_HAS_CACHE) and adds a quite different one (gfp_allowed_mask &
> __GFP_IO).
Okay, replaces a peculiar check for hibernation in scan_swap_map()
by a more general check for hibernation inside try_to_free_swap().
>
> > to catch not only the common case when hibernation's allocation itself
> > triggers swap reuse, but also the less likely case when concurrent page
> > reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
> >
> > Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
> > reclaim from going to swap: check that to prevent swap reuse too.
> >
> > Signed-off-by: Hugh Dickins <hughd@google.com>
> > Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> > Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> > Cc: Ondrej Zary <linux@rainbow-software.org>
> > Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
> > Cc: Balbir Singh <balbir@in.ibm.com>
> > Cc: Andrea Arcangeli <aarcange@redhat.com>
> > Cc: Nigel Cunningham <nigel@tuxonice.net>
> > Cc: stable@kernel.org
I put Cc: stable@kernel.org in the list here, so that they'd be notified
when the patch reaches Linus's tree: I thought it would just be noise to
Cc them on earlier mails - but I've not removed the Cc you've now added!
> > ---
> >
> > mm/swapfile.c | 24 ++++++++++++++++++++----
> > 1 file changed, 20 insertions(+), 4 deletions(-)
> >
> > --- swap1/mm/swapfile.c 2010-09-05 22:37:07.000000000 -0700
> > +++ swap2/mm/swapfile.c 2010-09-05 22:45:54.000000000 -0700
> > @@ -318,10 +318,8 @@ checks:
> > if (offset > si->highest_bit)
> > scan_base = offset = si->lowest_bit;
> >
> > - /* reuse swap entry of cache-only swap if not hibernation. */
> > - if (vm_swap_full()
> > - && usage == SWAP_HAS_CACHE
> > - && si->swap_map[offset] == SWAP_HAS_CACHE) {
> > + /* reuse swap entry of cache-only swap if not busy. */
> > + if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
> > int swap_was_freed;
> > spin_unlock(&swap_lock);
> > swap_was_freed = __try_to_reclaim_swap(si, offset);
>
> This hunk is already present in 2.6.35.
That hunk is already present in 2.6.35, but it's been replaced by the
usage == SWAP_HAS_CACHE hunk in 2.6.35.4 (or earlier), so this part of
the patch reverts 2.6.35.4 back to how 2.6.35 was. (An extra test here
does no harm, and is even a more efficient way of preventing the
issue where it's most likely to occur; but I thought it better to
handle the issue just in one place, with a longish comment.)
>
> > @@ -688,6 +686,24 @@ int try_to_free_swap(struct page *page)
> > if (page_swapcount(page))
> > return 0;
> >
> > + /*
> > + * Once hibernation has begun to create its image of memory,
> > + * there's a danger that one of the calls to try_to_free_swap()
> > + * - most probably a call from __try_to_reclaim_swap() while
> > + * hibernation is allocating its own swap pages for the image,
> > + * but conceivably even a call from memory reclaim - will free
> > + * the swap from a page which has already been recorded in the
> > + * image as a clean swapcache page, and then reuse its swap for
> > + * another page of the image. On waking from hibernation, the
> > + * original page might be freed under memory pressure, then
> > + * later read back in from swap, now with the wrong data.
> > + *
> > + * Hibernation clears bits from gfp_allowed_mask to prevent
> > + * memory reclaim from writing to disk, so check that here.
> > + */
> > + if (!(gfp_allowed_mask & __GFP_IO))
> > + return 0;
> > +
> > delete_from_swap_cache(page);
> > SetPageDirty(page);
> > return 1;
>
> This is the good bit. I guess the (unCc:ed!) -stable guys would like a
> standalone patch.
They do need both hunks (well, nobody *needs* the first part, but it's
tidier to restore the original code and keep tracking mainline).
>
> Also, are patches [3/4] and [4/4] really -stable material??
I'm sure that 3/4, the one that removes the BLKDEV_IFL_BARRIER from the
BLKDEV_IFL_WAITing swap discards, is -stable material; and it's not at
all dependent on Christoph's and Tejun's ongoing barrier changes.
(An alternative for -stable might have been to remove the BLKDEV_IFL_WAITs
instead, going back more to 2.6.34: except I cannot vouch for the stability
of that change - what of intervening mods at the block layer? - and it
would go against the remove-barriers direction we're moving for 2.6.37.)
Nigel saw a terrible pause in hibernation (originally with TuxOnIce, but
he then checked swsusp too): he reported "The patch reduces the pause
from minutes to a matter of seconds (with 4GB of swap), but it is still
there (there was previously no discernable delay)".
I've not noticed such an outstanding effect in ordinary swapping, where
other things are going on; but the patch can easily halve the length of
a run when swapping to some SSDs.
But is 4/4 (SWAP_FLAG_DISCARD) really -stable material? I think so
(Nigel still sees a pause of seconds without it, and I see some swapping
tests - on some SSDs - take three times as long without it), but it's more
questionable: I was rather hoping to provoke a reaction with it. And it
will need swapon(2 and 8) manual page updates and util-linux-ng swapon
support (latter ready but waiting to see when the kernel end goes in),
if it's to amount to anything more than removing discard from swapping.
Perhaps wait for more comment on it before pushing it to stable?
Hugh
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 2/4] swap: prevent reuse during hibernation
2010-09-07 21:38 ` Hugh Dickins
@ 2010-09-09 5:44 ` Nigel Cunningham
0 siblings, 0 replies; 11+ messages in thread
From: Nigel Cunningham @ 2010-09-09 5:44 UTC (permalink / raw)
To: Hugh Dickins
Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
Rafael J. Wysocki, Ondrej Zary, Andrea Gelmini, Balbir Singh,
Andrea Arcangeli, linux-mm, linux-kernel, stable
Hi.
On 08/09/10 07:38, Hugh Dickins wrote:
> On Tue, 7 Sep 2010, Andrew Morton wrote:
>> On Mon, 6 Sep 2010 01:12:38 -0700 (PDT)
>> Hugh Dickins<hughd@google.com> wrote:
>>
>>> Move the hibernation check from scan_swap_map() into try_to_free_swap():
>>
>> Well, it doesn't really "move" anything. It removes one test (usage ==
>> SWAP_HAS_CACHE) and adds a quite different one (gfp_allowed_mask&
>> __GFP_IO).
>
> Okay, replaces a peculiar check for hibernation in scan_swap_map()
> by a more general check for hibernation inside try_to_free_swap().
>
>>
>>> to catch not only the common case when hibernation's allocation itself
>>> triggers swap reuse, but also the less likely case when concurrent page
>>> reclaim (shrink_page_list) might happen to try_to_free_swap from a page.
>>>
>>> Hibernation already clears __GFP_IO from the gfp_allowed_mask, to stop
>>> reclaim from going to swap: check that to prevent swap reuse too.
>>>
>>> Signed-off-by: Hugh Dickins<hughd@google.com>
>>> Cc: KAMEZAWA Hiroyuki<kamezawa.hiroyu@jp.fujitsu.com>
>>> Cc: KOSAKI Motohiro<kosaki.motohiro@jp.fujitsu.com>
>>> Cc: "Rafael J. Wysocki"<rjw@sisk.pl>
>>> Cc: Ondrej Zary<linux@rainbow-software.org>
>>> Cc: Andrea Gelmini<andrea.gelmini@gmail.com>
>>> Cc: Balbir Singh<balbir@in.ibm.com>
>>> Cc: Andrea Arcangeli<aarcange@redhat.com>
>>> Cc: Nigel Cunningham<nigel@tuxonice.net>
>>> Cc: stable@kernel.org
>
> I put Cc: stable@kernel.org in the list here, so that they'd be notified
> when the patch reaches Linus's tree: I thought it would just be noise to
> Cc them on earlier mails - but I've not removed the Cc you've now added!
>
>>> ---
>>>
>>> mm/swapfile.c | 24 ++++++++++++++++++++----
>>> 1 file changed, 20 insertions(+), 4 deletions(-)
>>>
>>> --- swap1/mm/swapfile.c 2010-09-05 22:37:07.000000000 -0700
>>> +++ swap2/mm/swapfile.c 2010-09-05 22:45:54.000000000 -0700
>>> @@ -318,10 +318,8 @@ checks:
>>> if (offset> si->highest_bit)
>>> scan_base = offset = si->lowest_bit;
>>>
>>> - /* reuse swap entry of cache-only swap if not hibernation. */
>>> - if (vm_swap_full()
>>> - && usage == SWAP_HAS_CACHE
>>> - && si->swap_map[offset] == SWAP_HAS_CACHE) {
>>> + /* reuse swap entry of cache-only swap if not busy. */
>>> + if (vm_swap_full()&& si->swap_map[offset] == SWAP_HAS_CACHE) {
>>> int swap_was_freed;
>>> spin_unlock(&swap_lock);
>>> swap_was_freed = __try_to_reclaim_swap(si, offset);
>>
>> This hunk is already present in 2.6.35.
>
> That hunk is already present in 2.6.35, but it's been replaced by the
> usage == SWAP_HAS_CACHE hunk in 2.6.35.4 (or earlier), so this part of
> the patch reverts 2.6.35.4 back to how 2.6.35 was. (An extra test here
> does no harm, and is even a more efficient way of preventing the
> issue where it's most likely to occur; but I thought it better to
> handle the issue just in one place, with a longish comment.)
>
>>
>>> @@ -688,6 +686,24 @@ int try_to_free_swap(struct page *page)
>>> if (page_swapcount(page))
>>> return 0;
>>>
>>> + /*
>>> + * Once hibernation has begun to create its image of memory,
>>> + * there's a danger that one of the calls to try_to_free_swap()
>>> + * - most probably a call from __try_to_reclaim_swap() while
>>> + * hibernation is allocating its own swap pages for the image,
>>> + * but conceivably even a call from memory reclaim - will free
>>> + * the swap from a page which has already been recorded in the
>>> + * image as a clean swapcache page, and then reuse its swap for
>>> + * another page of the image. On waking from hibernation, the
>>> + * original page might be freed under memory pressure, then
>>> + * later read back in from swap, now with the wrong data.
>>> + *
>>> + * Hibernation clears bits from gfp_allowed_mask to prevent
>>> + * memory reclaim from writing to disk, so check that here.
>>> + */
>>> + if (!(gfp_allowed_mask& __GFP_IO))
>>> + return 0;
>>> +
>>> delete_from_swap_cache(page);
>>> SetPageDirty(page);
>>> return 1;
>>
>> This is the good bit. I guess the (unCc:ed!) -stable guys would like a
>> standalone patch.
>
> They do need both hunks (well, nobody *needs* the first part, but it's
> tidier to restore the original code and keep tracking mainline).
>
>>
>> Also, are patches [3/4] and [4/4] really -stable material??
>
> I'm sure that 3/4, the one that removes the BLKDEV_IFL_BARRIER from the
> BLKDEV_IFL_WAITing swap discards, is -stable material; and it's not at
> all dependent on Christoph's and Tejun's ongoing barrier changes.
>
> (An alternative for -stable might have been to remove the BLKDEV_IFL_WAITs
> instead, going back more to 2.6.34: except I cannot vouch for the stability
> of that change - what of intervening mods at the block layer? - and it
> would go against the remove-barriers direction we're moving for 2.6.37.)
>
> Nigel saw a terrible pause in hibernation (originally with TuxOnIce, but
> he then checked swsusp too): he reported "The patch reduces the pause
> from minutes to a matter of seconds (with 4GB of swap), but it is still
> there (there was previously no discernable delay)".
>
> I've not noticed such an outstanding effect in ordinary swapping, where
> other things are going on; but the patch can easily halve the length of
> a run when swapping to some SSDs.
>
> But is 4/4 (SWAP_FLAG_DISCARD) really -stable material? I think so
> (Nigel still sees a pause of seconds without it, and I see some swapping
> tests - on some SSDs - take three times as long without it), but it's more
> questionable: I was rather hoping to provoke a reaction with it. And it
> will need swapon(2 and 8) manual page updates and util-linux-ng swapon
> support (latter ready but waiting to see when the kernel end goes in),
> if it's to amount to anything more than removing discard from swapping.
> Perhaps wait for more comment on it before pushing it to stable?
Just to back up what Hugh's saying, here's part of the original report:
Adding a printk in discard swap cluster gives the following:
[ 46.758330] Discarding 256 pages from bdev 800003 beginning at page
640377.
[ 47.003363] Discarding 256 pages from bdev 800003 beginning at page
640633.
[ 47.246514] Discarding 256 pages from bdev 800003 beginning at page
640889.
...
[ 221.877465] Discarding 256 pages from bdev 800003 beginning at page
826745.
[ 222.121284] Discarding 256 pages from bdev 800003 beginning at page
827001.
[ 222.365908] Discarding 256 pages from bdev 800003 beginning at page
827257.
[ 222.610311] Discarding 256 pages from bdev 800003 beginning at page
827513.
So allocating 4GB of swap on my SSD now takes 176 seconds instead of
virtually no time at all.
Regards,
Nigel
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/4] swap: revert special hibernation allocation
2010-09-06 8:10 [PATCH 1/4] swap: revert special hibernation allocation Hugh Dickins
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
@ 2010-09-06 8:13 ` KAMEZAWA Hiroyuki
2010-09-06 8:17 ` [PATCH 3/4] swap: do not send discards as barriers Hugh Dickins
2010-09-06 8:21 ` [PATCH 4/4] swap: discard while swapping only if SWAP_FLAG_DISCARD Hugh Dickins
3 siblings, 0 replies; 11+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-09-06 8:13 UTC (permalink / raw)
To: Hugh Dickins
Cc: Andrew Morton, KOSAKI Motohiro, Rafael J. Wysocki, Ondrej Zary,
Andrea Gelmini, Balbir Singh, Andrea Arcangeli, Nigel Cunningham,
linux-mm, linux-kernel
On Mon, 6 Sep 2010 01:10:55 -0700 (PDT)
Hugh Dickins <hughd@google.com> wrote:
> Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf
> "hibernation: freeze swap at hibernation". It complicated matters by
> adding a second swap allocation path, just for hibernation; without in
> any way fixing the issue that it was intended to address - page reclaim
> after fixing the hibernation image might free swap from a page already
> imaged as swapcache, letting its swap be reallocated to store a different
> page of the image: resulting in data corruption if the imaged page were
> freed as clean then swapped back in. Pages freed to si->swap_map were
> still in danger of being reallocated by the alternative allocation path.
>
> I guess it inadvertently fixed slow SSD swap allocation for hibernation,
> as reported by Nigel Cunningham: by missing out the discards that occur
> on the usual swap allocation path; but that was unintentional, and needs
> a separate fix.
>
> Signed-off-by: Hugh Dickins <hughd@google.com>
> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> Cc: Ondrej Zary <linux@rainbow-software.org>
> Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
> Cc: Balbir Singh <balbir@in.ibm.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Nigel Cunningham <nigel@tuxonice.net>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 3/4] swap: do not send discards as barriers
2010-09-06 8:10 [PATCH 1/4] swap: revert special hibernation allocation Hugh Dickins
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
2010-09-06 8:13 ` [PATCH 1/4] swap: revert special hibernation allocation KAMEZAWA Hiroyuki
@ 2010-09-06 8:17 ` Hugh Dickins
2010-09-06 8:21 ` [PATCH 4/4] swap: discard while swapping only if SWAP_FLAG_DISCARD Hugh Dickins
3 siblings, 0 replies; 11+ messages in thread
From: Hugh Dickins @ 2010-09-06 8:17 UTC (permalink / raw)
To: Andrew Morton
Cc: Christoph Hellwig, Tejun Heo, Jens Axboe, James Bottomley,
Martin K. Petersen, Nigel Cunningham, linux-mm, linux-kernel
From: Christoph Hellwig <hch@infradead.org>
The swap code already uses synchronous discards, no need to add I/O
barriers.
This fixes the worst of the terrible slowdown in swap allocation for
hibernation, reported on 2.6.35 by Nigel Cunningham; but does not
entirely eliminate that regression.
tj: superflous newlines removed.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Nigel Cunningham <nigel@tuxonice.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Jens Axboe <jaxboe@fusionio.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: stable@kernel.org
---
mm/swapfile.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
--- swap2/mm/swapfile.c 2010-09-05 22:45:54.000000000 -0700
+++ swap3/mm/swapfile.c 2010-09-05 22:47:21.000000000 -0700
@@ -139,8 +139,7 @@ static int discard_swap(struct swap_info
nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
if (nr_blocks) {
err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL,
- BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+ nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
if (err)
return err;
cond_resched();
@@ -151,8 +150,7 @@ static int discard_swap(struct swap_info
nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL,
- BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+ nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
if (err)
break;
@@ -191,8 +189,7 @@ static void discard_swap_cluster(struct
start_block <<= PAGE_SHIFT - 9;
nr_blocks <<= PAGE_SHIFT - 9;
if (blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT |
- BLKDEV_IFL_BARRIER))
+ nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT))
break;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 4/4] swap: discard while swapping only if SWAP_FLAG_DISCARD
2010-09-06 8:10 [PATCH 1/4] swap: revert special hibernation allocation Hugh Dickins
` (2 preceding siblings ...)
2010-09-06 8:17 ` [PATCH 3/4] swap: do not send discards as barriers Hugh Dickins
@ 2010-09-06 8:21 ` Hugh Dickins
3 siblings, 0 replies; 11+ messages in thread
From: Hugh Dickins @ 2010-09-06 8:21 UTC (permalink / raw)
To: Andrew Morton
Cc: Christoph Hellwig, Tejun Heo, Jens Axboe, James Bottomley,
Martin K. Petersen, Nigel Cunningham, linux-mm, linux-kernel
Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs
show a shift since I last tested in December: in part because of firmware
updates, in part because of the necessary move from barriers to awaiting
completion at the block layer. While discard at swapon still shows as
slightly beneficial on both, discarding 1MB swap cluster when allocating
is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV).
Surrender: discard as presently implemented is more hindrance than help
for swap; but might prove useful on other devices, or with improvements.
So continue to do the discard at swapon, but make discard while swapping
conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using
only the lower 16 bits of int flags).
We can add a --discard or -d to swapon(8), and a "discard" to swap in
/etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Nigel Cunningham <nigel@tuxonice.net>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: stable@kernel.org
---
include/linux/swap.h | 3 ++-
mm/swapfile.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
--- swap3/include/linux/swap.h 2010-09-05 22:37:07.000000000 -0700
+++ swap4/include/linux/swap.h 2010-09-05 22:49:06.000000000 -0700
@@ -19,6 +19,7 @@ struct bio;
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0
+#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */
static inline int current_is_kswapd(void)
{
@@ -142,7 +143,7 @@ struct swap_extent {
enum {
SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
- SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */
+ SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */
SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */
SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */
--- swap3/mm/swapfile.c 2010-09-05 22:47:21.000000000 -0700
+++ swap4/mm/swapfile.c 2010-09-05 22:49:06.000000000 -0700
@@ -2047,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __use
p->flags |= SWP_SOLIDSTATE;
p->cluster_next = 1 + (random32() % p->highest_bit);
}
- if (discard_swap(p) == 0)
+ if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD))
p->flags |= SWP_DISCARDABLE;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-09-09 5:44 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-06 8:10 [PATCH 1/4] swap: revert special hibernation allocation Hugh Dickins
2010-09-06 8:12 ` [PATCH 2/4] swap: prevent reuse during hibernation Hugh Dickins
2010-09-06 8:15 ` KAMEZAWA Hiroyuki
2010-09-06 18:34 ` Rafael J. Wysocki
2010-09-07 1:03 ` KOSAKI Motohiro
2010-09-07 20:20 ` Andrew Morton
2010-09-07 21:38 ` Hugh Dickins
2010-09-09 5:44 ` Nigel Cunningham
2010-09-06 8:13 ` [PATCH 1/4] swap: revert special hibernation allocation KAMEZAWA Hiroyuki
2010-09-06 8:17 ` [PATCH 3/4] swap: do not send discards as barriers Hugh Dickins
2010-09-06 8:21 ` [PATCH 4/4] swap: discard while swapping only if SWAP_FLAG_DISCARD Hugh Dickins
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).