* [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959
@ 2005-10-01 16:13 Rafael J. Wysocki
2005-10-01 19:45 ` Andi Kleen
2005-10-04 17:09 ` [discuss] " Andi Kleen
0 siblings, 2 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-01 16:13 UTC (permalink / raw)
To: Discuss x86-64; +Cc: Andi Kleen, Andrew Morton, LKML, Pavel Machek
Hi,
The following is yet another attempt to fix Bug #4959.
This one uses the code in mm/init.c directly. For this purpose
it introduces a modified version of init_memory_mapping()
which is compiled if CONFIG_SOFTWARE_SUSPEND is set.
This function allocates twice as much memory as needed for the direct
mapping page tables and assigns the second half of it to the resume page
tables. This area is later marked with PG_nosave by swsusp, so that it is
not overwritten during resume.
Your comments, criticisms and (preferably) suggestions will be appreciated.
Greetings,
Rafael
Index: linux-2.6.14-rc3/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc3.orig/arch/x86_64/kernel/suspend.c 2005-10-01 10:37:53.000000000 +0200
+++ linux-2.6.14-rc3/arch/x86_64/kernel/suspend.c 2005-10-01 14:29:48.000000000 +0200
@@ -9,6 +9,7 @@
#include <linux/config.h>
#include <linux/smp.h>
+#include <linux/mm.h>
#include <linux/suspend.h>
#include <asm/proto.h>
@@ -140,4 +141,15 @@
}
+#ifdef CONFIG_SOFTWARE_SUSPEND
+extern unsigned long resume_table_start, resume_table_end;
+int arch_prepare_suspend(void)
+{
+ unsigned long pfn;
+
+ for (pfn = resume_table_start; pfn < resume_table_end; pfn++)
+ SetPageNosave(pfn_to_page(pfn));
+ return 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
Index: linux-2.6.14-rc3/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc3.orig/arch/x86_64/kernel/suspend_asm.S 2005-10-01 10:37:53.000000000 +0200
+++ linux-2.6.14-rc3/arch/x86_64/kernel/suspend_asm.S 2005-10-01 14:29:48.000000000 +0200
@@ -40,11 +40,11 @@
ret
ENTRY(swsusp_arch_resume)
- /* set up cr3 */
- leaq init_level4_pgt(%rip),%rax
- subq $__START_KERNEL_map,%rax
- movq %rax,%cr3
-
+ /* switch to the resume page tables */
+ leaq resume_level4_pgt(%rip), %rax
+ subq $__START_KERNEL_map, %rax
+ movq %rax, %cr3
+ /* Flush TLB */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
andq $~(1<<7), %rdx # PGE
@@ -69,6 +69,10 @@
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* go back to the original page tables */
+ leaq init_level4_pgt(%rip), %rax
+ subq $__START_KERNEL_map, %rax
+ movq %rax, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
@@ -102,3 +106,13 @@
xorq %rax, %rax
ret
+
+ .section ".data.nosave"
+ .align PAGE_SIZE
+ENTRY(resume_level4_pgt)
+ .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
+ .fill 255,8,0
+ .quad 0x000000000000a007 + __PHYSICAL_START
+ .fill 254,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
Index: linux-2.6.14-rc3/include/asm-x86_64/suspend.h
===================================================================
--- linux-2.6.14-rc3.orig/include/asm-x86_64/suspend.h 2005-08-29 01:41:01.000000000 +0200
+++ linux-2.6.14-rc3/include/asm-x86_64/suspend.h 2005-10-01 11:38:47.000000000 +0200
@@ -6,11 +6,15 @@
#include <asm/desc.h>
#include <asm/i387.h>
+#ifdef CONFIG_SOFTWARE_SUSPEND
+extern int arch_prepare_suspend(void);
+#else
static inline int
arch_prepare_suspend(void)
{
return 0;
}
+#endif
/* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
struct saved_context {
Index: linux-2.6.14-rc3/arch/x86_64/mm/init.c
===================================================================
--- linux-2.6.14-rc3.orig/arch/x86_64/mm/init.c 2005-10-01 10:40:03.000000000 +0200
+++ linux-2.6.14-rc3/arch/x86_64/mm/init.c 2005-10-01 14:31:34.000000000 +0200
@@ -260,6 +260,9 @@
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+#ifdef CONFIG_SOFTWARE_SUSPEND
+ tables += tables;
+#endif
table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
if (table_start == -1UL)
@@ -272,6 +275,7 @@
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
This runs before bootmem is initialized and gets pages directly from the
physical memory. To access them they are temporarily mapped. */
+#ifndef CONFIG_SOFTWARE_SUSPEND
void __init init_memory_mapping(unsigned long start, unsigned long end)
{
unsigned long next;
@@ -307,6 +311,69 @@
table_start<<PAGE_SHIFT,
table_end<<PAGE_SHIFT);
}
+#else
+
+extern pgd_t resume_level4_pgt[];
+
+#define pgd_offset_resume(address) (resume_level4_pgt + pgd_index(address))
+
+unsigned long resume_table_start, resume_table_end;
+
+void __init init_memory_mapping(unsigned long start, unsigned long end)
+{
+ unsigned long next, start_phys;
+ int map;
+ pud_t *pud;
+ unsigned long pud_phys;
+
+ Dprintk("init_memory_mapping\n");
+
+ /*
+ * Find space for the kernel direct mapping tables.
+ * Later we should allocate these tables in the local node of the memory
+ * mapped. Unfortunately this is done currently before the nodes are
+ * discovered.
+ */
+ find_early_table_space(end);
+
+ start_phys = start;
+
+ start = (unsigned long)__va(start_phys);
+ end = (unsigned long)__va(end);
+
+ for (; start < end; start = next) {
+ pud = alloc_low_page(&map, &pud_phys);
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ phys_pud_init(pud, __pa(start), __pa(next));
+ set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+ unmap_low_page(map);
+ }
+
+ resume_table_start = table_end;
+
+ start = (unsigned long)__va(start_phys);
+
+ for (; start < end; start = next) {
+ pud = alloc_low_page(&map, &pud_phys);
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ phys_pud_init(pud, __pa(start), __pa(next));
+ set_pgd(pgd_offset_resume(start), mk_kernel_pgd(pud_phys));
+ unmap_low_page(map);
+ }
+
+ resume_table_end = table_end;
+
+ asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
+ __flush_tlb_all();
+ early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
+ table_start<<PAGE_SHIFT,
+ resume_table_start<<PAGE_SHIFT);
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
extern struct x8664_pda cpu_pda[NR_CPUS];
Index: linux-2.6.14-rc3/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc3.orig/kernel/power/swsusp.c 2005-10-01 10:40:02.000000000 +0200
+++ linux-2.6.14-rc3/kernel/power/swsusp.c 2005-10-01 11:38:47.000000000 +0200
@@ -672,7 +672,6 @@
return 0;
page = pfn_to_page(pfn);
- BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959
2005-10-01 16:13 [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959 Rafael J. Wysocki
@ 2005-10-01 19:45 ` Andi Kleen
2005-10-02 10:25 ` Rafael J. Wysocki
2005-10-04 14:11 ` Rafael J. Wysocki
2005-10-04 17:09 ` [discuss] " Andi Kleen
1 sibling, 2 replies; 11+ messages in thread
From: Andi Kleen @ 2005-10-01 19:45 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: Discuss x86-64, Andrew Morton, LKML, Pavel Machek
On Saturday 01 October 2005 18:13, Rafael J. Wysocki wrote:
>
> This function allocates twice as much memory as needed for the direct
> mapping page tables and assigns the second half of it to the resume page
> tables. This area is later marked with PG_nosave by swsusp, so that it is
> not overwritten during resume.
>
I prefered it when the additional page tables were allocated only on demand.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959
2005-10-01 19:45 ` Andi Kleen
@ 2005-10-02 10:25 ` Rafael J. Wysocki
2005-10-04 14:11 ` Rafael J. Wysocki
1 sibling, 0 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-02 10:25 UTC (permalink / raw)
To: Andi Kleen; +Cc: Discuss x86-64, Andrew Morton, LKML, Pavel Machek
On Saturday, 1 of October 2005 21:45, Andi Kleen wrote:
> On Saturday 01 October 2005 18:13, Rafael J. Wysocki wrote:
>
> >
> > This function allocates twice as much memory as needed for the direct
> > mapping page tables and assigns the second half of it to the resume page
> > tables. This area is later marked with PG_nosave by swsusp, so that it is
> > not overwritten during resume.
> >
> I prefered it when the additional page tables were allocated only on demand.
Me too. Let's get back to that patch, then. :-)
Comments etc. will be appreciated.
Greetings,
Rafael
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Index: linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc3-git1.orig/arch/x86_64/kernel/suspend.c 2005-10-02 10:39:41.000000000 +0200
+++ linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend.c 2005-10-02 12:12:27.000000000 +0200
@@ -11,6 +11,8 @@
#include <linux/smp.h>
#include <linux/suspend.h>
#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
struct saved_context saved_context;
@@ -140,4 +142,132 @@
}
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/* Defined in kernel/power/swsusp.c */
+extern unsigned long get_usable_page(unsigned gfp_mask);
+extern void free_eaten_memory(void);
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+extern int restore_image(void);
+pgd_t *temp_level4_pgt;
+
+static void **pages;
+
+static inline void *__add_page(void)
+{
+ void **c;
+
+ c = (void **)get_usable_page(GFP_ATOMIC);
+ if (c) {
+ *c = pages;
+ pages = c;
+ }
+ return c;
+}
+
+static inline void *__next_page(void)
+{
+ void **c;
+
+ c = pages;
+ if (c) {
+ pages = *c;
+ *c = NULL;
+ }
+ return c;
+}
+
+/*
+ * Try to allocate as many usable pages as needed and daisy chain them.
+ * If one allocation fails, free the pages allocated so far
+ */
+static int alloc_usable_pages(unsigned long n)
+{
+ void *p;
+
+ pages = NULL;
+ do
+ if (!__add_page())
+ break;
+ while (--n);
+ if (n) {
+ p = __next_page();
+ while (p) {
+ free_page((unsigned long)p);
+ p = __next_page();
+ }
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+ long i, j;
+
+ i = pud_index(address);
+ pud = pud + i;
+ for (; i < PTRS_PER_PUD; pud++, i++) {
+ unsigned long paddr;
+ pmd_t *pmd;
+
+ paddr = address + i*PUD_SIZE;
+ if (paddr >= end)
+ break;
+
+ pmd = (pmd_t *)__next_page();
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ if (paddr >= end)
+ break;
+ pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+ }
+}
+
+static void set_up_temporary_mappings(void)
+{
+ unsigned long start, end, next;
+
+ temp_level4_pgt = (pgd_t *)__next_page();
+
+ /* It is safe to reuse the original kernel mapping */
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+ /* Set up the direct mapping from scratch */
+ start = (unsigned long)pfn_to_kaddr(0);
+ end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+ for (; start < end; start = next) {
+ pud_t *pud = (pud_t *)__next_page();
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ phys_pud_init(pud, __pa(start), __pa(next));
+ set_pgd(temp_level4_pgt + pgd_index(start),
+ mk_kernel_pgd(__pa(pud)));
+ }
+}
+
+int swsusp_arch_resume(void)
+{
+ unsigned long n;
+
+ n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT;
+ n += (n + PTRS_PER_PUD - 1) / PTRS_PER_PUD + 1;
+ pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n);
+ if (alloc_usable_pages(n)) {
+ free_eaten_memory();
+ return -ENOMEM;
+ }
+ /* We have got enough memory and from now on we cannot recover */
+ set_up_temporary_mappings();
+ restore_image();
+ return 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
Index: linux-2.6.14-rc3-git1/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc3-git1.orig/kernel/power/swsusp.c 2005-10-02 10:39:41.000000000 +0200
+++ linux-2.6.14-rc3-git1/kernel/power/swsusp.c 2005-10-02 12:11:08.000000000 +0200
@@ -1095,7 +1095,7 @@
*eaten_memory = c;
}
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
{
unsigned long m;
@@ -1109,7 +1109,7 @@
return m;
}
-static void free_eaten_memory(void)
+void free_eaten_memory(void)
{
unsigned long m;
void **c;
@@ -1481,11 +1481,12 @@
/* Allocate memory for the image and read the data from swap */
error = check_pagedir(pagedir_nosave);
- free_eaten_memory();
+
if (!error)
error = data_read(pagedir_nosave);
if (error) { /* We fail cleanly */
+ free_eaten_memory();
for_each_pbe (p, pagedir_nosave)
if (p->address) {
free_page(p->address);
Index: linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc3-git1.orig/arch/x86_64/kernel/suspend_asm.S 2005-10-02 10:39:41.000000000 +0200
+++ linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend_asm.S 2005-10-02 11:30:55.000000000 +0200
@@ -39,12 +39,13 @@
call swsusp_save
ret
-ENTRY(swsusp_arch_resume)
- /* set up cr3 */
- leaq init_level4_pgt(%rip),%rax
- subq $__START_KERNEL_map,%rax
- movq %rax,%cr3
-
+ENTRY(restore_image)
+ /* switch to temporary page tables */
+ movq $__PAGE_OFFSET, %rdx
+ movq temp_level4_pgt(%rip), %rax
+ subq %rdx, %rax
+ movq %rax, %cr3
+ /* Flush TLB */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
andq $~(1<<7), %rdx # PGE
@@ -69,6 +70,10 @@
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* go back to the original page tables */
+ leaq init_level4_pgt(%rip), %rax
+ subq $__START_KERNEL_map, %rax
+ movq %rax, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959
2005-10-01 19:45 ` Andi Kleen
2005-10-02 10:25 ` Rafael J. Wysocki
@ 2005-10-04 14:11 ` Rafael J. Wysocki
1 sibling, 0 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-04 14:11 UTC (permalink / raw)
To: Andi Kleen; +Cc: Discuss x86-64, Andrew Morton, LKML, Pavel Machek
On Sunday, 2 of October 2005 12:06, Rafael J. Wysocki wrote:
> On Saturday, 1 of October 2005 21:45, Andi Kleen wrote:
> > On Saturday 01 October 2005 18:13, Rafael J. Wysocki wrote:
> >
> > >
> > > This function allocates twice as much memory as needed for the direct
> > > mapping page tables and assigns the second half of it to the resume page
> > > tables. This area is later marked with PG_nosave by swsusp, so that it is
> > > not overwritten during resume.
> > >
> > I prefered it when the additional page tables were allocated only on demand.
>
> Me too. Let's get back to that patch, then. :-)
>
> Comments etc. will be appreciated.
I haven't got any comments since I posted it on Saturday, so I gather there are
no objections. Or are there any?
Greetings,
Rafael
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [discuss] [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959
2005-10-01 16:13 [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959 Rafael J. Wysocki
2005-10-01 19:45 ` Andi Kleen
@ 2005-10-04 17:09 ` Andi Kleen
2005-10-04 21:31 ` Rafael J. Wysocki
2005-10-05 21:44 ` [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64 Rafael J. Wysocki
1 sibling, 2 replies; 11+ messages in thread
From: Andi Kleen @ 2005-10-04 17:09 UTC (permalink / raw)
To: discuss; +Cc: Rafael J. Wysocki, Andrew Morton, LKML, Pavel Machek
On Saturday 01 October 2005 18:13, Rafael J. Wysocki wrote:
> Your comments, criticisms and (preferably) suggestions will be appreciated.
First always write a full description of the problem and the rationale
of the change and a overview what it changes. Also please add Signed-off-by
lines.
> +#ifdef CONFIG_SOFTWARE_SUSPEND
> +extern unsigned long resume_table_start, resume_table_end;
These should be all in some include. Adding externs in C files is near
always wrong because it avoids cross file type checking.
Also the convention is to add _pfn to variables that are in PFNs,
otherwise it's full addresses.
> 10:40:03.000000000 +0200 +++
> linux-2.6.14-rc3/arch/x86_64/mm/init.c 2005-10-01 14:31:34.000000000 +0200
> @@ -260,6 +260,9 @@
> pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
> tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
> round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
> +#ifdef CONFIG_SOFTWARE_SUSPEND
> + tables += tables;
> +#endif
This needs a comment. Also I would still prefer if it was allocated
only when suspend is actually attempted.
> table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
> if (table_start == -1UL)
> @@ -272,6 +275,7 @@
> /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
> This runs before bootmem is initialized and gets pages directly from
> the physical memory. To access them they are temporarily mapped. */
> +#ifndef CONFIG_SOFTWARE_SUSPEND
> void __init init_memory_mapping(unsigned long start, unsigned long end)
> {
> unsigned long next;
> @@ -307,6 +311,69 @@
> table_start<<PAGE_SHIFT,
> table_end<<PAGE_SHIFT);
> }
> +#else
> +
> +extern pgd_t resume_level4_pgt[];
These should be in some include again.
I don't like it that you duplicated the function fully. Is that really
needed?
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [discuss] [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959
2005-10-04 17:09 ` [discuss] " Andi Kleen
@ 2005-10-04 21:31 ` Rafael J. Wysocki
2005-10-05 21:44 ` [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64 Rafael J. Wysocki
1 sibling, 0 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-04 21:31 UTC (permalink / raw)
To: Andi Kleen; +Cc: discuss, Andrew Morton, LKML, Pavel Machek
Hi,
Thanks a lot for the comments.
On Tuesday, 4 of October 2005 19:09, Andi Kleen wrote:
>
> On Saturday 01 October 2005 18:13, Rafael J. Wysocki wrote:
>
> > Your comments, criticisms and (preferably) suggestions will be appreciated.
>
> First always write a full description of the problem and the rationale
> of the change and a overview what it changes. Also please add Signed-off-by
> lines.
I forgot to add the Signed-off-by line, sorry.
I will add the full problem description etc. to the next iteration of the patch.
>
> > +#ifdef CONFIG_SOFTWARE_SUSPEND
> > +extern unsigned long resume_table_start, resume_table_end;
>
> These should be all in some include. Adding externs in C files is near
> always wrong because it avoids cross file type checking.
I will do that.
> Also the convention is to add _pfn to variables that are in PFNs,
> otherwise it's full addresses.
I will do that too.
> > 10:40:03.000000000 +0200 +++
> > linux-2.6.14-rc3/arch/x86_64/mm/init.c 2005-10-01 14:31:34.000000000 +0200
> > @@ -260,6 +260,9 @@
> > pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
> > tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
> > round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
> > +#ifdef CONFIG_SOFTWARE_SUSPEND
> > + tables += tables;
> > +#endif
>
> This needs a comment. Also I would still prefer if it was allocated
> only when suspend is actually attempted.
I will add a comment here.
The problem with allocating the resume page tables is that I have to make sure
they won't be overwritten by swsusp during resume. There are three possible
solutions to it:
(1) to use static arrays of predefined size marked as __nosavedata,
(2) to allocate the page tables as early as in the init code so that they always
end up at the same physical addresses and (later on) mark them as nosave
for swsusp,
(3) to place the page tables in the areas of memory that won't be modified
by swsusp.
The downsides of (1) are that by using it I'd artificially limit the allowed
size of the suspend image and I'd waste some page frames on systems with
relatively small RAM. Also I'd have to modify phys_pud_init() to use it for
populating these page tables.
The downside of (3) is that I do not know which pages will be overwritten by
swsusp until the image is loaded into memory, because the addresses that
will be affected by swsusp are stored within the image. Thus in that case I
can only allocate the resume page tables right before copying the image
pages to their original locations. This requires the use of GFP_ATOMIC
allocations that may fail, and the suitable pages have to be cherry picked
from the pages returned by the kernel (some of them will be overwritten
by swsusp). The failure of a memory allocation at that time, although not
very probable, would lead to a resume failure and the loss of the system's
state from before suspend, so this is not an ideal solution. Moreover,
in that case I'd have to modify phys_pud_init() to use it for populating the
resume page tables.
The only downside of (2) is that it keeps some 4KB page frames permanently
unavailable to the system. However, for a typical systems the number of
them is limited (it's (1 page per every started 1GB of RAM) + 2, or 3 pages
for a system with no more than 1GB of RAM). On the other hand, in that
case I can use the original phys_pud_init() to populate the resume page
tables and the required code changes are quite limited in scope. Therefore
I've chosen this one.
> > table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
> > if (table_start == -1UL)
> > @@ -272,6 +275,7 @@
> > /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
> > This runs before bootmem is initialized and gets pages directly from
> > the physical memory. To access them they are temporarily mapped. */
> > +#ifndef CONFIG_SOFTWARE_SUSPEND
> > void __init init_memory_mapping(unsigned long start, unsigned long end)
> > {
> > unsigned long next;
> > @@ -307,6 +311,69 @@
> > table_start<<PAGE_SHIFT,
> > table_end<<PAGE_SHIFT);
> > }
> > +#else
> > +
> > +extern pgd_t resume_level4_pgt[];
>
> These should be in some include again.
OK
> I don't like it that you duplicated the function fully. Is that really
> needed?
Not really, but otherwise I'll have to use a bunch of #ifdefs in the function
body. I'll do that in the next iteration of the patch.
Thanks again,
Rafael
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64
2005-10-04 17:09 ` [discuss] " Andi Kleen
2005-10-04 21:31 ` Rafael J. Wysocki
@ 2005-10-05 21:44 ` Rafael J. Wysocki
2005-10-05 22:49 ` Pavel Machek
1 sibling, 1 reply; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-05 21:44 UTC (permalink / raw)
To: discuss; +Cc: Andi Kleen, Andrew Morton, LKML, Pavel Machek
Summary =========
The following patch makes swsusp avoid the possible temporary corruption of
page translation tables during resume on x86-64. This is achieved by creating
a copy of the relevant page tables that will not be modified by swsusp and can
be safely used by it on resume.
Problem description ==========
The problem is that during resume on x86-64 swsusp may temporarily corrupt
the page tables used for the direct mapping of RAM. If that happens, a page
fault occurs and cannot be handled properly, which leads to the
solid hang of the affected system. This leads to the loss of the system's state
from before suspend and may result in the loss of data or the corruption
of filesystems, so it is a serious issue. Also, it appears to happen quite often
(for me, as often as 50% of the time).
The problem is related to the fact that (at least) one of the PMD entries used in
the direct memory mapping (starting at PAGE_OFFSET) points to a page table the
physical address of which is much greater than the physical address of the PMD
entry itself. Moreover, unfortunately, the physical address of the page table
before suspend (i.e. the one stored in the suspend image) happens to be
different to the physical address of the corresponding page table used during
resume (i.e. the one that is valid right before swsusp_arch_resume() in
arch/x86_64/kernel/suspend_asm.S is executed). Thus while the image is
restored, the "offending" PMD entry gets overwritten, so it does not point to
the right physical address any more (i.e. there's no page table at the address
pointed to by it, because it points to the address the page table has been at
during suspend). Consequently, if the PMD entry is used later on, and it _is_
used in the process of copying the image pages, a page fault occurs, but it
cannot be handled in the normal way and the system hangs.
Proposed solution =========
To avoid the corruption of the page tables during resume the patch creates
a copy of them that will not be overwritten by swsusp and can be used by it
safely during resume. This copy is created during the initialization of the
system along with the original page tables, because it has to be located
in the same page frames on every boot (otherwise swsusp could
overwrite it).
Alternatively, we could create such a copy of the page translation tables
on demand, before swsusp starts to restore the original state of the system
from the suspend image, but this would require that atomic memory allocations
be used while there's almost no free RAM. Then, theoretically the memory
allocations could fail leading to the failure of the entire resume process.
To avoid that risk I have decided to use the preallocated resume page tables,
although this makes some 4KB page frames be permanently reserved
(eg. 3 page frames for a system with no more than 1GB of RAM).
The additional advantage of the proposed approach is that the code used for
populatingvthe original page tables can also be used for populating the resume
pagevtables (any alternative solution would require the use of some additional
code for this purpose).
Please consider the patch for applying.
Greetings,
Rafael
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Index: linux-2.6.14-rc3-git5/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc3-git5.orig/arch/x86_64/kernel/suspend.c 2005-10-05 21:12:41.000000000 +0200
+++ linux-2.6.14-rc3-git5/arch/x86_64/kernel/suspend.c 2005-10-05 22:24:13.000000000 +0200
@@ -9,6 +9,7 @@
#include <linux/config.h>
#include <linux/smp.h>
+#include <linux/mm.h>
#include <linux/suspend.h>
#include <asm/proto.h>
@@ -140,4 +141,15 @@
}
+#ifdef CONFIG_SOFTWARE_SUSPEND
+unsigned long resume_table_start_pfn, resume_table_end_pfn;
+int arch_prepare_suspend(void)
+{
+ unsigned long pfn;
+
+ for (pfn = resume_table_start_pfn; pfn < resume_table_end_pfn; pfn++)
+ SetPageNosave(pfn_to_page(pfn));
+ return 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
Index: linux-2.6.14-rc3-git5/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc3-git5.orig/arch/x86_64/kernel/suspend_asm.S 2005-10-05 21:12:41.000000000 +0200
+++ linux-2.6.14-rc3-git5/arch/x86_64/kernel/suspend_asm.S 2005-10-05 22:24:13.000000000 +0200
@@ -40,11 +40,11 @@
ret
ENTRY(swsusp_arch_resume)
- /* set up cr3 */
- leaq init_level4_pgt(%rip),%rax
- subq $__START_KERNEL_map,%rax
- movq %rax,%cr3
-
+ /* switch to the resume page tables */
+ leaq resume_level4_pgt(%rip), %rax
+ subq $__START_KERNEL_map, %rax
+ movq %rax, %cr3
+ /* Flush TLB */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
andq $~(1<<7), %rdx # PGE
@@ -69,6 +69,10 @@
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* go back to the original page tables */
+ leaq init_level4_pgt(%rip), %rax
+ subq $__START_KERNEL_map, %rax
+ movq %rax, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
@@ -102,3 +106,13 @@
xorq %rax, %rax
ret
+
+ .section ".data.nosave"
+ .align PAGE_SIZE
+ENTRY(resume_level4_pgt)
+ .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
+ .fill 255,8,0
+ .quad 0x000000000000a007 + __PHYSICAL_START
+ .fill 254,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
Index: linux-2.6.14-rc3-git5/include/asm-x86_64/suspend.h
===================================================================
--- linux-2.6.14-rc3-git5.orig/include/asm-x86_64/suspend.h 2005-08-29 01:41:01.000000000 +0200
+++ linux-2.6.14-rc3-git5/include/asm-x86_64/suspend.h 2005-10-05 21:32:05.000000000 +0200
@@ -6,11 +6,20 @@
#include <asm/desc.h>
#include <asm/i387.h>
+#ifdef CONFIG_SOFTWARE_SUSPEND
+extern unsigned long resume_table_start_pfn, resume_table_end_pfn;
+extern pgd_t resume_level4_pgt[];
+
+#define pgd_offset_resume(address) (resume_level4_pgt + pgd_index(address))
+
+extern int arch_prepare_suspend(void);
+#else
static inline int
arch_prepare_suspend(void)
{
return 0;
}
+#endif
/* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
struct saved_context {
Index: linux-2.6.14-rc3-git5/arch/x86_64/mm/init.c
===================================================================
--- linux-2.6.14-rc3-git5.orig/arch/x86_64/mm/init.c 2005-10-05 21:14:45.000000000 +0200
+++ linux-2.6.14-rc3-git5/arch/x86_64/mm/init.c 2005-10-05 22:24:28.000000000 +0200
@@ -36,6 +36,7 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/smp.h>
+#include <asm/suspend.h>
#ifndef Dprintk
#define Dprintk(x...)
@@ -260,6 +261,14 @@
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+#ifdef CONFIG_SOFTWARE_SUSPEND
+ /*
+ * We need to create a copy of the direct mapping page tables
+ * that will be used during resume from disk, so we allocate
+ * twice as much room as needed for the direct mapping alone
+ */
+ tables += tables;
+#endif
table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
if (table_start == -1UL)
@@ -275,6 +284,9 @@
void __init init_memory_mapping(unsigned long start, unsigned long end)
{
unsigned long next;
+#ifdef CONFIG_SOFTWARE_SUSPEND
+ unsigned long start_phys = start;
+#endif
Dprintk("init_memory_mapping\n");
@@ -306,6 +318,26 @@
early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
table_start<<PAGE_SHIFT,
table_end<<PAGE_SHIFT);
+#ifdef CONFIG_SOFTWARE_SUSPEND
+
+ resume_table_start_pfn = table_end;
+
+ start = (unsigned long)__va(start_phys);
+
+ for (; start < end; start = next) {
+ int map;
+ unsigned long pud_phys;
+ pud_t *pud = alloc_low_page(&map, &pud_phys);
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ phys_pud_init(pud, __pa(start), __pa(next));
+ set_pgd(pgd_offset_resume(start), mk_kernel_pgd(pud_phys));
+ unmap_low_page(map);
+ }
+
+ resume_table_end_pfn = table_end;
+#endif
}
extern struct x8664_pda cpu_pda[NR_CPUS];
Index: linux-2.6.14-rc3-git5/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc3-git5.orig/kernel/power/swsusp.c 2005-10-05 21:12:41.000000000 +0200
+++ linux-2.6.14-rc3-git5/kernel/power/swsusp.c 2005-10-05 21:24:50.000000000 +0200
@@ -672,7 +672,6 @@
return 0;
page = pfn_to_page(pfn);
- BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64
2005-10-05 21:44 ` [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64 Rafael J. Wysocki
@ 2005-10-05 22:49 ` Pavel Machek
2005-10-06 8:07 ` [discuss] " Rafael J. Wysocki
0 siblings, 1 reply; 11+ messages in thread
From: Pavel Machek @ 2005-10-05 22:49 UTC (permalink / raw)
To: Rafael J. Wysocki; +Cc: discuss, Andi Kleen, Andrew Morton, LKML
Hi!
> Summary =========
> The following patch makes swsusp avoid the possible temporary corruption of
> page translation tables during resume on x86-64. This is achieved by creating
> a copy of the relevant page tables that will not be modified by swsusp and can
> be safely used by it on resume.
Andi, this means swsusp fails 50% of time on x86-64. I believe we even
have one report in suse bugzilla by now... Could we get this somehow
merged?
> Index: linux-2.6.14-rc3-git5/kernel/power/swsusp.c
> ===================================================================
> --- linux-2.6.14-rc3-git5.orig/kernel/power/swsusp.c 2005-10-05 21:12:41.000000000 +0200
> +++ linux-2.6.14-rc3-git5/kernel/power/swsusp.c 2005-10-05 21:24:50.000000000 +0200
> @@ -672,7 +672,6 @@
> return 0;
>
> page = pfn_to_page(pfn);
> - BUG_ON(PageReserved(page) && PageNosave(page));
> if (PageNosave(page))
> return 0;
> if (PageReserved(page) && pfn_is_nosave(pfn)) {
Rafael, are you sure? This will clash with snapshot.c split and
probably belongs to some other patch.
Pavel
--
if you have sharp zaurus hardware you don't need... you know my address
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [discuss] Re: [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64
2005-10-05 22:49 ` Pavel Machek
@ 2005-10-06 8:07 ` Rafael J. Wysocki
2005-10-08 10:30 ` Andi Kleen
0 siblings, 1 reply; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-06 8:07 UTC (permalink / raw)
To: discuss; +Cc: Pavel Machek, Andi Kleen, Andrew Morton, LKML
Hi,
On Thursday, 6 of October 2005 00:49, Pavel Machek wrote:
> Hi!
>
> > Summary =========
> > The following patch makes swsusp avoid the possible temporary corruption of
> > page translation tables during resume on x86-64. This is achieved by creating
> > a copy of the relevant page tables that will not be modified by swsusp and can
> > be safely used by it on resume.
>
> Andi, this means swsusp fails 50% of time on x86-64. I believe we even
> have one report in suse bugzilla by now... Could we get this somehow
> merged?
>
>
> > Index: linux-2.6.14-rc3-git5/kernel/power/swsusp.c
> > ===================================================================
> > --- linux-2.6.14-rc3-git5.orig/kernel/power/swsusp.c 2005-10-05 21:12:41.000000000 +0200
> > +++ linux-2.6.14-rc3-git5/kernel/power/swsusp.c 2005-10-05 21:24:50.000000000 +0200
> > @@ -672,7 +672,6 @@
> > return 0;
> >
> > page = pfn_to_page(pfn);
> > - BUG_ON(PageReserved(page) && PageNosave(page));
> > if (PageNosave(page))
> > return 0;
> > if (PageReserved(page) && pfn_is_nosave(pfn)) {
>
> Rafael, are you sure?
Yes, I am. The pages allocated in init_memory_mapping() are marked with
PG_reserved by the init code.
> This will clash with snapshot.c split and probably belongs to some other patch.
I am aware of that. This will conflict with the Nigel's patch, so we probably can
arrange to apply that patch before this one, if you prefer.
As far as the split is concerned, if you recall my doubts wrt it, the "bugfixes
pending" is the first point on the list. :-)
Greetings,
Rafael
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [discuss] Re: [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64
2005-10-06 8:07 ` [discuss] " Rafael J. Wysocki
@ 2005-10-08 10:30 ` Andi Kleen
2005-10-08 12:23 ` Rafael J. Wysocki
0 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2005-10-08 10:30 UTC (permalink / raw)
To: discuss; +Cc: Rafael J. Wysocki, Pavel Machek, Andrew Morton, LKML
I reworked the patch a bit to do on demand page table setup. Does it still
work for you?
-Andi
ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt-current/patches/suspend-pgtables
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [discuss] Re: [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64
2005-10-08 10:30 ` Andi Kleen
@ 2005-10-08 12:23 ` Rafael J. Wysocki
0 siblings, 0 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2005-10-08 12:23 UTC (permalink / raw)
To: Andi Kleen; +Cc: discuss, Pavel Machek, Andrew Morton, LKML
Hi Andi,
Thanks a lot for looking into this!
On Saturday, 8 of October 2005 12:30, Andi Kleen wrote:
>
> I reworked the patch a bit to do on demand page table setup. Does it still
> work for you?
No, it doesn't, because arch_prepare_suspend() is only called during suspend
(ie. by the kernel that creates the image) and the temporary page tables have
to exist during resume (ie. be available to the kernel that reads the image and
restores the saved state of the system).
In principle we can call create_resume_mapping() from swsusp_arch_resume()
(ie. from suspend_asm.S), but then the memory allocations in
create_resume_mapping(), resume_pud_mapping(), and resume_pmd_mapping()
must be made carefully so that we use _only_ NosaveFree pages in them
(the other pages are overwritten by the loop in swsusp_arch_resume()).
Additionally, we are in atomic context at that time, so we cannot use
GFP_KERNEL. Moreover, if one of the allocations fails, we should
free all of the allocated pages, so we need to trace them somehow.
All of this is done in the appended patch, except that the functions populating
the page tables are located in arch/x86_64/kernel/suspend.c rather than
in init.c. It may be done in a more elegan way in the future, with the help
of some swsusp patches that are in the works now.
Greetings,
Rafael
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Index: linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.14-rc3-git1.orig/arch/x86_64/kernel/suspend.c 2005-10-02 10:39:41.000000000 +0200
+++ linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend.c 2005-10-02 12:12:27.000000000 +0200
@@ -11,6 +11,8 @@
#include <linux/smp.h>
#include <linux/suspend.h>
#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
struct saved_context saved_context;
@@ -140,4 +142,132 @@
}
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/* Defined in kernel/power/swsusp.c */
+extern unsigned long get_usable_page(unsigned gfp_mask);
+extern void free_eaten_memory(void);
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+extern int restore_image(void);
+pgd_t *temp_level4_pgt;
+
+static void **pages;
+
+static inline void *__add_page(void)
+{
+ void **c;
+
+ c = (void **)get_usable_page(GFP_ATOMIC);
+ if (c) {
+ *c = pages;
+ pages = c;
+ }
+ return c;
+}
+
+static inline void *__next_page(void)
+{
+ void **c;
+
+ c = pages;
+ if (c) {
+ pages = *c;
+ *c = NULL;
+ }
+ return c;
+}
+
+/*
+ * Try to allocate as many usable pages as needed and daisy chain them.
+ * If one allocation fails, free the pages allocated so far
+ */
+static int alloc_usable_pages(unsigned long n)
+{
+ void *p;
+
+ pages = NULL;
+ do
+ if (!__add_page())
+ break;
+ while (--n);
+ if (n) {
+ p = __next_page();
+ while (p) {
+ free_page((unsigned long)p);
+ p = __next_page();
+ }
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+ long i, j;
+
+ i = pud_index(address);
+ pud = pud + i;
+ for (; i < PTRS_PER_PUD; pud++, i++) {
+ unsigned long paddr;
+ pmd_t *pmd;
+
+ paddr = address + i*PUD_SIZE;
+ if (paddr >= end)
+ break;
+
+ pmd = (pmd_t *)__next_page();
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ if (paddr >= end)
+ break;
+ pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+ }
+}
+
+static void set_up_temporary_mappings(void)
+{
+ unsigned long start, end, next;
+
+ temp_level4_pgt = (pgd_t *)__next_page();
+
+ /* It is safe to reuse the original kernel mapping */
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+ /* Set up the direct mapping from scratch */
+ start = (unsigned long)pfn_to_kaddr(0);
+ end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+ for (; start < end; start = next) {
+ pud_t *pud = (pud_t *)__next_page();
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ phys_pud_init(pud, __pa(start), __pa(next));
+ set_pgd(temp_level4_pgt + pgd_index(start),
+ mk_kernel_pgd(__pa(pud)));
+ }
+}
+
+int swsusp_arch_resume(void)
+{
+ unsigned long n;
+
+ n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT;
+ n += (n + PTRS_PER_PUD - 1) / PTRS_PER_PUD + 1;
+ pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n);
+ if (alloc_usable_pages(n)) {
+ free_eaten_memory();
+ return -ENOMEM;
+ }
+ /* We have got enough memory and from now on we cannot recover */
+ set_up_temporary_mappings();
+ restore_image();
+ return 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
Index: linux-2.6.14-rc3-git1/kernel/power/swsusp.c
===================================================================
--- linux-2.6.14-rc3-git1.orig/kernel/power/swsusp.c 2005-10-02 10:39:41.000000000 +0200
+++ linux-2.6.14-rc3-git1/kernel/power/swsusp.c 2005-10-02 12:11:08.000000000 +0200
@@ -1095,7 +1095,7 @@
*eaten_memory = c;
}
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
{
unsigned long m;
@@ -1109,7 +1109,7 @@
return m;
}
-static void free_eaten_memory(void)
+void free_eaten_memory(void)
{
unsigned long m;
void **c;
@@ -1481,11 +1481,12 @@
/* Allocate memory for the image and read the data from swap */
error = check_pagedir(pagedir_nosave);
- free_eaten_memory();
+
if (!error)
error = data_read(pagedir_nosave);
if (error) { /* We fail cleanly */
+ free_eaten_memory();
for_each_pbe (p, pagedir_nosave)
if (p->address) {
free_page(p->address);
Index: linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend_asm.S
===================================================================
--- linux-2.6.14-rc3-git1.orig/arch/x86_64/kernel/suspend_asm.S 2005-10-02 10:39:41.000000000 +0200
+++ linux-2.6.14-rc3-git1/arch/x86_64/kernel/suspend_asm.S 2005-10-02 11:30:55.000000000 +0200
@@ -39,12 +39,13 @@
call swsusp_save
ret
-ENTRY(swsusp_arch_resume)
- /* set up cr3 */
- leaq init_level4_pgt(%rip),%rax
- subq $__START_KERNEL_map,%rax
- movq %rax,%cr3
-
+ENTRY(restore_image)
+ /* switch to temporary page tables */
+ movq $__PAGE_OFFSET, %rdx
+ movq temp_level4_pgt(%rip), %rax
+ subq %rdx, %rax
+ movq %rax, %cr3
+ /* Flush TLB */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
andq $~(1<<7), %rdx # PGE
@@ -69,6 +70,10 @@
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* go back to the original page tables */
+ leaq init_level4_pgt(%rip), %rax
+ subq $__START_KERNEL_map, %rax
+ movq %rax, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2005-10-08 12:22 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-01 16:13 [RFC][PATCH][Fix] swsusp: Yet another attempt to fix Bug #4959 Rafael J. Wysocki
2005-10-01 19:45 ` Andi Kleen
2005-10-02 10:25 ` Rafael J. Wysocki
2005-10-04 14:11 ` Rafael J. Wysocki
2005-10-04 17:09 ` [discuss] " Andi Kleen
2005-10-04 21:31 ` Rafael J. Wysocki
2005-10-05 21:44 ` [PATCH][Fix] swsusp: avoid possible page tables corruption during resume on x86-64 Rafael J. Wysocki
2005-10-05 22:49 ` Pavel Machek
2005-10-06 8:07 ` [discuss] " Rafael J. Wysocki
2005-10-08 10:30 ` Andi Kleen
2005-10-08 12:23 ` Rafael J. Wysocki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox