* [PATCH] Dirty page tracking for physical system migration
@ 2011-06-07 20:28 Paradis, James
2011-06-07 20:33 ` Randy Dunlap
0 siblings, 1 reply; 2+ messages in thread
From: Paradis, James @ 2011-06-07 20:28 UTC (permalink / raw)
To: linux-mm
[-- Attachment #1: Type: text/plain, Size: 12771 bytes --]
This patch implements a system to track re-dirtied pages and modified
PTEs. It is used by Stratus Technologies for both our ftLinux product
and
our new GPL Live Kernel Self Migration project (lksm.sourceforge.net).
In both cases, we bring a backup server online by copying the primary
server's state while it is running. We start by copying all of memory
top to bottom. We then go back and re-copy any pages that were changed
during the first copy pass. After several such passes we momentarily
suspend processing so we can copy the last few pages over and bring up
the secondary system. This patch keeps track of which pages need to be
copied during these passes.
arch/x86/Kconfig | 11 +++++++++++
arch/x86/include/asm/hugetlb.h | 3 +++
arch/x86/include/asm/pgtable-2level.h | 4 ++++
arch/x86/include/asm/pgtable-3level.h | 11 +++++++++++
arch/x86/include/asm/pgtable.h | 4 ++--
arch/x86/include/asm/pgtable_32.h | 1 +
arch/x86/include/asm/pgtable_64.h | 7 +++++++
arch/x86/include/asm/pgtable_types.h | 5 ++++-
arch/x86/mm/Makefile | 2 ++
mm/huge_memory.c | 4 ++--
11 files changed, 48 insertions(+), 6 deletions(-)
Signed-off-by: "James Paradis" <james.paradis@stratus.com>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a..cc778a4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1146,6 +1146,17 @@ config DIRECT_GBPAGES
support it. This can improve the kernel's performance
a tiny bit by
reducing TLB pressure. If in doubt, say "Y".
+config TRACK_DIRTY_PAGES
+ bool "Enable dirty page tracking"
+ default n
+ depends on !KMEMCHECK
+ ---help---
+ Turning this on enables tracking of re-dirtied and
+ changed pages. This is needed by the Live Kernel
+ Self Migration project (lksm.sourceforge.net) to
perform
+ live copying of memory and system state to another
system.
+ Most users will say n here.
+
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/hugetlb.h
b/arch/x86/include/asm/hugetlb.h
index 439a9ac..8266873 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
#define _ASM_X86_HUGETLB_H
#include <asm/page.h>
+#include <asm/mm_track.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
@@ -39,12 +40,14 @@ static inline void hugetlb_free_pgd_range(struct
mmu_gather *tlb,
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long
addr,
pte_t
*ptep, pte_t pte)
{
+ mm_track_pmd((pmd_t *)ptep);
set_pte_at(mm, addr, ptep, pte);
}
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
+ mm_track_pmd((pmd_t *)ptep);
return ptep_get_and_clear(mm, addr, ptep);
}
diff --git a/arch/x86/include/asm/pgtable-2level.h
b/arch/x86/include/asm/pgtable-2level.h
index 98391db..a59deb5 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -13,11 +13,13 @@
*/
static inline void native_set_pte(pte_t *ptep , pte_t pte)
{
+ mm_track_pte(ptep);
*ptep = pte;
}
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
+ mm_track_pmd(pmdp);
*pmdp = pmd;
}
@@ -34,12 +36,14 @@ static inline void native_pmd_clear(pmd_t *pmdp)
static inline void native_pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *xp)
{
+ mm_track_pte(xp);
*xp = native_make_pte(0);
}
#ifdef CONFIG_SMP
static inline pte_t native_ptep_get_and_clear(pte_t *xp)
{
+ mm_track_pte(xp);
return __pte(xchg(&xp->pte_low, 0));
}
#else
diff --git a/arch/x86/include/asm/pgtable-3level.h
b/arch/x86/include/asm/pgtable-3level.h
index effff47..b75d753 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -26,6 +26,7 @@
*/
static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
+ mm_track_pte(ptep);
ptep->pte_high = pte.pte_high;
smp_wmb();
ptep->pte_low = pte.pte_low;
@@ -33,16 +34,19 @@ static inline void native_set_pte(pte_t *ptep, pte_t
pte)
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
+ mm_track_pte(ptep);
set_64bit((unsigned long long *)(ptep),
native_pte_val(pte));
}
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
+ mm_track_pmd(pmdp);
set_64bit((unsigned long long *)(pmdp),
native_pmd_val(pmd));
}
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
+ mm_track_pud(pudp);
set_64bit((unsigned long long *)(pudp),
native_pud_val(pud));
}
@@ -54,6 +58,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t
pud)
static inline void native_pte_clear(struct mm_struct *mm, unsigned long
addr,
pte_t *ptep)
{
+ mm_track_pte(ptep);
ptep->pte_low = 0;
smp_wmb();
ptep->pte_high = 0;
@@ -62,6 +67,9 @@ static inline void native_pte_clear(struct mm_struct
*mm, unsigned long addr,
static inline void native_pmd_clear(pmd_t *pmd)
{
u32 *tmp = (u32 *)pmd;
+
+ mm_track_pmd(pmd);
+
*tmp = 0;
smp_wmb();
*(tmp + 1) = 0;
@@ -69,6 +77,7 @@ static inline void native_pmd_clear(pmd_t *pmd)
static inline void pud_clear(pud_t *pudp)
{
+ mm_track_pud(pudp);
set_pud(pudp, __pud(0));
/*
@@ -88,6 +97,8 @@ static inline pte_t native_ptep_get_and_clear(pte_t
*ptep)
{
pte_t res;
+ mm_track_pte(ptep);
+
/* xchg acts as a barrier before the setting of the high
bits */
res.pte_low = xchg(&ptep->pte_low, 0);
res.pte_high = ptep->pte_high;
diff --git a/arch/x86/include/asm/pgtable.h
b/arch/x86/include/asm/pgtable.h
index 18601c8..30bb916 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -89,7 +89,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page
*page);
*/
static inline int pte_dirty(pte_t pte)
{
- return pte_flags(pte) & _PAGE_DIRTY;
+ return pte_flags(pte) & (_PAGE_DIRTY | _PAGE_SOFTDIRTY);
}
static inline int pte_young(pte_t pte)
@@ -183,7 +183,7 @@ static inline pte_t pte_clear_flags(pte_t pte,
pteval_t clear)
static inline pte_t pte_mkclean(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_DIRTY);
+ return pte_clear_flags(pte, (_PAGE_DIRTY |
_PAGE_SOFTDIRTY));
}
static inline pte_t pte_mkold(pte_t pte)
diff --git a/arch/x86/include/asm/pgtable_32.h
b/arch/x86/include/asm/pgtable_32.h
index 0c92113..78415fb 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <asm/mm_track.h>
struct mm_struct;
struct vm_area_struct;
diff --git a/arch/x86/include/asm/pgtable_64.h
b/arch/x86/include/asm/pgtable_64.h
index 975f709..0848e9e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -13,6 +13,7 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
+#include <asm/mm_track.h>
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
@@ -46,11 +47,13 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned
long vaddr, pte_t new_pte);
static inline void native_pte_clear(struct mm_struct *mm, unsigned long
addr,
pte_t *ptep)
{
+ mm_track_pte(ptep);
*ptep = native_make_pte(0);
}
static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
+ mm_track_pte(ptep);
*ptep = pte;
}
@@ -61,6 +64,7 @@ static inline void native_set_pte_atomic(pte_t *ptep,
pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
+ mm_track_pmd(pmdp);
*pmdp = pmd;
}
@@ -71,6 +75,7 @@ static inline void native_pmd_clear(pmd_t *pmd)
static inline pte_t native_ptep_get_and_clear(pte_t *xp)
{
+ mm_track_pte(xp);
#ifdef CONFIG_SMP
return native_make_pte(xchg(&xp->pte, 0));
#else
@@ -97,6 +102,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t
*xp)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
+ mm_track_pud(pudp);
*pudp = pud;
}
@@ -107,6 +113,7 @@ static inline void native_pud_clear(pud_t *pud)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
+ mm_track_pgd(pgdp);
*pgdp = pgd;
}
diff --git a/arch/x86/include/asm/pgtable_types.h
b/arch/x86/include/asm/pgtable_types.h
index d56187c..7f366d0 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,6 +23,7 @@
#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE
pmd */
+#define _PAGE_BIT_SOFTDIRTY _PAGE_BIT_HIDDEN
#define _PAGE_BIT_NX 63 /* No execute: only valid after
cpuid check */
/* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -47,6 +48,7 @@
#define _PAGE_SPECIAL (_AT(pteval_t, 1) <<
_PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) <<
_PAGE_BIT_CPA_TEST)
#define _PAGE_SPLITTING (_AT(pteval_t, 1) <<
_PAGE_BIT_SPLITTING)
+#define _PAGE_SOFTDIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFTDIRTY)
#define __HAVE_ARCH_PTE_SPECIAL
#ifdef CONFIG_KMEMCHECK
@@ -71,7 +73,8 @@
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |
\
- _PAGE_SPECIAL |
_PAGE_ACCESSED | _PAGE_DIRTY)
+ _PAGE_SPECIAL |
_PAGE_ACCESSED | _PAGE_DIRTY | \
+ _PAGE_SOFTDIRTY)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3e608ed..a416317 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -30,3 +30,5 @@ obj-$(CONFIG_NUMA_EMU) +=
numa_emulation.o
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
obj-$(CONFIG_MEMTEST) += memtest.o
+
+obj-$(CONFIG_TRACK_DIRTY_PAGES) += track.o
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 83326ad..b94aad6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -795,7 +795,7 @@ static int do_huge_pmd_wp_page_fallback(struct
mm_struct *mm,
unsigned long haddr)
{
pgtable_t pgtable;
- pmd_t _pmd;
+ pmd_t _pmd = {0};
int ret = 0, i;
struct page **pages;
@@ -1265,7 +1265,7 @@ static int __split_huge_page_map(struct page
*page,
unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
- pmd_t *pmd, _pmd;
+ pmd_t *pmd, _pmd = {0};
int ret = 0, i;
pgtable_t pgtable;
unsigned long haddr;
[-- Attachment #2: Type: text/html, Size: 37229 bytes --]
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Dirty page tracking for physical system migration
2011-06-07 20:28 [PATCH] Dirty page tracking for physical system migration Paradis, James
@ 2011-06-07 20:33 ` Randy Dunlap
0 siblings, 0 replies; 2+ messages in thread
From: Randy Dunlap @ 2011-06-07 20:33 UTC (permalink / raw)
To: Paradis, James; +Cc: linux-mm
On Tue, 7 Jun 2011 16:28:27 -0400 Paradis, James wrote:
>
>
> This patch implements a system to track re-dirtied pages and modified
>
> PTEs. It is used by Stratus Technologies for both our ftLinux product
> and
>
> our new GPL Live Kernel Self Migration project (lksm.sourceforge.net).
>
> In both cases, we bring a backup server online by copying the primary
>
> server's state while it is running. We start by copying all of memory
>
> top to bottom. We then go back and re-copy any pages that were changed
>
> during the first copy pass. After several such passes we momentarily
>
> suspend processing so we can copy the last few pages over and bring up
>
> the secondary system. This patch keeps track of which pages need to be
>
> copied during these passes.
>
>
>
> arch/x86/Kconfig | 11 +++++++++++
>
> arch/x86/include/asm/hugetlb.h | 3 +++
>
> arch/x86/include/asm/pgtable-2level.h | 4 ++++
>
> arch/x86/include/asm/pgtable-3level.h | 11 +++++++++++
>
> arch/x86/include/asm/pgtable.h | 4 ++--
>
> arch/x86/include/asm/pgtable_32.h | 1 +
>
> arch/x86/include/asm/pgtable_64.h | 7 +++++++
>
> arch/x86/include/asm/pgtable_types.h | 5 ++++-
>
> arch/x86/mm/Makefile | 2 ++
>
> mm/huge_memory.c | 4 ++--
>
> 11 files changed, 48 insertions(+), 6 deletions(-)
>
>
>
> Signed-off-by: "James Paradis" <james.paradis@stratus.com>
>
>
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>
> index cc6c53a..cc778a4 100644
>
> --- a/arch/x86/Kconfig
>
> +++ b/arch/x86/Kconfig
>
> @@ -1146,6 +1146,17 @@ config DIRECT_GBPAGES
>
> support it. This can improve the kernel's performance
> a tiny bit by
>
> reducing TLB pressure. If in doubt, say "Y".
>
>
>
> +config TRACK_DIRTY_PAGES
>
> + bool "Enable dirty page tracking"
>
> + default n
>
> + depends on !KMEMCHECK
>
> + ---help---
>
> + Turning this on enables tracking of re-dirtied and
>
> + changed pages. This is needed by the Live Kernel
>
> + Self Migration project (lksm.sourceforge.net) to
> perform
>
> + live copying of memory and system state to another
> system.
>
> + Most users will say n here.
>
> +
>
> # Common NUMA Features
>
> config NUMA
>
> bool "Numa Memory Allocation and Scheduler Support"
>
[rest is snipped]
a. Please don't send html.
b. What caused the double-spaced lines? maybe CR/LF?
I haven't tested it, but I doubt that this patch will apply cleanly as is.
c. There's lots of whitespace damage, i.e., spaces instead of tabs at the
beginning of many lines.
You probably need to try again.
---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2011-06-07 20:33 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-07 20:28 [PATCH] Dirty page tracking for physical system migration Paradis, James
2011-06-07 20:33 ` Randy Dunlap
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).