* [PATCH 01 of 12] xen: make phys_to_machine structure dynamic
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 02 of 12] xen: add configurable max domain size Jeremy Fitzhardinge
` (10 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
We now support the use of memory hotplug, so the physical to machine
page mapping structure must be dynamic. This is implemented as a
two-level radix tree structure, which allows us to efficiently
incrementally allocate memory for the p2m table as new pages are
added.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/enlighten.c | 2 -
arch/x86/xen/mmu.c | 85 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/xen/setup.c | 2 -
arch/x86/xen/xen-ops.h | 2 +
include/asm-x86/xen/page.h | 20 +++-------
5 files changed, 94 insertions(+), 17 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1222,7 +1222,7 @@
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
- phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
+ xen_build_dynamic_phys_to_machine();
pgd = (pgd_t *)xen_start_info->pt_base;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -55,6 +55,91 @@
#include "multicalls.h"
#include "mmu.h"
+
+/*
+ * This should probably be a config option. On 32-bit, it costs 1
+ * page/gig of memory; on 64-bit its 2 pages/gig. If we want it to be
+ * completely unbounded we can add another level to the p2m structure.
+ */
+#define MAX_GUEST_PAGES (16ull * 1024*1024*1024 / PAGE_SIZE)
+#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+
+static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE];
+
+static inline unsigned p2m_top_index(unsigned long pfn)
+{
+ BUG_ON(pfn >= MAX_GUEST_PAGES);
+ return pfn / P2M_ENTRIES_PER_PAGE;
+}
+
+static inline unsigned p2m_index(unsigned long pfn)
+{
+ return pfn % P2M_ENTRIES_PER_PAGE;
+}
+
+void __init xen_build_dynamic_phys_to_machine(void)
+{
+ unsigned pfn;
+ unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+
+ BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES);
+
+ for(pfn = 0;
+ pfn < xen_start_info->nr_pages;
+ pfn += P2M_ENTRIES_PER_PAGE) {
+ unsigned topidx = p2m_top_index(pfn);
+
+ p2m_top[topidx] = &mfn_list[pfn];
+ }
+}
+
+unsigned long get_phys_to_machine(unsigned long pfn)
+{
+ unsigned topidx, idx;
+
+ topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == NULL)
+ return INVALID_P2M_ENTRY;
+
+ idx = p2m_index(pfn);
+ return p2m_top[topidx][idx];
+}
+
+static void alloc_p2m(unsigned long **pp)
+{
+ unsigned long *p;
+ unsigned i;
+
+ p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+ BUG_ON(p == NULL);
+
+ for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+ p[i] = INVALID_P2M_ENTRY;
+
+ if (cmpxchg(pp, NULL, p) != NULL)
+ free_page((unsigned long)p);
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ unsigned topidx, idx;
+
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return;
+ }
+
+ topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == NULL) {
+ /* no need to allocate a page to store an invalid entry */
+ if (mfn == INVALID_P2M_ENTRY)
+ return;
+ alloc_p2m(&p2m_top[topidx]);
+ }
+
+ idx = p2m_index(pfn);
+ p2m_top[topidx][idx] = mfn;
+}
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
{
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,8 +27,6 @@
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];
-unsigned long *phys_to_machine_mapping;
-EXPORT_SYMBOL(phys_to_machine_mapping);
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -21,6 +21,8 @@
void __init xen_arch_setup(void);
void __init xen_init_IRQ(void);
void xen_enable_sysenter(void);
+
+void __init xen_build_dynamic_phys_to_machine(void);
void xen_setup_timer(int cpu);
void xen_setup_cpu_clockevents(void);
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -26,15 +26,15 @@
#define FOREIGN_FRAME_BIT (1UL<<31)
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
-extern unsigned long *phys_to_machine_mapping;
+extern unsigned long get_phys_to_machine(unsigned long pfn);
+extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
- return phys_to_machine_mapping[(unsigned int)(pfn)] &
- ~FOREIGN_FRAME_BIT;
+ return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
}
static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -42,7 +42,7 @@
if (xen_feature(XENFEAT_auto_translated_physmap))
return 1;
- return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+ return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
}
static inline unsigned long mfn_to_pfn(unsigned long mfn)
@@ -106,18 +106,10 @@
unsigned long pfn = mfn_to_pfn(mfn);
if ((pfn < max_mapnr)
&& !xen_feature(XENFEAT_auto_translated_physmap)
- && (phys_to_machine_mapping[pfn] != mfn))
+ && (get_phys_to_machine(pfn) != mfn))
return max_mapnr; /* force !pfn_valid() */
+ /* XXX fixme; not true with sparsemem */
return pfn;
-}
-
-static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return;
- }
- phys_to_machine_mapping[pfn] = mfn;
}
/* VIRT <-> MACHINE conversion */
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 02 of 12] xen: add configurable max domain size
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 01 of 12] xen: make phys_to_machine structure dynamic Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 03 of 12] xen: efficiently support a holey p2m table Jeremy Fitzhardinge
` (9 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Add a config option to set the max size of a Xen domain. This is used
to scale the size of the physical-to-machine array; it ends up using
around 1 page/GByte, so there's no reason to be very restrictive.
For a 32-bit guest, the default value of 8GB is probably sufficient;
there's not much point in giving a 32-bit machine much more memory
than that.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/Kconfig | 10 ++++++++++
arch/x86/xen/mmu.c | 25 ++++++++++++-------------
arch/x86/xen/setup.c | 3 +++
include/asm-x86/xen/page.h | 5 +++++
4 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -11,3 +11,13 @@
This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the
Xen hypervisor.
+
+config XEN_MAX_DOMAIN_MEMORY
+ int "Maximum allowed size of a domain in gigabytes"
+ default 8
+ depends on XEN
+ help
+ The pseudo-physical to machine address array is sized
+ according to the maximum possible memory size of a Xen
+ domain. This array uses 1 page per gigabyte, so there's no
+ need to be too stingy here.
\ No newline at end of file
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,19 +56,13 @@
#include "multicalls.h"
#include "mmu.h"
-/*
- * This should probably be a config option. On 32-bit, it costs 1
- * page/gig of memory; on 64-bit its 2 pages/gig. If we want it to be
- * completely unbounded we can add another level to the p2m structure.
- */
-#define MAX_GUEST_PAGES (16ull * 1024*1024*1024 / PAGE_SIZE)
#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE];
+static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE];
static inline unsigned p2m_top_index(unsigned long pfn)
{
- BUG_ON(pfn >= MAX_GUEST_PAGES);
+ BUG_ON(pfn >= MAX_DOMAIN_PAGES);
return pfn / P2M_ENTRIES_PER_PAGE;
}
@@ -81,12 +75,9 @@
{
unsigned pfn;
unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+ unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
- BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES);
-
- for(pfn = 0;
- pfn < xen_start_info->nr_pages;
- pfn += P2M_ENTRIES_PER_PAGE) {
+ for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
unsigned topidx = p2m_top_index(pfn);
p2m_top[topidx] = &mfn_list[pfn];
@@ -96,6 +87,9 @@
unsigned long get_phys_to_machine(unsigned long pfn)
{
unsigned topidx, idx;
+
+ if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+ return INVALID_P2M_ENTRY;
topidx = p2m_top_index(pfn);
if (p2m_top[topidx] == NULL)
@@ -126,6 +120,11 @@
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return;
+ }
+
+ if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+ BUG_ON(mfn != INVALID_P2M_ENTRY);
return;
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <xen/page.h>
#include <xen/interface/callback.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
@@ -35,6 +36,8 @@
char * __init xen_memory_setup(void)
{
unsigned long max_pfn = xen_start_info->nr_pages;
+
+ max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -25,6 +25,11 @@
#define INVALID_P2M_ENTRY (~0UL)
#define FOREIGN_FRAME_BIT (1UL<<31)
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+
+/* Maximum amount of memory we can handle in a domain in pages */
+#define MAX_DOMAIN_PAGES \
+ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 03 of 12] xen: efficiently support a holey p2m table
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 01 of 12] xen: make phys_to_machine structure dynamic Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 02 of 12] xen: add configurable max domain size Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 04 of 12] xen: make dummy_shared_info non-static Jeremy Fitzhardinge
` (8 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
When using sparsemem and memory hotplug, the kernel's pseudo-physical
address space can be discontigious. Previously this was dealt with by
having the upper parts of the radix tree stubbed off. Unfortunately,
this is incompatible with save/restore, which requires a complete p2m
table.
The solution is to have a special distinguished all-invalid p2m leaf
page, which we can point all the hole areas at. This allows the tools
to see a complete p2m table, but it only costs a page for all memory
holes.
It also simplifies the code since it removes a few special cases.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/mmu.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -57,8 +57,17 @@
#include "mmu.h"
#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
-static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE];
+/* Placeholder for holes in the address space */
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
+ __attribute__((section(".data.page_aligned"))) =
+ { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+
+ /* Array of pointers to pages containing p2m entries */
+static unsigned long *p2m_top[TOP_ENTRIES]
+ __attribute__((section(".data.page_aligned"))) =
+ { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
static inline unsigned p2m_top_index(unsigned long pfn)
{
@@ -92,9 +101,6 @@
return INVALID_P2M_ENTRY;
topidx = p2m_top_index(pfn);
- if (p2m_top[topidx] == NULL)
- return INVALID_P2M_ENTRY;
-
idx = p2m_index(pfn);
return p2m_top[topidx][idx];
}
@@ -110,7 +116,7 @@
for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
p[i] = INVALID_P2M_ENTRY;
- if (cmpxchg(pp, NULL, p) != NULL)
+ if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
free_page((unsigned long)p);
}
@@ -129,7 +135,7 @@
}
topidx = p2m_top_index(pfn);
- if (p2m_top[topidx] == NULL) {
+ if (p2m_top[topidx] == p2m_missing) {
/* no need to allocate a page to store an invalid entry */
if (mfn == INVALID_P2M_ENTRY)
return;
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 04 of 12] xen: make dummy_shared_info non-static
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (2 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 03 of 12] xen: efficiently support a holey p2m table Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 05 of 12] xen: add p2m mfn_list_list Jeremy Fitzhardinge
` (7 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Rename dummy_shared_info to xen_dummy_shared_info and make it
non-static, in anticipation of users outside of enlighten.c
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/enlighten.c | 6 +++---
arch/x86/xen/xen-ops.h | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,13 +75,13 @@
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
-static /* __initdata */ struct shared_info dummy_shared_info;
+struct shared_info xen_dummy_shared_info;
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
*/
-struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
+struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
/*
* Flag to determine whether vcpu info placement is available on all
@@ -104,7 +104,7 @@
int err;
struct vcpu_info *vcpup;
- BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
+ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
if (!have_vcpu_info_placement)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -15,6 +15,7 @@
DECLARE_PER_CPU(unsigned long, xen_current_cr3);
extern struct start_info *xen_start_info;
+extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info;
char * __init xen_memory_setup(void);
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (3 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 04 of 12] xen: make dummy_shared_info non-static Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-28 12:28 ` [bisected] " Ingo Molnar
2008-05-23 13:41 ` [PATCH 06 of 12] xen: add rebind_evtchn_irq Jeremy Fitzhardinge
` (6 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
When saving a domain, the Xen tools need to remap all our mfns to
portable pfns. In order to remap our p2m table, it needs to know
where all its pages are, so maintain the references to the p2m table
for it to use.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/enlighten.c | 2 ++
arch/x86/xen/mmu.c | 39 ++++++++++++++++++++++++++++++++++++---
arch/x86/xen/xen-ops.h | 2 ++
3 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -880,6 +880,8 @@
/* In UP this is as good a place as any to set up shared info */
xen_setup_vcpu_info_placement();
#endif
+
+ xen_setup_mfn_list_list();
}
static __init void xen_pagetable_setup_done(pgd_t *base)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -69,6 +69,13 @@
__attribute__((section(".data.page_aligned"))) =
{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+/* Arrays of p2m arrays expressed in mfns used for save/restore */
+static unsigned long p2m_top_mfn[TOP_ENTRIES]
+ __attribute__((section(".bss.page_aligned")));
+
+static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
+ __attribute__((section(".bss.page_aligned")));
+
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_DOMAIN_PAGES);
@@ -80,11 +87,35 @@
return pfn % P2M_ENTRIES_PER_PAGE;
}
+/* Build the parallel p2m_top_mfn structures */
+void xen_setup_mfn_list_list(void)
+{
+ unsigned pfn, idx;
+
+ for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+ unsigned topidx = p2m_top_index(pfn);
+
+ p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
+ }
+
+ for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+ unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
+ p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
+ }
+
+ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+ virt_to_mfn(p2m_top_mfn_list);
+ HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
+}
+
+/* Set up p2m_top to point to the domain-builder provided p2m pages */
void __init xen_build_dynamic_phys_to_machine(void)
{
- unsigned pfn;
unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
+ unsigned pfn;
for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
unsigned topidx = p2m_top_index(pfn);
@@ -105,7 +136,7 @@
return p2m_top[topidx][idx];
}
-static void alloc_p2m(unsigned long **pp)
+static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
{
unsigned long *p;
unsigned i;
@@ -118,6 +149,8 @@
if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
free_page((unsigned long)p);
+ else
+ *mfnp = virt_to_mfn(p);
}
void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
@@ -139,7 +172,7 @@
/* no need to allocate a page to store an invalid entry */
if (mfn == INVALID_P2M_ENTRY)
return;
- alloc_p2m(&p2m_top[topidx]);
+ alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
}
idx = p2m_index(pfn);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -17,6 +17,8 @@
extern struct start_info *xen_start_info;
extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info;
+
+void xen_setup_mfn_list_list(void);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
^ permalink raw reply [flat|nested] 33+ messages in thread* [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-23 13:41 ` [PATCH 05 of 12] xen: add p2m mfn_list_list Jeremy Fitzhardinge
@ 2008-05-28 12:28 ` Ingo Molnar
2008-05-28 14:02 ` Jeremy Fitzhardinge
2008-05-30 7:40 ` Gerd Hoffmann
0 siblings, 2 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-05-28 12:28 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki, x86,
Sam Ravnborg
* Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> When saving a domain, the Xen tools need to remap all our mfns to
> portable pfns. In order to remap our p2m table, it needs to know
> where all its pages are, so maintain the references to the p2m table
> for it to use.
-tip tree auto-testing found the following early bootup hang:
-------------->
get_memcfg_from_srat: assigning address to rsdp
RSD PTR v0 [Nvidia]
BUG: Int 14: CR2 ffd00040
EDI 8092fbfe ESI ffd00040 EBP 80b0aee8 ESP 80b0aed0
EBX 000f76f0 EDX 0000000e ECX 00000003 EAX ffd00040
err 00000000 EIP 802c055a CS 00000060 flg 00010006
Stack: ffd00040 80bc78d0 80b0af6c 80b1dbfe 8093d8ba 00000008 80b42810 80b4ddb4
80b42842 00000000 80b0af1c 801079c8 808e724e 00000000 80b42871 802c0531
00000100 00000000 0003fff0 80b0af40 80129999 00040100 00040100 00000000
Pid: 0, comm: swapper Not tainted 2.6.26-rc4-sched-devel.git #570
[<802c055a>] ? strncmp+0x11/0x25
[<80b1dbfe>] ? get_memcfg_from_srat+0xb4/0x568
[<801079c8>] ? mcount_call+0x5/0x9
[<802c0531>] ? strcmp+0xa/0x22
[<80129999>] ? printk+0x38/0x3a
[<80129999>] ? printk+0x38/0x3a
[<8011b122>] ? memory_present+0x66/0x6f
[<80b216b4>] ? setup_memory+0x13/0x40c
[<80b16b47>] ? propagate_e820_map+0x80/0x97
[<80b1622a>] ? setup_arch+0x248/0x477
[<80129999>] ? printk+0x38/0x3a
[<80b11759>] ? start_kernel+0x6e/0x2eb
[<80b110fc>] ? i386_start_kernel+0xeb/0xf2
=======================
<------
with this config:
http://redhat.com/~mingo/misc/config-Wed_May_28_01_33_33_CEST_2008.bad
The thing is, the crash makes little sense at first sight. We crash on a
benign-looking printk. The code around it got changed in -tip but
checking those topic branches individually did not reproduce the bug.
Bisection led to this commit:
| d5edbc1f75420935b1ec7e65df10c8f81cea82de is first bad commit
| commit d5edbc1f75420935b1ec7e65df10c8f81cea82de
| Author: Jeremy Fitzhardinge <jeremy@goop.org>
| Date: Mon May 26 23:31:22 2008 +0100
|
| xen: add p2m mfn_list_list
Which is somewhat surprising, as on native hardware Xen client side
should have little to no side-effects.
After some head scratching, it turns out the following happened:
randconfig enabled the following Xen options:
CONFIG_XEN=y
CONFIG_XEN_MAX_DOMAIN_MEMORY=8
# CONFIG_XEN_BLKDEV_FRONTEND is not set
# CONFIG_XEN_NETDEV_FRONTEND is not set
CONFIG_HVC_XEN=y
# CONFIG_XEN_BALLOON is not set
which activated this piece of code in arch/x86/xen/mmu.c:
> @@ -69,6 +69,13 @@
> __attribute__((section(".data.page_aligned"))) =
> { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
>
> +/* Arrays of p2m arrays expressed in mfns used for save/restore */
> +static unsigned long p2m_top_mfn[TOP_ENTRIES]
> + __attribute__((section(".bss.page_aligned")));
> +
> +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
> + __attribute__((section(".bss.page_aligned")));
The problem is, you must only put variables into .bss.page_aligned that
have a _size_ that is _exactly_ page aligned. In this case the size of
p2m_top_mfn_list is not page aligned:
80b8d000 b p2m_top_mfn
80b8f000 b p2m_top_mfn_list
80b8f008 b softirq_stack
80b97008 b hardirq_stack
80b9f008 b bm_pte
So all subsequent variables get unaligned which, depending on luck,
breaks the kernel in various funny ways. In this case what killed the
kernel first was the misaligned bootmap pte page, resulting in that
creative crash above.
Anyway, this was a fun bug to track down :-)
I think the moral is that .bss.page_aligned is a dangerous construct in
its current form, and the symptoms of breakage are very non-trivial, so
i think we need build-time checks to make sure all symbols in
.bss.page_aligned are truly page aligned.
Sam, any ideas how to accomplish that best?
The Xen fix below gets the kernel booting again. I suspect we really
need this list to stay in its separate page due to Xen assumptions, even
though it's only 8 bytes large?
Ingo
---
arch/x86/xen/mmu.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
Index: linux/arch/x86/xen/mmu.c
===================================================================
--- linux.orig/arch/x86/xen/mmu.c
+++ linux/arch/x86/xen/mmu.c
@@ -73,7 +73,8 @@ static unsigned long *p2m_top[TOP_ENTRIE
static unsigned long p2m_top_mfn[TOP_ENTRIES]
__attribute__((section(".bss.page_aligned")));
-static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
+static unsigned long p2m_top_mfn_list[
+ PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
__attribute__((section(".bss.page_aligned")));
static inline unsigned p2m_top_index(unsigned long pfn)
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-28 12:28 ` [bisected] " Ingo Molnar
@ 2008-05-28 14:02 ` Jeremy Fitzhardinge
2008-05-28 14:10 ` Sam Ravnborg
2008-06-02 10:10 ` Ingo Molnar
2008-05-30 7:40 ` Gerd Hoffmann
1 sibling, 2 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-28 14:02 UTC (permalink / raw)
To: Ingo Molnar
Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki, x86,
Sam Ravnborg
Ingo Molnar wrote:
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>
>> When saving a domain, the Xen tools need to remap all our mfns to
>> portable pfns. In order to remap our p2m table, it needs to know
>> where all its pages are, so maintain the references to the p2m table
>> for it to use.
>>
>
> -tip tree auto-testing found the following early bootup hang:
>
> -------------->
> get_memcfg_from_srat: assigning address to rsdp
> RSD PTR v0 [Nvidia]
> BUG: Int 14: CR2 ffd00040
> EDI 8092fbfe ESI ffd00040 EBP 80b0aee8 ESP 80b0aed0
> EBX 000f76f0 EDX 0000000e ECX 00000003 EAX ffd00040
> err 00000000 EIP 802c055a CS 00000060 flg 00010006
> Stack: ffd00040 80bc78d0 80b0af6c 80b1dbfe 8093d8ba 00000008 80b42810 80b4ddb4
> 80b42842 00000000 80b0af1c 801079c8 808e724e 00000000 80b42871 802c0531
> 00000100 00000000 0003fff0 80b0af40 80129999 00040100 00040100 00000000
> Pid: 0, comm: swapper Not tainted 2.6.26-rc4-sched-devel.git #570
> [<802c055a>] ? strncmp+0x11/0x25
> [<80b1dbfe>] ? get_memcfg_from_srat+0xb4/0x568
> [<801079c8>] ? mcount_call+0x5/0x9
> [<802c0531>] ? strcmp+0xa/0x22
> [<80129999>] ? printk+0x38/0x3a
> [<80129999>] ? printk+0x38/0x3a
> [<8011b122>] ? memory_present+0x66/0x6f
> [<80b216b4>] ? setup_memory+0x13/0x40c
> [<80b16b47>] ? propagate_e820_map+0x80/0x97
> [<80b1622a>] ? setup_arch+0x248/0x477
> [<80129999>] ? printk+0x38/0x3a
> [<80b11759>] ? start_kernel+0x6e/0x2eb
> [<80b110fc>] ? i386_start_kernel+0xeb/0xf2
> =======================
> <------
>
> with this config:
>
> http://redhat.com/~mingo/misc/config-Wed_May_28_01_33_33_CEST_2008.bad
>
> The thing is, the crash makes little sense at first sight. We crash on a
> benign-looking printk. The code around it got changed in -tip but
> checking those topic branches individually did not reproduce the bug.
>
> Bisection led to this commit:
>
> | d5edbc1f75420935b1ec7e65df10c8f81cea82de is first bad commit
> | commit d5edbc1f75420935b1ec7e65df10c8f81cea82de
> | Author: Jeremy Fitzhardinge <jeremy@goop.org>
> | Date: Mon May 26 23:31:22 2008 +0100
> |
> | xen: add p2m mfn_list_list
>
> Which is somewhat surprising, as on native hardware Xen client side
> should have little to no side-effects.
>
> After some head scratching, it turns out the following happened:
> randconfig enabled the following Xen options:
>
> CONFIG_XEN=y
> CONFIG_XEN_MAX_DOMAIN_MEMORY=8
> # CONFIG_XEN_BLKDEV_FRONTEND is not set
> # CONFIG_XEN_NETDEV_FRONTEND is not set
> CONFIG_HVC_XEN=y
> # CONFIG_XEN_BALLOON is not set
>
> which activated this piece of code in arch/x86/xen/mmu.c:
>
>
>> @@ -69,6 +69,13 @@
>> __attribute__((section(".data.page_aligned"))) =
>> { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
>>
>> +/* Arrays of p2m arrays expressed in mfns used for save/restore */
>> +static unsigned long p2m_top_mfn[TOP_ENTRIES]
>> + __attribute__((section(".bss.page_aligned")));
>> +
>> +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
>> + __attribute__((section(".bss.page_aligned")));
>>
>
> The problem is, you must only put variables into .bss.page_aligned that
> have a _size_ that is _exactly_ page aligned. In this case the size of
> p2m_top_mfn_list is not page aligned:
>
> 80b8d000 b p2m_top_mfn
> 80b8f000 b p2m_top_mfn_list
> 80b8f008 b softirq_stack
> 80b97008 b hardirq_stack
> 80b9f008 b bm_pte
>
> So all subsequent variables get unaligned which, depending on luck,
> breaks the kernel in various funny ways. In this case what killed the
> kernel first was the misaligned bootmap pte page, resulting in that
> creative crash above.
>
> Anyway, this was a fun bug to track down :-)
>
Thanks for that. That's pretty subtle...
> I think the moral is that .bss.page_aligned is a dangerous construct in
> its current form, and the symptoms of breakage are very non-trivial, so
> i think we need build-time checks to make sure all symbols in
> .bss.page_aligned are truly page aligned.
>
> Sam, any ideas how to accomplish that best?
>
The use of __section(.data.page_aligned) (or worse
__attribute__((section(".data.page_aligned"))) is fairly verbose and
brittle. I've got a (totally untested) proposed patch below, to
introduce __page_aligned_data|bss which sets the section and the
alignment. This will work, but it requires that all page-aligned
variables also have an alignment associated with them, so that mis-sized
ones don't push the others around.
There aren't very many users of .data|bss.page_aligned, so it should be
easy enough to fix them all up.
A link-time warning would be good too, of course.
> The Xen fix below gets the kernel booting again. I suspect we really
> need this list to stay in its separate page due to Xen assumptions, even
> though it's only 8 bytes large?
>
Yes, that's right. It can never get to page size, but it must be on its
own page because its only ever referred to by page number; these pages
are read from outside the VM when doing save/restore.
> Ingo
>
> ---
> arch/x86/xen/mmu.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> Index: linux/arch/x86/xen/mmu.c
> ===================================================================
> --- linux.orig/arch/x86/xen/mmu.c
> +++ linux/arch/x86/xen/mmu.c
> @@ -73,7 +73,8 @@ static unsigned long *p2m_top[TOP_ENTRIE
> static unsigned long p2m_top_mfn[TOP_ENTRIES]
> __attribute__((section(".bss.page_aligned")));
>
> -static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
> +static unsigned long p2m_top_mfn_list[
> + PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
> __attribute__((section(".bss.page_aligned")));
>
That's a bit severe - it expands it to 4 pages ;)
J
Subject: make page-aligned data and bss less fragile
Making a variable page-aligned by using
__attribute__((section(".data.page_aligned"))) is fragile because if
sizeof(variable) is not also a multiple of page size, it leaves
variables in the remainder of the section unaligned.
This patch introduces two new qualifiers, __page_aligned_data and
__page_aligned_bss to set the section *and* the alignment of
variables. This makes page-aligned variables more robust because the
linker will make sure they're aligned properly. Unfortunately it
requires *all* page-aligned data to use these macros...
---
arch/x86/kernel/irq_32.c | 7 ++-----
arch/x86/kernel/setup64.c | 2 +-
arch/x86/mm/ioremap.c | 3 +--
arch/x86/xen/mmu.c | 12 +++++-------
include/linux/linkage.h | 4 ++++
5 files changed, 13 insertions(+), 15 deletions(-)
===================================================================
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -83,11 +83,8 @@
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static void call_on_stack(void *func, void *stack)
{
===================================================================
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -40,7 +40,7 @@
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
+char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
unsigned long __supported_pte_mask __read_mostly = ~0UL;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
===================================================================
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -403,8 +403,7 @@
early_param("early_ioremap_debug", early_ioremap_debug_setup);
static __initdata int after_paging_init;
-static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
- __section(.bss.page_aligned);
+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
===================================================================
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/paravirt.h>
+#include <asm/linkage.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -60,21 +61,18 @@
#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
/* Placeholder for holes in the address space */
-static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
- __attribute__((section(".data.page_aligned"))) =
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
/* Array of pointers to pages containing p2m entries */
-static unsigned long *p2m_top[TOP_ENTRIES]
- __attribute__((section(".data.page_aligned"))) =
+static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
/* Arrays of p2m arrays expressed in mfns used for save/restore */
-static unsigned long p2m_top_mfn[TOP_ENTRIES]
- __attribute__((section(".bss.page_aligned")));
+static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
- __attribute__((section(".bss.page_aligned")));
+ __page_aligned_bss;
static inline unsigned p2m_top_index(unsigned long pfn)
{
===================================================================
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_LINKAGE_H
#define _LINUX_LINKAGE_H
+#include <linux/compiler.h>
#include <asm/linkage.h>
#define notrace __attribute__((no_instrument_function))
@@ -18,6 +19,9 @@
#ifndef asmregparm
# define asmregparm
#endif
+
+#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE)
+#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE)
/*
* This is used by architectures to keep arguments on the stack
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-28 14:02 ` Jeremy Fitzhardinge
@ 2008-05-28 14:10 ` Sam Ravnborg
2008-06-02 10:10 ` Ingo Molnar
1 sibling, 0 replies; 33+ messages in thread
From: Sam Ravnborg @ 2008-05-28 14:10 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki,
x86
On Wed, May 28, 2008 at 03:02:14PM +0100, Jeremy Fitzhardinge wrote:
>
> The use of __section(.data.page_aligned) (or worse
> __attribute__((section(".data.page_aligned"))) is fairly verbose and
> brittle. I've got a (totally untested) proposed patch below, to
> introduce __page_aligned_data|bss which sets the section and the
> alignment. This will work, but it requires that all page-aligned
> variables also have an alignment associated with them, so that mis-sized
> ones don't push the others around.
>
> There aren't very many users of .data|bss.page_aligned, so it should be
> easy enough to fix them all up.
>
> A link-time warning would be good too, of course.
I cooked up this:
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index ce5ed08..963b2ae 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -40,6 +40,7 @@ SECTIONS
.text : AT(ADDR(.text) - LOAD_OFFSET) {
. = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
*(.text.page_aligned)
+ end_text_page_aligned = .;
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
@@ -49,6 +50,9 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x9090
+ ASSERT((end_text_page_aligned == ALIGN((end_text_page_aligned), PAGE_SIZE)),
+ "Text in .text.page_aligned are not modulo PAGE_SIZE")
+
. = ALIGN(16); /* Exception table */
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
@@ -89,6 +93,8 @@ SECTIONS
*(.data.page_aligned)
*(.data.idt)
}
+ ASSERT((. == ALIGN(PAGE_SIZE)),
+ "Data in .data.page_aligned are not modulo PAGE_SIZE")
. = ALIGN(32);
.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
But we should try to do it so all archs can benefit.
And it failed in the second ASSERT - I dunno why.
Soccer duties - so I have to run.
Sam
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-28 14:02 ` Jeremy Fitzhardinge
2008-05-28 14:10 ` Sam Ravnborg
@ 2008-06-02 10:10 ` Ingo Molnar
2008-06-02 13:12 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 33+ messages in thread
From: Ingo Molnar @ 2008-06-02 10:10 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki, x86,
Sam Ravnborg
* Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Subject: make page-aligned data and bss less fragile
>
> Making a variable page-aligned by using
> __attribute__((section(".data.page_aligned"))) is fragile because if
> sizeof(variable) is not also a multiple of page size, it leaves
> variables in the remainder of the section unaligned.
>
> This patch introduces two new qualifiers, __page_aligned_data and
> __page_aligned_bss to set the section *and* the alignment of
> variables. This makes page-aligned variables more robust because the
> linker will make sure they're aligned properly. Unfortunately it
> requires *all* page-aligned data to use these macros...
applied to -tip, thanks Jeremy. Created a new topic branch for the core
bits of it: tip/build. Sam might want to pull from that topic branch
eventually, once these changes pass testing in -tip.
Ingo
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-06-02 10:10 ` Ingo Molnar
@ 2008-06-02 13:12 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-02 13:12 UTC (permalink / raw)
To: Ingo Molnar
Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki, x86,
Sam Ravnborg
Ingo Molnar wrote:
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>
>> Subject: make page-aligned data and bss less fragile
>>
>> Making a variable page-aligned by using
>> __attribute__((section(".data.page_aligned"))) is fragile because if
>> sizeof(variable) is not also a multiple of page size, it leaves
>> variables in the remainder of the section unaligned.
>>
>> This patch introduces two new qualifiers, __page_aligned_data and
>> __page_aligned_bss to set the section *and* the alignment of
>> variables. This makes page-aligned variables more robust because the
>> linker will make sure they're aligned properly. Unfortunately it
>> requires *all* page-aligned data to use these macros...
>>
>
> applied to -tip, thanks Jeremy. Created a new topic branch for the core
> bits of it: tip/build. Sam might want to pull from that topic branch
> eventually, once these changes pass testing in -tip.
>
Can you also drop "xen: fix early bootup crash on native hardware" and
replace it with "x86: use __page_aligned_data/bss"? (Which seems to
have been partially applied?)
Thanks,
J
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-28 12:28 ` [bisected] " Ingo Molnar
2008-05-28 14:02 ` Jeremy Fitzhardinge
@ 2008-05-30 7:40 ` Gerd Hoffmann
2008-05-30 8:04 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 33+ messages in thread
From: Gerd Hoffmann @ 2008-05-30 7:40 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, LKML, xen-devel, Thomas Gleixner,
Rafael J. Wysocki, x86, Sam Ravnborg
Ingo Molnar wrote:
>> +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
>> + __attribute__((section(".bss.page_aligned")));
> Sam, any ideas how to accomplish that best?
Have a define like this for them all?
#define page_aligned \
__attribute__((aligned(4096))) \
__attribute__((__section__ (".bss.page_aligned")))
cheers,
Gerd
--
http://kraxel.fedorapeople.org/xenner/
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [bisected] Re: [PATCH 05 of 12] xen: add p2m mfn_list_list
2008-05-30 7:40 ` Gerd Hoffmann
@ 2008-05-30 8:04 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-30 8:04 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Ingo Molnar, LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki,
x86, Sam Ravnborg
Gerd Hoffmann wrote:
> Have a define like this for them all?
>
> #define page_aligned \
> __attribute__((aligned(4096))) \
> __attribute__((__section__ (".bss.page_aligned")))
>
>
Yes, yesterday I posted some patches to do exactly that.
J
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 06 of 12] xen: add rebind_evtchn_irq
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (4 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 05 of 12] xen: add p2m mfn_list_list Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 07 of 12] xen: fix unbind_from_irq() Jeremy Fitzhardinge
` (5 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Add rebind_evtchn_irq(), which will rebind an device driver's existing
irq to a new event channel on restore. Since the new event channel
will be masked and bound to vcpu0, we update the state accordingly and
unmask the irq once everything is set up.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
drivers/xen/events.c | 27 +++++++++++++++++++++++++++
include/xen/events.h | 1 +
2 files changed, 28 insertions(+)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -557,6 +557,33 @@
put_cpu();
}
+/* Rebind a new event channel to an existing irq. */
+void rebind_evtchn_irq(int evtchn, int irq)
+{
+ /* Make sure the irq is masked, since the new event channel
+ will also be masked. */
+ disable_irq(irq);
+
+ spin_lock(&irq_mapping_update_lock);
+
+ /* After resume the irq<->evtchn mappings are all cleared out */
+ BUG_ON(evtchn_to_irq[evtchn] != -1);
+ /* Expect irq to have been bound before,
+ so the bindcount should be non-0 */
+ BUG_ON(irq_bindcount[irq] == 0);
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ /* new event channels are always bound to cpu 0 */
+ irq_set_affinity(irq, cpumask_of_cpu(0));
+
+ /* Unmask the event channel. */
+ enable_irq(irq);
+}
+
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
diff --git a/include/xen/events.h b/include/xen/events.h
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -32,6 +32,7 @@
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
int resend_irq_on_evtchn(unsigned int irq);
+void rebind_evtchn_irq(int evtchn, int irq);
static inline void notify_remote_via_evtchn(int port)
{
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 07 of 12] xen: fix unbind_from_irq()
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (5 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 06 of 12] xen: add rebind_evtchn_irq Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 08 of 12] xen-console: add save/restore Jeremy Fitzhardinge
` (4 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Rearrange the tests in unbind_from_irq() so that we can still unbind
an irq even if the underlying event channel is bad. This allows a
device driver to shuffle its irqs on save/restore before the
underlying event channels have been fixed up.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
drivers/xen/events.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -355,7 +355,7 @@
spin_lock(&irq_mapping_update_lock);
- if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
+ if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
close.port = evtchn;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
@@ -375,7 +375,7 @@
evtchn_to_irq[evtchn] = -1;
irq_info[irq] = IRQ_UNBOUND;
- dynamic_irq_init(irq);
+ dynamic_irq_cleanup(irq);
}
spin_unlock(&irq_mapping_update_lock);
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 08 of 12] xen-console: add save/restore
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (6 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 07 of 12] xen: fix unbind_from_irq() Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-06-02 11:17 ` Ingo Molnar
2008-05-23 13:41 ` [PATCH 09 of 12] xenbus: rebind irq on restore Jeremy Fitzhardinge
` (3 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Add code to:
1. Deal with the console page being canonicalized. During save, the
console's mfn in the start_info structure is canonicalized to a pfn.
In order to deal with that, we always use a copy of the pfn and
indirect off that all the time. However, we fall back to using the
mfn if the pfn hasn't been initialized yet.
2. Restore the console event channel, and rebind it to the existing irq.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
drivers/char/hvc_xen.c | 25 +++++++++++++++++++++----
include/xen/hvc-console.h | 2 ++
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -39,9 +39,14 @@
/* ------------------------------------------------------------------ */
+static unsigned long console_pfn = ~0ul;
+
static inline struct xencons_interface *xencons_interface(void)
{
- return mfn_to_virt(xen_start_info->console.domU.mfn);
+ if (console_pfn == ~0ul)
+ return mfn_to_virt(xen_start_info->console.domU.mfn);
+ else
+ return __va(console_pfn << PAGE_SHIFT);
}
static inline void notify_daemon(void)
@@ -101,18 +106,30 @@
{
struct hvc_struct *hp;
- if (!is_running_on_xen())
- return 0;
+ if (!is_running_on_xen() ||
+ is_initial_xendomain() ||
+ !xen_start_info->console.domU.evtchn)
+ return -ENODEV;
xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
if (xencons_irq < 0)
- xencons_irq = 0 /* NO_IRQ */;
+ xencons_irq = 0; /* NO_IRQ */
+
hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
if (IS_ERR(hp))
return PTR_ERR(hp);
hvc = hp;
+
+ console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn);
+
return 0;
+}
+
+void xen_console_resume(void)
+{
+ if (xencons_irq)
+ rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
}
static void __exit xen_fini(void)
diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h
--- a/include/xen/hvc-console.h
+++ b/include/xen/hvc-console.h
@@ -3,6 +3,8 @@
extern struct console xenboot_console;
+void xen_console_resume(void);
+
void xen_raw_console_write(const char *str);
void xen_raw_printk(const char *fmt, ...);
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 08 of 12] xen-console: add save/restore
2008-05-23 13:41 ` [PATCH 08 of 12] xen-console: add save/restore Jeremy Fitzhardinge
@ 2008-06-02 11:17 ` Ingo Molnar
2008-06-02 11:18 ` Ingo Molnar
` (2 more replies)
0 siblings, 3 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-06-02 11:17 UTC (permalink / raw)
To: Jeremy Fitzhardinge; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
-tip testing found the following xen-console symbols trouble:
ERROR: "console_drivers" [drivers/video/xen-fbfront.ko] undefined!
ERROR: "get_phys_to_machine" [drivers/video/xen-fbfront.ko] undefined!
ERROR: "get_phys_to_machine" [drivers/net/xen-netfront.ko] undefined!
ERROR: "get_phys_to_machine" [drivers/input/xen-kbdfront.ko] undefined!
with:
http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_26_38_CEST_2008.bad
get_phys_to_machine can indeed be exported but i'm less sure about the
console_drivers export ... The temporary fix below resolves it for now.
Ingo
------------->
Subject: xen: build fixes
From: Ingo Molnar <mingo@elte.hu>
Date: Mon Jun 02 13:13:04 CEST 2008
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/xen/mmu.c | 1 +
kernel/printk.c | 2 ++
2 files changed, 3 insertions(+)
Index: linux/arch/x86/xen/mmu.c
===================================================================
--- linux.orig/arch/x86/xen/mmu.c
+++ linux/arch/x86/xen/mmu.c
@@ -136,6 +136,7 @@ unsigned long get_phys_to_machine(unsign
idx = p2m_index(pfn);
return p2m_top[topidx][idx];
}
+EXPORT_SYMBOL_GPL(get_phys_to_machine);
static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
{
Index: linux/kernel/printk.c
===================================================================
--- linux.orig/kernel/printk.c
+++ linux/kernel/printk.c
@@ -76,6 +76,8 @@ EXPORT_SYMBOL(oops_in_progress);
static DECLARE_MUTEX(console_sem);
static DECLARE_MUTEX(secondary_console_sem);
struct console *console_drivers;
+EXPORT_SYMBOL_GPL(console_drivers);
+
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 08 of 12] xen-console: add save/restore
2008-06-02 11:17 ` Ingo Molnar
@ 2008-06-02 11:18 ` Ingo Molnar
2008-06-02 11:50 ` Jeremy Fitzhardinge
2008-06-02 12:13 ` [Xen-devel] " Markus Armbruster
2 siblings, 0 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-06-02 11:18 UTC (permalink / raw)
To: Jeremy Fitzhardinge; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
* Ingo Molnar <mingo@elte.hu> wrote:
> -tip testing found the following xen-console symbols trouble:
>
> ERROR: "console_drivers" [drivers/video/xen-fbfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/video/xen-fbfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/net/xen-netfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/input/xen-kbdfront.ko] undefined!
>
> with:
>
> http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_26_38_CEST_2008.bad
that would be:
http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_25_13_CEST_2008.bad
Ingo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 08 of 12] xen-console: add save/restore
2008-06-02 11:17 ` Ingo Molnar
2008-06-02 11:18 ` Ingo Molnar
@ 2008-06-02 11:50 ` Jeremy Fitzhardinge
2008-06-02 12:13 ` [Xen-devel] " Markus Armbruster
2 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-02 11:50 UTC (permalink / raw)
To: Ingo Molnar
Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki,
Markus Armbruster
Ingo Molnar wrote:
> -tip testing found the following xen-console symbols trouble:
>
> ERROR: "console_drivers" [drivers/video/xen-fbfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/video/xen-fbfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/net/xen-netfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/input/xen-kbdfront.ko] undefined!
>
> with:
>
> http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_26_38_CEST_2008.bad
>
> get_phys_to_machine can indeed be exported but i'm less sure about the
> console_drivers export ... The temporary fix below resolves it for now.
>
Exporting "get_phys_to_machine" is definitely the right thing to do,
though it might be an idea to put it in the xen_ namespace.
The console changes are Markus' territory. Markus?
Thanks,
J
> Ingo
>
> ------------->
> Subject: xen: build fixes
> From: Ingo Molnar <mingo@elte.hu>
> Date: Mon Jun 02 13:13:04 CEST 2008
>
> Signed-off-by: Ingo Molnar <mingo@elte.hu>
> ---
> arch/x86/xen/mmu.c | 1 +
> kernel/printk.c | 2 ++
> 2 files changed, 3 insertions(+)
>
> Index: linux/arch/x86/xen/mmu.c
> ===================================================================
> --- linux.orig/arch/x86/xen/mmu.c
> +++ linux/arch/x86/xen/mmu.c
> @@ -136,6 +136,7 @@ unsigned long get_phys_to_machine(unsign
> idx = p2m_index(pfn);
> return p2m_top[topidx][idx];
> }
> +EXPORT_SYMBOL_GPL(get_phys_to_machine);
>
> static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
> {
> Index: linux/kernel/printk.c
> ===================================================================
> --- linux.orig/kernel/printk.c
> +++ linux/kernel/printk.c
> @@ -76,6 +76,8 @@ EXPORT_SYMBOL(oops_in_progress);
> static DECLARE_MUTEX(console_sem);
> static DECLARE_MUTEX(secondary_console_sem);
> struct console *console_drivers;
> +EXPORT_SYMBOL_GPL(console_drivers);
> +
> /*
> * This is used for debugging the mess that is the VT code by
> * keeping track if we have the console semaphore held. It's
>
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [Xen-devel] Re: [PATCH 08 of 12] xen-console: add save/restore
2008-06-02 11:17 ` Ingo Molnar
2008-06-02 11:18 ` Ingo Molnar
2008-06-02 11:50 ` Jeremy Fitzhardinge
@ 2008-06-02 12:13 ` Markus Armbruster
2 siblings, 0 replies; 33+ messages in thread
From: Markus Armbruster @ 2008-06-02 12:13 UTC (permalink / raw)
To: Ingo Molnar
Cc: Jeremy Fitzhardinge, Rafael J. Wysocki, Thomas Gleixner,
xen-devel, LKML
Ingo Molnar <mingo@elte.hu> writes:
> -tip testing found the following xen-console symbols trouble:
>
> ERROR: "console_drivers" [drivers/video/xen-fbfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/video/xen-fbfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/net/xen-netfront.ko] undefined!
> ERROR: "get_phys_to_machine" [drivers/input/xen-kbdfront.ko] undefined!
>
> with:
>
> http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_26_38_CEST_2008.bad
>
> get_phys_to_machine can indeed be exported but i'm less sure about the
> console_drivers export ... The temporary fix below resolves it for now.
>
> Ingo
xen-fbfront makes itself the preferred console when it is actually
enabled. It does that by re-registering itself with CON_CONSDEV set,
and for that it needs to find its struct console. Simple, works.
Perhaps you can think of a better solution for this problem. I'm all
ears!
Details are in this commit:
xen: Enable console tty by default in domU if it's not a dummy
Without console= arguments on the kernel command line, the first
console to register becomes enabled and the preferred console (the one
behind /dev/console). This is normally tty (assuming
CONFIG_VT_CONSOLE is enabled, which it commonly is).
This is okay as long tty is a useful console. But unless we have the
PV framebuffer, and it is enabled for this domain, tty0 in domU is
merely a dummy. In that case, we want the preferred console to be the
Xen console hvc0, and we want it without having to fiddle with the
kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7
did that for us.
Since we now have the PV framebuffer, we want to enable and prefer tty
again, but only when PVFB is enabled. But even then we still want to
enable the Xen console as well.
Problem: when tty registers, we can't yet know whether the PVFB is
enabled. By the time we can know (xenstore is up), the console setup
game is over.
Solution: enable console tty by default, but keep hvc as the preferred
console. Change the preferred console to tty when PVFB probes
successfully, unless we've been given console kernel parameters.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[...]
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 09 of 12] xenbus: rebind irq on restore
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (7 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 08 of 12] xen-console: add save/restore Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 10 of 12] xen: implement save/restore Jeremy Fitzhardinge
` (2 subsequent siblings)
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
When restoring, rebind the existing xenbus irq to the new xenbus event
channel. (It turns out in practice that this is always the same, and
is never updated on restore. That's a bug, but Xeno-linux has been
like this for a long time, so it can't really be fixed.)
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
drivers/xen/xenbus/xenbus_comms.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -203,7 +203,6 @@
int xb_init_comms(void)
{
struct xenstore_domain_interface *intf = xen_store_interface;
- int err;
if (intf->req_prod != intf->req_cons)
printk(KERN_ERR "XENBUS request ring is not quiescent "
@@ -216,18 +215,20 @@
intf->rsp_cons = intf->rsp_prod;
}
- if (xenbus_irq)
- unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+ if (xenbus_irq) {
+ /* Already have an irq; assume we're resuming */
+ rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
+ } else {
+ int err;
+ err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
+ 0, "xenbus", &xb_waitq);
+ if (err <= 0) {
+ printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+ return err;
+ }
- err = bind_evtchn_to_irqhandler(
- xen_store_evtchn, wake_waiting,
- 0, "xenbus", &xb_waitq);
- if (err <= 0) {
- printk(KERN_ERR "XENBUS request irq failed %i\n", err);
- return err;
+ xenbus_irq = err;
}
-
- xenbus_irq = err;
return 0;
}
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 10 of 12] xen: implement save/restore
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (8 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 09 of 12] xenbus: rebind irq on restore Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-29 7:31 ` Ingo Molnar
2008-06-02 9:21 ` [Xen-devel] [PATCH 10 of 12] xen: implement save/restore Isaku Yamahata
2008-05-23 13:41 ` [PATCH 11 of 12] xen: maintain clock offset over save/restore Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 12 of 12] hrtimer: remove warning in hres_timers_resume Jeremy Fitzhardinge
11 siblings, 2 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
This patch implements Xen save/restore and migration.
Saving is triggered via xenbus, which is polled in
drivers/xen/manage.c. When a suspend request comes in, the kernel
prepares itself for saving by:
1 - Freeze all processes. This is primarily to prevent any
partially-completed pagetable updates from confusing the suspend
process. If CONFIG_PREEMPT isn't defined, then this isn't necessary.
2 - Suspend xenbus and other devices
3 - Stop_machine, to make sure all the other vcpus are quiescent. The
Xen tools require the domain to run its save off vcpu0.
4 - Within the stop_machine state, it pins any unpinned pgds (under
construction or destruction), performs canonicalizes various other
pieces of state (mostly converting mfns to pfns), and finally
5 - Suspend the domain
Restore reverses the steps used to save the domain, ending when all
the frozen processes are thawed.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/Makefile | 2
arch/x86/xen/enlighten.c | 6 +-
arch/x86/xen/mmu.c | 46 ++++++++++++++++
arch/x86/xen/smp.c | 2
arch/x86/xen/suspend.c | 42 ++++++++++++++
arch/x86/xen/time.c | 8 ++
arch/x86/xen/xen-ops.h | 4 +
drivers/xen/events.c | 83 +++++++++++++++++++++++++++++
drivers/xen/grant-table.c | 4 -
drivers/xen/manage.c | 124 +++++++++++++++++++++++++++++++++++++++-----
include/linux/page-flags.h | 1
include/xen/events.h | 3 +
include/xen/grant_table.h | 3 +
include/xen/xen-ops.h | 9 +++
14 files changed, 317 insertions(+), 20 deletions(-)
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
obj-y := enlighten.o setup.o multicalls.o mmu.o \
- time.o xen-asm.o grant-table.o
+ time.o xen-asm.o grant-table.o suspend.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -857,7 +857,7 @@
PFN_DOWN(__pa(xen_start_info->pt_base)));
}
-static __init void setup_shared_info(void)
+void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -894,7 +894,7 @@
pv_mmu_ops.release_pmd = xen_release_pmd;
pv_mmu_ops.set_pte = xen_set_pte;
- setup_shared_info();
+ xen_setup_shared_info();
/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
@@ -902,7 +902,7 @@
}
/* This is called once we have the cpu_possible_map */
-void __init xen_setup_vcpu_info_placement(void)
+void xen_setup_vcpu_info_placement(void)
{
int cpu;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -560,6 +560,29 @@
xen_mc_issue(0);
}
+/*
+ * On save, we need to pin all pagetables to make sure they get their
+ * mfns turned into pfns. Search the list for any unpinned pgds and pin
+ * them (unpinned pgds are not currently in use, probably because the
+ * process is under construction or destruction).
+ */
+void xen_mm_pin_all(void)
+{
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (!PagePinned(page)) {
+ xen_pgd_pin((pgd_t *)page_address(page));
+ SetPageSavePinned(page);
+ }
+ }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
/* The init_mm pagetable is really pinned as soon as its created, but
that's before we have page structures to store the bits. So do all
the book-keeping now. */
@@ -615,6 +638,29 @@
pgd_walk(pgd, unpin_page, TASK_SIZE);
xen_mc_issue(0);
+}
+
+/*
+ * On resume, undo any pinning done at save, so that the rest of the
+ * kernel doesn't see any unexpected pinned pagetables.
+ */
+void xen_mm_unpin_all(void)
+{
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (PageSavePinned(page)) {
+ BUG_ON(!PagePinned(page));
+ printk("unpinning pinned %p\n", page_address(page));
+ xen_pgd_unpin((pgd_t *)page_address(page));
+ ClearPageSavePinned(page);
+ }
+ }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,7 +35,7 @@
#include "xen-ops.h"
#include "mmu.h"
-static cpumask_t xen_cpu_initialized_map;
+cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq) = -1;
static DEFINE_PER_CPU(int, callfunc_irq) = -1;
static DEFINE_PER_CPU(int, debug_irq) = -1;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644
--- /dev/null
+++ b/arch/x86/xen/suspend.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+
+#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+#include "xen-ops.h"
+#include "mmu.h"
+
+void xen_pre_suspend(void)
+{
+ xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+ xen_start_info->console.domU.mfn =
+ mfn_to_pfn(xen_start_info->console.domU.mfn);
+
+ BUG_ON(!irqs_disabled());
+
+ HYPERVISOR_shared_info = &xen_dummy_shared_info;
+ if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
+ __pte_ma(0), 0))
+ BUG();
+}
+
+void xen_post_suspend(int suspend_cancelled)
+{
+ if (suspend_cancelled) {
+ xen_start_info->store_mfn =
+ pfn_to_mfn(xen_start_info->store_mfn);
+ xen_start_info->console.domU.mfn =
+ pfn_to_mfn(xen_start_info->console.domU.mfn);
+ } else {
+#ifdef CONFIG_SMP
+ xen_cpu_initialized_map = cpu_online_map;
+#endif
+ }
+
+ xen_setup_shared_info();
+}
+
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -565,6 +565,14 @@
clockevents_register_device(&__get_cpu_var(xen_clock_events));
}
+void xen_time_suspend(void)
+{
+}
+
+void xen_time_resume(void)
+{
+}
+
__init void xen_time_init(void)
{
int cpu = smp_processor_id();
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -9,6 +9,7 @@
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];
+struct trap_info;
void xen_copy_trap_info(struct trap_info *traps);
DECLARE_PER_CPU(unsigned long, xen_cr3);
@@ -19,6 +20,7 @@
extern struct shared_info *HYPERVISOR_shared_info;
void xen_setup_mfn_list_list(void);
+void xen_setup_shared_info(void);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
@@ -59,6 +61,8 @@
int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
void *info, int wait);
+extern cpumask_t xen_cpu_initialized_map;
+
/* Declare an asm function, along with symbols needed to make it
inlineable */
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -674,6 +674,89 @@
return ret;
}
+static void restore_cpu_virqs(unsigned int cpu)
+{
+ struct evtchn_bind_virq bind_virq;
+ int virq, irq, evtchn;
+
+ for (virq = 0; virq < NR_VIRQS; virq++) {
+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+ continue;
+
+ BUG_ON(irq_info[irq].type != IRQT_VIRQ);
+ BUG_ON(irq_info[irq].index != virq);
+
+ /* Get a new binding from Xen. */
+ bind_virq.virq = virq;
+ bind_virq.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+ evtchn = bind_virq.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+ }
+}
+
+static void restore_cpu_ipis(unsigned int cpu)
+{
+ struct evtchn_bind_ipi bind_ipi;
+ int ipi, irq, evtchn;
+
+ for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+ continue;
+
+ BUG_ON(irq_info[irq].type != IRQT_IPI);
+ BUG_ON(irq_info[irq].index != ipi);
+
+ /* Get a new binding from Xen. */
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi) != 0)
+ BUG();
+ evtchn = bind_ipi.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+
+ }
+}
+
+void xen_irq_resume(void)
+{
+ unsigned int cpu, irq, evtchn;
+
+ init_evtchn_cpu_bindings();
+
+ /* New event-channel space is not 'live' yet. */
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+ mask_evtchn(evtchn);
+
+ /* No IRQ <-> event-channel mappings. */
+ for (irq = 0; irq < NR_IRQS; irq++)
+ irq_info[irq].evtchn = 0; /* zap event-channel binding */
+
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+ evtchn_to_irq[evtchn] = -1;
+
+ for_each_possible_cpu(cpu) {
+ restore_cpu_virqs(cpu);
+ restore_cpu_ipis(cpu);
+ }
+}
+
static struct irq_chip xen_dynamic_chip __read_mostly = {
.name = "xen-dyn",
.mask = disable_dynirq,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -471,14 +471,14 @@
return 0;
}
-static int gnttab_resume(void)
+int gnttab_resume(void)
{
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
}
-static int gnttab_suspend(void)
+int gnttab_suspend(void)
{
arch_gnttab_unmap_shared(shared, nr_grant_frames);
return 0;
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -5,21 +5,113 @@
#include <linux/err.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
+#include <linux/stop_machine.h>
+#include <linux/freezer.h>
#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/hvc-console.h>
+#include <xen/xen-ops.h>
-#define SHUTDOWN_INVALID -1
-#define SHUTDOWN_POWEROFF 0
-#define SHUTDOWN_SUSPEND 2
-/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
- * report a crash, not be instructed to crash!
- * HALT is the same as POWEROFF, as far as we're concerned. The tools use
- * the distinction when we return the reason code to them.
- */
-#define SHUTDOWN_HALT 4
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+enum shutdown_state {
+ SHUTDOWN_INVALID = -1,
+ SHUTDOWN_POWEROFF = 0,
+ SHUTDOWN_SUSPEND = 2,
+ /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+ report a crash, not be instructed to crash!
+ HALT is the same as POWEROFF, as far as we're concerned. The tools use
+ the distinction when we return the reason code to them. */
+ SHUTDOWN_HALT = 4,
+};
/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
+static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+
+static int xen_suspend(void *data)
+{
+ int *cancelled = data;
+
+ BUG_ON(!irqs_disabled());
+
+ load_cr3(swapper_pg_dir);
+
+ xen_mm_pin_all();
+ gnttab_suspend();
+ xen_time_suspend();
+ xen_pre_suspend();
+
+ /*
+ * This hypercall returns 1 if suspend was cancelled
+ * or the domain was merely checkpointed, and 0 if it
+ * is resuming in a new domain.
+ */
+ *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+ xen_post_suspend(*cancelled);
+ xen_time_resume();
+ gnttab_resume();
+ xen_mm_unpin_all();
+
+ if (!*cancelled) {
+ xen_irq_resume();
+ xen_console_resume();
+ }
+
+ return 0;
+}
+
+static void do_suspend(void)
+{
+ int err;
+ int cancelled = 1;
+
+ shutting_down = SHUTDOWN_SUSPEND;
+
+#ifdef CONFIG_PREEMPT
+ /* If the kernel is preemptible, we need to freeze all the processes
+ to prevent them from being in the middle of a pagetable update
+ during suspend. */
+ err = freeze_processes();
+ if (err) {
+ printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+ return;
+ }
+#endif
+
+ err = device_suspend(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+ goto out;
+ }
+
+ printk("suspending xenbus...\n");
+ /* XXX use normal device tree? */
+ xenbus_suspend();
+
+ err = stop_machine_run(xen_suspend, &cancelled, 0);
+ if (err) {
+ printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+ goto out;
+ }
+
+ if (!cancelled)
+ xenbus_resume();
+ else
+ xenbus_suspend_cancel();
+
+ device_resume();
+
+
+out:
+#ifdef CONFIG_PREEMPT
+ thaw_processes();
+#endif
+ shutting_down = SHUTDOWN_INVALID;
+}
static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
@@ -52,11 +144,17 @@
}
if (strcmp(str, "poweroff") == 0 ||
- strcmp(str, "halt") == 0)
+ strcmp(str, "halt") == 0) {
+ shutting_down = SHUTDOWN_POWEROFF;
orderly_poweroff(false);
- else if (strcmp(str, "reboot") == 0)
+ } else if (strcmp(str, "reboot") == 0) {
+ shutting_down = SHUTDOWN_POWEROFF; /* ? */
ctrl_alt_del();
- else {
+#ifdef CONFIG_PM_SLEEP
+ } else if (strcmp(str, "suspend") == 0) {
+ do_suspend();
+#endif
+ } else {
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
}
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -157,6 +157,7 @@
__PAGEFLAG(Slab, slab)
PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */
PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */
+PAGEFLAG(SavePinned, dirty); /* Xen */
PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
__SETPAGEFLAG(Private, private)
diff --git a/include/xen/events.h b/include/xen/events.h
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -41,4 +41,7 @@
}
extern void notify_remote_via_irq(int irq);
+
+extern void xen_irq_resume(void);
+
#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -50,6 +50,9 @@
void *arg;
u16 count;
};
+
+int gnttab_suspend(void);
+int gnttab_resume(void);
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
int readonly);
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,4 +5,13 @@
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
+void xen_pre_suspend(void);
+void xen_post_suspend(int suspend_cancelled);
+
+void xen_mm_pin_all(void);
+void xen_mm_unpin_all(void);
+
+void xen_time_suspend(void);
+void xen_time_resume(void);
+
#endif /* INCLUDE_XEN_OPS_H */
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 10 of 12] xen: implement save/restore
2008-05-23 13:41 ` [PATCH 10 of 12] xen: implement save/restore Jeremy Fitzhardinge
@ 2008-05-29 7:31 ` Ingo Molnar
2008-05-29 8:00 ` Jeremy Fitzhardinge
2008-05-29 8:02 ` [PATCH] xen: fix compilation when CONFIG_PM_SLEEP is disabled Jeremy Fitzhardinge
2008-06-02 9:21 ` [Xen-devel] [PATCH 10 of 12] xen: implement save/restore Isaku Yamahata
1 sibling, 2 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-05-29 7:31 UTC (permalink / raw)
To: Jeremy Fitzhardinge; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
* Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> This patch implements Xen save/restore and migration.
-tip testing found the following build breakage:
drivers/built-in.o: In function `xen_suspend':
manage.c:(.text+0x4390f): undefined reference to `xen_console_resume'
with this config:
http://redhat.com/~mingo/misc/config-Thu_May_29_09_23_16_CEST_2008.bad
i have bisected it down to:
| commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58
| Author: Jeremy Fitzhardinge <jeremy@goop.org>
| Date: Mon May 26 23:31:27 2008 +0100
|
| xen: implement save/restore
the problem is that drivers/xen/manage.c is built unconditionally if
CONFIG_XEN is enabled and makes use of xen_suspend(), but
drivers/char/hvc_xen.c, where the xen_suspend() method is implemented,
is only build if CONFIG_HVC_XEN=y as well.
i have solved this by providing a NOP implementation for xen_suspend()
in the !CONFIG_HVC_XEN case.
Ingo
---
include/xen/hvc-console.h | 4 ++++
1 file changed, 4 insertions(+)
Index: linux/include/xen/hvc-console.h
===================================================================
--- linux.orig/include/xen/hvc-console.h
+++ linux/include/xen/hvc-console.h
@@ -3,7 +3,11 @@
extern struct console xenboot_console;
+#ifdef CONFIG_HVC_XEN
void xen_console_resume(void);
+#else
+static inline void xen_console_resume(void) { }
+#endif
void xen_raw_console_write(const char *str);
void xen_raw_printk(const char *fmt, ...);
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 10 of 12] xen: implement save/restore
2008-05-29 7:31 ` Ingo Molnar
@ 2008-05-29 8:00 ` Jeremy Fitzhardinge
2008-05-29 8:02 ` [PATCH] xen: fix compilation when CONFIG_PM_SLEEP is disabled Jeremy Fitzhardinge
1 sibling, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-29 8:00 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Ingo Molnar wrote:
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>
>> This patch implements Xen save/restore and migration.
>>
>
> -tip testing found the following build breakage:
>
> drivers/built-in.o: In function `xen_suspend':
> manage.c:(.text+0x4390f): undefined reference to `xen_console_resume'
>
> with this config:
>
> http://redhat.com/~mingo/misc/config-Thu_May_29_09_23_16_CEST_2008.bad
>
> i have bisected it down to:
>
> | commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58
> | Author: Jeremy Fitzhardinge <jeremy@goop.org>
> | Date: Mon May 26 23:31:27 2008 +0100
> |
> | xen: implement save/restore
>
> the problem is that drivers/xen/manage.c is built unconditionally if
> CONFIG_XEN is enabled and makes use of xen_suspend(), but
> drivers/char/hvc_xen.c, where the xen_suspend() method is implemented,
> is only build if CONFIG_HVC_XEN=y as well.
>
> i have solved this by providing a NOP implementation for xen_suspend()
> in the !CONFIG_HVC_XEN case.
>
Thanks Ingo, looks good.
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
J
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH] xen: fix compilation when CONFIG_PM_SLEEP is disabled
2008-05-29 7:31 ` Ingo Molnar
2008-05-29 8:00 ` Jeremy Fitzhardinge
@ 2008-05-29 8:02 ` Jeremy Fitzhardinge
2008-05-30 0:24 ` Randy Dunlap
1 sibling, 1 reply; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-29 8:02 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Randy Dunlap
Xen save/restore depends on CONFIG_PM_SLEEP being set for device_power_up/down.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
---
drivers/xen/manage.c | 2 ++
1 file changed, 2 insertions(+)
===================================================================
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -31,6 +31,7 @@
/* Ignore multiple shutdown requests. */
static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+#ifdef CONFIG_PM_SLEEP
static int xen_suspend(void *data)
{
int *cancelled = data;
@@ -121,6 +122,7 @@
#endif
shutting_down = SHUTDOWN_INVALID;
}
+#endif /* CONFIG_PM_SLEEP */
static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH] xen: fix compilation when CONFIG_PM_SLEEP is disabled
2008-05-29 8:02 ` [PATCH] xen: fix compilation when CONFIG_PM_SLEEP is disabled Jeremy Fitzhardinge
@ 2008-05-30 0:24 ` Randy Dunlap
2008-06-02 10:13 ` Ingo Molnar
0 siblings, 1 reply; 33+ messages in thread
From: Randy Dunlap @ 2008-05-30 0:24 UTC (permalink / raw)
To: Jeremy Fitzhardinge; +Cc: Ingo Molnar, LKML, xen-devel, Thomas Gleixner
On Thu, 29 May 2008 09:02:19 +0100 Jeremy Fitzhardinge wrote:
> Xen save/restore depends on CONFIG_PM_SLEEP being set for device_power_up/down.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> Cc: Randy Dunlap <randy.dunlap@oracle.com>
Ack. Thanks.
> ---
> drivers/xen/manage.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> ===================================================================
> --- a/drivers/xen/manage.c
> +++ b/drivers/xen/manage.c
> @@ -31,6 +31,7 @@
> /* Ignore multiple shutdown requests. */
> static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
>
> +#ifdef CONFIG_PM_SLEEP
> static int xen_suspend(void *data)
> {
> int *cancelled = data;
> @@ -121,6 +122,7 @@
> #endif
> shutting_down = SHUTDOWN_INVALID;
> }
> +#endif /* CONFIG_PM_SLEEP */
>
> static void shutdown_handler(struct xenbus_watch *watch,
> const char **vec, unsigned int len)
---
~Randy
"He closes his eyes and drops the goggles. You can't get hurt
by looking at a bitmap. Or can you?"
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH] xen: fix compilation when CONFIG_PM_SLEEP is disabled
2008-05-30 0:24 ` Randy Dunlap
@ 2008-06-02 10:13 ` Ingo Molnar
0 siblings, 0 replies; 33+ messages in thread
From: Ingo Molnar @ 2008-06-02 10:13 UTC (permalink / raw)
To: Randy Dunlap; +Cc: Jeremy Fitzhardinge, LKML, xen-devel, Thomas Gleixner
* Randy Dunlap <randy.dunlap@oracle.com> wrote:
> On Thu, 29 May 2008 09:02:19 +0100 Jeremy Fitzhardinge wrote:
>
> > Xen save/restore depends on CONFIG_PM_SLEEP being set for device_power_up/down.
> >
> > Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> > Cc: Randy Dunlap <randy.dunlap@oracle.com>
>
> Ack. Thanks.
applied to tip/x86/xen. Thanks,
Ingo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Xen-devel] [PATCH 10 of 12] xen: implement save/restore
2008-05-23 13:41 ` [PATCH 10 of 12] xen: implement save/restore Jeremy Fitzhardinge
2008-05-29 7:31 ` Ingo Molnar
@ 2008-06-02 9:21 ` Isaku Yamahata
2008-06-02 10:03 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 33+ messages in thread
From: Isaku Yamahata @ 2008-06-02 9:21 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Rafael J. Wysocki, Thomas Gleixner, xen-devel, LKML
On Fri, May 23, 2008 at 02:41:17PM +0100, Jeremy Fitzhardinge wrote:
> diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
> --- a/drivers/xen/manage.c
> +++ b/drivers/xen/manage.c
...
> +static int xen_suspend(void *data)
> +{
> + int *cancelled = data;
> +
> + BUG_ON(!irqs_disabled());
> +
> + load_cr3(swapper_pg_dir);
What is the purpose of load_cr3() here?
I'd like to make this load_cr3() more arch generic for ia64 support.
(or eliminate it if possible)
--
yamahata
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [Xen-devel] [PATCH 10 of 12] xen: implement save/restore
2008-06-02 9:21 ` [Xen-devel] [PATCH 10 of 12] xen: implement save/restore Isaku Yamahata
@ 2008-06-02 10:03 ` Jeremy Fitzhardinge
2008-06-02 10:47 ` Isaku Yamahata
0 siblings, 1 reply; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-02 10:03 UTC (permalink / raw)
To: Isaku Yamahata
Cc: Ingo Molnar, Rafael J. Wysocki, Thomas Gleixner, xen-devel, LKML
Isaku Yamahata wrote:
> What is the purpose of load_cr3() here?
> I'd like to make this load_cr3() more arch generic for ia64 support.
> (or eliminate it if possible)
>
I think it's an unnecessary left-over from when I was trying to get that
stuff to work. I can't think of a good reason not to just remove it if
it causes you problems.
BTW, I tried to split the suspend/resume stuff into common and things
which were definitely x86-specific with you in mind. How close did I get?
J
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [Xen-devel] [PATCH 10 of 12] xen: implement save/restore
2008-06-02 10:03 ` Jeremy Fitzhardinge
@ 2008-06-02 10:47 ` Isaku Yamahata
2008-06-02 10:52 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 33+ messages in thread
From: Isaku Yamahata @ 2008-06-02 10:47 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Rafael J. Wysocki, Ingo Molnar, xen-devel, Thomas Gleixner, LKML
On Mon, Jun 02, 2008 at 11:03:36AM +0100, Jeremy Fitzhardinge wrote:
> Isaku Yamahata wrote:
> >What is the purpose of load_cr3() here?
> >I'd like to make this load_cr3() more arch generic for ia64 support.
> >(or eliminate it if possible)
> >
>
> I think it's an unnecessary left-over from when I was trying to get that
> stuff to work. I can't think of a good reason not to just remove it if
> it causes you problems.
Here is the patch. I did only compile test.
> BTW, I tried to split the suspend/resume stuff into common and things
> which were definitely x86-specific with you in mind. How close did I get?
Almost complete. Your effort made my task easier.
I haven't yet succeeded to save/restore, though.
Is CONFIG_PM_SLEEP necessary?
Since ia64 doesn't define ARCH_HIBERNATION_POSSIBLE nor
ARCH_SUSPEND_POSSIBLE.
Although I can define them in ia64/xen/Kconfig, I'd like to leave
them untouched if possible.
>From 93906ae3785206cdd1e25e27b3797914f24f55aa Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 2 Jun 2008 19:36:20 +0900
Subject: [PATCH] xen: clean up of manage.c
remove load_cr3() from manage.c. It is unncessary and x86 dependent.
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
drivers/xen/manage.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 675c3dd..28bb618 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -38,8 +38,6 @@ static int xen_suspend(void *data)
BUG_ON(!irqs_disabled());
- load_cr3(swapper_pg_dir);
-
err = device_power_down(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
--
1.5.3
--
yamahata
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [Xen-devel] [PATCH 10 of 12] xen: implement save/restore
2008-06-02 10:47 ` Isaku Yamahata
@ 2008-06-02 10:52 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-02 10:52 UTC (permalink / raw)
To: Isaku Yamahata
Cc: Rafael J. Wysocki, Ingo Molnar, xen-devel, Thomas Gleixner, LKML
Isaku Yamahata wrote:
> On Mon, Jun 02, 2008 at 11:03:36AM +0100, Jeremy Fitzhardinge wrote:
>
>> Isaku Yamahata wrote:
>>
>>> What is the purpose of load_cr3() here?
>>> I'd like to make this load_cr3() more arch generic for ia64 support.
>>> (or eliminate it if possible)
>>>
>>>
>> I think it's an unnecessary left-over from when I was trying to get that
>> stuff to work. I can't think of a good reason not to just remove it if
>> it causes you problems.
>>
>
> Here is the patch. I did only compile test.
>
>
>
>> BTW, I tried to split the suspend/resume stuff into common and things
>> which were definitely x86-specific with you in mind. How close did I get?
>>
>
> Almost complete. Your effort made my task easier.
> I haven't yet succeeded to save/restore, though.
>
> Is CONFIG_PM_SLEEP necessary?
>
Yes.
It's necessary because the Xen save/restore code calls into various core
functions to handle things like resuming timekeeping; that code is
compiled controlled by CONFIG_PM_SLEEP.
> Since ia64 doesn't define ARCH_HIBERNATION_POSSIBLE nor
> ARCH_SUSPEND_POSSIBLE.
> Although I can define them in ia64/xen/Kconfig, I'd like to leave
> them untouched if possible.
>
Hm, not sure how can address that then.
I'll confirm test that the load_cr3() is unnecessary later today.
J
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 11 of 12] xen: maintain clock offset over save/restore
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (9 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 10 of 12] xen: implement save/restore Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
2008-05-23 13:41 ` [PATCH 12 of 12] hrtimer: remove warning in hres_timers_resume Jeremy Fitzhardinge
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
Hook into the device model to make sure that timekeeping's resume handler
is called. This deals with our clocksource's non-monotonicity over the
save/restore. Explicitly call clock_has_changed() to make sure that
all the timers get retriggered properly.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/time.c | 8 --------
drivers/xen/manage.c | 15 ++++++++++++---
include/xen/xen-ops.h | 3 ---
3 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -565,14 +565,6 @@
clockevents_register_device(&__get_cpu_var(xen_clock_events));
}
-void xen_time_suspend(void)
-{
-}
-
-void xen_time_resume(void)
-{
-}
-
__init void xen_time_init(void)
{
int cpu = smp_processor_id();
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -34,14 +34,21 @@
static int xen_suspend(void *data)
{
int *cancelled = data;
+ int err;
BUG_ON(!irqs_disabled());
load_cr3(swapper_pg_dir);
+ err = device_power_down(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
+ err);
+ return err;
+ }
+
xen_mm_pin_all();
gnttab_suspend();
- xen_time_suspend();
xen_pre_suspend();
/*
@@ -52,9 +59,10 @@
*cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
xen_post_suspend(*cancelled);
- xen_time_resume();
gnttab_resume();
xen_mm_unpin_all();
+
+ device_power_up();
if (!*cancelled) {
xen_irq_resume();
@@ -105,7 +113,8 @@
device_resume();
-
+ /* Make sure timer events get retriggered on all CPUs */
+ clock_was_set();
out:
#ifdef CONFIG_PREEMPT
thaw_processes();
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -11,7 +11,4 @@
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
-void xen_time_suspend(void);
-void xen_time_resume(void);
-
#endif /* INCLUDE_XEN_OPS_H */
^ permalink raw reply [flat|nested] 33+ messages in thread* [PATCH 12 of 12] hrtimer: remove warning in hres_timers_resume
2008-05-23 13:41 [PATCH 00 of 12] xen: add save/restore/migrate for Xen domains Jeremy Fitzhardinge
` (10 preceding siblings ...)
2008-05-23 13:41 ` [PATCH 11 of 12] xen: maintain clock offset over save/restore Jeremy Fitzhardinge
@ 2008-05-23 13:41 ` Jeremy Fitzhardinge
11 siblings, 0 replies; 33+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, xen-devel, Thomas Gleixner, Rafael J. Wysocki
hres_timers_resume() warns if there appears to be more than one cpu
online. This warning makes sense when the suspend/resume mechanism
offlines all cpus but one during the suspend/resume process.
However, Xen suspend does not need to offline the other cpus; it
merely keeps them tied up in stop_machine() while the virtual machine
is suspended. The warning hres_timers_resume issues is therefore
spurious.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
kernel/hrtimer.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -632,8 +632,6 @@
*/
void hres_timers_resume(void)
{
- WARN_ON_ONCE(num_online_cpus() > 1);
-
/* Retrigger the CPU local events: */
retrigger_next_event(NULL);
}
^ permalink raw reply [flat|nested] 33+ messages in thread