* [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi‑page shadows
2015-02-02 11:09 [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Jan Beulich
@ 2015-02-02 11:18 ` Jan Beulich
2015-02-02 11:43 ` Andrew Cooper
2015-02-02 11:19 ` [PATCH v2 2/5] x86/shadow: don't needlessly expose internal functions Jan Beulich
` (4 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 11:18 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 14814 bytes --]
From: Tim Deegan <tim@xen.org>
Multi-page shadows are linked together using the 'list' field. When
those shadows are in the pinned list, the list fragments are spliced
into the pinned list; otherwise they have no associated list head.
Rework the code that handles these fragments to use the page_list
interface rather than manipulating the fields directly. This makes
the code cleaner, and allows the 'list' field to be either the
compact pdx form or a normal list_entry.
Signed-off-by: Tim Deegan <tim@xen.org>
Introduce sh_terminate_list() and make it use LIST_POISON*.
Move helper array of shadow_size() into common.c.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1196,6 +1196,26 @@ int shadow_cmpxchg_guest_entry(struct vc
* the free pool.
*/
+const u32 sh_type_to_size[] = {
+ 1, /* SH_type_none */
+ 2, /* SH_type_l1_32_shadow */
+ 2, /* SH_type_fl1_32_shadow */
+ 4, /* SH_type_l2_32_shadow */
+ 1, /* SH_type_l1_pae_shadow */
+ 1, /* SH_type_fl1_pae_shadow */
+ 1, /* SH_type_l2_pae_shadow */
+ 1, /* SH_type_l2h_pae_shadow */
+ 1, /* SH_type_l1_64_shadow */
+ 1, /* SH_type_fl1_64_shadow */
+ 1, /* SH_type_l2_64_shadow */
+ 1, /* SH_type_l2h_64_shadow */
+ 1, /* SH_type_l3_64_shadow */
+ 1, /* SH_type_l4_64_shadow */
+ 1, /* SH_type_p2m_table */
+ 1, /* SH_type_monitor_table */
+ 1 /* SH_type_oos_snapshot */
+};
+
/* Figure out the least acceptable quantity of shadow memory.
* The minimum memory requirement for always being able to free up a
* chunk of memory is very small -- only three max-order chunks per
@@ -1218,33 +1238,6 @@ static unsigned int shadow_min_acceptabl
return (vcpu_count * 128);
}
-/* Figure out the size (in pages) of a given shadow type */
-static inline u32
-shadow_size(unsigned int shadow_type)
-{
- static const u32 type_to_size[SH_type_unused] = {
- 1, /* SH_type_none */
- 2, /* SH_type_l1_32_shadow */
- 2, /* SH_type_fl1_32_shadow */
- 4, /* SH_type_l2_32_shadow */
- 1, /* SH_type_l1_pae_shadow */
- 1, /* SH_type_fl1_pae_shadow */
- 1, /* SH_type_l2_pae_shadow */
- 1, /* SH_type_l2h_pae_shadow */
- 1, /* SH_type_l1_64_shadow */
- 1, /* SH_type_fl1_64_shadow */
- 1, /* SH_type_l2_64_shadow */
- 1, /* SH_type_l2h_64_shadow */
- 1, /* SH_type_l3_64_shadow */
- 1, /* SH_type_l4_64_shadow */
- 1, /* SH_type_p2m_table */
- 1, /* SH_type_monitor_table */
- 1 /* SH_type_oos_snapshot */
- };
- ASSERT(shadow_type < SH_type_unused);
- return type_to_size[shadow_type];
-}
-
/* Dispatcher function: call the per-mode function that will unhook the
* non-Xen mappings in this top-level shadow mfn. With user_only == 1,
* unhooks only the user-mode mappings. */
@@ -1487,9 +1480,6 @@ mfn_t shadow_alloc(struct domain *d,
break;
}
- /* Page lists don't have pointers back to the head structure, so
- * it's safe to use a head structure on the stack to link the pages
- * together. */
INIT_PAGE_LIST_HEAD(&tmp_list);
/* Init page info fields and clear the pages */
@@ -1523,6 +1513,9 @@ mfn_t shadow_alloc(struct domain *d,
if ( shadow_type >= SH_type_min_shadow
&& shadow_type <= SH_type_max_shadow )
sp->u.sh.head = 1;
+
+ sh_terminate_list(&tmp_list);
+
return page_to_mfn(sp);
}
@@ -1531,6 +1524,7 @@ mfn_t shadow_alloc(struct domain *d,
void shadow_free(struct domain *d, mfn_t smfn)
{
struct page_info *next = NULL, *sp = mfn_to_page(smfn);
+ struct page_list_head *pin_list;
unsigned int pages;
u32 shadow_type;
int i;
@@ -1542,6 +1536,7 @@ void shadow_free(struct domain *d, mfn_t
ASSERT(shadow_type != SH_type_none);
ASSERT(sp->u.sh.head || (shadow_type > SH_type_max_shadow));
pages = shadow_size(shadow_type);
+ pin_list = &d->arch.paging.shadow.pinned_shadows;
for ( i = 0; i < pages; i++ )
{
@@ -1562,7 +1557,7 @@ void shadow_free(struct domain *d, mfn_t
#endif
/* Get the next page before we overwrite the list header */
if ( i < pages - 1 )
- next = pdx_to_page(sp->list.next);
+ next = page_list_next(sp, pin_list);
/* Strip out the type: this is now a free shadow page */
sp->u.sh.type = sp->u.sh.head = 0;
/* Remember the TLB timestamp so we will know whether to flush
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -428,20 +428,20 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
/* From one page of a multi-page shadow, find the next one */
static inline mfn_t sh_next_page(mfn_t smfn)
{
- mfn_t next;
- struct page_info *pg = mfn_to_page(smfn);
+ struct page_info *pg = mfn_to_page(smfn), *next;
+ struct page_list_head h = PAGE_LIST_HEAD_INIT(h);
ASSERT(pg->u.sh.type == SH_type_l1_32_shadow
|| pg->u.sh.type == SH_type_fl1_32_shadow
|| pg->u.sh.type == SH_type_l2_32_shadow);
ASSERT(pg->u.sh.type == SH_type_l2_32_shadow || pg->u.sh.head);
- ASSERT(pg->list.next != PAGE_LIST_NULL);
- next = _mfn(pdx_to_pfn(pg->list.next));
+ next = page_list_next(pg, &h);
- ASSERT(mfn_to_page(next)->u.sh.type == pg->u.sh.type);
- ASSERT(!mfn_to_page(next)->u.sh.head);
- return next;
+ ASSERT(next);
+ ASSERT(next->u.sh.type == pg->u.sh.type);
+ ASSERT(!next->u.sh.head);
+ return page_to_mfn(next);
}
static inline u32
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -231,6 +231,17 @@ static inline int sh_type_has_up_pointer
return !sh_type_is_pinnable(v, t);
}
+static inline void sh_terminate_list(struct page_list_head *tmp_list)
+{
+#ifndef PAGE_LIST_NULL
+ /* The temporary list-head is on our stack. Invalidate the
+ * pointers to it in the shadows, just to get a clean failure if
+ * we accidentally follow them. */
+ tmp_list->prev->next = LIST_POISON1;
+ tmp_list->next->prev = LIST_POISON2;
+#endif
+}
+
/*
* Definitions for the shadow_flags field in page_info.
* These flags are stored on *guest* pages...
@@ -318,6 +329,15 @@ static inline int mfn_oos_may_write(mfn_
}
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+/* Figure out the size (in pages) of a given shadow type */
+extern const u32 sh_type_to_size[SH_type_unused];
+static inline u32
+shadow_size(unsigned int shadow_type)
+{
+ ASSERT(shadow_type < ARRAY_SIZE(sh_type_to_size));
+ return sh_type_to_size[shadow_type];
+}
+
/******************************************************************************
* Various function declarations
*/
@@ -586,22 +606,25 @@ prev_pinned_shadow(const struct page_inf
const struct domain *d)
{
struct page_info *p;
+ const struct page_list_head *pin_list;
+
+ pin_list = &d->arch.paging.shadow.pinned_shadows;
- if ( page == d->arch.paging.shadow.pinned_shadows.next )
+ if ( page_list_empty(pin_list) || page == page_list_first(pin_list) )
return NULL;
-
+
if ( page == NULL ) /* If no current place, start at the tail */
- p = d->arch.paging.shadow.pinned_shadows.tail;
+ p = page_list_last(pin_list);
else
- p = pdx_to_page(page->list.prev);
+ p = page_list_prev(page, pin_list);
/* Skip over the non-tail parts of multi-page shadows */
if ( p && p->u.sh.type == SH_type_l2_32_shadow )
{
- p = pdx_to_page(p->list.prev);
+ p = page_list_prev(p, pin_list);
ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
- p = pdx_to_page(p->list.prev);
+ p = page_list_prev(p, pin_list);
ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
- p = pdx_to_page(p->list.prev);
+ p = page_list_prev(p, pin_list);
ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
}
ASSERT(!p || p->u.sh.head);
@@ -618,49 +641,48 @@ prev_pinned_shadow(const struct page_inf
* Returns 0 for failure, 1 for success. */
static inline int sh_pin(struct vcpu *v, mfn_t smfn)
{
- struct page_info *sp;
- struct page_list_head h, *pin_list;
-
+ struct page_info *sp[4];
+ struct page_list_head *pin_list;
+ unsigned int i, pages;
+ bool_t already_pinned;
+
ASSERT(mfn_valid(smfn));
- sp = mfn_to_page(smfn);
- ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
- ASSERT(sp->u.sh.head);
+ sp[0] = mfn_to_page(smfn);
+ pages = shadow_size(sp[0]->u.sh.type);
+ already_pinned = sp[0]->u.sh.pinned;
+ ASSERT(sh_type_is_pinnable(v, sp[0]->u.sh.type));
+ ASSERT(sp[0]->u.sh.head);
+
+ pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
+ if ( already_pinned && sp[0] == page_list_first(pin_list) )
+ return 1;
/* Treat the up-to-four pages of the shadow as a unit in the list ops */
- h.next = h.tail = sp;
- if ( sp->u.sh.type == SH_type_l2_32_shadow )
+ for ( i = 1; i < pages; i++ )
{
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow);
+ sp[i] = page_list_next(sp[i - 1], pin_list);
+ ASSERT(sp[i]->u.sh.type == sp[0]->u.sh.type);
+ ASSERT(!sp[i]->u.sh.head);
}
- pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
- if ( sp->u.sh.pinned )
+ if ( already_pinned )
{
- /* Already pinned: take it out of the pinned-list so it can go
- * at the front */
- if ( pin_list->next == h.next )
- return 1;
- page_list_prev(h.next, pin_list)->list.next = h.tail->list.next;
- if ( pin_list->tail == h.tail )
- pin_list->tail = page_list_prev(h.next, pin_list);
- else
- page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev;
- h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL;
+ /* Take it out of the pinned-list so it can go at the front */
+ for ( i = 0; i < pages; i++ )
+ page_list_del(sp[i], pin_list);
}
else
{
/* Not pinned: pin it! */
if ( !sh_get_ref(v, smfn, 0) )
return 0;
- sp->u.sh.pinned = 1;
- ASSERT(h.next->list.prev == PAGE_LIST_NULL);
- ASSERT(h.tail->list.next == PAGE_LIST_NULL);
+ sp[0]->u.sh.pinned = 1;
}
+
/* Put it at the head of the list of pinned shadows */
- page_list_splice(&h, pin_list);
+ for ( i = pages; i > 0; i-- )
+ page_list_add(sp[i - 1], pin_list);
+
return 1;
}
@@ -668,46 +690,35 @@ static inline int sh_pin(struct vcpu *v,
* of pinned shadows, and release the extra ref. */
static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
{
- struct page_list_head h, *pin_list;
- struct page_info *sp;
-
+ struct page_list_head tmp_list, *pin_list;
+ struct page_info *sp, *next;
+ unsigned int i, head_type;
+
ASSERT(mfn_valid(smfn));
sp = mfn_to_page(smfn);
+ head_type = sp->u.sh.type;
ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
ASSERT(sp->u.sh.head);
- /* Treat the up-to-four pages of the shadow as a unit in the list ops */
- h.next = h.tail = sp;
- if ( sp->u.sh.type == SH_type_l2_32_shadow )
- {
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow);
- }
- pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
-
if ( !sp->u.sh.pinned )
return;
-
sp->u.sh.pinned = 0;
- /* Cut the sub-list out of the list of pinned shadows */
- if ( pin_list->next == h.next && pin_list->tail == h.tail )
- pin_list->next = pin_list->tail = NULL;
- else
+ /* Cut the sub-list out of the list of pinned shadows,
+ * stitching it back into a list fragment of its own. */
+ pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
+ INIT_PAGE_LIST_HEAD(&tmp_list);
+ for ( i = 0; i < shadow_size(head_type); i++ )
{
- if ( pin_list->next == h.next )
- pin_list->next = page_list_next(h.tail, pin_list);
- else
- page_list_prev(h.next, pin_list)->list.next = h.tail->list.next;
- if ( pin_list->tail == h.tail )
- pin_list->tail = page_list_prev(h.next, pin_list);
- else
- page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev;
+ ASSERT(sp->u.sh.type == head_type);
+ ASSERT(!i || !sp->u.sh.head);
+ next = page_list_next(sp, pin_list);
+ page_list_del(sp, pin_list);
+ page_list_add_tail(sp, &tmp_list);
+ sp = next;
}
- h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL;
-
+ sh_terminate_list(&tmp_list);
+
sh_put_ref(v, smfn, 0);
}
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -172,6 +172,11 @@ page_list_first(const struct page_list_h
return head->next;
}
static inline struct page_info *
+page_list_last(const struct page_list_head *head)
+{
+ return head->tail;
+}
+static inline struct page_info *
page_list_next(const struct page_info *page,
const struct page_list_head *head)
{
@@ -331,8 +336,12 @@ page_list_splice(struct page_list_head *
# define page_list_empty list_empty
# define page_list_first(hd) list_entry((hd)->next, \
struct page_info, list)
+# define page_list_last(hd) list_entry((hd)->prev, \
+ struct page_info, list)
# define page_list_next(pg, hd) list_entry((pg)->list.next, \
struct page_info, list)
+# define page_list_prev(pg, hd) list_entry((pg)->list.prev, \
+ struct page_info, list)
# define page_list_add(pg, hd) list_add(&(pg)->list, hd)
# define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd)
# define page_list_del(pg, hd) list_del(&(pg)->list)
[-- Attachment #2: x86-shadow-page-list.patch --]
[-- Type: text/plain, Size: 14876 bytes --]
From: Tim Deegan <tim@xen.org>
x86/shadow: tidy up fragmentary page lists in multi-page shadows
Multi-page shadows are linked together using the 'list' field. When
those shadows are in the pinned list, the list fragments are spliced
into the pinned list; otherwise they have no associated list head.
Rework the code that handles these fragments to use the page_list
interface rather than manipulating the fields directly. This makes
the code cleaner, and allows the 'list' field to be either the
compact pdx form or a normal list_entry.
Signed-off-by: Tim Deegan <tim@xen.org>
Introduce sh_terminate_list() and make it use LIST_POISON*.
Move helper array of shadow_size() into common.c.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1196,6 +1196,26 @@ int shadow_cmpxchg_guest_entry(struct vc
* the free pool.
*/
+const u32 sh_type_to_size[] = {
+ 1, /* SH_type_none */
+ 2, /* SH_type_l1_32_shadow */
+ 2, /* SH_type_fl1_32_shadow */
+ 4, /* SH_type_l2_32_shadow */
+ 1, /* SH_type_l1_pae_shadow */
+ 1, /* SH_type_fl1_pae_shadow */
+ 1, /* SH_type_l2_pae_shadow */
+ 1, /* SH_type_l2h_pae_shadow */
+ 1, /* SH_type_l1_64_shadow */
+ 1, /* SH_type_fl1_64_shadow */
+ 1, /* SH_type_l2_64_shadow */
+ 1, /* SH_type_l2h_64_shadow */
+ 1, /* SH_type_l3_64_shadow */
+ 1, /* SH_type_l4_64_shadow */
+ 1, /* SH_type_p2m_table */
+ 1, /* SH_type_monitor_table */
+ 1 /* SH_type_oos_snapshot */
+};
+
/* Figure out the least acceptable quantity of shadow memory.
* The minimum memory requirement for always being able to free up a
* chunk of memory is very small -- only three max-order chunks per
@@ -1218,33 +1238,6 @@ static unsigned int shadow_min_acceptabl
return (vcpu_count * 128);
}
-/* Figure out the size (in pages) of a given shadow type */
-static inline u32
-shadow_size(unsigned int shadow_type)
-{
- static const u32 type_to_size[SH_type_unused] = {
- 1, /* SH_type_none */
- 2, /* SH_type_l1_32_shadow */
- 2, /* SH_type_fl1_32_shadow */
- 4, /* SH_type_l2_32_shadow */
- 1, /* SH_type_l1_pae_shadow */
- 1, /* SH_type_fl1_pae_shadow */
- 1, /* SH_type_l2_pae_shadow */
- 1, /* SH_type_l2h_pae_shadow */
- 1, /* SH_type_l1_64_shadow */
- 1, /* SH_type_fl1_64_shadow */
- 1, /* SH_type_l2_64_shadow */
- 1, /* SH_type_l2h_64_shadow */
- 1, /* SH_type_l3_64_shadow */
- 1, /* SH_type_l4_64_shadow */
- 1, /* SH_type_p2m_table */
- 1, /* SH_type_monitor_table */
- 1 /* SH_type_oos_snapshot */
- };
- ASSERT(shadow_type < SH_type_unused);
- return type_to_size[shadow_type];
-}
-
/* Dispatcher function: call the per-mode function that will unhook the
* non-Xen mappings in this top-level shadow mfn. With user_only == 1,
* unhooks only the user-mode mappings. */
@@ -1487,9 +1480,6 @@ mfn_t shadow_alloc(struct domain *d,
break;
}
- /* Page lists don't have pointers back to the head structure, so
- * it's safe to use a head structure on the stack to link the pages
- * together. */
INIT_PAGE_LIST_HEAD(&tmp_list);
/* Init page info fields and clear the pages */
@@ -1523,6 +1513,9 @@ mfn_t shadow_alloc(struct domain *d,
if ( shadow_type >= SH_type_min_shadow
&& shadow_type <= SH_type_max_shadow )
sp->u.sh.head = 1;
+
+ sh_terminate_list(&tmp_list);
+
return page_to_mfn(sp);
}
@@ -1531,6 +1524,7 @@ mfn_t shadow_alloc(struct domain *d,
void shadow_free(struct domain *d, mfn_t smfn)
{
struct page_info *next = NULL, *sp = mfn_to_page(smfn);
+ struct page_list_head *pin_list;
unsigned int pages;
u32 shadow_type;
int i;
@@ -1542,6 +1536,7 @@ void shadow_free(struct domain *d, mfn_t
ASSERT(shadow_type != SH_type_none);
ASSERT(sp->u.sh.head || (shadow_type > SH_type_max_shadow));
pages = shadow_size(shadow_type);
+ pin_list = &d->arch.paging.shadow.pinned_shadows;
for ( i = 0; i < pages; i++ )
{
@@ -1562,7 +1557,7 @@ void shadow_free(struct domain *d, mfn_t
#endif
/* Get the next page before we overwrite the list header */
if ( i < pages - 1 )
- next = pdx_to_page(sp->list.next);
+ next = page_list_next(sp, pin_list);
/* Strip out the type: this is now a free shadow page */
sp->u.sh.type = sp->u.sh.head = 0;
/* Remember the TLB timestamp so we will know whether to flush
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -428,20 +428,20 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
/* From one page of a multi-page shadow, find the next one */
static inline mfn_t sh_next_page(mfn_t smfn)
{
- mfn_t next;
- struct page_info *pg = mfn_to_page(smfn);
+ struct page_info *pg = mfn_to_page(smfn), *next;
+ struct page_list_head h = PAGE_LIST_HEAD_INIT(h);
ASSERT(pg->u.sh.type == SH_type_l1_32_shadow
|| pg->u.sh.type == SH_type_fl1_32_shadow
|| pg->u.sh.type == SH_type_l2_32_shadow);
ASSERT(pg->u.sh.type == SH_type_l2_32_shadow || pg->u.sh.head);
- ASSERT(pg->list.next != PAGE_LIST_NULL);
- next = _mfn(pdx_to_pfn(pg->list.next));
+ next = page_list_next(pg, &h);
- ASSERT(mfn_to_page(next)->u.sh.type == pg->u.sh.type);
- ASSERT(!mfn_to_page(next)->u.sh.head);
- return next;
+ ASSERT(next);
+ ASSERT(next->u.sh.type == pg->u.sh.type);
+ ASSERT(!next->u.sh.head);
+ return page_to_mfn(next);
}
static inline u32
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -231,6 +231,17 @@ static inline int sh_type_has_up_pointer
return !sh_type_is_pinnable(v, t);
}
+static inline void sh_terminate_list(struct page_list_head *tmp_list)
+{
+#ifndef PAGE_LIST_NULL
+ /* The temporary list-head is on our stack. Invalidate the
+ * pointers to it in the shadows, just to get a clean failure if
+ * we accidentally follow them. */
+ tmp_list->prev->next = LIST_POISON1;
+ tmp_list->next->prev = LIST_POISON2;
+#endif
+}
+
/*
* Definitions for the shadow_flags field in page_info.
* These flags are stored on *guest* pages...
@@ -318,6 +329,15 @@ static inline int mfn_oos_may_write(mfn_
}
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+/* Figure out the size (in pages) of a given shadow type */
+extern const u32 sh_type_to_size[SH_type_unused];
+static inline u32
+shadow_size(unsigned int shadow_type)
+{
+ ASSERT(shadow_type < ARRAY_SIZE(sh_type_to_size));
+ return sh_type_to_size[shadow_type];
+}
+
/******************************************************************************
* Various function declarations
*/
@@ -586,22 +606,25 @@ prev_pinned_shadow(const struct page_inf
const struct domain *d)
{
struct page_info *p;
+ const struct page_list_head *pin_list;
+
+ pin_list = &d->arch.paging.shadow.pinned_shadows;
- if ( page == d->arch.paging.shadow.pinned_shadows.next )
+ if ( page_list_empty(pin_list) || page == page_list_first(pin_list) )
return NULL;
-
+
if ( page == NULL ) /* If no current place, start at the tail */
- p = d->arch.paging.shadow.pinned_shadows.tail;
+ p = page_list_last(pin_list);
else
- p = pdx_to_page(page->list.prev);
+ p = page_list_prev(page, pin_list);
/* Skip over the non-tail parts of multi-page shadows */
if ( p && p->u.sh.type == SH_type_l2_32_shadow )
{
- p = pdx_to_page(p->list.prev);
+ p = page_list_prev(p, pin_list);
ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
- p = pdx_to_page(p->list.prev);
+ p = page_list_prev(p, pin_list);
ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
- p = pdx_to_page(p->list.prev);
+ p = page_list_prev(p, pin_list);
ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
}
ASSERT(!p || p->u.sh.head);
@@ -618,49 +641,48 @@ prev_pinned_shadow(const struct page_inf
* Returns 0 for failure, 1 for success. */
static inline int sh_pin(struct vcpu *v, mfn_t smfn)
{
- struct page_info *sp;
- struct page_list_head h, *pin_list;
-
+ struct page_info *sp[4];
+ struct page_list_head *pin_list;
+ unsigned int i, pages;
+ bool_t already_pinned;
+
ASSERT(mfn_valid(smfn));
- sp = mfn_to_page(smfn);
- ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
- ASSERT(sp->u.sh.head);
+ sp[0] = mfn_to_page(smfn);
+ pages = shadow_size(sp[0]->u.sh.type);
+ already_pinned = sp[0]->u.sh.pinned;
+ ASSERT(sh_type_is_pinnable(v, sp[0]->u.sh.type));
+ ASSERT(sp[0]->u.sh.head);
+
+ pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
+ if ( already_pinned && sp[0] == page_list_first(pin_list) )
+ return 1;
/* Treat the up-to-four pages of the shadow as a unit in the list ops */
- h.next = h.tail = sp;
- if ( sp->u.sh.type == SH_type_l2_32_shadow )
+ for ( i = 1; i < pages; i++ )
{
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow);
+ sp[i] = page_list_next(sp[i - 1], pin_list);
+ ASSERT(sp[i]->u.sh.type == sp[0]->u.sh.type);
+ ASSERT(!sp[i]->u.sh.head);
}
- pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
- if ( sp->u.sh.pinned )
+ if ( already_pinned )
{
- /* Already pinned: take it out of the pinned-list so it can go
- * at the front */
- if ( pin_list->next == h.next )
- return 1;
- page_list_prev(h.next, pin_list)->list.next = h.tail->list.next;
- if ( pin_list->tail == h.tail )
- pin_list->tail = page_list_prev(h.next, pin_list);
- else
- page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev;
- h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL;
+ /* Take it out of the pinned-list so it can go at the front */
+ for ( i = 0; i < pages; i++ )
+ page_list_del(sp[i], pin_list);
}
else
{
/* Not pinned: pin it! */
if ( !sh_get_ref(v, smfn, 0) )
return 0;
- sp->u.sh.pinned = 1;
- ASSERT(h.next->list.prev == PAGE_LIST_NULL);
- ASSERT(h.tail->list.next == PAGE_LIST_NULL);
+ sp[0]->u.sh.pinned = 1;
}
+
/* Put it at the head of the list of pinned shadows */
- page_list_splice(&h, pin_list);
+ for ( i = pages; i > 0; i-- )
+ page_list_add(sp[i - 1], pin_list);
+
return 1;
}
@@ -668,46 +690,35 @@ static inline int sh_pin(struct vcpu *v,
* of pinned shadows, and release the extra ref. */
static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
{
- struct page_list_head h, *pin_list;
- struct page_info *sp;
-
+ struct page_list_head tmp_list, *pin_list;
+ struct page_info *sp, *next;
+ unsigned int i, head_type;
+
ASSERT(mfn_valid(smfn));
sp = mfn_to_page(smfn);
+ head_type = sp->u.sh.type;
ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
ASSERT(sp->u.sh.head);
- /* Treat the up-to-four pages of the shadow as a unit in the list ops */
- h.next = h.tail = sp;
- if ( sp->u.sh.type == SH_type_l2_32_shadow )
- {
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- h.tail = pdx_to_page(h.tail->list.next);
- ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow);
- }
- pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
-
if ( !sp->u.sh.pinned )
return;
-
sp->u.sh.pinned = 0;
- /* Cut the sub-list out of the list of pinned shadows */
- if ( pin_list->next == h.next && pin_list->tail == h.tail )
- pin_list->next = pin_list->tail = NULL;
- else
+ /* Cut the sub-list out of the list of pinned shadows,
+ * stitching it back into a list fragment of its own. */
+ pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
+ INIT_PAGE_LIST_HEAD(&tmp_list);
+ for ( i = 0; i < shadow_size(head_type); i++ )
{
- if ( pin_list->next == h.next )
- pin_list->next = page_list_next(h.tail, pin_list);
- else
- page_list_prev(h.next, pin_list)->list.next = h.tail->list.next;
- if ( pin_list->tail == h.tail )
- pin_list->tail = page_list_prev(h.next, pin_list);
- else
- page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev;
+ ASSERT(sp->u.sh.type == head_type);
+ ASSERT(!i || !sp->u.sh.head);
+ next = page_list_next(sp, pin_list);
+ page_list_del(sp, pin_list);
+ page_list_add_tail(sp, &tmp_list);
+ sp = next;
}
- h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL;
-
+ sh_terminate_list(&tmp_list);
+
sh_put_ref(v, smfn, 0);
}
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -172,6 +172,11 @@ page_list_first(const struct page_list_h
return head->next;
}
static inline struct page_info *
+page_list_last(const struct page_list_head *head)
+{
+ return head->tail;
+}
+static inline struct page_info *
page_list_next(const struct page_info *page,
const struct page_list_head *head)
{
@@ -331,8 +336,12 @@ page_list_splice(struct page_list_head *
# define page_list_empty list_empty
# define page_list_first(hd) list_entry((hd)->next, \
struct page_info, list)
+# define page_list_last(hd) list_entry((hd)->prev, \
+ struct page_info, list)
# define page_list_next(pg, hd) list_entry((pg)->list.next, \
struct page_info, list)
+# define page_list_prev(pg, hd) list_entry((pg)->list.prev, \
+ struct page_info, list)
# define page_list_add(pg, hd) list_add(&(pg)->list, hd)
# define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd)
# define page_list_del(pg, hd) list_del(&(pg)->list)
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi‑page shadows
2015-02-02 11:18 ` [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi‑page shadows Jan Beulich
@ 2015-02-02 11:43 ` Andrew Cooper
2015-02-02 11:47 ` Jan Beulich
0 siblings, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2015-02-02 11:43 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Tim Deegan, Keir Fraser
On 02/02/15 11:18, Jan Beulich wrote:
> From: Tim Deegan <tim@xen.org>
>
> Multi-page shadows are linked together using the 'list' field. When
> those shadows are in the pinned list, the list fragments are spliced
> into the pinned list; otherwise they have no associated list head.
>
> Rework the code that handles these fragments to use the page_list
> interface rather than manipulating the fields directly. This makes
> the code cleaner, and allows the 'list' field to be either the
> compact pdx form or a normal list_entry.
>
> Signed-off-by: Tim Deegan <tim@xen.org>
I put v1 of the patch through XenRT over the weekend, and nothing
appeared to blow up spectacularly.
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> Introduce sh_terminate_list() and make it use LIST_POISON*.
>
> Move helper array of shadow_size() into common.c.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>, with 1 small comment
>
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -1196,6 +1196,26 @@ int shadow_cmpxchg_guest_entry(struct vc
> * the free pool.
> */
>
> +const u32 sh_type_to_size[] = {
u8 instead? The maximum value held is 4.
~Andrew
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi‑page shadows
2015-02-02 11:43 ` Andrew Cooper
@ 2015-02-02 11:47 ` Jan Beulich
2015-02-02 11:55 ` [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi?page shadows Tim Deegan
0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 11:47 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Tim Deegan
>>> On 02.02.15 at 12:43, <andrew.cooper3@citrix.com> wrote:
> On 02/02/15 11:18, Jan Beulich wrote:
>> --- a/xen/arch/x86/mm/shadow/common.c
>> +++ b/xen/arch/x86/mm/shadow/common.c
>> @@ -1196,6 +1196,26 @@ int shadow_cmpxchg_guest_entry(struct vc
>> * the free pool.
>> */
>>
>> +const u32 sh_type_to_size[] = {
>
> u8 instead? The maximum value held is 4.
Good point, but it was u32 before, so I'll leave it to Tim to decide.
Jan
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi?page shadows
2015-02-02 11:47 ` Jan Beulich
@ 2015-02-02 11:55 ` Tim Deegan
0 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2015-02-02 11:55 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, Keir Fraser, xen-devel
At 11:47 +0000 on 02 Feb (1422874047), Jan Beulich wrote:
> >>> On 02.02.15 at 12:43, <andrew.cooper3@citrix.com> wrote:
> > On 02/02/15 11:18, Jan Beulich wrote:
> >> --- a/xen/arch/x86/mm/shadow/common.c
> >> +++ b/xen/arch/x86/mm/shadow/common.c
> >> @@ -1196,6 +1196,26 @@ int shadow_cmpxchg_guest_entry(struct vc
> >> * the free pool.
> >> */
> >>
> >> +const u32 sh_type_to_size[] = {
> >
> > u8 instead? The maximum value held is 4.
>
> Good point, but it was u32 before, so I'll leave it to Tim to decide.
Yes, u8 would be better.
Cheers,
Tim.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 2/5] x86/shadow: don't needlessly expose internal functions
2015-02-02 11:09 [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Jan Beulich
2015-02-02 11:18 ` [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi‑page shadows Jan Beulich
@ 2015-02-02 11:19 ` Jan Beulich
2015-02-02 11:47 ` Andrew Cooper
2015-02-02 11:20 ` [PATCH v2 3/5] x86/mm: allow for building without shadow mode support Jan Beulich
` (3 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 11:19 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 2456 bytes --]
... and drop an unused one.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2383,7 +2383,7 @@ int sh_remove_write_access_from_sl1p(str
/* Remove all mappings of a guest frame from the shadow tables.
* Returns non-zero if we need to flush TLBs. */
-int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+static int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
{
struct page_info *page = mfn_to_page(gmfn);
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -387,6 +387,11 @@ int shadow_write_guest_entry(struct vcpu
int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
intpte_t *old, intpte_t new, mfn_t gmfn);
+/* Update all the things that are derived from the guest's CR0/CR3/CR4.
+ * Called to initialize paging structures if the paging mode
+ * has changed, and when bringing up a VCPU for the first time. */
+void shadow_update_paging_modes(struct vcpu *v);
+
/* Unhook the non-Xen mappings in this top-level shadow mfn.
* With user_only == 1, unhooks only the user-mode mappings. */
void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only);
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -77,23 +77,6 @@ void shadow_teardown(struct domain *d);
/* Call once all of the references to the domain have gone away */
void shadow_final_teardown(struct domain *d);
-/* Update all the things that are derived from the guest's CR0/CR3/CR4.
- * Called to initialize paging structures if the paging mode
- * has changed, and when bringing up a VCPU for the first time. */
-void shadow_update_paging_modes(struct vcpu *v);
-
-
-/* Remove all mappings of the guest page from the shadows.
- * This is called from common code. It does not flush TLBs. */
-int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
-static inline void
-shadow_drop_references(struct domain *d, struct page_info *p)
-{
- if ( unlikely(shadow_mode_enabled(d)) )
- /* See the comment about locking in sh_remove_all_mappings */
- sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p)));
-}
-
/* Remove all shadows of the guest mfn. */
void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
[-- Attachment #2: x86-shadow-private.patch --]
[-- Type: text/plain, Size: 2508 bytes --]
x86/shadow: don't needlessly expose internal functions
... and drop an unused one.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2383,7 +2383,7 @@ int sh_remove_write_access_from_sl1p(str
/* Remove all mappings of a guest frame from the shadow tables.
* Returns non-zero if we need to flush TLBs. */
-int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+static int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
{
struct page_info *page = mfn_to_page(gmfn);
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -387,6 +387,11 @@ int shadow_write_guest_entry(struct vcpu
int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
intpte_t *old, intpte_t new, mfn_t gmfn);
+/* Update all the things that are derived from the guest's CR0/CR3/CR4.
+ * Called to initialize paging structures if the paging mode
+ * has changed, and when bringing up a VCPU for the first time. */
+void shadow_update_paging_modes(struct vcpu *v);
+
/* Unhook the non-Xen mappings in this top-level shadow mfn.
* With user_only == 1, unhooks only the user-mode mappings. */
void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only);
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -77,23 +77,6 @@ void shadow_teardown(struct domain *d);
/* Call once all of the references to the domain have gone away */
void shadow_final_teardown(struct domain *d);
-/* Update all the things that are derived from the guest's CR0/CR3/CR4.
- * Called to initialize paging structures if the paging mode
- * has changed, and when bringing up a VCPU for the first time. */
-void shadow_update_paging_modes(struct vcpu *v);
-
-
-/* Remove all mappings of the guest page from the shadows.
- * This is called from common code. It does not flush TLBs. */
-int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
-static inline void
-shadow_drop_references(struct domain *d, struct page_info *p)
-{
- if ( unlikely(shadow_mode_enabled(d)) )
- /* See the comment about locking in sh_remove_all_mappings */
- sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p)));
-}
-
/* Remove all shadows of the guest mfn. */
void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 2/5] x86/shadow: don't needlessly expose internal functions
2015-02-02 11:19 ` [PATCH v2 2/5] x86/shadow: don't needlessly expose internal functions Jan Beulich
@ 2015-02-02 11:47 ` Andrew Cooper
0 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2015-02-02 11:47 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Tim Deegan, Keir Fraser
On 02/02/15 11:19, Jan Beulich wrote:
> ... and drop an unused one.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -2383,7 +2383,7 @@ int sh_remove_write_access_from_sl1p(str
> /* Remove all mappings of a guest frame from the shadow tables.
> * Returns non-zero if we need to flush TLBs. */
>
> -int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
> +static int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
> {
> struct page_info *page = mfn_to_page(gmfn);
>
> --- a/xen/arch/x86/mm/shadow/private.h
> +++ b/xen/arch/x86/mm/shadow/private.h
> @@ -387,6 +387,11 @@ int shadow_write_guest_entry(struct vcpu
> int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
> intpte_t *old, intpte_t new, mfn_t gmfn);
>
> +/* Update all the things that are derived from the guest's CR0/CR3/CR4.
> + * Called to initialize paging structures if the paging mode
> + * has changed, and when bringing up a VCPU for the first time. */
> +void shadow_update_paging_modes(struct vcpu *v);
> +
> /* Unhook the non-Xen mappings in this top-level shadow mfn.
> * With user_only == 1, unhooks only the user-mode mappings. */
> void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only);
> --- a/xen/include/asm-x86/shadow.h
> +++ b/xen/include/asm-x86/shadow.h
> @@ -77,23 +77,6 @@ void shadow_teardown(struct domain *d);
> /* Call once all of the references to the domain have gone away */
> void shadow_final_teardown(struct domain *d);
>
> -/* Update all the things that are derived from the guest's CR0/CR3/CR4.
> - * Called to initialize paging structures if the paging mode
> - * has changed, and when bringing up a VCPU for the first time. */
> -void shadow_update_paging_modes(struct vcpu *v);
> -
> -
> -/* Remove all mappings of the guest page from the shadows.
> - * This is called from common code. It does not flush TLBs. */
> -int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
> -static inline void
> -shadow_drop_references(struct domain *d, struct page_info *p)
> -{
> - if ( unlikely(shadow_mode_enabled(d)) )
> - /* See the comment about locking in sh_remove_all_mappings */
> - sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p)));
> -}
> -
> /* Remove all shadows of the guest mfn. */
> void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
> static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 3/5] x86/mm: allow for building without shadow mode support
2015-02-02 11:09 [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Jan Beulich
2015-02-02 11:18 ` [PATCH v2 1/5] x86/shadow: tidy up fragmentary page lists in multi‑page shadows Jan Beulich
2015-02-02 11:19 ` [PATCH v2 2/5] x86/shadow: don't needlessly expose internal functions Jan Beulich
@ 2015-02-02 11:20 ` Jan Beulich
2015-02-02 11:56 ` Andrew Cooper
2015-02-02 11:20 ` [PATCH v2 4/5] IOMMU/x86: correct page_list_first() use Jan Beulich
` (2 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 11:20 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 11157 bytes --]
Considering the complexity of the code, it seems to be a reasonable
thing to allow people to disable that code entirely even outside the
immediate need for this by the next patch.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Set mode table in shadow_vcpu_init() stub. Convert BUG()/BUG_ON()
to ASSERT()/ASSERT_UNREACHABLE() and make various of the stub
functions macros or inline. Hide opt_dom0_shadow when
!CONFIG_SHADOW_PAGING. Adjust Makefile modification.
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -32,9 +32,13 @@ x86 := y
x86_32 := n
x86_64 := y
+shadow-paging ?= y
+
CFLAGS += -mno-red-zone -mno-sse -fpic
CFLAGS += -fno-asynchronous-unwind-tables
# -fvisibility=hidden reduces -fpic cost, if it's available
ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
endif
+
+CFLAGS-$(shadow-paging) += -DCONFIG_SHADOW_PAGING
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -128,8 +128,10 @@ struct vcpu *__init alloc_dom0_vcpu0(str
return alloc_vcpu(dom0, 0, 0);
}
+#ifdef CONFIG_SHADOW_PAGING
static bool_t __initdata opt_dom0_shadow;
boolean_param("dom0_shadow", opt_dom0_shadow);
+#endif
static char __initdata opt_dom0_ioports_disable[200] = "";
string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
@@ -1399,6 +1401,7 @@ int __init construct_dom0(
regs->esi = vstartinfo_start;
regs->eflags = X86_EFLAGS_IF;
+#ifdef CONFIG_SHADOW_PAGING
if ( opt_dom0_shadow )
{
if ( is_pvh_domain(d) )
@@ -1409,6 +1412,7 @@ int __init construct_dom0(
if ( paging_enable(d, PG_SH_enable) == 0 )
paging_update_paging_modes(v);
}
+#endif
/*
* PVH Fixme: XENFEAT_supervisor_mode_kernel has been reused in PVH with a
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -635,16 +635,16 @@ int paging_domain_init(struct domain *d,
* don't want to leak any active log-dirty bitmaps */
d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
- /* The order of the *_init calls below is important, as the later
- * ones may rewrite some common fields. Shadow pagetables are the
- * default... */
- shadow_domain_init(d, domcr_flags);
-
- /* ... but we will use hardware assistance if it's available. */
+ /*
+ * Shadow pagetables are the default, but we will use
+ * hardware assistance if it's available and enabled.
+ */
if ( hap_enabled(d) )
hap_domain_init(d);
+ else
+ rc = shadow_domain_init(d, domcr_flags);
- return 0;
+ return rc;
}
/* vcpu paging struct initialization goes here */
@@ -822,12 +822,16 @@ int paging_enable(struct domain *d, u32
* and therefore its pagetables will soon be discarded */
void pagetable_dying(struct domain *d, paddr_t gpa)
{
+#ifdef CONFIG_SHADOW_PAGING
struct vcpu *v;
ASSERT(paging_mode_shadow(d));
v = d->vcpu[0];
v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
+#else
+ BUG();
+#endif
}
/* Print paging-assistance info to the console */
--- a/xen/arch/x86/mm/shadow/Makefile
+++ b/xen/arch/x86/mm/shadow/Makefile
@@ -1,4 +1,8 @@
-obj-$(x86_64) += common.o guest_2.o guest_3.o guest_4.o
+ifeq ($(shadow-paging),y)
+obj-y += common.o guest_2.o guest_3.o guest_4.o
+else
+obj-y += none.o
+endif
guest_%.o: multi.c Makefile
$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -47,7 +47,7 @@ static void sh_clean_dirty_bitmap(struct
/* Set up the shadow-specific parts of a domain struct at start of day.
* Called for every domain from arch_domain_create() */
-void shadow_domain_init(struct domain *d, unsigned int domcr_flags)
+int shadow_domain_init(struct domain *d, unsigned int domcr_flags)
{
INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist);
INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
@@ -61,6 +61,8 @@ void shadow_domain_init(struct domain *d
d->arch.paging.shadow.oos_off = (domcr_flags & DOMCRF_oos_off) ? 1 : 0;
#endif
d->arch.paging.shadow.pagetable_dying_op = 0;
+
+ return 0;
}
/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/none.c
@@ -0,0 +1,78 @@
+#include <xen/mm.h>
+#include <asm/shadow.h>
+
+static int _enable_log_dirty(struct domain *d, bool_t log_global)
+{
+ ASSERT(is_pv_domain(d));
+ return -EOPNOTSUPP;
+}
+
+static int _disable_log_dirty(struct domain *d)
+{
+ ASSERT(is_pv_domain(d));
+ return -EOPNOTSUPP;
+}
+
+static void _clean_dirty_bitmap(struct domain *d)
+{
+ ASSERT(is_pv_domain(d));
+}
+
+int shadow_domain_init(struct domain *d, unsigned int domcr_flags)
+{
+ paging_log_dirty_init(d, _enable_log_dirty,
+ _disable_log_dirty, _clean_dirty_bitmap);
+ return is_pv_domain(d) ? 0 : -EOPNOTSUPP;
+}
+
+static int _page_fault(struct vcpu *v, unsigned long va,
+ struct cpu_user_regs *regs)
+{
+ ASSERT_UNREACHABLE();
+ return 0;
+}
+
+static int _invlpg(struct vcpu *v, unsigned long va)
+{
+ ASSERT_UNREACHABLE();
+ return -EOPNOTSUPP;
+}
+
+static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
+ unsigned long va, uint32_t *pfec)
+{
+ ASSERT_UNREACHABLE();
+ return INVALID_GFN;
+}
+
+static void _update_cr3(struct vcpu *v, int do_locking)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static void _update_paging_modes(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static void _write_p2m_entry(struct domain *d, unsigned long gfn,
+ l1_pgentry_t *p, l1_pgentry_t new,
+ unsigned int level)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static const struct paging_mode sh_paging_none = {
+ .page_fault = _page_fault,
+ .invlpg = _invlpg,
+ .gva_to_gfn = _gva_to_gfn,
+ .update_cr3 = _update_cr3,
+ .update_paging_modes = _update_paging_modes,
+ .write_p2m_entry = _write_p2m_entry,
+};
+
+void shadow_vcpu_init(struct vcpu *v)
+{
+ ASSERT(is_pv_domain(v->domain));
+ v->arch.paging.mode = &sh_paging_none;
+}
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -87,6 +87,7 @@ void hypercall_page_initialise(struct do
/* shadow paging extension */
/************************************************/
struct shadow_domain {
+#ifdef CONFIG_SHADOW_PAGING
unsigned int opt_flags; /* runtime tunable optimizations on/off */
struct page_list_head pinned_shadows;
@@ -116,9 +117,11 @@ struct shadow_domain {
/* Has this domain ever used HVMOP_pagetable_dying? */
bool_t pagetable_dying_op;
+#endif
};
struct shadow_vcpu {
+#ifdef CONFIG_SHADOW_PAGING
/* PAE guests: per-vcpu shadow top-level table */
l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
/* PAE guests: per-vcpu cache of the top-level *guest* entries */
@@ -144,6 +147,7 @@ struct shadow_vcpu {
} oos_fixup[SHADOW_OOS_PAGES];
bool_t pagetable_dying;
+#endif
};
/************************************************/
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -39,7 +39,11 @@
#define PG_SH_shift 20
#define PG_HAP_shift 21
/* We're in one of the shadow modes */
+#ifdef CONFIG_SHADOW_PAGING
#define PG_SH_enable (1U << PG_SH_shift)
+#else
+#define PG_SH_enable 0
+#endif
#define PG_HAP_enable (1U << PG_HAP_shift)
/* common paging mode bits */
@@ -74,6 +78,7 @@
struct sh_emulate_ctxt;
struct shadow_paging_mode {
+#ifdef CONFIG_SHADOW_PAGING
void (*detach_old_tables )(struct vcpu *v);
int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
void *src, u32 bytes,
@@ -88,6 +93,7 @@ struct shadow_paging_mode {
int (*guess_wrmap )(struct vcpu *v,
unsigned long vaddr, mfn_t gmfn);
void (*pagetable_dying )(struct vcpu *v, paddr_t gpa);
+#endif
/* For outsiders to tell what mode we're in */
unsigned int shadow_levels;
};
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -49,12 +49,14 @@
/* Set up the shadow-specific parts of a domain struct at start of day.
* Called from paging_domain_init(). */
-void shadow_domain_init(struct domain *d, unsigned int domcr_flags);
+int shadow_domain_init(struct domain *d, unsigned int domcr_flags);
/* Setup the shadow-specific parts of a vcpu struct. It is called by
* paging_vcpu_init() in paging.c */
void shadow_vcpu_init(struct vcpu *v);
+#ifdef CONFIG_SHADOW_PAGING
+
/* Enable an arbitrary shadow mode. Call once at domain creation. */
int shadow_enable(struct domain *d, u32 mode);
@@ -77,17 +79,40 @@ void shadow_teardown(struct domain *d);
/* Call once all of the references to the domain have gone away */
void shadow_final_teardown(struct domain *d);
-/* Remove all shadows of the guest mfn. */
void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
+
+/* Discard _all_ mappings from the domain's shadows. */
+void shadow_blow_tables_per_domain(struct domain *d);
+
+#else /* !CONFIG_SHADOW_PAGING */
+
+#define shadow_teardown(d) ASSERT(is_pv_domain(d))
+#define shadow_final_teardown(d) ASSERT(is_pv_domain(d))
+#define shadow_enable(d, mode) \
+ ({ ASSERT(is_pv_domain(d)); -EOPNOTSUPP; })
+#define shadow_track_dirty_vram(d, begin_pfn, nr, bitmap) \
+ ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
+
+static inline void sh_remove_shadows(struct vcpu *v, mfn_t gmfn,
+ bool_t fast, bool_t all) {}
+
+static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+
+static inline int shadow_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_SHADOW_PAGING */
+
+/* Remove all shadows of the guest mfn. */
static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
{
/* See the comment about locking in sh_remove_shadows */
sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
}
-/* Discard _all_ mappings from the domain's shadows. */
-void shadow_blow_tables_per_domain(struct domain *d);
-
#endif /* _XEN_SHADOW_H */
/*
--- a/xen/include/xen/paging.h
+++ b/xen/include/xen/paging.h
@@ -7,7 +7,7 @@
#include <asm/paging.h>
#include <asm/p2m.h>
-#elif defined CONFIG_SHADOW
+#elif defined CONFIG_SHADOW_PAGING
#include <asm/shadow.h>
[-- Attachment #2: x86-no-shadow.patch --]
[-- Type: text/plain, Size: 11211 bytes --]
x86/mm: allow for building without shadow mode support
Considering the complexity of the code, it seems to be a reasonable
thing to allow people to disable that code entirely even outside the
immediate need for this by the next patch.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Set mode table in shadow_vcpu_init() stub. Convert BUG()/BUG_ON()
to ASSERT()/ASSERT_UNREACHABLE() and make various of the stub
functions macros or inline. Hide opt_dom0_shadow when
!CONFIG_SHADOW_PAGING. Adjust Makefile modification.
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -32,9 +32,13 @@ x86 := y
x86_32 := n
x86_64 := y
+shadow-paging ?= y
+
CFLAGS += -mno-red-zone -mno-sse -fpic
CFLAGS += -fno-asynchronous-unwind-tables
# -fvisibility=hidden reduces -fpic cost, if it's available
ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
endif
+
+CFLAGS-$(shadow-paging) += -DCONFIG_SHADOW_PAGING
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -128,8 +128,10 @@ struct vcpu *__init alloc_dom0_vcpu0(str
return alloc_vcpu(dom0, 0, 0);
}
+#ifdef CONFIG_SHADOW_PAGING
static bool_t __initdata opt_dom0_shadow;
boolean_param("dom0_shadow", opt_dom0_shadow);
+#endif
static char __initdata opt_dom0_ioports_disable[200] = "";
string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
@@ -1399,6 +1401,7 @@ int __init construct_dom0(
regs->esi = vstartinfo_start;
regs->eflags = X86_EFLAGS_IF;
+#ifdef CONFIG_SHADOW_PAGING
if ( opt_dom0_shadow )
{
if ( is_pvh_domain(d) )
@@ -1409,6 +1412,7 @@ int __init construct_dom0(
if ( paging_enable(d, PG_SH_enable) == 0 )
paging_update_paging_modes(v);
}
+#endif
/*
* PVH Fixme: XENFEAT_supervisor_mode_kernel has been reused in PVH with a
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -635,16 +635,16 @@ int paging_domain_init(struct domain *d,
* don't want to leak any active log-dirty bitmaps */
d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
- /* The order of the *_init calls below is important, as the later
- * ones may rewrite some common fields. Shadow pagetables are the
- * default... */
- shadow_domain_init(d, domcr_flags);
-
- /* ... but we will use hardware assistance if it's available. */
+ /*
+ * Shadow pagetables are the default, but we will use
+ * hardware assistance if it's available and enabled.
+ */
if ( hap_enabled(d) )
hap_domain_init(d);
+ else
+ rc = shadow_domain_init(d, domcr_flags);
- return 0;
+ return rc;
}
/* vcpu paging struct initialization goes here */
@@ -822,12 +822,16 @@ int paging_enable(struct domain *d, u32
* and therefore its pagetables will soon be discarded */
void pagetable_dying(struct domain *d, paddr_t gpa)
{
+#ifdef CONFIG_SHADOW_PAGING
struct vcpu *v;
ASSERT(paging_mode_shadow(d));
v = d->vcpu[0];
v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
+#else
+ BUG();
+#endif
}
/* Print paging-assistance info to the console */
--- a/xen/arch/x86/mm/shadow/Makefile
+++ b/xen/arch/x86/mm/shadow/Makefile
@@ -1,4 +1,8 @@
-obj-$(x86_64) += common.o guest_2.o guest_3.o guest_4.o
+ifeq ($(shadow-paging),y)
+obj-y += common.o guest_2.o guest_3.o guest_4.o
+else
+obj-y += none.o
+endif
guest_%.o: multi.c Makefile
$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -47,7 +47,7 @@ static void sh_clean_dirty_bitmap(struct
/* Set up the shadow-specific parts of a domain struct at start of day.
* Called for every domain from arch_domain_create() */
-void shadow_domain_init(struct domain *d, unsigned int domcr_flags)
+int shadow_domain_init(struct domain *d, unsigned int domcr_flags)
{
INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist);
INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
@@ -61,6 +61,8 @@ void shadow_domain_init(struct domain *d
d->arch.paging.shadow.oos_off = (domcr_flags & DOMCRF_oos_off) ? 1 : 0;
#endif
d->arch.paging.shadow.pagetable_dying_op = 0;
+
+ return 0;
}
/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
--- /dev/null
+++ b/xen/arch/x86/mm/shadow/none.c
@@ -0,0 +1,78 @@
+#include <xen/mm.h>
+#include <asm/shadow.h>
+
+static int _enable_log_dirty(struct domain *d, bool_t log_global)
+{
+ ASSERT(is_pv_domain(d));
+ return -EOPNOTSUPP;
+}
+
+static int _disable_log_dirty(struct domain *d)
+{
+ ASSERT(is_pv_domain(d));
+ return -EOPNOTSUPP;
+}
+
+static void _clean_dirty_bitmap(struct domain *d)
+{
+ ASSERT(is_pv_domain(d));
+}
+
+int shadow_domain_init(struct domain *d, unsigned int domcr_flags)
+{
+ paging_log_dirty_init(d, _enable_log_dirty,
+ _disable_log_dirty, _clean_dirty_bitmap);
+ return is_pv_domain(d) ? 0 : -EOPNOTSUPP;
+}
+
+static int _page_fault(struct vcpu *v, unsigned long va,
+ struct cpu_user_regs *regs)
+{
+ ASSERT_UNREACHABLE();
+ return 0;
+}
+
+static int _invlpg(struct vcpu *v, unsigned long va)
+{
+ ASSERT_UNREACHABLE();
+ return -EOPNOTSUPP;
+}
+
+static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
+ unsigned long va, uint32_t *pfec)
+{
+ ASSERT_UNREACHABLE();
+ return INVALID_GFN;
+}
+
+static void _update_cr3(struct vcpu *v, int do_locking)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static void _update_paging_modes(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static void _write_p2m_entry(struct domain *d, unsigned long gfn,
+ l1_pgentry_t *p, l1_pgentry_t new,
+ unsigned int level)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static const struct paging_mode sh_paging_none = {
+ .page_fault = _page_fault,
+ .invlpg = _invlpg,
+ .gva_to_gfn = _gva_to_gfn,
+ .update_cr3 = _update_cr3,
+ .update_paging_modes = _update_paging_modes,
+ .write_p2m_entry = _write_p2m_entry,
+};
+
+void shadow_vcpu_init(struct vcpu *v)
+{
+ ASSERT(is_pv_domain(v->domain));
+ v->arch.paging.mode = &sh_paging_none;
+}
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -87,6 +87,7 @@ void hypercall_page_initialise(struct do
/* shadow paging extension */
/************************************************/
struct shadow_domain {
+#ifdef CONFIG_SHADOW_PAGING
unsigned int opt_flags; /* runtime tunable optimizations on/off */
struct page_list_head pinned_shadows;
@@ -116,9 +117,11 @@ struct shadow_domain {
/* Has this domain ever used HVMOP_pagetable_dying? */
bool_t pagetable_dying_op;
+#endif
};
struct shadow_vcpu {
+#ifdef CONFIG_SHADOW_PAGING
/* PAE guests: per-vcpu shadow top-level table */
l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
/* PAE guests: per-vcpu cache of the top-level *guest* entries */
@@ -144,6 +147,7 @@ struct shadow_vcpu {
} oos_fixup[SHADOW_OOS_PAGES];
bool_t pagetable_dying;
+#endif
};
/************************************************/
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -39,7 +39,11 @@
#define PG_SH_shift 20
#define PG_HAP_shift 21
/* We're in one of the shadow modes */
+#ifdef CONFIG_SHADOW_PAGING
#define PG_SH_enable (1U << PG_SH_shift)
+#else
+#define PG_SH_enable 0
+#endif
#define PG_HAP_enable (1U << PG_HAP_shift)
/* common paging mode bits */
@@ -74,6 +78,7 @@
struct sh_emulate_ctxt;
struct shadow_paging_mode {
+#ifdef CONFIG_SHADOW_PAGING
void (*detach_old_tables )(struct vcpu *v);
int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
void *src, u32 bytes,
@@ -88,6 +93,7 @@ struct shadow_paging_mode {
int (*guess_wrmap )(struct vcpu *v,
unsigned long vaddr, mfn_t gmfn);
void (*pagetable_dying )(struct vcpu *v, paddr_t gpa);
+#endif
/* For outsiders to tell what mode we're in */
unsigned int shadow_levels;
};
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -49,12 +49,14 @@
/* Set up the shadow-specific parts of a domain struct at start of day.
* Called from paging_domain_init(). */
-void shadow_domain_init(struct domain *d, unsigned int domcr_flags);
+int shadow_domain_init(struct domain *d, unsigned int domcr_flags);
/* Setup the shadow-specific parts of a vcpu struct. It is called by
* paging_vcpu_init() in paging.c */
void shadow_vcpu_init(struct vcpu *v);
+#ifdef CONFIG_SHADOW_PAGING
+
/* Enable an arbitrary shadow mode. Call once at domain creation. */
int shadow_enable(struct domain *d, u32 mode);
@@ -77,17 +79,40 @@ void shadow_teardown(struct domain *d);
/* Call once all of the references to the domain have gone away */
void shadow_final_teardown(struct domain *d);
-/* Remove all shadows of the guest mfn. */
void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
+
+/* Discard _all_ mappings from the domain's shadows. */
+void shadow_blow_tables_per_domain(struct domain *d);
+
+#else /* !CONFIG_SHADOW_PAGING */
+
+#define shadow_teardown(d) ASSERT(is_pv_domain(d))
+#define shadow_final_teardown(d) ASSERT(is_pv_domain(d))
+#define shadow_enable(d, mode) \
+ ({ ASSERT(is_pv_domain(d)); -EOPNOTSUPP; })
+#define shadow_track_dirty_vram(d, begin_pfn, nr, bitmap) \
+ ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
+
+static inline void sh_remove_shadows(struct vcpu *v, mfn_t gmfn,
+ bool_t fast, bool_t all) {}
+
+static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+
+static inline int shadow_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_SHADOW_PAGING */
+
+/* Remove all shadows of the guest mfn. */
static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
{
/* See the comment about locking in sh_remove_shadows */
sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
}
-/* Discard _all_ mappings from the domain's shadows. */
-void shadow_blow_tables_per_domain(struct domain *d);
-
#endif /* _XEN_SHADOW_H */
/*
--- a/xen/include/xen/paging.h
+++ b/xen/include/xen/paging.h
@@ -7,7 +7,7 @@
#include <asm/paging.h>
#include <asm/p2m.h>
-#elif defined CONFIG_SHADOW
+#elif defined CONFIG_SHADOW_PAGING
#include <asm/shadow.h>
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 3/5] x86/mm: allow for building without shadow mode support
2015-02-02 11:20 ` [PATCH v2 3/5] x86/mm: allow for building without shadow mode support Jan Beulich
@ 2015-02-02 11:56 ` Andrew Cooper
0 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2015-02-02 11:56 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Tim Deegan, Keir Fraser
On 02/02/15 11:20, Jan Beulich wrote:
> Considering the complexity of the code, it seems to be a reasonable
> thing to allow people to disable that code entirely even outside the
> immediate need for this by the next patch.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> v2: Set mode table in shadow_vcpu_init() stub. Convert BUG()/BUG_ON()
> to ASSERT()/ASSERT_UNREACHABLE() and make various of the stub
> functions macros or inline. Hide opt_dom0_shadow when
> !CONFIG_SHADOW_PAGING. Adjust Makefile modification.
>
> --- a/xen/arch/x86/Rules.mk
> +++ b/xen/arch/x86/Rules.mk
> @@ -32,9 +32,13 @@ x86 := y
> x86_32 := n
> x86_64 := y
>
> +shadow-paging ?= y
> +
> CFLAGS += -mno-red-zone -mno-sse -fpic
> CFLAGS += -fno-asynchronous-unwind-tables
> # -fvisibility=hidden reduces -fpic cost, if it's available
> ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
> CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
> endif
> +
> +CFLAGS-$(shadow-paging) += -DCONFIG_SHADOW_PAGING
> --- a/xen/arch/x86/domain_build.c
> +++ b/xen/arch/x86/domain_build.c
> @@ -128,8 +128,10 @@ struct vcpu *__init alloc_dom0_vcpu0(str
> return alloc_vcpu(dom0, 0, 0);
> }
>
> +#ifdef CONFIG_SHADOW_PAGING
> static bool_t __initdata opt_dom0_shadow;
> boolean_param("dom0_shadow", opt_dom0_shadow);
> +#endif
>
> static char __initdata opt_dom0_ioports_disable[200] = "";
> string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
> @@ -1399,6 +1401,7 @@ int __init construct_dom0(
> regs->esi = vstartinfo_start;
> regs->eflags = X86_EFLAGS_IF;
>
> +#ifdef CONFIG_SHADOW_PAGING
> if ( opt_dom0_shadow )
> {
> if ( is_pvh_domain(d) )
> @@ -1409,6 +1412,7 @@ int __init construct_dom0(
> if ( paging_enable(d, PG_SH_enable) == 0 )
> paging_update_paging_modes(v);
> }
> +#endif
>
> /*
> * PVH Fixme: XENFEAT_supervisor_mode_kernel has been reused in PVH with a
> --- a/xen/arch/x86/mm/paging.c
> +++ b/xen/arch/x86/mm/paging.c
> @@ -635,16 +635,16 @@ int paging_domain_init(struct domain *d,
> * don't want to leak any active log-dirty bitmaps */
> d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
>
> - /* The order of the *_init calls below is important, as the later
> - * ones may rewrite some common fields. Shadow pagetables are the
> - * default... */
> - shadow_domain_init(d, domcr_flags);
> -
> - /* ... but we will use hardware assistance if it's available. */
> + /*
> + * Shadow pagetables are the default, but we will use
> + * hardware assistance if it's available and enabled.
> + */
> if ( hap_enabled(d) )
> hap_domain_init(d);
> + else
> + rc = shadow_domain_init(d, domcr_flags);
>
> - return 0;
> + return rc;
> }
>
> /* vcpu paging struct initialization goes here */
> @@ -822,12 +822,16 @@ int paging_enable(struct domain *d, u32
> * and therefore its pagetables will soon be discarded */
> void pagetable_dying(struct domain *d, paddr_t gpa)
> {
> +#ifdef CONFIG_SHADOW_PAGING
> struct vcpu *v;
>
> ASSERT(paging_mode_shadow(d));
>
> v = d->vcpu[0];
> v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
> +#else
> + BUG();
> +#endif
> }
>
> /* Print paging-assistance info to the console */
> --- a/xen/arch/x86/mm/shadow/Makefile
> +++ b/xen/arch/x86/mm/shadow/Makefile
> @@ -1,4 +1,8 @@
> -obj-$(x86_64) += common.o guest_2.o guest_3.o guest_4.o
> +ifeq ($(shadow-paging),y)
> +obj-y += common.o guest_2.o guest_3.o guest_4.o
> +else
> +obj-y += none.o
> +endif
>
> guest_%.o: multi.c Makefile
> $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -47,7 +47,7 @@ static void sh_clean_dirty_bitmap(struct
>
> /* Set up the shadow-specific parts of a domain struct at start of day.
> * Called for every domain from arch_domain_create() */
> -void shadow_domain_init(struct domain *d, unsigned int domcr_flags)
> +int shadow_domain_init(struct domain *d, unsigned int domcr_flags)
> {
> INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist);
> INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
> @@ -61,6 +61,8 @@ void shadow_domain_init(struct domain *d
> d->arch.paging.shadow.oos_off = (domcr_flags & DOMCRF_oos_off) ? 1 : 0;
> #endif
> d->arch.paging.shadow.pagetable_dying_op = 0;
> +
> + return 0;
> }
>
> /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
> --- /dev/null
> +++ b/xen/arch/x86/mm/shadow/none.c
> @@ -0,0 +1,78 @@
> +#include <xen/mm.h>
> +#include <asm/shadow.h>
> +
> +static int _enable_log_dirty(struct domain *d, bool_t log_global)
> +{
> + ASSERT(is_pv_domain(d));
> + return -EOPNOTSUPP;
> +}
> +
> +static int _disable_log_dirty(struct domain *d)
> +{
> + ASSERT(is_pv_domain(d));
> + return -EOPNOTSUPP;
> +}
> +
> +static void _clean_dirty_bitmap(struct domain *d)
> +{
> + ASSERT(is_pv_domain(d));
> +}
> +
> +int shadow_domain_init(struct domain *d, unsigned int domcr_flags)
> +{
> + paging_log_dirty_init(d, _enable_log_dirty,
> + _disable_log_dirty, _clean_dirty_bitmap);
> + return is_pv_domain(d) ? 0 : -EOPNOTSUPP;
> +}
> +
> +static int _page_fault(struct vcpu *v, unsigned long va,
> + struct cpu_user_regs *regs)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +
> +static int _invlpg(struct vcpu *v, unsigned long va)
> +{
> + ASSERT_UNREACHABLE();
> + return -EOPNOTSUPP;
> +}
> +
> +static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
> + unsigned long va, uint32_t *pfec)
> +{
> + ASSERT_UNREACHABLE();
> + return INVALID_GFN;
> +}
> +
> +static void _update_cr3(struct vcpu *v, int do_locking)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +
> +static void _update_paging_modes(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +
> +static void _write_p2m_entry(struct domain *d, unsigned long gfn,
> + l1_pgentry_t *p, l1_pgentry_t new,
> + unsigned int level)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +
> +static const struct paging_mode sh_paging_none = {
> + .page_fault = _page_fault,
> + .invlpg = _invlpg,
> + .gva_to_gfn = _gva_to_gfn,
> + .update_cr3 = _update_cr3,
> + .update_paging_modes = _update_paging_modes,
> + .write_p2m_entry = _write_p2m_entry,
> +};
> +
> +void shadow_vcpu_init(struct vcpu *v)
> +{
> + ASSERT(is_pv_domain(v->domain));
> + v->arch.paging.mode = &sh_paging_none;
> +}
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -87,6 +87,7 @@ void hypercall_page_initialise(struct do
> /* shadow paging extension */
> /************************************************/
> struct shadow_domain {
> +#ifdef CONFIG_SHADOW_PAGING
> unsigned int opt_flags; /* runtime tunable optimizations on/off */
> struct page_list_head pinned_shadows;
>
> @@ -116,9 +117,11 @@ struct shadow_domain {
>
> /* Has this domain ever used HVMOP_pagetable_dying? */
> bool_t pagetable_dying_op;
> +#endif
> };
>
> struct shadow_vcpu {
> +#ifdef CONFIG_SHADOW_PAGING
> /* PAE guests: per-vcpu shadow top-level table */
> l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
> /* PAE guests: per-vcpu cache of the top-level *guest* entries */
> @@ -144,6 +147,7 @@ struct shadow_vcpu {
> } oos_fixup[SHADOW_OOS_PAGES];
>
> bool_t pagetable_dying;
> +#endif
> };
>
> /************************************************/
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -39,7 +39,11 @@
> #define PG_SH_shift 20
> #define PG_HAP_shift 21
> /* We're in one of the shadow modes */
> +#ifdef CONFIG_SHADOW_PAGING
> #define PG_SH_enable (1U << PG_SH_shift)
> +#else
> +#define PG_SH_enable 0
> +#endif
> #define PG_HAP_enable (1U << PG_HAP_shift)
>
> /* common paging mode bits */
> @@ -74,6 +78,7 @@
>
> struct sh_emulate_ctxt;
> struct shadow_paging_mode {
> +#ifdef CONFIG_SHADOW_PAGING
> void (*detach_old_tables )(struct vcpu *v);
> int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
> void *src, u32 bytes,
> @@ -88,6 +93,7 @@ struct shadow_paging_mode {
> int (*guess_wrmap )(struct vcpu *v,
> unsigned long vaddr, mfn_t gmfn);
> void (*pagetable_dying )(struct vcpu *v, paddr_t gpa);
> +#endif
> /* For outsiders to tell what mode we're in */
> unsigned int shadow_levels;
> };
> --- a/xen/include/asm-x86/shadow.h
> +++ b/xen/include/asm-x86/shadow.h
> @@ -49,12 +49,14 @@
>
> /* Set up the shadow-specific parts of a domain struct at start of day.
> * Called from paging_domain_init(). */
> -void shadow_domain_init(struct domain *d, unsigned int domcr_flags);
> +int shadow_domain_init(struct domain *d, unsigned int domcr_flags);
>
> /* Setup the shadow-specific parts of a vcpu struct. It is called by
> * paging_vcpu_init() in paging.c */
> void shadow_vcpu_init(struct vcpu *v);
>
> +#ifdef CONFIG_SHADOW_PAGING
> +
> /* Enable an arbitrary shadow mode. Call once at domain creation. */
> int shadow_enable(struct domain *d, u32 mode);
>
> @@ -77,17 +79,40 @@ void shadow_teardown(struct domain *d);
> /* Call once all of the references to the domain have gone away */
> void shadow_final_teardown(struct domain *d);
>
> -/* Remove all shadows of the guest mfn. */
> void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
> +
> +/* Discard _all_ mappings from the domain's shadows. */
> +void shadow_blow_tables_per_domain(struct domain *d);
> +
> +#else /* !CONFIG_SHADOW_PAGING */
> +
> +#define shadow_teardown(d) ASSERT(is_pv_domain(d))
> +#define shadow_final_teardown(d) ASSERT(is_pv_domain(d))
> +#define shadow_enable(d, mode) \
> + ({ ASSERT(is_pv_domain(d)); -EOPNOTSUPP; })
> +#define shadow_track_dirty_vram(d, begin_pfn, nr, bitmap) \
> + ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
> +
> +static inline void sh_remove_shadows(struct vcpu *v, mfn_t gmfn,
> + bool_t fast, bool_t all) {}
> +
> +static inline void shadow_blow_tables_per_domain(struct domain *d) {}
> +
> +static inline int shadow_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
> + XEN_GUEST_HANDLE_PARAM(void) u_domctl)
> +{
> + return -EINVAL;
> +}
> +
> +#endif /* CONFIG_SHADOW_PAGING */
> +
> +/* Remove all shadows of the guest mfn. */
> static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
> {
> /* See the comment about locking in sh_remove_shadows */
> sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
> }
>
> -/* Discard _all_ mappings from the domain's shadows. */
> -void shadow_blow_tables_per_domain(struct domain *d);
> -
> #endif /* _XEN_SHADOW_H */
>
> /*
> --- a/xen/include/xen/paging.h
> +++ b/xen/include/xen/paging.h
> @@ -7,7 +7,7 @@
> #include <asm/paging.h>
> #include <asm/p2m.h>
>
> -#elif defined CONFIG_SHADOW
> +#elif defined CONFIG_SHADOW_PAGING
>
> #include <asm/shadow.h>
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 4/5] IOMMU/x86: correct page_list_first() use
2015-02-02 11:09 [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Jan Beulich
` (2 preceding siblings ...)
2015-02-02 11:20 ` [PATCH v2 3/5] x86/mm: allow for building without shadow mode support Jan Beulich
@ 2015-02-02 11:20 ` Jan Beulich
2015-02-02 11:52 ` Andrew Cooper
2015-02-02 11:21 ` [PATCH v2 5/5] x86: provide build time option to support up to 123Tb of memory Jan Beulich
2015-02-02 11:54 ` [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Tim Deegan
5 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 11:20 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 901 bytes --]
Comparing its result against NULL is unsafe when page lists use normal
list entries for linking together - page_list_empty() needs to be used
instead.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -85,8 +85,9 @@ int arch_iommu_populate_page_table(struc
* first few entries.
*/
page_list_move(&d->page_list, &d->arch.relmem_list);
- while ( (page = page_list_first(&d->page_list)) != NULL &&
- (page->count_info & (PGC_state|PGC_broken)) )
+ while ( !page_list_empty(&d->page_list) &&
+ (page = page_list_first(&d->page_list),
+ (page->count_info & (PGC_state|PGC_broken))) )
{
page_list_del(page, &d->page_list);
page_list_add_tail(page, &d->arch.relmem_list);
[-- Attachment #2: x86-IOMMU-page-list.patch --]
[-- Type: text/plain, Size: 939 bytes --]
IOMMU/x86: correct page_list_first() use
Comparing its result against NULL is unsafe when page lists use normal
list entries for linking together - page_list_empty() needs to be used
instead.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -85,8 +85,9 @@ int arch_iommu_populate_page_table(struc
* first few entries.
*/
page_list_move(&d->page_list, &d->arch.relmem_list);
- while ( (page = page_list_first(&d->page_list)) != NULL &&
- (page->count_info & (PGC_state|PGC_broken)) )
+ while ( !page_list_empty(&d->page_list) &&
+ (page = page_list_first(&d->page_list),
+ (page->count_info & (PGC_state|PGC_broken))) )
{
page_list_del(page, &d->page_list);
page_list_add_tail(page, &d->arch.relmem_list);
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 4/5] IOMMU/x86: correct page_list_first() use
2015-02-02 11:20 ` [PATCH v2 4/5] IOMMU/x86: correct page_list_first() use Jan Beulich
@ 2015-02-02 11:52 ` Andrew Cooper
0 siblings, 0 replies; 15+ messages in thread
From: Andrew Cooper @ 2015-02-02 11:52 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Tim Deegan, Keir Fraser
On 02/02/15 11:20, Jan Beulich wrote:
> Comparing its result against NULL is unsafe when page lists use normal
> list entries for linking together - page_list_empty() needs to be used
> instead.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -85,8 +85,9 @@ int arch_iommu_populate_page_table(struc
> * first few entries.
> */
> page_list_move(&d->page_list, &d->arch.relmem_list);
> - while ( (page = page_list_first(&d->page_list)) != NULL &&
> - (page->count_info & (PGC_state|PGC_broken)) )
> + while ( !page_list_empty(&d->page_list) &&
> + (page = page_list_first(&d->page_list),
> + (page->count_info & (PGC_state|PGC_broken))) )
> {
> page_list_del(page, &d->page_list);
> page_list_add_tail(page, &d->arch.relmem_list);
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 5/5] x86: provide build time option to support up to 123Tb of memory
2015-02-02 11:09 [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Jan Beulich
` (3 preceding siblings ...)
2015-02-02 11:20 ` [PATCH v2 4/5] IOMMU/x86: correct page_list_first() use Jan Beulich
@ 2015-02-02 11:21 ` Jan Beulich
2015-02-02 11:58 ` Andrew Cooper
2015-02-02 11:54 ` [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Tim Deegan
5 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 11:21 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 6120 bytes --]
As this requires growing struct page_info from 32 to 48 bytes as well
as shrinking the always accessible direct mapped memory range from 5Tb
to 3.5Tb, this isn't being introduced as a general or default enabled
feature.
For now setting "bigmem=y" implies "shadow-paging=n", as the shadow
paging code otherwise fails to build (see
http://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03165.html).
A side effect of the change to x86's mm.h is that asm/mm.h may no
longer be included directly. Hence in the few places where this was done,
xen/mm.h is being substituted (indirectly in the hvm/mtrr.h case).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: "bigmem=y" no longer implies "shadow-paging=n".
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -33,6 +33,7 @@ x86 := y
x86_64 := y
shadow-paging ?= y
+bigmem ?= n
CFLAGS += -mno-red-zone -mno-sse -fpic
CFLAGS += -fno-asynchronous-unwind-tables
@@ -42,3 +43,4 @@ CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
endif
CFLAGS-$(shadow-paging) += -DCONFIG_SHADOW_PAGING
+CFLAGS-$(bigmem) += -DCONFIG_BIGMEM
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -18,13 +18,11 @@
*/
#include <public/hvm/e820.h>
-#include <xen/types.h>
+#include <xen/domain_page.h>
#include <asm/e820.h>
#include <asm/iocap.h>
-#include <asm/mm.h>
#include <asm/paging.h>
#include <asm/p2m.h>
-#include <xen/domain_page.h>
#include <asm/mtrr.h>
#include <asm/hvm/support.h>
#include <asm/hvm/cacheattr.h>
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -386,8 +386,13 @@ static void __init setup_max_pdx(unsigne
if ( max_pdx > FRAMETABLE_NR )
max_pdx = FRAMETABLE_NR;
+ if ( max_pdx > MPT_VIRT_SIZE / sizeof(unsigned long) )
+ max_pdx = MPT_VIRT_SIZE / sizeof(unsigned long);
+
+#ifdef PAGE_LIST_NULL
if ( max_pdx >= PAGE_LIST_NULL )
max_pdx = PAGE_LIST_NULL - 1;
+#endif
max_page = pdx_to_pfn(max_pdx - 1) + 1;
}
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -161,6 +161,7 @@ extern unsigned char boot_edid_info[128]
* High read-only compatibility machine-to-phys translation table.
* 0xffff82d080000000 - 0xffff82d0bfffffff [1GB, 2^30 bytes, PML4:261]
* Xen text, static data, bss.
+#ifndef CONFIG_BIGMEM
* 0xffff82d0c0000000 - 0xffff82dffbffffff [61GB - 64MB, PML4:261]
* Reserved for future use.
* 0xffff82dffc000000 - 0xffff82dfffffffff [64MB, 2^26 bytes, PML4:261]
@@ -169,6 +170,16 @@ extern unsigned char boot_edid_info[128]
* Page-frame information array.
* 0xffff830000000000 - 0xffff87ffffffffff [5TB, 5*2^40 bytes, PML4:262-271]
* 1:1 direct mapping of all physical memory.
+#else
+ * 0xffff82d0c0000000 - 0xffff82ffdfffffff [188.5GB, PML4:261]
+ * Reserved for future use.
+ * 0xffff82ffe0000000 - 0xffff82ffffffffff [512MB, 2^29 bytes, PML4:261]
+ * Super-page information array.
+ * 0xffff830000000000 - 0xffff847fffffffff [1.5TB, 3*2^39 bytes, PML4:262-264]
+ * Page-frame information array.
+ * 0xffff848000000000 - 0xffff87ffffffffff [3.5TB, 7*2^39 bytes, PML4:265-271]
+ * 1:1 direct mapping of all physical memory.
+#endif
* 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
* PV: Guest-defined use.
* 0xffff880000000000 - 0xffffff7fffffffff [119.5TB, PML4:272-510]
@@ -237,21 +248,35 @@ extern unsigned char boot_edid_info[128]
/* Slot 261: xen text, static data and bss (1GB). */
#define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END)
#define XEN_VIRT_END (XEN_VIRT_START + GB(1))
-/* Slot 261: superpage information array (64MB). */
+
+/* Slot 261: superpage information array (64MB or 512MB). */
#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START
#define SPAGETABLE_NR (((FRAMETABLE_NR - 1) >> (SUPERPAGE_SHIFT - \
PAGE_SHIFT)) + 1)
#define SPAGETABLE_SIZE (SPAGETABLE_NR * sizeof(struct spage_info))
#define SPAGETABLE_VIRT_START ((SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) & \
(_AC(-1,UL) << SUPERPAGE_SHIFT))
+
+#ifndef CONFIG_BIGMEM
/* Slot 261: page-frame information array (128GB). */
-#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
#define FRAMETABLE_SIZE GB(128)
+#else
+/* Slot 262-264: page-frame information array (1.5TB). */
+#define FRAMETABLE_SIZE GB(1536)
+#endif
+#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
#define FRAMETABLE_NR (FRAMETABLE_SIZE / sizeof(*frame_table))
#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
+
+#ifndef CONFIG_BIGMEM
/* Slot 262-271/510: A direct 1:1 mapping of all of physical memory. */
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 262))
+#else
+/* Slot 265-271/510: A direct 1:1 mapping of all of physical memory. */
+#define DIRECTMAP_VIRT_START (PML4_ADDR(265))
+#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 265))
+#endif
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE)
#ifndef __ASSEMBLY__
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -17,6 +17,7 @@
*/
#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
+#ifndef CONFIG_BIGMEM
/*
* This definition is solely for the use in struct page_info (and
* struct page_list_head), intended to allow easy adjustment once x86-64
@@ -30,6 +31,9 @@ struct page_list_entry
{
__pdx_t next, prev;
};
+#else
+#define __pdx_t unsigned long
+#endif
struct page_sharing_info;
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -1,8 +1,7 @@
#ifndef __ASM_X86_MTRR_H__
#define __ASM_X86_MTRR_H__
-#include <xen/config.h>
-#include <asm/mm.h>
+#include <xen/mm.h>
/* These are the region types. They match the architectural specification. */
#define MTRR_TYPE_UNCACHABLE 0
[-- Attachment #2: x86-bigmem.patch --]
[-- Type: text/plain, Size: 6183 bytes --]
x86: provide build time option to support up to 123Tb of memory
As this requires growing struct page_info from 32 to 48 bytes as well
as shrinking the always accessible direct mapped memory range from 5Tb
to 3.5Tb, this isn't being introduced as a general or default enabled
feature.
For now setting "bigmem=y" implies "shadow-paging=n", as the shadow
paging code otherwise fails to build (see
http://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03165.html).
A side effect of the change to x86's mm.h is that asm/mm.h may no
longer be included directly. Hence in the few places where this was done,
xen/mm.h is being substituted (indirectly in the hvm/mtrr.h case).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: "bigmem=y" no longer implies "shadow-paging=n".
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -33,6 +33,7 @@ x86 := y
x86_64 := y
shadow-paging ?= y
+bigmem ?= n
CFLAGS += -mno-red-zone -mno-sse -fpic
CFLAGS += -fno-asynchronous-unwind-tables
@@ -42,3 +43,4 @@ CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
endif
CFLAGS-$(shadow-paging) += -DCONFIG_SHADOW_PAGING
+CFLAGS-$(bigmem) += -DCONFIG_BIGMEM
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -18,13 +18,11 @@
*/
#include <public/hvm/e820.h>
-#include <xen/types.h>
+#include <xen/domain_page.h>
#include <asm/e820.h>
#include <asm/iocap.h>
-#include <asm/mm.h>
#include <asm/paging.h>
#include <asm/p2m.h>
-#include <xen/domain_page.h>
#include <asm/mtrr.h>
#include <asm/hvm/support.h>
#include <asm/hvm/cacheattr.h>
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -386,8 +386,13 @@ static void __init setup_max_pdx(unsigne
if ( max_pdx > FRAMETABLE_NR )
max_pdx = FRAMETABLE_NR;
+ if ( max_pdx > MPT_VIRT_SIZE / sizeof(unsigned long) )
+ max_pdx = MPT_VIRT_SIZE / sizeof(unsigned long);
+
+#ifdef PAGE_LIST_NULL
if ( max_pdx >= PAGE_LIST_NULL )
max_pdx = PAGE_LIST_NULL - 1;
+#endif
max_page = pdx_to_pfn(max_pdx - 1) + 1;
}
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -161,6 +161,7 @@ extern unsigned char boot_edid_info[128]
* High read-only compatibility machine-to-phys translation table.
* 0xffff82d080000000 - 0xffff82d0bfffffff [1GB, 2^30 bytes, PML4:261]
* Xen text, static data, bss.
+#ifndef CONFIG_BIGMEM
* 0xffff82d0c0000000 - 0xffff82dffbffffff [61GB - 64MB, PML4:261]
* Reserved for future use.
* 0xffff82dffc000000 - 0xffff82dfffffffff [64MB, 2^26 bytes, PML4:261]
@@ -169,6 +170,16 @@ extern unsigned char boot_edid_info[128]
* Page-frame information array.
* 0xffff830000000000 - 0xffff87ffffffffff [5TB, 5*2^40 bytes, PML4:262-271]
* 1:1 direct mapping of all physical memory.
+#else
+ * 0xffff82d0c0000000 - 0xffff82ffdfffffff [188.5GB, PML4:261]
+ * Reserved for future use.
+ * 0xffff82ffe0000000 - 0xffff82ffffffffff [512MB, 2^29 bytes, PML4:261]
+ * Super-page information array.
+ * 0xffff830000000000 - 0xffff847fffffffff [1.5TB, 3*2^39 bytes, PML4:262-264]
+ * Page-frame information array.
+ * 0xffff848000000000 - 0xffff87ffffffffff [3.5TB, 7*2^39 bytes, PML4:265-271]
+ * 1:1 direct mapping of all physical memory.
+#endif
* 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
* PV: Guest-defined use.
* 0xffff880000000000 - 0xffffff7fffffffff [119.5TB, PML4:272-510]
@@ -237,21 +248,35 @@ extern unsigned char boot_edid_info[128]
/* Slot 261: xen text, static data and bss (1GB). */
#define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END)
#define XEN_VIRT_END (XEN_VIRT_START + GB(1))
-/* Slot 261: superpage information array (64MB). */
+
+/* Slot 261: superpage information array (64MB or 512MB). */
#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START
#define SPAGETABLE_NR (((FRAMETABLE_NR - 1) >> (SUPERPAGE_SHIFT - \
PAGE_SHIFT)) + 1)
#define SPAGETABLE_SIZE (SPAGETABLE_NR * sizeof(struct spage_info))
#define SPAGETABLE_VIRT_START ((SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) & \
(_AC(-1,UL) << SUPERPAGE_SHIFT))
+
+#ifndef CONFIG_BIGMEM
/* Slot 261: page-frame information array (128GB). */
-#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
#define FRAMETABLE_SIZE GB(128)
+#else
+/* Slot 262-264: page-frame information array (1.5TB). */
+#define FRAMETABLE_SIZE GB(1536)
+#endif
+#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
#define FRAMETABLE_NR (FRAMETABLE_SIZE / sizeof(*frame_table))
#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
+
+#ifndef CONFIG_BIGMEM
/* Slot 262-271/510: A direct 1:1 mapping of all of physical memory. */
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 262))
+#else
+/* Slot 265-271/510: A direct 1:1 mapping of all of physical memory. */
+#define DIRECTMAP_VIRT_START (PML4_ADDR(265))
+#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 265))
+#endif
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE)
#ifndef __ASSEMBLY__
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -17,6 +17,7 @@
*/
#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
+#ifndef CONFIG_BIGMEM
/*
* This definition is solely for the use in struct page_info (and
* struct page_list_head), intended to allow easy adjustment once x86-64
@@ -30,6 +31,9 @@ struct page_list_entry
{
__pdx_t next, prev;
};
+#else
+#define __pdx_t unsigned long
+#endif
struct page_sharing_info;
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -1,8 +1,7 @@
#ifndef __ASM_X86_MTRR_H__
#define __ASM_X86_MTRR_H__
-#include <xen/config.h>
-#include <asm/mm.h>
+#include <xen/mm.h>
/* These are the region types. They match the architectural specification. */
#define MTRR_TYPE_UNCACHABLE 0
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 5/5] x86: provide build time option to support up to 123Tb of memory
2015-02-02 11:21 ` [PATCH v2 5/5] x86: provide build time option to support up to 123Tb of memory Jan Beulich
@ 2015-02-02 11:58 ` Andrew Cooper
2015-02-02 13:03 ` Jan Beulich
0 siblings, 1 reply; 15+ messages in thread
From: Andrew Cooper @ 2015-02-02 11:58 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Tim Deegan, Keir Fraser
On 02/02/15 11:21, Jan Beulich wrote:
> As this requires growing struct page_info from 32 to 48 bytes as well
> as shrinking the always accessible direct mapped memory range from 5Tb
> to 3.5Tb, this isn't being introduced as a general or default enabled
> feature.
>
> For now setting "bigmem=y" implies "shadow-paging=n", as the shadow
> paging code otherwise fails to build (see
> http://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03165.html).
Stale commit message?
>
> A side effect of the change to x86's mm.h is that asm/mm.h may no
> longer be included directly. Hence in the few places where this was done,
> xen/mm.h is being substituted (indirectly in the hvm/mtrr.h case).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Content Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> v2: "bigmem=y" no longer implies "shadow-paging=n".
>
> --- a/xen/arch/x86/Rules.mk
> +++ b/xen/arch/x86/Rules.mk
> @@ -33,6 +33,7 @@ x86 := y
> x86_64 := y
>
> shadow-paging ?= y
> +bigmem ?= n
>
> CFLAGS += -mno-red-zone -mno-sse -fpic
> CFLAGS += -fno-asynchronous-unwind-tables
> @@ -42,3 +43,4 @@ CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
> endif
>
> CFLAGS-$(shadow-paging) += -DCONFIG_SHADOW_PAGING
> +CFLAGS-$(bigmem) += -DCONFIG_BIGMEM
> --- a/xen/arch/x86/hvm/mtrr.c
> +++ b/xen/arch/x86/hvm/mtrr.c
> @@ -18,13 +18,11 @@
> */
>
> #include <public/hvm/e820.h>
> -#include <xen/types.h>
> +#include <xen/domain_page.h>
> #include <asm/e820.h>
> #include <asm/iocap.h>
> -#include <asm/mm.h>
> #include <asm/paging.h>
> #include <asm/p2m.h>
> -#include <xen/domain_page.h>
> #include <asm/mtrr.h>
> #include <asm/hvm/support.h>
> #include <asm/hvm/cacheattr.h>
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -386,8 +386,13 @@ static void __init setup_max_pdx(unsigne
> if ( max_pdx > FRAMETABLE_NR )
> max_pdx = FRAMETABLE_NR;
>
> + if ( max_pdx > MPT_VIRT_SIZE / sizeof(unsigned long) )
> + max_pdx = MPT_VIRT_SIZE / sizeof(unsigned long);
> +
> +#ifdef PAGE_LIST_NULL
> if ( max_pdx >= PAGE_LIST_NULL )
> max_pdx = PAGE_LIST_NULL - 1;
> +#endif
>
> max_page = pdx_to_pfn(max_pdx - 1) + 1;
> }
> --- a/xen/include/asm-x86/config.h
> +++ b/xen/include/asm-x86/config.h
> @@ -161,6 +161,7 @@ extern unsigned char boot_edid_info[128]
> * High read-only compatibility machine-to-phys translation table.
> * 0xffff82d080000000 - 0xffff82d0bfffffff [1GB, 2^30 bytes, PML4:261]
> * Xen text, static data, bss.
> +#ifndef CONFIG_BIGMEM
> * 0xffff82d0c0000000 - 0xffff82dffbffffff [61GB - 64MB, PML4:261]
> * Reserved for future use.
> * 0xffff82dffc000000 - 0xffff82dfffffffff [64MB, 2^26 bytes, PML4:261]
> @@ -169,6 +170,16 @@ extern unsigned char boot_edid_info[128]
> * Page-frame information array.
> * 0xffff830000000000 - 0xffff87ffffffffff [5TB, 5*2^40 bytes, PML4:262-271]
> * 1:1 direct mapping of all physical memory.
> +#else
> + * 0xffff82d0c0000000 - 0xffff82ffdfffffff [188.5GB, PML4:261]
> + * Reserved for future use.
> + * 0xffff82ffe0000000 - 0xffff82ffffffffff [512MB, 2^29 bytes, PML4:261]
> + * Super-page information array.
> + * 0xffff830000000000 - 0xffff847fffffffff [1.5TB, 3*2^39 bytes, PML4:262-264]
> + * Page-frame information array.
> + * 0xffff848000000000 - 0xffff87ffffffffff [3.5TB, 7*2^39 bytes, PML4:265-271]
> + * 1:1 direct mapping of all physical memory.
> +#endif
> * 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
> * PV: Guest-defined use.
> * 0xffff880000000000 - 0xffffff7fffffffff [119.5TB, PML4:272-510]
> @@ -237,21 +248,35 @@ extern unsigned char boot_edid_info[128]
> /* Slot 261: xen text, static data and bss (1GB). */
> #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END)
> #define XEN_VIRT_END (XEN_VIRT_START + GB(1))
> -/* Slot 261: superpage information array (64MB). */
> +
> +/* Slot 261: superpage information array (64MB or 512MB). */
> #define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START
> #define SPAGETABLE_NR (((FRAMETABLE_NR - 1) >> (SUPERPAGE_SHIFT - \
> PAGE_SHIFT)) + 1)
> #define SPAGETABLE_SIZE (SPAGETABLE_NR * sizeof(struct spage_info))
> #define SPAGETABLE_VIRT_START ((SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) & \
> (_AC(-1,UL) << SUPERPAGE_SHIFT))
> +
> +#ifndef CONFIG_BIGMEM
> /* Slot 261: page-frame information array (128GB). */
> -#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
> #define FRAMETABLE_SIZE GB(128)
> +#else
> +/* Slot 262-264: page-frame information array (1.5TB). */
> +#define FRAMETABLE_SIZE GB(1536)
> +#endif
> +#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
> #define FRAMETABLE_NR (FRAMETABLE_SIZE / sizeof(*frame_table))
> #define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
> +
> +#ifndef CONFIG_BIGMEM
> /* Slot 262-271/510: A direct 1:1 mapping of all of physical memory. */
> #define DIRECTMAP_VIRT_START (PML4_ADDR(262))
> #define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 262))
> +#else
> +/* Slot 265-271/510: A direct 1:1 mapping of all of physical memory. */
> +#define DIRECTMAP_VIRT_START (PML4_ADDR(265))
> +#define DIRECTMAP_SIZE (PML4_ENTRY_BYTES * (511 - 265))
> +#endif
> #define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE)
>
> #ifndef __ASSEMBLY__
> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -17,6 +17,7 @@
> */
> #define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
>
> +#ifndef CONFIG_BIGMEM
> /*
> * This definition is solely for the use in struct page_info (and
> * struct page_list_head), intended to allow easy adjustment once x86-64
> @@ -30,6 +31,9 @@ struct page_list_entry
> {
> __pdx_t next, prev;
> };
> +#else
> +#define __pdx_t unsigned long
> +#endif
>
> struct page_sharing_info;
>
> --- a/xen/include/asm-x86/mtrr.h
> +++ b/xen/include/asm-x86/mtrr.h
> @@ -1,8 +1,7 @@
> #ifndef __ASM_X86_MTRR_H__
> #define __ASM_X86_MTRR_H__
>
> -#include <xen/config.h>
> -#include <asm/mm.h>
> +#include <xen/mm.h>
>
> /* These are the region types. They match the architectural specification. */
> #define MTRR_TYPE_UNCACHABLE 0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 5/5] x86: provide build time option to support up to 123Tb of memory
2015-02-02 11:58 ` Andrew Cooper
@ 2015-02-02 13:03 ` Jan Beulich
0 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2015-02-02 13:03 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Tim Deegan
>>> On 02.02.15 at 12:58, <andrew.cooper3@citrix.com> wrote:
> On 02/02/15 11:21, Jan Beulich wrote:
>> As this requires growing struct page_info from 32 to 48 bytes as well
>> as shrinking the always accessible direct mapped memory range from 5Tb
>> to 3.5Tb, this isn't being introduced as a general or default enabled
>> feature.
>>
>> For now setting "bigmem=y" implies "shadow-paging=n", as the shadow
>> paging code otherwise fails to build (see
>> http://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03165.html).
>
> Stale commit message?
Indeed - thanks for noticing. Dropped.
Jan
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used
2015-02-02 11:09 [PATCH v2 0/5] x86: shadow adjustments / allow for more memory to be used Jan Beulich
` (4 preceding siblings ...)
2015-02-02 11:21 ` [PATCH v2 5/5] x86: provide build time option to support up to 123Tb of memory Jan Beulich
@ 2015-02-02 11:54 ` Tim Deegan
5 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2015-02-02 11:54 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Andrew Cooper
At 11:09 +0000 on 02 Feb (1422871743), Jan Beulich wrote:
> 1: shadow: tidy up fragmentary page lists in multi-page shadows
> 2: shadow: don't needlessly expose internal functions
> 3: mm: allow for building without shadow mode support
> 4: IOMMU: correct page_list_first() use
> 5: provide build time option to support up to 123Tb of memory
>
> Note that thanks to Tim's patch 1 above the functional dependency
> of patch 5 on shadow mode being disabled is now gone. Still the
> latter only applies cleanly with the former in place.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
Reviewed-by: Tim Deegan <tim@xen.org>
^ permalink raw reply [flat|nested] 15+ messages in thread